spotify-genres/genres/get_genres.py
2023-10-16 23:46:55 +02:00

76 lines
2 KiB
Python

import requests
import json
from dataclasses import dataclass, asdict
from bs4 import BeautifulSoup
from typing import List
import yaml
@dataclass
class AllGenres:
"""Map of genres"""
genres: dict
"""Next.js build ID"""
build_id: str
@dataclass
class ArtistTag:
spotify_id: str
name: str
@dataclass
class Genre:
id: str
name: str
description: str
artists: List[ArtistTag]
def get_genres():
r = requests.get("https://musicalyst.com/genres")
html = BeautifulSoup(r.text, features="lxml")
elm = html.find("script", {"id": "__NEXT_DATA__"})
data = json.loads(elm.string)
genres = data["props"]["pageProps"]["genres"]
build_id = data["buildId"]
return AllGenres(genres, build_id)
def get_genre(build_id, genre_id):
r = requests.get(f"https://musicalyst.com/_next/data/{build_id}/en/genre/{genre_id}.json")
data = r.json()
page = data["pageProps"]
return Genre(
page["genresAdvancedInfo"]["id"], page["genresAdvancedInfo"]["name"], page["genresAdvancedInfo"]["description"],
list(map(lambda a: ArtistTag(a["id"], a["name"]), page["topArtists"][:10]))
)
def fetch_genres():
genres = get_genres()
def map_genre(g):
print(g["id"])
return asdict(get_genre(genres.build_id, g["id"]))
genres_with_data = list(map(map_genre, genres.genres))
with open("genres.json", "w") as f:
json.dump(genres_with_data, f, indent=2, ensure_ascii=False)
def convert_genres():
from collections import OrderedDict
with open("genres.json", "r") as f:
data = json.load(f)
mapped_genres = {g["id"]: OrderedDict({"name": g["name"], "description": g["description"]}) for g in data}
yaml.add_representer(OrderedDict, lambda dumper, data: dumper.represent_mapping('tag:yaml.org,2002:map', data.items()))
with open("genres.yaml", "w") as f:
yaml.dump(mapped_genres, f)
if __name__ == "__main__":
fetch_genres()
# convert_genres()