import requests import json from dataclasses import dataclass, asdict from bs4 import BeautifulSoup from typing import List import yaml @dataclass class AllGenres: """Map of genres""" genres: dict """Next.js build ID""" build_id: str @dataclass class ArtistTag: spotify_id: str name: str @dataclass class Genre: id: str name: str description: str artists: List[ArtistTag] def get_genres(): r = requests.get("https://musicalyst.com/genres") html = BeautifulSoup(r.text, features="lxml") elm = html.find("script", {"id": "__NEXT_DATA__"}) data = json.loads(elm.string) genres = data["props"]["pageProps"]["genres"] build_id = data["buildId"] return AllGenres(genres, build_id) def get_genre(build_id, genre_id): r = requests.get(f"https://musicalyst.com/_next/data/{build_id}/en/genre/{genre_id}.json") data = r.json() page = data["pageProps"] return Genre( page["genresAdvancedInfo"]["id"], page["genresAdvancedInfo"]["name"], page["genresAdvancedInfo"]["description"], list(map(lambda a: ArtistTag(a["id"], a["name"]), page["topArtists"][:10])) ) def fetch_genres(): genres = get_genres() def map_genre(g): print(g["id"]) return asdict(get_genre(genres.build_id, g["id"])) genres_with_data = list(map(map_genre, genres.genres)) with open("genres.json", "w") as f: json.dump(genres_with_data, f, indent=2, ensure_ascii=False) def convert_genres(): from collections import OrderedDict with open("genres.json", "r") as f: data = json.load(f) mapped_genres = {g["id"]: OrderedDict({"name": g["name"], "description": g["description"]}) for g in data} yaml.add_representer(OrderedDict, lambda dumper, data: dumper.represent_mapping('tag:yaml.org,2002:map', data.items())) with open("genres.yaml", "w") as f: yaml.dump(mapped_genres, f) if __name__ == "__main__": fetch_genres() # convert_genres()