76 lines
2 KiB
Python
76 lines
2 KiB
Python
import requests
|
|
import json
|
|
from dataclasses import dataclass, asdict
|
|
from bs4 import BeautifulSoup
|
|
from typing import List
|
|
import yaml
|
|
|
|
@dataclass
|
|
class AllGenres:
|
|
"""Map of genres"""
|
|
genres: dict
|
|
"""Next.js build ID"""
|
|
build_id: str
|
|
|
|
@dataclass
|
|
class ArtistTag:
|
|
spotify_id: str
|
|
name: str
|
|
|
|
@dataclass
|
|
class Genre:
|
|
id: str
|
|
name: str
|
|
description: str
|
|
artists: List[ArtistTag]
|
|
|
|
|
|
def get_genres():
|
|
r = requests.get("https://musicalyst.com/genres")
|
|
html = BeautifulSoup(r.text, features="lxml")
|
|
elm = html.find("script", {"id": "__NEXT_DATA__"})
|
|
data = json.loads(elm.string)
|
|
genres = data["props"]["pageProps"]["genres"]
|
|
build_id = data["buildId"]
|
|
return AllGenres(genres, build_id)
|
|
|
|
|
|
def get_genre(build_id, genre_id):
|
|
r = requests.get(f"https://musicalyst.com/_next/data/{build_id}/en/genre/{genre_id}.json")
|
|
data = r.json()
|
|
page = data["pageProps"]
|
|
return Genre(
|
|
page["genresAdvancedInfo"]["id"], page["genresAdvancedInfo"]["name"], page["genresAdvancedInfo"]["description"],
|
|
list(map(lambda a: ArtistTag(a["id"], a["name"]), page["topArtists"][:10]))
|
|
)
|
|
|
|
|
|
def fetch_genres():
|
|
genres = get_genres()
|
|
|
|
def map_genre(g):
|
|
print(g["id"])
|
|
return asdict(get_genre(genres.build_id, g["id"]))
|
|
|
|
genres_with_data = list(map(map_genre, genres.genres))
|
|
|
|
with open("genres.json", "w") as f:
|
|
json.dump(genres_with_data, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
def convert_genres():
|
|
from collections import OrderedDict
|
|
|
|
with open("genres.json", "r") as f:
|
|
data = json.load(f)
|
|
|
|
mapped_genres = {g["id"]: OrderedDict({"name": g["name"], "description": g["description"]}) for g in data}
|
|
|
|
yaml.add_representer(OrderedDict, lambda dumper, data: dumper.represent_mapping('tag:yaml.org,2002:map', data.items()))
|
|
with open("genres.yaml", "w") as f:
|
|
yaml.dump(mapped_genres, f)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fetch_genres()
|
|
# convert_genres()
|