81 lines
2.5 KiB
Python
81 lines
2.5 KiB
Python
"""
|
|
Load a list of all spotify genres from everynoise.com.
|
|
|
|
This requires an envfile with valid Spotify API credentials (to fetch the real genre names)
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
from typing import Dict
|
|
|
|
from dotenv import load_dotenv
|
|
from bs4 import BeautifulSoup
|
|
import spotify
|
|
import aiohttp
|
|
from aiostream import stream, pipe
|
|
|
|
import model
|
|
import util
|
|
|
|
EVERYNOISE_URL = "https://everynoise.com/everynoise1d.html"
|
|
|
|
|
|
async def get_genres():
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(EVERYNOISE_URL) as response:
|
|
html_text = await response.text()
|
|
|
|
html = BeautifulSoup(html_text, features="lxml")
|
|
table = html.find("table")
|
|
|
|
if table is None:
|
|
print(html)
|
|
raise Exception("no table")
|
|
|
|
genre_data: Dict[str, model.GenreMetadata] = dict()
|
|
|
|
if os.path.isfile(util.GENRE_FILE):
|
|
with open(util.GENRE_FILE) as f:
|
|
genre_dict = json.load(f)
|
|
genre_data = model.load_genre_dict(genre_dict)
|
|
print(len(genre_data), "genres loaded from file")
|
|
|
|
async with spotify.Client(os.getenv("SPOTIFY_CLIENT_ID"),
|
|
os.getenv("SPOTIFY_CLIENT_SECRET")) as client:
|
|
|
|
async def fetch_genre(row):
|
|
rank = int(row.find("td").string)
|
|
|
|
pl_link = row.find("a", {"target": "spotify"})
|
|
pl_id = util.remove_prefix(
|
|
pl_link["href"],
|
|
"https://embed.spotify.com/?uri=spotify:playlist:")
|
|
genre_link = row.find(
|
|
"a", {"title": "Re-sort the list starting from here."})
|
|
genre_id = genre_link.string
|
|
|
|
# Genre was already fetched, just update popularity
|
|
if genre_id in genre_data:
|
|
genre_data[genre_id].rank = rank
|
|
return
|
|
|
|
# Fetch genre name from Spotify
|
|
pl_data = await client.http.get_playlist(pl_id, fields=["name"])
|
|
genre_name = util.remove_prefix(pl_data["name"], "The Sound of ")
|
|
|
|
genre_data[genre_id] = model.GenreMetadata(
|
|
genre_name, playlists={"sound": pl_id}, rank=rank)
|
|
print(f"<{genre_id}> {genre_name}")
|
|
|
|
rows = table.find_all("tr")
|
|
print(f"Found {len(rows)} genres")
|
|
sx = stream.iterate(rows) | pipe.map(fetch_genre, task_limit=5)
|
|
await sx
|
|
|
|
model.store_genres_json(util.GENRE_FILE, genre_data)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
load_dotenv()
|
|
asyncio.get_event_loop().run_until_complete(get_genres())
|