""" Load a list of all spotify genres from everynoise.com. This requires an envfile with valid Spotify API credentials (to fetch the real genre names) """ import asyncio import json import os from typing import Dict from dotenv import load_dotenv from bs4 import BeautifulSoup import spotify import aiohttp from aiostream import stream, pipe import model import util EVERYNOISE_URL = "https://everynoise.com/everynoise1d.html" async def get_genres(): async with aiohttp.ClientSession() as session: async with session.get(EVERYNOISE_URL) as response: html_text = await response.text() html = BeautifulSoup(html_text, features="lxml") table = html.find("table") if table is None: print(html) raise Exception("no table") genre_data: Dict[str, model.GenreMetadata] = dict() if os.path.isfile(util.GENRE_FILE): with open(util.GENRE_FILE) as f: genre_dict = json.load(f) genre_data = model.load_genre_dict(genre_dict) print(len(genre_data), "genres loaded from file") async with spotify.Client(os.getenv("SPOTIFY_CLIENT_ID"), os.getenv("SPOTIFY_CLIENT_SECRET")) as client: async def fetch_genre(row): rank = int(row.find("td").string) pl_link = row.find("a", {"target": "spotify"}) pl_id = util.remove_prefix( pl_link["href"], "https://embed.spotify.com/?uri=spotify:playlist:") genre_link = row.find( "a", {"title": "Re-sort the list starting from here."}) genre_id = genre_link.string # Genre was already fetched, just update popularity if genre_id in genre_data: genre_data[genre_id].rank = rank return # Fetch genre name from Spotify pl_data = await client.http.get_playlist(pl_id, fields=["name"]) genre_name = util.remove_prefix(pl_data["name"], "The Sound of ") genre_data[genre_id] = model.GenreMetadata( genre_name, playlists={"sound": pl_id}, rank=rank) print(f"<{genre_id}> {genre_name}") rows = table.find_all("tr") print(f"Found {len(rows)} genres") sx = stream.iterate(rows) | pipe.map(fetch_genre, task_limit=5) await sx model.store_genres_json(util.GENRE_FILE, genre_data) if __name__ == "__main__": load_dotenv() asyncio.get_event_loop().run_until_complete(get_genres())