spotify-genres/genres/get_everynoise.py

81 lines
2.5 KiB
Python

"""
Load a list of all spotify genres from everynoise.com.
This requires an envfile with valid Spotify API credentials (to fetch the real genre names)
"""
import asyncio
import json
import os
from typing import Dict
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import spotify
import aiohttp
from aiostream import stream, pipe
import model
import util
EVERYNOISE_URL = "https://everynoise.com/everynoise1d.html"
async def get_genres():
async with aiohttp.ClientSession() as session:
async with session.get(EVERYNOISE_URL) as response:
html_text = await response.text()
html = BeautifulSoup(html_text, features="lxml")
table = html.find("table")
if table is None:
print(html)
raise Exception("no table")
genre_data: Dict[str, model.GenreMetadata] = dict()
if os.path.isfile(util.GENRE_FILE):
with open(util.GENRE_FILE) as f:
genre_dict = json.load(f)
genre_data = model.load_genre_dict(genre_dict)
print(len(genre_data), "genres loaded from file")
async with spotify.Client(os.getenv("SPOTIFY_CLIENT_ID"),
os.getenv("SPOTIFY_CLIENT_SECRET")) as client:
async def fetch_genre(row):
rank = int(row.find("td").string)
pl_link = row.find("a", {"target": "spotify"})
pl_id = util.remove_prefix(
pl_link["href"],
"https://embed.spotify.com/?uri=spotify:playlist:")
genre_link = row.find(
"a", {"title": "Re-sort the list starting from here."})
genre_id = genre_link.string
# Genre was already fetched, just update popularity
if genre_id in genre_data:
genre_data[genre_id].rank = rank
return
# Fetch genre name from Spotify
pl_data = await client.http.get_playlist(pl_id, fields=["name"])
genre_name = util.remove_prefix(pl_data["name"], "The Sound of ")
genre_data[genre_id] = model.GenreMetadata(
genre_name, playlists={"sound": pl_id}, rank=rank)
print(f"<{genre_id}> {genre_name}")
rows = table.find_all("tr")
print(f"Found {len(rows)} genres")
sx = stream.iterate(rows) | pipe.map(fetch_genre, task_limit=5)
await sx
model.store_genres_json(util.GENRE_FILE, genre_data)
if __name__ == "__main__":
load_dotenv()
asyncio.get_event_loop().run_until_complete(get_genres())