diff --git a/.gitignore b/.gitignore index d69c82d..2bf76b0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,8 @@ /players.redb* +/*.json* /players*.parquet -/b50*.json* /region*.parquet /.python-version @@ -13,4 +13,6 @@ /.venv -*.pyc \ No newline at end of file +*.pyc + +/*html diff --git a/pyproject.toml b/pyproject.toml index 6560f86..8d9f3ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,4 +7,5 @@ requires-python = ">=3.12" dependencies = [ "orjson>=3.11.1", "polars>=1.32.0", + "pyecharts>=2.0.8", ] diff --git a/utils/export_b50.py b/utils/export_b50.py index 2592dd8..4ae4f00 100644 --- a/utils/export_b50.py +++ b/utils/export_b50.py @@ -1,83 +1,9 @@ import orjson as json from typing import Callable from datetime import datetime -from decimal import Decimal, getcontext +from decimal import Decimal -from hasher import salted_hash_userid - - -with open("musicDB.json", "r", encoding="utf-8") as f: - music_db = json.loads(f.read()) - -music_db = {entry["id"]: entry for entry in music_db} - -# Set Decimal precision -getcontext().prec = 28 - -# Constants -SSS_PLUS_THRESHOLD = Decimal("100.5") -SSS_PLUS_FACTOR = Decimal("0.224") -SSS_PRO_THRESHOLD = Decimal("100.4999") -SSS_PRO_FACTOR = Decimal("0.222") -SSS_THRESHOLD = Decimal("100.0") -SSS_FACTOR = Decimal("0.216") -SS_PLUS_PRO_THRESHOLD = Decimal("99.9999") -SS_PLUS_PRO_FACTOR = Decimal("0.214") -SS_PLUS_THRESHOLD = Decimal("99.5") -SS_PLUS_FACTOR = Decimal("0.211") -SS_THRESHOLD = Decimal("99.0") -SS_FACTOR = Decimal("0.208") -S_PLUS_PRO_THRESHOLD = Decimal("98.9999") -S_PLUS_PRO_FACTOR = Decimal("0.206") -S_PLUS_THRESHOLD = Decimal("98.0") -S_PLUS_FACTOR = Decimal("0.203") -S_THRESHOLD = Decimal("97.0") -S_FACTOR = Decimal("0.2") -AAA_PRO_THRESHOLD = Decimal("96.9999") -AAA_PRO_FACTOR = Decimal("0.176") -AAA_THRESHOLD = Decimal("94.0") -AAA_FACTOR = Decimal("0.168") -AA_THRESHOLD = Decimal("90.0") -AA_FACTOR = Decimal("0.152") -A_THRESHOLD = Decimal("80.0") -A_FACTOR = Decimal("0.136") - - -def dx_rating(difficulty: Decimal, achievement: int) -> int: - ach = Decimal(achievement) / Decimal("10000") - if ach > Decimal("101.0") or ach < A_THRESHOLD: - return 0 - if ach >= SSS_PLUS_THRESHOLD: - factor = SSS_PLUS_FACTOR - ach = Decimal("100.5") - elif ach >= SSS_PRO_THRESHOLD: - factor = SSS_PRO_FACTOR - elif ach >= SSS_THRESHOLD: - factor = SSS_FACTOR - elif ach >= SS_PLUS_PRO_THRESHOLD: - factor = SS_PLUS_PRO_FACTOR - elif ach >= SS_PLUS_THRESHOLD: - factor = SS_PLUS_FACTOR - elif ach >= SS_THRESHOLD: - factor = SS_FACTOR - elif ach >= S_PLUS_PRO_THRESHOLD: - factor = S_PLUS_PRO_FACTOR - elif ach >= S_PLUS_THRESHOLD: - factor = S_PLUS_FACTOR - elif ach >= S_THRESHOLD: - factor = S_FACTOR - elif ach >= AAA_PRO_THRESHOLD: - factor = AAA_PRO_FACTOR - elif ach >= AAA_THRESHOLD: - factor = AAA_FACTOR - elif ach >= AA_THRESHOLD: - factor = AA_FACTOR - elif ach >= A_THRESHOLD: - factor = A_FACTOR - else: - return 0 - result = (factor * difficulty * ach).quantize(Decimal("1."), rounding="ROUND_FLOOR") - return int(result) +from helpers import dx_rating, find_level, query_music_db, salted_hash_userid def clean_b50(b50: dict[str, str | dict]): @@ -102,25 +28,25 @@ def clean_b50(b50: dict[str, str | dict]): entry["difficulty"] = None entry["dxRating"] = 0 - music_info = music_db.get(entry["musicId"]) + music_info = query_music_db(entry["musicId"]) + if music_info is None: return entry["musicTitle"] = music_info["name"] + levels = find_level(music_info, entry["level"]) - levels = [ - level for level in music_info["levels"] if level["level"] == entry["level"] - ] + if not levels: + return - if levels: - level: dict[str, str | int] = levels.pop() - difficulty = level["difficulty"] + level: dict[str, str | int] = levels.pop() + difficulty = level["difficulty"] - entry["difficulty"] = difficulty - entry["dxRating"] = dx_rating( - difficulty=Decimal(difficulty), - achievement=entry["achievement"], - ) + entry["difficulty"] = difficulty + entry["dxRating"] = dx_rating( + difficulty=Decimal(difficulty), + achievement=entry["achievement"], + ) for b35 in urating["ratingList"]: add_rating(b35) diff --git a/utils/export_regions.py b/utils/export_regions.py index 13cf8ab..4f5e3f7 100644 --- a/utils/export_regions.py +++ b/utils/export_regions.py @@ -1,6 +1,6 @@ import polars as pl -from hasher import salted_hash_userid +from helpers import salted_hash_userid pl.scan_parquet("regions.parquet").with_columns( pl.col("user_id").map_elements(salted_hash_userid, return_dtype=pl.String) diff --git a/utils/flat_user_detail.py b/utils/flat_user_detail.py new file mode 100644 index 0000000..2c1fa80 --- /dev/null +++ b/utils/flat_user_detail.py @@ -0,0 +1,89 @@ +from sys import stderr, stdin +from decimal import Decimal +from functools import reduce +from typing import Literal + +import polars as pl +import orjson as json +import pyecharts.options as opts +from pyecharts.charts import Scatter + + +from helpers import query_music_db, find_level, dx_rating + + +def calculate_dxrating(music: dict): + music_id = music["musicId"] + level_id = music["level"] + ach = music["achievement"] + music_info = query_music_db(music_id) + level = find_level(music_info, level_id) + + try: + return music | {"dxRating": dx_rating(Decimal(level.pop()["difficulty"]), ach)} + except IndexError as _: + print(f"unknown level: {music_id} - {level_id}", file=stderr) + return music | {"dxRating": 0} + + +data = json.loads(stdin.buffer.read()) +user_id = data["userId"] +music_list: list[dict[str, dict]] = data["userMusicList"] +musics = reduce( + lambda a, b: a + b, (music["userMusicDetailList"] for music in music_list) +) + +musics = list(map(calculate_dxrating, musics)) + +df = ( + pl.LazyFrame(musics) + .filter(pl.col("dxRating") > 0) # filter out invalid play + .select(["playCount", "dxRating"]) + .sort("dxRating", descending=False) + .with_columns(pl.col("playCount").cum_sum()) + .collect() +) + +x_data = df["playCount"].to_list() +y_data = df["dxRating"].to_list() + + +def init_chart( + x_type: Literal["value", "log"], x_min: int = 1, x_max: int = 5000 +) -> Scatter: + return ( + Scatter( + init_opts=opts.InitOpts( + width="1600px", # 设置图表宽度 + height="1000px", # 设置图表高度 + ) + ) + .set_series_opts() + .set_global_opts( + xaxis_opts=opts.AxisOpts( + type_=x_type, + min_=x_min, + max_=x_max, + ), + yaxis_opts=opts.AxisOpts( + type_="value", + axistick_opts=opts.AxisTickOpts(is_show=True), + splitline_opts=opts.SplitLineOpts(is_show=True), + max_=330, + ), + tooltip_opts=opts.TooltipOpts(is_show=False), + visualmap_opts=opts.VisualMapOpts(max_=16400), + ) + .add_xaxis(xaxis_data=x_data) + .add_yaxis( + series_name="", + y_axis=y_data, + symbol_size=5, + label_opts=opts.LabelOpts(is_show=False), + ) + ) + + +x_max = (y_data[-1] // 50 * 50) + 50 +init_chart("value", 1, x_max).render(f"{user_id}-pc-rating-linear.html") +init_chart("log", 1, x_max).render(f"{user_id}-pc-rating-log.html") diff --git a/utils/hasher.py b/utils/hasher.py deleted file mode 100644 index fb5b7a3..0000000 --- a/utils/hasher.py +++ /dev/null @@ -1,8 +0,0 @@ -import hashlib - - -def salted_hash_userid(user_id: int): - SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7" - - hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT) - return hash_uid.hexdigest()[:16] diff --git a/utils/helpers.py b/utils/helpers.py new file mode 100644 index 0000000..1e55168 --- /dev/null +++ b/utils/helpers.py @@ -0,0 +1,95 @@ +from decimal import Decimal, getcontext +import hashlib + +import orjson as json + +getcontext().prec = 28 + + +def salted_hash_userid(user_id: int): + SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7" + + hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT) + return hash_uid.hexdigest()[:16] + + +def dx_rating(difficulty: Decimal, achievement: int) -> int: + # Constants + SSS_PLUS_THRESHOLD = Decimal("100.5") + SSS_PLUS_FACTOR = Decimal("0.224") + SSS_PRO_THRESHOLD = Decimal("100.4999") + SSS_PRO_FACTOR = Decimal("0.222") + SSS_THRESHOLD = Decimal("100.0") + SSS_FACTOR = Decimal("0.216") + SS_PLUS_PRO_THRESHOLD = Decimal("99.9999") + SS_PLUS_PRO_FACTOR = Decimal("0.214") + SS_PLUS_THRESHOLD = Decimal("99.5") + SS_PLUS_FACTOR = Decimal("0.211") + SS_THRESHOLD = Decimal("99.0") + SS_FACTOR = Decimal("0.208") + S_PLUS_PRO_THRESHOLD = Decimal("98.9999") + S_PLUS_PRO_FACTOR = Decimal("0.206") + S_PLUS_THRESHOLD = Decimal("98.0") + S_PLUS_FACTOR = Decimal("0.203") + S_THRESHOLD = Decimal("97.0") + S_FACTOR = Decimal("0.2") + AAA_PRO_THRESHOLD = Decimal("96.9999") + AAA_PRO_FACTOR = Decimal("0.176") + AAA_THRESHOLD = Decimal("94.0") + AAA_FACTOR = Decimal("0.168") + AA_THRESHOLD = Decimal("90.0") + AA_FACTOR = Decimal("0.152") + A_THRESHOLD = Decimal("80.0") + A_FACTOR = Decimal("0.136") + + ach = Decimal(achievement) / Decimal("10000") + if ach > Decimal("101.0") or ach < A_THRESHOLD: + return 0 + if ach >= SSS_PLUS_THRESHOLD: + factor = SSS_PLUS_FACTOR + ach = Decimal("100.5") + elif ach >= SSS_PRO_THRESHOLD: + factor = SSS_PRO_FACTOR + elif ach >= SSS_THRESHOLD: + factor = SSS_FACTOR + elif ach >= SS_PLUS_PRO_THRESHOLD: + factor = SS_PLUS_PRO_FACTOR + elif ach >= SS_PLUS_THRESHOLD: + factor = SS_PLUS_FACTOR + elif ach >= SS_THRESHOLD: + factor = SS_FACTOR + elif ach >= S_PLUS_PRO_THRESHOLD: + factor = S_PLUS_PRO_FACTOR + elif ach >= S_PLUS_THRESHOLD: + factor = S_PLUS_FACTOR + elif ach >= S_THRESHOLD: + factor = S_FACTOR + elif ach >= AAA_PRO_THRESHOLD: + factor = AAA_PRO_FACTOR + elif ach >= AAA_THRESHOLD: + factor = AAA_FACTOR + elif ach >= AA_THRESHOLD: + factor = AA_FACTOR + elif ach >= A_THRESHOLD: + factor = A_FACTOR + else: + return 0 + result = (factor * difficulty * ach).quantize(Decimal("1."), rounding="ROUND_FLOOR") + return int(result) + + +with open("musicDB.json", "r", encoding="utf-8") as f: + MUSIC_DB = json.loads(f.read()) + +MUSIC_DB = {entry["id"]: entry for entry in MUSIC_DB} + + +def query_music_db(music_id: int): + music_info = MUSIC_DB.get(music_id) + if music_info is None: + return + return music_info + + +def find_level(music_info: dict, level_id: int): + return [level for level in music_info["levels"] if level["level"] == level_id]