diff --git a/pyproject.toml b/pyproject.toml index 1427555..62ff5a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,11 +4,4 @@ version = "0.1.0" description = "Add your description here" readme = "README.md" requires-python = ">=3.12" -dependencies = [ - "diskcache>=5.6.3", - "loguru>=0.7.3", - "orjson>=3.11.1", - "polars>=1.32.0", - "polars-hash>=0.5.4", - "pyecharts>=2.0.8", -] +dependencies = ["orjson>=3.11.1", "polars>=1.32.0", "polars-hash>=0.5.4"] diff --git a/utils/export_b50.py b/utils/export_b50.py deleted file mode 100644 index 4ae4f00..0000000 --- a/utils/export_b50.py +++ /dev/null @@ -1,106 +0,0 @@ -import orjson as json -from typing import Callable -from datetime import datetime -from decimal import Decimal - -from helpers import dx_rating, find_level, query_music_db, salted_hash_userid - - -def clean_b50(b50: dict[str, str | dict]): - urating: dict[str, list[dict[str, int]]] = b50["userRating"] - - def add_rating(entry: dict[str, int]): - """ - ``` - { - "musicId": 11638, - "level": 2, - "romVersion": 24005, - "achievement": 988145 - } - ``` - - level: EXPERT - - ver: DX, 1.40.05 - - ach: 98.8145% - """ - - entry["musicTitle"] = None - entry["difficulty"] = None - entry["dxRating"] = 0 - - music_info = query_music_db(entry["musicId"]) - - if music_info is None: - return - - entry["musicTitle"] = music_info["name"] - levels = find_level(music_info, entry["level"]) - - if not levels: - return - - level: dict[str, str | int] = levels.pop() - difficulty = level["difficulty"] - - entry["difficulty"] = difficulty - entry["dxRating"] = dx_rating( - difficulty=Decimal(difficulty), - achievement=entry["achievement"], - ) - - for b35 in urating["ratingList"]: - add_rating(b35) - for b15 in urating["newRatingList"]: - add_rating(b15) - - urating["rating"] = sum( - map( - lambda lst: sum(map(lambda entry: entry["dxRating"], urating[lst])), - ["ratingList", "newRatingList"], - ) - ) - - -def record_time(*, _: list[datetime] = []): - last_time = _ - if not last_time: - last_time.append(datetime.now()) - else: - new = datetime.now() - diff = (new - last_time.pop()).total_seconds() - last_time.append(new) - return diff - - -def process( - clean_fields: Callable[[dict], None], - input_file: str, - output_file: str, -): - record_time() - with open(input_file, "rb") as f: - data = json.loads(f.read()) - print(f"loaded, cost {record_time():.2f}s") - - for entry in data: - entry["userId"] = salted_hash_userid(entry["userId"]) - clean_fields(entry) - print(f"processed, cost {record_time():.2f}s") - - with open(output_file, "wb") as f: - f.write(json.dumps(data)) - print(f"written out, cost {record_time():.2f}s") - - return data - - -def main(): - process( - clean_b50, - "b50.json", - "b50_pub.json", - ) - - -if __name__ == "__main__": - main() diff --git a/utils/helpers.py b/utils/helpers.py deleted file mode 100644 index 80cedce..0000000 --- a/utils/helpers.py +++ /dev/null @@ -1,105 +0,0 @@ -from decimal import Decimal, getcontext -import hashlib - -import orjson as json -from diskcache import Cache - -getcontext().prec = 28 - -CACHE = Cache("target") - - -def salted_hash_userid(user_id: int): - hex = CACHE.get(user_id) - if hex is not None: - return hex - - SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7" - - hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT) - result = hash_uid.hexdigest()[:16] - - CACHE.add(user_id, result) - return result - - -def dx_rating(difficulty: Decimal, achievement: int) -> int: - # Constants - SSS_PLUS_THRESHOLD = Decimal("100.5") - SSS_PLUS_FACTOR = Decimal("0.224") - SSS_PRO_THRESHOLD = Decimal("100.4999") - SSS_PRO_FACTOR = Decimal("0.222") - SSS_THRESHOLD = Decimal("100.0") - SSS_FACTOR = Decimal("0.216") - SS_PLUS_PRO_THRESHOLD = Decimal("99.9999") - SS_PLUS_PRO_FACTOR = Decimal("0.214") - SS_PLUS_THRESHOLD = Decimal("99.5") - SS_PLUS_FACTOR = Decimal("0.211") - SS_THRESHOLD = Decimal("99.0") - SS_FACTOR = Decimal("0.208") - S_PLUS_PRO_THRESHOLD = Decimal("98.9999") - S_PLUS_PRO_FACTOR = Decimal("0.206") - S_PLUS_THRESHOLD = Decimal("98.0") - S_PLUS_FACTOR = Decimal("0.203") - S_THRESHOLD = Decimal("97.0") - S_FACTOR = Decimal("0.2") - AAA_PRO_THRESHOLD = Decimal("96.9999") - AAA_PRO_FACTOR = Decimal("0.176") - AAA_THRESHOLD = Decimal("94.0") - AAA_FACTOR = Decimal("0.168") - AA_THRESHOLD = Decimal("90.0") - AA_FACTOR = Decimal("0.152") - A_THRESHOLD = Decimal("80.0") - A_FACTOR = Decimal("0.136") - - ach = Decimal(achievement) / Decimal("10000") - if ach > Decimal("101.0") or ach < A_THRESHOLD: - return 0 - if ach >= SSS_PLUS_THRESHOLD: - factor = SSS_PLUS_FACTOR - ach = Decimal("100.5") - elif ach >= SSS_PRO_THRESHOLD: - factor = SSS_PRO_FACTOR - elif ach >= SSS_THRESHOLD: - factor = SSS_FACTOR - elif ach >= SS_PLUS_PRO_THRESHOLD: - factor = SS_PLUS_PRO_FACTOR - elif ach >= SS_PLUS_THRESHOLD: - factor = SS_PLUS_FACTOR - elif ach >= SS_THRESHOLD: - factor = SS_FACTOR - elif ach >= S_PLUS_PRO_THRESHOLD: - factor = S_PLUS_PRO_FACTOR - elif ach >= S_PLUS_THRESHOLD: - factor = S_PLUS_FACTOR - elif ach >= S_THRESHOLD: - factor = S_FACTOR - elif ach >= AAA_PRO_THRESHOLD: - factor = AAA_PRO_FACTOR - elif ach >= AAA_THRESHOLD: - factor = AAA_FACTOR - elif ach >= AA_THRESHOLD: - factor = AA_FACTOR - elif ach >= A_THRESHOLD: - factor = A_FACTOR - else: - return 0 - result = (factor * difficulty * ach).quantize(Decimal("1."), rounding="ROUND_FLOOR") - return int(result) - - -with open("musicDB.json", "r", encoding="utf-8") as f: - MUSIC_DB = json.loads(f.read()) - -MUSIC_DB = {entry["id"]: entry for entry in MUSIC_DB} - - -def query_music_db(music_id: int): - music_info = MUSIC_DB.get(music_id) - if music_info is None: - return - return music_info - - -def find_level(music_info: dict, level_id: int): - return [level for level in music_info["levels"] if level["level"] == level_id]