diff --git a/pyproject.toml b/pyproject.toml index df5000d..1427555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,10 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.12" dependencies = [ + "diskcache>=5.6.3", "loguru>=0.7.3", "orjson>=3.11.1", "polars>=1.32.0", + "polars-hash>=0.5.4", "pyecharts>=2.0.8", ] diff --git a/utils/export_records.py b/utils/export_records.py new file mode 100644 index 0000000..e0f6fa7 --- /dev/null +++ b/utils/export_records.py @@ -0,0 +1,8 @@ +import polars as pl +import polars_hash as pl_hash + +pl.scan_parquet("records.parquet").with_columns( + pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") +).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet( + "records_pub.parquet" +) diff --git a/utils/export_regions.py b/utils/export_regions.py index 4f5e3f7..086b57b 100644 --- a/utils/export_regions.py +++ b/utils/export_regions.py @@ -1,7 +1,8 @@ import polars as pl - -from helpers import salted_hash_userid +import polars_hash as pl_hash pl.scan_parquet("regions.parquet").with_columns( - pl.col("user_id").map_elements(salted_hash_userid, return_dtype=pl.String) -).collect().write_parquet("regions_pub.parquet") + pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") +).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet( + "regions_pub.parquet" +) diff --git a/utils/helpers.py b/utils/helpers.py index 1e55168..80cedce 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -2,15 +2,25 @@ from decimal import Decimal, getcontext import hashlib import orjson as json +from diskcache import Cache getcontext().prec = 28 +CACHE = Cache("target") + def salted_hash_userid(user_id: int): + hex = CACHE.get(user_id) + if hex is not None: + return hex + SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7" hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT) - return hash_uid.hexdigest()[:16] + result = hash_uid.hexdigest()[:16] + + CACHE.add(user_id, result) + return result def dx_rating(difficulty: Decimal, achievement: int) -> int: