From b0942e2af4834b50c548f8357edbeb36827956dd Mon Sep 17 00:00:00 2001 From: mokurin000 <1348292515a@gmail.com> Date: Mon, 11 Aug 2025 12:12:14 +0800 Subject: [PATCH] feat: high-concurrency userid hashing --- pyproject.toml | 2 ++ utils/export_records.py | 8 ++++++++ utils/export_regions.py | 9 +++++---- utils/helpers.py | 12 +++++++++++- 4 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 utils/export_records.py diff --git a/pyproject.toml b/pyproject.toml index df5000d..1427555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,10 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.12" dependencies = [ + "diskcache>=5.6.3", "loguru>=0.7.3", "orjson>=3.11.1", "polars>=1.32.0", + "polars-hash>=0.5.4", "pyecharts>=2.0.8", ] diff --git a/utils/export_records.py b/utils/export_records.py new file mode 100644 index 0000000..e0f6fa7 --- /dev/null +++ b/utils/export_records.py @@ -0,0 +1,8 @@ +import polars as pl +import polars_hash as pl_hash + +pl.scan_parquet("records.parquet").with_columns( + pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") +).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet( + "records_pub.parquet" +) diff --git a/utils/export_regions.py b/utils/export_regions.py index 4f5e3f7..086b57b 100644 --- a/utils/export_regions.py +++ b/utils/export_regions.py @@ -1,7 +1,8 @@ import polars as pl - -from helpers import salted_hash_userid +import polars_hash as pl_hash pl.scan_parquet("regions.parquet").with_columns( - pl.col("user_id").map_elements(salted_hash_userid, return_dtype=pl.String) -).collect().write_parquet("regions_pub.parquet") + pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") +).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet( + "regions_pub.parquet" +) diff --git a/utils/helpers.py b/utils/helpers.py index 1e55168..80cedce 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -2,15 +2,25 @@ from decimal import Decimal, getcontext import hashlib import orjson as json +from diskcache import Cache getcontext().prec = 28 +CACHE = Cache("target") + def salted_hash_userid(user_id: int): + hex = CACHE.get(user_id) + if hex is not None: + return hex + SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7" hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT) - return hash_uid.hexdigest()[:16] + result = hash_uid.hexdigest()[:16] + + CACHE.add(user_id, result) + return result def dx_rating(difficulty: Decimal, achievement: int) -> int: