feat: high-concurrency userid hashing

This commit is contained in:
mokurin000
2025-08-11 12:12:14 +08:00
parent 1d2e3fc7cc
commit b0942e2af4
4 changed files with 26 additions and 5 deletions

View File

@@ -5,8 +5,10 @@ description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [
"diskcache>=5.6.3",
"loguru>=0.7.3", "loguru>=0.7.3",
"orjson>=3.11.1", "orjson>=3.11.1",
"polars>=1.32.0", "polars>=1.32.0",
"polars-hash>=0.5.4",
"pyecharts>=2.0.8", "pyecharts>=2.0.8",
] ]

8
utils/export_records.py Normal file
View File

@@ -0,0 +1,8 @@
import polars as pl
import polars_hash as pl_hash
pl.scan_parquet("records.parquet").with_columns(
pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet(
"records_pub.parquet"
)

View File

@@ -1,7 +1,8 @@
import polars as pl import polars as pl
import polars_hash as pl_hash
from helpers import salted_hash_userid
pl.scan_parquet("regions.parquet").with_columns( pl.scan_parquet("regions.parquet").with_columns(
pl.col("user_id").map_elements(salted_hash_userid, return_dtype=pl.String) pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
).collect().write_parquet("regions_pub.parquet") ).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet(
"regions_pub.parquet"
)

View File

@@ -2,15 +2,25 @@ from decimal import Decimal, getcontext
import hashlib import hashlib
import orjson as json import orjson as json
from diskcache import Cache
getcontext().prec = 28 getcontext().prec = 28
CACHE = Cache("target")
def salted_hash_userid(user_id: int): def salted_hash_userid(user_id: int):
hex = CACHE.get(user_id)
if hex is not None:
return hex
SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7" SALT = b"Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7"
hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT) hash_uid = hashlib.sha256(f"{user_id}".encode("utf-8") + SALT)
return hash_uid.hexdigest()[:16] result = hash_uid.hexdigest()[:16]
CACHE.add(user_id, result)
return result
def dx_rating(difficulty: Decimal, achievement: int) -> int: def dx_rating(difficulty: Decimal, achievement: int) -> int: