feat: high-concurrency userid hashing

This commit is contained in:
mokurin000
2025-08-11 12:12:14 +08:00
parent 1d2e3fc7cc
commit b0942e2af4
4 changed files with 26 additions and 5 deletions

View File

@@ -1,7 +1,8 @@
import polars as pl
from helpers import salted_hash_userid
import polars_hash as pl_hash
pl.scan_parquet("regions.parquet").with_columns(
pl.col("user_id").map_elements(salted_hash_userid, return_dtype=pl.String)
).collect().write_parquet("regions_pub.parquet")
pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet(
"regions_pub.parquet"
)