chore: update redb for performance
This commit is contained in:
20
utils/rev_hashed_userid.py
Normal file
20
utils/rev_hashed_userid.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from sys import argv
|
||||
import polars as pl
|
||||
import polars_hash as pl_hash
|
||||
|
||||
file = argv[1]
|
||||
|
||||
user_ids = (
|
||||
pl.DataFrame({"user_id_num": range(11000000, 13000001)})
|
||||
.with_columns(
|
||||
pl.col("user_id_num")
|
||||
.cast(pl.String)
|
||||
.add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
|
||||
.alias("user_id"),
|
||||
)
|
||||
.with_columns(pl_hash.col("user_id").chash.sha2_256().str.head(16))
|
||||
.join(pl.read_parquet(file), on="user_id", how="inner")["user_id_num"]
|
||||
)
|
||||
|
||||
with open("id.txt", "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(map(str, user_ids)))
|
||||
Reference in New Issue
Block a user