From a7777d127aaa6b2cad91b3466ecf0a0c67ebf63a Mon Sep 17 00:00:00 2001 From: mokurin000 <1348292515a@gmail.com> Date: Tue, 16 Sep 2025 19:04:59 +0800 Subject: [PATCH] chore: update redb for performance --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- utils/rev_hashed_userid.py | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 utils/rev_hashed_userid.py diff --git a/Cargo.lock b/Cargo.lock index a58a9c0..83be1a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1769,9 +1769,9 @@ dependencies = [ [[package]] name = "redb" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225e8bf881033e020ed87e9f10fc6254cf3ebab8d440e6fbb4c7b34bec2a0543" +checksum = "3fefa3e5ff4a369819c3d6df4195873d6f9abad109f13c0d505dbe119cfabb10" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index 77195fc..3b518ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ serde_json = "1.0.141" strum = { version = "0.27.2", features = ["derive"] } tokio = { version = "1.47.1", features = ["rt-multi-thread"] } compio = { version = "0.15.0", features = ["runtime"] } -redb = "3.0.0" +redb = "3.0.1" crabtime = { git = "https://github.com/wdanilo/crabtime.git", rev = "2ed856f5" } parquet = "56.0.0" diff --git a/utils/rev_hashed_userid.py b/utils/rev_hashed_userid.py new file mode 100644 index 0000000..e3a1051 --- /dev/null +++ b/utils/rev_hashed_userid.py @@ -0,0 +1,20 @@ +from sys import argv +import polars as pl +import polars_hash as pl_hash + +file = argv[1] + +user_ids = ( + pl.DataFrame({"user_id_num": range(11000000, 13000001)}) + .with_columns( + pl.col("user_id_num") + .cast(pl.String) + .add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") + .alias("user_id"), + ) + .with_columns(pl_hash.col("user_id").chash.sha2_256().str.head(16)) + .join(pl.read_parquet(file), on="user_id", how="inner")["user_id_num"] +) + +with open("id.txt", "w", encoding="utf-8") as f: + f.write("\n".join(map(str, user_ids)))