chore: update redb for performance

This commit is contained in:
mokurin000
2025-09-16 19:04:59 +08:00
parent c45e12d1bb
commit a7777d127a
3 changed files with 23 additions and 3 deletions

4
Cargo.lock generated
View File

@@ -1769,9 +1769,9 @@ dependencies = [
[[package]]
name = "redb"
version = "3.0.0"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225e8bf881033e020ed87e9f10fc6254cf3ebab8d440e6fbb4c7b34bec2a0543"
checksum = "3fefa3e5ff4a369819c3d6df4195873d6f9abad109f13c0d505dbe119cfabb10"
dependencies = [
"libc",
]

View File

@@ -18,7 +18,7 @@ serde_json = "1.0.141"
strum = { version = "0.27.2", features = ["derive"] }
tokio = { version = "1.47.1", features = ["rt-multi-thread"] }
compio = { version = "0.15.0", features = ["runtime"] }
redb = "3.0.0"
redb = "3.0.1"
crabtime = { git = "https://github.com/wdanilo/crabtime.git", rev = "2ed856f5" }
parquet = "56.0.0"

View File

@@ -0,0 +1,20 @@
from sys import argv
import polars as pl
import polars_hash as pl_hash
file = argv[1]
user_ids = (
pl.DataFrame({"user_id_num": range(11000000, 13000001)})
.with_columns(
pl.col("user_id_num")
.cast(pl.String)
.add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
.alias("user_id"),
)
.with_columns(pl_hash.col("user_id").chash.sha2_256().str.head(16))
.join(pl.read_parquet(file), on="user_id", how="inner")["user_id_num"]
)
with open("id.txt", "w", encoding="utf-8") as f:
f.write("\n".join(map(str, user_ids)))