refactor: dedup clean uid
This commit is contained in:
@@ -1,8 +0,0 @@
|
||||
import polars as pl
|
||||
import polars_hash as pl_hash
|
||||
|
||||
pl.scan_parquet("records.parquet").with_columns(
|
||||
pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
|
||||
).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet(
|
||||
"records_pub.parquet"
|
||||
)
|
||||
@@ -1,8 +0,0 @@
|
||||
import polars as pl
|
||||
import polars_hash as pl_hash
|
||||
|
||||
pl.scan_parquet("regions.parquet").with_columns(
|
||||
pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
|
||||
).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet(
|
||||
"regions_pub.parquet"
|
||||
)
|
||||
11
utils/hash_userid.py
Normal file
11
utils/hash_userid.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from sys import argv
|
||||
import polars as pl
|
||||
import polars_hash as pl_hash
|
||||
|
||||
file = argv[1]
|
||||
|
||||
pl.scan_parquet(file).with_columns(
|
||||
pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
|
||||
).with_columns(
|
||||
pl_hash.col("user_id").chash.sha2_256().str.head(16)
|
||||
).collect().write_parquet(file.replace(".parquet", "_pub.parquet"))
|
||||
Reference in New Issue
Block a user