14 lines
391 B
Python
14 lines
391 B
Python
from sys import argv
|
|
import polars as pl
|
|
import polars_hash as pl_hash
|
|
|
|
file = argv[1]
|
|
|
|
pl.scan_parquet(file).with_columns(
|
|
pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7")
|
|
).with_columns(
|
|
pl_hash.col("user_id").chash.sha2_256().str.head(16)
|
|
).collect().write_parquet(
|
|
file.replace(".parquet", "_pub.parquet"), compression="zstd", compression_level=15
|
|
)
|