diff --git a/utils/hash_userid.py b/utils/hash_userid.py index c9b40cc..b2cfd6c 100644 --- a/utils/hash_userid.py +++ b/utils/hash_userid.py @@ -8,4 +8,6 @@ pl.scan_parquet(file).with_columns( pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") ).with_columns( pl_hash.col("user_id").chash.sha2_256().str.head(16) -).collect().write_parquet(file.replace(".parquet", "_pub.parquet")) +).collect().write_parquet( + file.replace(".parquet", "_pub.parquet"), compression="zstd", compression_level=15 +)