diff --git a/utils/export_records.py b/utils/export_records.py deleted file mode 100644 index e0f6fa7..0000000 --- a/utils/export_records.py +++ /dev/null @@ -1,8 +0,0 @@ -import polars as pl -import polars_hash as pl_hash - -pl.scan_parquet("records.parquet").with_columns( - pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") -).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet( - "records_pub.parquet" -) diff --git a/utils/export_regions.py b/utils/export_regions.py deleted file mode 100644 index 086b57b..0000000 --- a/utils/export_regions.py +++ /dev/null @@ -1,8 +0,0 @@ -import polars as pl -import polars_hash as pl_hash - -pl.scan_parquet("regions.parquet").with_columns( - pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") -).with_columns(pl_hash.col("user_id").chash.sha2_256()).collect().write_parquet( - "regions_pub.parquet" -) diff --git a/utils/hash_userid.py b/utils/hash_userid.py new file mode 100644 index 0000000..c9b40cc --- /dev/null +++ b/utils/hash_userid.py @@ -0,0 +1,11 @@ +from sys import argv +import polars as pl +import polars_hash as pl_hash + +file = argv[1] + +pl.scan_parquet(file).with_columns( + pl.col("user_id").cast(pl.String).add("Lt2N5xgjJOqRsT5qVt7wWYw6SqOPZDI7") +).with_columns( + pl_hash.col("user_id").chash.sha2_256().str.head(16) +).collect().write_parquet(file.replace(".parquet", "_pub.parquet"))