perf: parquet based data export

This commit is contained in:
mokurin000
2025-08-04 01:49:43 +08:00
parent 89d8177180
commit 73e1046be9
10 changed files with 642 additions and 17 deletions

2
.gitignore vendored
View File

@@ -4,7 +4,7 @@
/players.redb* /players.redb*
/players*.json* /players*.parquet
/b50*.json* /b50*.json*
/region*.json* /region*.json*

523
Cargo.lock generated
View File

@@ -28,6 +28,20 @@ dependencies = [
"cpufeatures", "cpufeatures",
] ]
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"const-random",
"getrandom 0.3.3",
"once_cell",
"version_check",
"zerocopy",
]
[[package]] [[package]]
name = "aligned-array" name = "aligned-array"
version = "1.0.1" version = "1.0.1"
@@ -37,6 +51,21 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "alloc-no-stdlib"
version = "2.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
[[package]]
name = "alloc-stdlib"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
dependencies = [
"alloc-no-stdlib",
]
[[package]] [[package]]
name = "android-tzdata" name = "android-tzdata"
version = "0.1.1" version = "0.1.1"
@@ -64,12 +93,113 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow-array"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2730bc045d62bb2e53ef8395b7d4242f5c8102f41ceac15e8395b9ac3d08461"
dependencies = [
"ahash",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"half",
"hashbrown",
"num",
]
[[package]]
name = "arrow-buffer"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54295b93beb702ee9a6f6fbced08ad7f4d76ec1c297952d4b83cf68755421d1d"
dependencies = [
"bytes",
"half",
"num",
]
[[package]]
name = "arrow-cast"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67e8bcb7dc971d779a7280593a1bf0c2743533b8028909073e804552e85e75b5"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
"atoi",
"base64",
"chrono",
"half",
"lexical-core",
"num",
"ryu",
]
[[package]]
name = "arrow-data"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97c22fe3da840039c69e9f61f81e78092ea36d57037b4900151f063615a2f6b4"
dependencies = [
"arrow-buffer",
"arrow-schema",
"half",
"num",
]
[[package]]
name = "arrow-ipc"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "778de14c5a69aedb27359e3dd06dd5f9c481d5f6ee9fbae912dba332fd64636b"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"flatbuffers",
]
[[package]]
name = "arrow-schema"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85fa1babc4a45fdc64a92175ef51ff00eba5ebbc0007962fecf8022ac1c6ce28"
[[package]]
name = "arrow-select"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8854d15f1cf5005b4b358abeb60adea17091ff5bdd094dca5d3f73787d81170"
dependencies = [
"ahash",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"num",
]
[[package]] [[package]]
name = "async-task" name = "async-task"
version = "4.7.1" version = "4.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de"
[[package]]
name = "atoi"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "atomic" name = "atomic"
version = "0.5.3" version = "0.5.3"
@@ -97,6 +227,12 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]] [[package]]
name = "bincode" name = "bincode"
version = "2.0.1" version = "2.0.1"
@@ -150,12 +286,39 @@ dependencies = [
"objc2", "objc2",
] ]
[[package]]
name = "brotli"
version = "8.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor",
]
[[package]]
name = "brotli-decompressor"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.19.0" version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.10.1" version = "1.10.1"
@@ -177,6 +340,8 @@ version = "1.2.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2"
dependencies = [ dependencies = [
"jobserver",
"libc",
"shlex", "shlex",
] ]
@@ -442,6 +607,26 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "const-random"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
dependencies = [
"const-random-macro",
]
[[package]]
name = "const-random-macro"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom 0.2.16",
"once_cell",
"tiny-keccak",
]
[[package]] [[package]]
name = "core-foundation-sys" name = "core-foundation-sys"
version = "0.8.7" version = "0.8.7"
@@ -510,6 +695,12 @@ version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]] [[package]]
name = "crypto-common" name = "crypto-common"
version = "0.1.6" version = "0.1.6"
@@ -606,6 +797,16 @@ dependencies = [
"windows-sys 0.60.2", "windows-sys 0.60.2",
] ]
[[package]]
name = "flatbuffers"
version = "25.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1"
dependencies = [
"bitflags",
"rustc_version",
]
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.1.2" version = "1.1.2"
@@ -613,6 +814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
dependencies = [ dependencies = [
"crc32fast", "crc32fast",
"libz-rs-sys",
"miniz_oxide", "miniz_oxide",
] ]
@@ -730,6 +932,17 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "half"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
dependencies = [
"cfg-if",
"crunchy",
"num-traits",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.15.4" version = "0.15.4"
@@ -813,6 +1026,12 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "integer-encoding"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
[[package]] [[package]]
name = "io-uring" name = "io-uring"
version = "0.7.9" version = "0.7.9"
@@ -852,6 +1071,16 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jobserver"
version = "0.1.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
dependencies = [
"getrandom 0.3.3",
"libc",
]
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.77" version = "0.3.77"
@@ -862,12 +1091,91 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "lexical-core"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.174" version = "0.2.174"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "libz-rs-sys"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221"
dependencies = [
"zlib-rs",
]
[[package]] [[package]]
name = "libz-sys" name = "libz-sys"
version = "1.1.22" version = "1.1.22"
@@ -908,6 +1216,15 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "lz4_flex"
version = "0.11.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
dependencies = [
"twox-hash",
]
[[package]] [[package]]
name = "md5" name = "md5"
version = "0.8.0" version = "0.8.0"
@@ -988,6 +1305,70 @@ dependencies = [
"minimal-lexical", "minimal-lexical",
] ]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
dependencies = [
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
@@ -995,6 +1376,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"libm",
] ]
[[package]] [[package]]
@@ -1142,6 +1524,15 @@ version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "ordered-float"
version = "2.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "os_pipe" name = "os_pipe"
version = "1.2.2" version = "1.2.2"
@@ -1181,6 +1572,51 @@ dependencies = [
"syn 2.0.104", "syn 2.0.104",
] ]
[[package]]
name = "parquet"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c"
dependencies = [
"ahash",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-ipc",
"arrow-schema",
"arrow-select",
"base64",
"brotli",
"bytes",
"chrono",
"flate2",
"half",
"hashbrown",
"lz4_flex",
"num",
"num-bigint",
"paste",
"seq-macro",
"simdutf8",
"snap",
"thrift",
"twox-hash",
"zstd",
]
[[package]]
name = "parquet_derive"
version = "56.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c7f5c3f1365cd144c2f881ef7045bc65bdb0fe1259661c0e533aedf2a93627e"
dependencies = [
"parquet",
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]] [[package]]
name = "paste" name = "paste"
version = "1.0.15" version = "1.0.15"
@@ -1494,6 +1930,8 @@ dependencies = [
"md5", "md5",
"music-db", "music-db",
"nyquest", "nyquest",
"parquet",
"parquet_derive",
"serde", "serde",
"serde_json", "serde_json",
"snafu", "snafu",
@@ -1512,6 +1950,7 @@ dependencies = [
"futures-util", "futures-util",
"nyquest-preset", "nyquest-preset",
"palc", "palc",
"parquet",
"redb", "redb",
"sdgb-api", "sdgb-api",
"serde", "serde",
@@ -1529,6 +1968,12 @@ version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
[[package]]
name = "seq-macro"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.219" version = "1.0.219"
@@ -1576,6 +2021,12 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "simdutf8"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.10" version = "0.4.10"
@@ -1604,6 +2055,12 @@ dependencies = [
"syn 2.0.104", "syn 2.0.104",
] ]
[[package]]
name = "snap"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
[[package]] [[package]]
name = "socket2" name = "socket2"
version = "0.5.10" version = "0.5.10"
@@ -1670,6 +2127,12 @@ dependencies = [
"lock_api", "lock_api",
] ]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "strum" name = "strum"
version = "0.24.1" version = "0.24.1"
@@ -1781,6 +2244,26 @@ dependencies = [
"syn 2.0.104", "syn 2.0.104",
] ]
[[package]]
name = "thrift"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
dependencies = [
"byteorder",
"integer-encoding",
"ordered-float",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.9.0" version = "1.9.0"
@@ -1882,6 +2365,12 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "twox-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.18.0" version = "1.18.0"
@@ -2345,3 +2834,37 @@ name = "zeroize"
version = "1.8.1" version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
[[package]]
name = "zlib-rs"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a"
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.15+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
dependencies = [
"cc",
"pkg-config",
]

View File

@@ -21,8 +21,10 @@ compio = { version = "0.15.0", features = ["runtime"] }
redb = "2.6.2" redb = "2.6.2"
crabtime = { git = "https://github.com/wdanilo/crabtime.git", rev = "2ed856f5" } crabtime = { git = "https://github.com/wdanilo/crabtime.git", rev = "2ed856f5" }
parquet = "56.0.0"
[profile.release] [profile.release]
lto = true lto = "thin"
strip = true strip = true
codegen-units = 1 codegen-units = 4
panic = "abort" panic = "abort"

View File

@@ -45,3 +45,5 @@ aes = "0.8.4"
cipher = { version = "0.4.4", features = ["block-padding"] } cipher = { version = "0.4.4", features = ["block-padding"] }
bincode = { version = "2.0.1", optional = true } bincode = { version = "2.0.1", optional = true }
parquet = { workspace = true }
parquet_derive = { version = "56.0.0" }

View File

@@ -1,6 +1,7 @@
use std::fmt::Display; use std::fmt::Display;
use bincode::{Decode, Encode}; use bincode::{Decode, Encode};
use parquet_derive::ParquetRecordWriter;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Serialize)] #[derive(Serialize)]
@@ -15,7 +16,7 @@ impl From<u32> for GetUserPreviewApi {
} }
} }
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] #[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode, ParquetRecordWriter)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct GetUserPreviewApiResp { pub struct GetUserPreviewApiResp {
pub user_id: u32, pub user_id: u32,

View File

@@ -1,4 +1,5 @@
use bincode::{Decode, Encode}; use bincode::{Decode, Encode};
use parquet_derive::ParquetRecordWriter;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
@@ -13,6 +14,42 @@ impl From<u32> for GetUserRegionApi {
} }
} }
impl From<GetUserRegionApiResp> for Vec<UserRegionFlatten> {
fn from(
GetUserRegionApiResp {
user_id,
user_region_list,
..
}: GetUserRegionApiResp,
) -> Self {
user_region_list
.into_iter()
.map(
|UserRegion {
region_id,
play_count,
created,
}| {
UserRegionFlatten {
user_id,
region_id,
play_count,
created,
}
},
)
.collect()
}
}
#[derive(Default, Debug, Clone, PartialEq, ParquetRecordWriter)]
pub struct UserRegionFlatten {
pub user_id: u32,
pub region_id: u32,
pub play_count: i64,
pub created: String,
}
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode)] #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct GetUserRegionApiResp { pub struct GetUserRegionApiResp {

View File

@@ -26,7 +26,9 @@ mod get_user_music_api;
pub use get_user_music_api::{GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail}; pub use get_user_music_api::{GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail};
mod get_user_region_api; mod get_user_region_api;
pub use get_user_region_api::{GetUserRegionApi, GetUserRegionApiResp, UserRegion}; pub use get_user_region_api::{
GetUserRegionApi, GetUserRegionApiResp, UserRegion, UserRegionFlatten,
};
mod dxrating; mod dxrating;
pub use dxrating::{ pub use dxrating::{

View File

@@ -42,5 +42,7 @@ ctrlc = { version = "3.4.7", features = ["termination"] }
# magic macro # magic macro
crabtime = { workspace = true } crabtime = { workspace = true }
parquet = { workspace = true }
[build-dependencies] [build-dependencies]
version_check = "0.9.5" version_check = "0.9.5"

View File

@@ -311,23 +311,35 @@ async fn main() -> Result<(), Box<dyn snafu::Error>> {
#[cfg(feature = "fetchall")] #[cfg(feature = "fetchall")]
Commands::ListAllUserDump {} => { Commands::ListAllUserDump {} => {
use crate::{cache::PLAYERS, utils::helpers::dump_cache}; use crate::{
cache::PLAYERS,
utils::helpers::{dump_parquet, read_cache},
};
dump_cache::<GetUserPreviewApiResp>("players.json", PLAYERS)?; let players: Vec<GetUserPreviewApiResp> = read_cache(PLAYERS)?;
} dump_parquet(players, "players.parquet")?;
#[cfg(feature = "fetchall")]
Commands::ScrapeAllB50Dump {} => {
use crate::{cache::B50, utils::helpers::dump_cache};
dump_cache::<GetUserRatingApiResp>("b50.json", B50)?;
} }
#[cfg(feature = "fetchall")] #[cfg(feature = "fetchall")]
Commands::ScrapeAllRegionDump {} => { Commands::ScrapeAllRegionDump {} => {
use sdgb_api::title::model::GetUserRegionApiResp; use crate::{
cache::REGIONS,
utils::helpers::{dump_parquet, read_cache},
};
use sdgb_api::title::model::{GetUserRegionApiResp, UserRegionFlatten};
use crate::{cache::REGIONS, utils::helpers::dump_cache}; let regions: Vec<GetUserRegionApiResp> = read_cache(REGIONS)?;
let regions_flat = regions
.into_iter()
.map(Vec::<UserRegionFlatten>::from)
.flatten()
.collect::<Vec<_>>();
dump_parquet(regions_flat, "regions.parquet")?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllB50Dump {} => {
use crate::{cache::B50, utils::helpers::dump_json};
dump_cache::<GetUserRegionApiResp>("region.json", REGIONS)?; dump_json::<GetUserRatingApiResp>("b50.json", B50)?;
} }
Commands::Userdata { user_id } => { Commands::Userdata { user_id } => {

View File

@@ -1,9 +1,14 @@
use std::sync::Arc;
use std::{fs::OpenOptions, io::BufWriter}; use std::{fs::OpenOptions, io::BufWriter};
use std::{path::Path, sync::atomic::Ordering}; use std::{path::Path, sync::atomic::Ordering};
use futures_util::StreamExt; use futures_util::StreamExt;
use nyquest_preset::nyquest::AsyncClient; use nyquest_preset::nyquest::AsyncClient;
use parquet::basic::BrotliLevel;
use parquet::file::properties::WriterProperties;
use parquet::file::writer::SerializedFileWriter;
use parquet::record::RecordWriter;
use redb::ReadableTable; use redb::ReadableTable;
use redb::TableDefinition; use redb::TableDefinition;
use serde::Serialize; use serde::Serialize;
@@ -52,7 +57,46 @@ where
.collect::<Vec<D>>()) .collect::<Vec<D>>())
} }
pub fn dump_cache<D>( pub fn dump_parquet<D>(
data: impl Into<Vec<D>>,
output_path: impl AsRef<Path>,
) -> Result<(), Box<dyn snafu::Error>>
where
for<'a> &'a [D]: RecordWriter<D>,
{
let data = data.into();
let file = OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(output_path)?;
#[cfg(file_lock_ready)]
file.try_lock()?;
let writer = BufWriter::new(file);
let schema = data.as_slice().schema()?;
let props = Arc::new(
WriterProperties::builder()
.set_compression(parquet::basic::Compression::BROTLI(BrotliLevel::try_new(
6,
)?))
.build(),
);
let mut writer = SerializedFileWriter::new(writer, schema, props).unwrap();
let mut row_group = writer.next_row_group().unwrap();
data.as_slice().write_to_row_group(&mut row_group)?;
row_group.close()?;
writer.close().unwrap();
info!("dumped {} user id", data.len());
Ok(())
}
pub fn dump_json<D>(
output_path: impl AsRef<Path>, output_path: impl AsRef<Path>,
definition: TableDefinition<'_, u32, Vec<u8>>, definition: TableDefinition<'_, u32, Vec<u8>>,
) -> Result<(), Box<dyn snafu::Error>> ) -> Result<(), Box<dyn snafu::Error>>