diff --git a/.gitignore b/.gitignore index bb6faca..f943528 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ /players.redb* -/players*.json* +/players*.parquet /b50*.json* /region*.json* diff --git a/Cargo.lock b/Cargo.lock index e75e5f0..b955164 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,20 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.3", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aligned-array" version = "1.0.1" @@ -37,6 +51,21 @@ dependencies = [ "generic-array", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "android-tzdata" version = "0.1.1" @@ -64,12 +93,113 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "arrow-array" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2730bc045d62bb2e53ef8395b7d4242f5c8102f41ceac15e8395b9ac3d08461" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54295b93beb702ee9a6f6fbced08ad7f4d76ec1c297952d4b83cf68755421d1d" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67e8bcb7dc971d779a7280593a1bf0c2743533b8028909073e804552e85e75b5" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97c22fe3da840039c69e9f61f81e78092ea36d57037b4900151f063615a2f6b4" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778de14c5a69aedb27359e3dd06dd5f9c481d5f6ee9fbae912dba332fd64636b" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-schema" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85fa1babc4a45fdc64a92175ef51ff00eba5ebbc0007962fecf8022ac1c6ce28" + +[[package]] +name = "arrow-select" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8854d15f1cf5005b4b358abeb60adea17091ff5bdd094dca5d3f73787d81170" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + [[package]] name = "async-task" version = "4.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic" version = "0.5.3" @@ -97,6 +227,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bincode" version = "2.0.1" @@ -150,12 +286,39 @@ dependencies = [ "objc2", ] +[[package]] +name = "brotli" +version = "8.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.10.1" @@ -177,6 +340,8 @@ version = "1.2.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -442,6 +607,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -510,6 +695,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.6" @@ -606,6 +797,16 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + [[package]] name = "flate2" version = "1.1.2" @@ -613,6 +814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ "crc32fast", + "libz-rs-sys", "miniz_oxide", ] @@ -730,6 +932,17 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + [[package]] name = "hashbrown" version = "0.15.4" @@ -813,6 +1026,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "io-uring" version = "0.7.9" @@ -852,6 +1071,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -862,12 +1091,91 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "libz-rs-sys" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +dependencies = [ + "zlib-rs", +] + [[package]] name = "libz-sys" version = "1.1.22" @@ -908,6 +1216,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + [[package]] name = "md5" version = "0.8.0" @@ -988,6 +1305,70 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -995,6 +1376,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1142,6 +1524,15 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "os_pipe" version = "1.2.2" @@ -1181,6 +1572,51 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "parquet" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "half", + "hashbrown", + "lz4_flex", + "num", + "num-bigint", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "twox-hash", + "zstd", +] + +[[package]] +name = "parquet_derive" +version = "56.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c7f5c3f1365cd144c2f881ef7045bc65bdb0fe1259661c0e533aedf2a93627e" +dependencies = [ + "parquet", + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "paste" version = "1.0.15" @@ -1494,6 +1930,8 @@ dependencies = [ "md5", "music-db", "nyquest", + "parquet", + "parquet_derive", "serde", "serde_json", "snafu", @@ -1512,6 +1950,7 @@ dependencies = [ "futures-util", "nyquest-preset", "palc", + "parquet", "redb", "sdgb-api", "serde", @@ -1529,6 +1968,12 @@ version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.219" @@ -1576,6 +2021,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "slab" version = "0.4.10" @@ -1604,6 +2055,12 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.5.10" @@ -1670,6 +2127,12 @@ dependencies = [ "lock_api", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strum" version = "0.24.1" @@ -1781,6 +2244,26 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.9.0" @@ -1882,6 +2365,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "twox-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" + [[package]] name = "typenum" version = "1.18.0" @@ -2345,3 +2834,37 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zlib-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.15+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index e3df531..fc86905 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,8 +21,10 @@ compio = { version = "0.15.0", features = ["runtime"] } redb = "2.6.2" crabtime = { git = "https://github.com/wdanilo/crabtime.git", rev = "2ed856f5" } +parquet = "56.0.0" + [profile.release] -lto = true +lto = "thin" strip = true -codegen-units = 1 +codegen-units = 4 panic = "abort" diff --git a/sdgb-api/Cargo.toml b/sdgb-api/Cargo.toml index 3ff1724..8aea64d 100644 --- a/sdgb-api/Cargo.toml +++ b/sdgb-api/Cargo.toml @@ -45,3 +45,5 @@ aes = "0.8.4" cipher = { version = "0.4.4", features = ["block-padding"] } bincode = { version = "2.0.1", optional = true } +parquet = { workspace = true } +parquet_derive = { version = "56.0.0" } diff --git a/sdgb-api/src/title/model/get_user_preview_api/mod.rs b/sdgb-api/src/title/model/get_user_preview_api/mod.rs index 61fcbd3..7f6cb9d 100644 --- a/sdgb-api/src/title/model/get_user_preview_api/mod.rs +++ b/sdgb-api/src/title/model/get_user_preview_api/mod.rs @@ -1,6 +1,7 @@ use std::fmt::Display; use bincode::{Decode, Encode}; +use parquet_derive::ParquetRecordWriter; use serde::{Deserialize, Serialize}; #[derive(Serialize)] @@ -15,7 +16,7 @@ impl From for GetUserPreviewApi { } } -#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)] +#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode, ParquetRecordWriter)] #[serde(rename_all = "camelCase")] pub struct GetUserPreviewApiResp { pub user_id: u32, diff --git a/sdgb-api/src/title/model/get_user_region_api/mod.rs b/sdgb-api/src/title/model/get_user_region_api/mod.rs index c264418..e8232ab 100644 --- a/sdgb-api/src/title/model/get_user_region_api/mod.rs +++ b/sdgb-api/src/title/model/get_user_region_api/mod.rs @@ -1,4 +1,5 @@ use bincode::{Decode, Encode}; +use parquet_derive::ParquetRecordWriter; use serde::{Deserialize, Serialize}; #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -13,6 +14,42 @@ impl From for GetUserRegionApi { } } +impl From for Vec { + fn from( + GetUserRegionApiResp { + user_id, + user_region_list, + .. + }: GetUserRegionApiResp, + ) -> Self { + user_region_list + .into_iter() + .map( + |UserRegion { + region_id, + play_count, + created, + }| { + UserRegionFlatten { + user_id, + region_id, + play_count, + created, + } + }, + ) + .collect() + } +} + +#[derive(Default, Debug, Clone, PartialEq, ParquetRecordWriter)] +pub struct UserRegionFlatten { + pub user_id: u32, + pub region_id: u32, + pub play_count: i64, + pub created: String, +} + #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode)] #[serde(rename_all = "camelCase")] pub struct GetUserRegionApiResp { diff --git a/sdgb-api/src/title/model/mod.rs b/sdgb-api/src/title/model/mod.rs index 079d20a..ba9c431 100644 --- a/sdgb-api/src/title/model/mod.rs +++ b/sdgb-api/src/title/model/mod.rs @@ -26,7 +26,9 @@ mod get_user_music_api; pub use get_user_music_api::{GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail}; mod get_user_region_api; -pub use get_user_region_api::{GetUserRegionApi, GetUserRegionApiResp, UserRegion}; +pub use get_user_region_api::{ + GetUserRegionApi, GetUserRegionApiResp, UserRegion, UserRegionFlatten, +}; mod dxrating; pub use dxrating::{ diff --git a/sdgb-cli/Cargo.toml b/sdgb-cli/Cargo.toml index 518110b..d2e44fb 100644 --- a/sdgb-cli/Cargo.toml +++ b/sdgb-cli/Cargo.toml @@ -42,5 +42,7 @@ ctrlc = { version = "3.4.7", features = ["termination"] } # magic macro crabtime = { workspace = true } +parquet = { workspace = true } + [build-dependencies] version_check = "0.9.5" diff --git a/sdgb-cli/src/main.rs b/sdgb-cli/src/main.rs index 82662f8..edd5f61 100644 --- a/sdgb-cli/src/main.rs +++ b/sdgb-cli/src/main.rs @@ -311,23 +311,35 @@ async fn main() -> Result<(), Box> { #[cfg(feature = "fetchall")] Commands::ListAllUserDump {} => { - use crate::{cache::PLAYERS, utils::helpers::dump_cache}; + use crate::{ + cache::PLAYERS, + utils::helpers::{dump_parquet, read_cache}, + }; - dump_cache::("players.json", PLAYERS)?; - } - #[cfg(feature = "fetchall")] - Commands::ScrapeAllB50Dump {} => { - use crate::{cache::B50, utils::helpers::dump_cache}; - - dump_cache::("b50.json", B50)?; + let players: Vec = read_cache(PLAYERS)?; + dump_parquet(players, "players.parquet")?; } #[cfg(feature = "fetchall")] Commands::ScrapeAllRegionDump {} => { - use sdgb_api::title::model::GetUserRegionApiResp; + use crate::{ + cache::REGIONS, + utils::helpers::{dump_parquet, read_cache}, + }; + use sdgb_api::title::model::{GetUserRegionApiResp, UserRegionFlatten}; - use crate::{cache::REGIONS, utils::helpers::dump_cache}; + let regions: Vec = read_cache(REGIONS)?; + let regions_flat = regions + .into_iter() + .map(Vec::::from) + .flatten() + .collect::>(); + dump_parquet(regions_flat, "regions.parquet")?; + } + #[cfg(feature = "fetchall")] + Commands::ScrapeAllB50Dump {} => { + use crate::{cache::B50, utils::helpers::dump_json}; - dump_cache::("region.json", REGIONS)?; + dump_json::("b50.json", B50)?; } Commands::Userdata { user_id } => { diff --git a/sdgb-cli/src/utils/helpers/mod.rs b/sdgb-cli/src/utils/helpers/mod.rs index 09d70d7..f989af6 100644 --- a/sdgb-cli/src/utils/helpers/mod.rs +++ b/sdgb-cli/src/utils/helpers/mod.rs @@ -1,9 +1,14 @@ +use std::sync::Arc; use std::{fs::OpenOptions, io::BufWriter}; use std::{path::Path, sync::atomic::Ordering}; use futures_util::StreamExt; use nyquest_preset::nyquest::AsyncClient; +use parquet::basic::BrotliLevel; +use parquet::file::properties::WriterProperties; +use parquet::file::writer::SerializedFileWriter; +use parquet::record::RecordWriter; use redb::ReadableTable; use redb::TableDefinition; use serde::Serialize; @@ -52,7 +57,46 @@ where .collect::>()) } -pub fn dump_cache( +pub fn dump_parquet( + data: impl Into>, + output_path: impl AsRef, +) -> Result<(), Box> +where + for<'a> &'a [D]: RecordWriter, +{ + let data = data.into(); + let file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(output_path)?; + + #[cfg(file_lock_ready)] + file.try_lock()?; + + let writer = BufWriter::new(file); + let schema = data.as_slice().schema()?; + let props = Arc::new( + WriterProperties::builder() + .set_compression(parquet::basic::Compression::BROTLI(BrotliLevel::try_new( + 6, + )?)) + .build(), + ); + + let mut writer = SerializedFileWriter::new(writer, schema, props).unwrap(); + let mut row_group = writer.next_row_group().unwrap(); + + data.as_slice().write_to_row_group(&mut row_group)?; + row_group.close()?; + + writer.close().unwrap(); + info!("dumped {} user id", data.len()); + + Ok(()) +} + +pub fn dump_json( output_path: impl AsRef, definition: TableDefinition<'_, u32, Vec>, ) -> Result<(), Box>