chore: optimize parquet fields

This commit is contained in:
mokurin000
2025-08-10 22:17:46 +08:00
parent 0ce47537fb
commit 1d2e3fc7cc
5 changed files with 92 additions and 1 deletions

1
.gitignore vendored
View File

@@ -7,6 +7,7 @@
/*.json* /*.json*
/players*.parquet /players*.parquet
/region*.parquet /region*.parquet
/records*.parquet
/.python-version /.python-version
/uv.lock /uv.lock

View File

@@ -64,6 +64,20 @@ pub struct UserMusicDetail {
/// DX 分数 /// DX 分数
pub deluxscore_max: i64, pub deluxscore_max: i64,
/// - D = 0,
/// - C = 1,
/// - B = 2,
/// - BB = 3,
/// - BBB = 4,
/// - A = 5,
/// - AA = 6,
/// - AAA = 7,
/// - S = 8,
/// - S_PLUS = 9,
/// - SS = 10,
/// - SS_PLUS = 11,
/// - SSS = 12,
/// - SSS_PLUS = 13
pub score_rank: i64, pub score_rank: i64,
/// 理论次数 /// 理论次数
@@ -71,6 +85,21 @@ pub struct UserMusicDetail {
pub ext_num2: i64, pub ext_num2: i64,
} }
#[cfg_attr(feature = "parquet", derive(parquet_derive::ParquetRecordWriter))]
pub struct UserMusicDetailFlatten {
pub user_id: u32,
pub music_id: u32,
pub level: u8,
pub play_count: u32,
pub achievement: u32,
pub combo_status: u8,
pub sync_status: u8,
pub deluxscore_max: u16,
pub score_rank: u8,
pub ext_num1: u32,
pub ext_num2: u32,
}
impl Display for UserMusicDetail { impl Display for UserMusicDetail {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(music_title) = query_music(self.music_id).map(|i| &i.name) { if let Some(music_title) = query_music(self.music_id).map(|i| &i.name) {
@@ -119,3 +148,35 @@ impl Display for UserMusicDetail {
Ok(()) Ok(())
} }
} }
impl UserMusicDetailFlatten {
pub fn new(
user_id: u32,
UserMusicDetail {
music_id,
level,
play_count,
achievement,
combo_status,
sync_status,
deluxscore_max,
score_rank,
ext_num1,
ext_num2,
}: UserMusicDetail,
) -> Self {
Self {
user_id,
music_id,
level: level as _,
sync_status: sync_status as _,
deluxscore_max: deluxscore_max as _,
score_rank: score_rank as _,
play_count: play_count as _,
achievement: achievement as _,
combo_status: combo_status as _,
ext_num1: ext_num1 as _,
ext_num2: ext_num2 as _,
}
}
}

View File

@@ -23,7 +23,9 @@ pub use get_user_rating_api::{
}; };
mod get_user_music_api; mod get_user_music_api;
pub use get_user_music_api::{GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail}; pub use get_user_music_api::{
GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail, UserMusicDetailFlatten,
};
mod get_user_region_api; mod get_user_region_api;
pub use get_user_region_api::{ pub use get_user_region_api::{

View File

@@ -123,6 +123,8 @@ pub enum Commands {
ScrapeAllB50Dump {}, ScrapeAllB50Dump {},
#[cfg(feature = "fetchall")] #[cfg(feature = "fetchall")]
ScrapeAllRegionDump {}, ScrapeAllRegionDump {},
#[cfg(feature = "fetchall")]
ScrapeAllRecordDump {},
Logout { Logout {
#[arg(short, long)] #[arg(short, long)]

View File

@@ -337,6 +337,31 @@ async fn main() -> Result<(), Box<dyn snafu::Error>> {
dump_parquet(regions_flat, "regions.parquet")?; dump_parquet(regions_flat, "regions.parquet")?;
} }
#[cfg(feature = "fetchall")] #[cfg(feature = "fetchall")]
Commands::ScrapeAllRecordDump {} => {
use crate::{
cache::RECORDS,
utils::helpers::{dump_parquet, read_cache},
};
use sdgb_api::title::model::GetUserMusicApiResp;
use sdgb_api::title::model::UserMusicDetailFlatten;
let records: Vec<GetUserMusicApiResp> = read_cache(RECORDS)?;
dump_parquet(
records
.into_iter()
.map(|resp| {
resp.user_music_list
.into_iter()
.map(|music| music.user_music_detail_list)
.flatten()
.map(move |detail| UserMusicDetailFlatten::new(resp.user_id, detail))
})
.flatten()
.collect::<Vec<UserMusicDetailFlatten>>(),
"records.parquet",
)?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllB50Dump {} => { Commands::ScrapeAllB50Dump {} => {
use crate::{cache::B50, utils::helpers::dump_json}; use crate::{cache::B50, utils::helpers::dump_json};