Compare commits

...

2 Commits

Author SHA1 Message Date
adcb715d2c chore: scraping user data becomes impossible 2026-01-22 06:12:17 +08:00
82e2db7c90 chore: impl Default for PreviewAPI 2026-01-22 05:32:55 +08:00
15 changed files with 57 additions and 1304 deletions

697
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,12 +6,9 @@ edition = "2024"
license = "GPL-3.0"
[features]
default = ["compio", "bincode"]
default = ["compio"]
compio = ["dep:compio"]
tokio = ["dep:tokio"]
bincode = ["dep:bincode"]
parquet = ['dep:parquet', 'dep:parquet_derive']
[dependencies]
snafu = { workspace = true }
@@ -45,7 +42,3 @@ flate2 = "1.1.2"
cbc = { version = "0.1.2", features = ["alloc"] }
aes = "0.8.4"
cipher = { version = "0.4.4", features = ["block-padding"] }
bincode = { version = "2.0.1", optional = true }
parquet = { workspace = true, optional = true }
parquet_derive = { workspace = true, optional = true }

View File

@@ -7,8 +7,5 @@ pub mod helper;
mod error;
pub use error::ApiError;
#[cfg(feature = "bincode")]
pub use bincode;
#[cfg(all(feature = "compio", feature = "tokio"))]
compile_error!("you must not enable both `compio` and `tokio`");

View File

@@ -16,7 +16,6 @@ pub struct GetUserMusicApi {
}
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[serde(rename_all = "camelCase")]
pub struct GetUserMusicApiResp {
pub user_id: u32,
@@ -25,7 +24,6 @@ pub struct GetUserMusicApiResp {
pub user_music_list: Vec<UserMusic>,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct UserMusic {
@@ -33,7 +31,6 @@ pub struct UserMusic {
pub length: u32,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct UserMusicDetail {
@@ -85,21 +82,6 @@ pub struct UserMusicDetail {
pub ext_num2: i64,
}
#[cfg_attr(feature = "parquet", derive(parquet_derive::ParquetRecordWriter))]
pub struct UserMusicDetailFlatten {
pub user_id: u32,
pub music_id: u32,
pub level: u8,
pub play_count: u32,
pub achievement: u32,
pub combo_status: u8,
pub sync_status: u8,
pub deluxscore_max: u16,
pub score_rank: u8,
pub ext_num1: u32,
pub ext_num2: u32,
}
impl Display for UserMusicDetail {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(music_title) = query_music(self.music_id).map(|i| &i.name) {
@@ -149,35 +131,3 @@ impl Display for UserMusicDetail {
Ok(())
}
}
impl UserMusicDetailFlatten {
pub fn new(
user_id: u32,
UserMusicDetail {
music_id,
level,
play_count,
achievement,
combo_status,
sync_status,
deluxscore_max,
score_rank,
ext_num1,
ext_num2,
}: UserMusicDetail,
) -> Self {
Self {
user_id,
music_id,
level: level as _,
sync_status: sync_status as _,
deluxscore_max: deluxscore_max as _,
score_rank: score_rank as _,
play_count: play_count as _,
achievement: achievement as _,
combo_status: combo_status as _,
ext_num1: ext_num1 as _,
ext_num2: ext_num2 as _,
}
}
}

View File

@@ -20,9 +20,13 @@ impl From<u32> for GetUserPreviewApi {
}
}
impl Default for GetUserPreviewApi {
fn default() -> Self {
Self::from(0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[cfg_attr(feature = "parquet", derive(parquet_derive::ParquetRecordWriter))]
#[serde(rename_all = "camelCase")]
pub struct GetUserPreviewApiResp {
pub user_id: u32,

View File

@@ -19,7 +19,6 @@ impl From<u32> for GetUserRatingApi {
}
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GetUserRatingApiResp {
@@ -27,7 +26,6 @@ pub struct GetUserRatingApiResp {
pub user_rating: UserRating,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct UserRating {
@@ -44,7 +42,6 @@ pub struct UserRating {
pub udemae: Udemae,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MusicRating {
@@ -66,18 +63,6 @@ pub struct MusicRating {
pub achievement: i32,
}
#[cfg_attr(feature = "parquet", derive(parquet_derive::ParquetRecordWriter))]
#[derive(Default, Debug, Clone, PartialEq)]
pub struct MusicRatingFlatten {
pub user_id: u32,
pub music_id: u32,
pub level: u32,
pub rom_version: i64,
pub achievement: i32,
pub dx_rating: u32,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Udemae {

View File

@@ -12,44 +12,6 @@ impl From<u32> for GetUserRegionApi {
}
}
impl From<GetUserRegionApiResp> for Vec<UserRegionFlatten> {
fn from(
GetUserRegionApiResp {
user_id,
user_region_list,
..
}: GetUserRegionApiResp,
) -> Self {
user_region_list
.into_iter()
.map(
|UserRegion {
region_id,
play_count,
created,
}| {
UserRegionFlatten {
user_id,
region_id,
play_count,
created,
}
},
)
.collect()
}
}
#[derive(Default, Debug, Clone, PartialEq)]
#[cfg_attr(feature = "parquet", derive(parquet_derive::ParquetRecordWriter))]
pub struct UserRegionFlatten {
pub user_id: u32,
pub region_id: u32,
pub play_count: i64,
pub created: String,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GetUserRegionApiResp {
@@ -58,7 +20,6 @@ pub struct GetUserRegionApiResp {
pub user_region_list: Vec<UserRegion>,
}
#[cfg_attr(feature = "bincode", derive(bincode::Encode, bincode::Decode))]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct UserRegion {

View File

@@ -18,20 +18,15 @@ pub use get_user_rating_api::{
GetUserRatingApi,
GetUserRatingApiResp, // api
MusicRating,
MusicRatingFlatten,
Udemae,
UserRating,
};
mod get_user_music_api;
pub use get_user_music_api::{
GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail, UserMusicDetailFlatten,
};
pub use get_user_music_api::{GetUserMusicApi, GetUserMusicApiResp, UserMusic, UserMusicDetail};
mod get_user_region_api;
pub use get_user_region_api::{
GetUserRegionApi, GetUserRegionApiResp, UserRegion, UserRegionFlatten,
};
pub use get_user_region_api::{GetUserRegionApi, GetUserRegionApiResp, UserRegion};
mod dxrating;
pub use dxrating::{

View File

@@ -32,7 +32,7 @@ impl Default for UserLogoutApi {
client_id: "A63E01C2805".into(),
type_: 1,
access_code: "",
login_date_time: 0,
}
}

View File

@@ -6,21 +6,13 @@ authors = ["mokurin000"]
description = "CLI tool for SDGB protocol"
[features]
default = ["compio", "fetchall"]
default = ["compio"]
compio = ["dep:compio", "sdgb-api/compio"]
tokio = ["dep:tokio", "sdgb-api/tokio"]
fetchall = [
"dep:redb",
"dep:futures-util",
"dep:parquet",
"dep:music-db",
"sdgb-api/parquet",
]
[dependencies]
sdgb-api = { workspace = true, features = ["bincode"] }
sdgb-api = { workspace = true, features = [] }
music-db = { workspace = true, optional = true }
# (de)serialization
@@ -32,10 +24,6 @@ strum = { workspace = true }
spdlog-rs = { workspace = true }
snafu = { workspace = true }
# kv database
redb = { workspace = true, optional = true }
# async runtime
tokio = { workspace = true, features = ["macros"], optional = true }
compio = { workspace = true, features = ["macros"], optional = true }
@@ -49,7 +37,5 @@ ctrlc = { version = "3.4.7", features = ["termination"] }
# magic macro
crabtime = { workspace = true }
parquet = { workspace = true, optional = true }
[build-dependencies]
version_check = "0.9.5"

View File

@@ -1,64 +0,0 @@
use std::sync::LazyLock;
use redb::{ReadTransaction, ReadableDatabase as _, Table, TableDefinition, WriteTransaction};
static DATABASE: LazyLock<redb::Database> = LazyLock::new(|| {
let mut db = redb::Database::builder()
.create("players.redb")
.expect("failed to open database");
_ = db.compact();
db
});
pub fn write_txn() -> Result<WriteTransaction, redb::Error> {
Ok(DATABASE.begin_write()?)
}
pub fn read_txn() -> Result<ReadTransaction, redb::Error> {
Ok(DATABASE.begin_read()?)
}
pub fn open_table<'a>(
write: &'a WriteTransaction,
definition: TableDefinition<'_, u32, Vec<u8>>,
) -> Result<Table<'a, u32, Vec<u8>>, redb::Error> {
Ok(write.open_table(definition)?)
}
pub fn open_table_ro(
read: &ReadTransaction,
definition: TableDefinition<'_, u32, Vec<u8>>,
) -> Result<redb::ReadOnlyTable<u32, Vec<u8>>, redb::Error> {
Ok(read.open_table(definition)?)
}
#[crabtime::function]
fn table_definitions_impl(tables: Vec<String>) {
let mut defs: Vec<String> = Vec::new();
for table in tables {
let definition = table.to_uppercase();
let table_name = format!("\"{table}\"");
crabtime::output!(
pub const {{definition}}: TableDefinition<'_, u32, Vec<u8>> = redb::TableDefinition::new({{table_name}});
);
defs.push(format!("write_txn.open_table({definition})?;"));
}
let init_statements = defs.join("\n");
crabtime::output!(
pub fn init_db() -> Result<(), redb::Error> {
let write_txn = DATABASE.begin_write()?;
{
{ init_statements }
}
write_txn.commit()?;
Ok(())
}
);
}
table_definitions_impl!(["players", "b50", "records", "regions"]);

View File

@@ -85,55 +85,6 @@ pub enum Commands {
token: Option<String>,
},
/// Scrape all user, read possible id from stdin
#[cfg(feature = "fetchall")]
ListAllUser {
#[arg(short, long, default_value_t = 5)]
concurrency: usize,
},
#[cfg(feature = "fetchall")]
/// Scrape B50 data
ScrapeAllB50 {
#[arg(short, long, default_value_t = 5)]
concurrency: usize,
#[arg(long, default_value_t = 1000)]
min_rating: i64,
#[arg(long, default_value_t = 16500)]
max_rating: i64,
},
/// Scrape Region data
#[cfg(feature = "fetchall")]
ScrapeAllRegion {
#[arg(short, long, default_value_t = 5)]
concurrency: usize,
#[arg(long, default_value_t = 1000)]
min_rating: i64,
#[arg(long, default_value_t = 16500)]
max_rating: i64,
},
/// Scrape all player record
#[cfg(feature = "fetchall")]
ScrapeAllRecord {
#[arg(short, long, default_value_t = 5)]
concurrency: usize,
#[arg(long, default_value_t = 10000)]
min_rating: i64,
#[arg(long, default_value_t = 16400)]
max_rating: i64,
},
#[cfg(feature = "fetchall")]
ListAllUserDump {},
#[cfg(feature = "fetchall")]
ScrapeAllB50Dump {},
#[cfg(feature = "fetchall")]
ScrapeAllRegionDump {},
#[cfg(feature = "fetchall")]
ScrapeAllRecordDump {},
Logout {
#[arg(short, long)]
user_id: u32,

View File

@@ -32,8 +32,6 @@ use crate::{
utils::{human_readable_display, json_display, login_action},
};
#[cfg(feature = "fetchall")]
mod cache;
mod commands;
mod utils;
@@ -221,216 +219,6 @@ async fn main() -> Result<(), Box<dyn snafu::Error>> {
println!("{}", String::from_utf8_lossy(&resp));
}
#[cfg(feature = "fetchall")]
Commands::ListAllUser { concurrency } => {
use crate::{cache::PLAYERS, utils::helpers::cached_concurrent_fetch};
use sdgb_api::title::methods::GetUserPreviewApiExt;
use std::io::BufRead as _;
let mut user_ids = Vec::new();
{
let mut stdin = std::io::stdin().lock();
let mut buf = String::new();
while stdin.read_line(&mut buf).is_ok_and(|size| size != 0) {
if buf.is_empty() {
continue;
}
let user_id: u32 = buf.trim().parse()?;
buf.clear();
user_ids.push(user_id);
}
}
cached_concurrent_fetch::<GetUserPreviewApiExt>(
user_ids,
&client,
concurrency,
PLAYERS,
)
.await?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllRecord {
concurrency,
min_rating,
max_rating,
} => {
use crate::{
cache::{PLAYERS, RECORDS},
utils::helpers::{cached_concurrent_fetch_userfn, read_cache},
};
let players: Vec<GetUserPreviewApiResp> = read_cache(PLAYERS)?;
cached_concurrent_fetch_userfn(
players
.iter()
.filter(|p| p.player_rating >= min_rating && p.player_rating <= max_rating)
.map(|p| p.user_id)
.collect::<Vec<u32>>(),
&client,
concurrency,
RECORDS,
get_user_all_music,
)
.await?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllB50 {
concurrency,
min_rating,
max_rating,
} => {
use sdgb_api::title::methods::GetUserRatingApiExt;
use crate::{
cache::{B50, PLAYERS},
utils::helpers::{cached_concurrent_fetch, read_cache},
};
let players: Vec<GetUserPreviewApiResp> = read_cache(PLAYERS)?;
cached_concurrent_fetch::<GetUserRatingApiExt>(
players
.iter()
.filter(|p| p.player_rating >= min_rating && p.player_rating <= max_rating)
.map(|p| p.user_id)
.collect::<Vec<u32>>(),
&client,
concurrency,
B50,
)
.await?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllRegion {
concurrency,
min_rating,
max_rating,
} => {
use sdgb_api::title::methods::GetUserRegionApiExt;
use crate::{
cache::{PLAYERS, REGIONS},
utils::helpers::{cached_concurrent_fetch, read_cache},
};
let players: Vec<GetUserPreviewApiResp> = read_cache(PLAYERS)?;
cached_concurrent_fetch::<GetUserRegionApiExt>(
players
.iter()
.filter(|p| p.player_rating >= min_rating && p.player_rating <= max_rating)
.map(|p| p.user_id)
.collect::<Vec<u32>>(),
&client,
concurrency,
REGIONS,
)
.await?;
}
#[cfg(feature = "fetchall")]
Commands::ListAllUserDump {} => {
use crate::{
cache::PLAYERS,
utils::helpers::{dump_parquet, read_cache},
};
let players: Vec<GetUserPreviewApiResp> = read_cache(PLAYERS)?;
dump_parquet(players, "players.parquet")?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllRegionDump {} => {
use crate::{
cache::REGIONS,
utils::helpers::{dump_parquet, read_cache},
};
use sdgb_api::title::model::{GetUserRegionApiResp, UserRegionFlatten};
let regions: Vec<GetUserRegionApiResp> = read_cache(REGIONS)?;
let regions_flat = regions
.into_iter()
.map(Vec::<UserRegionFlatten>::from)
.flatten()
.collect::<Vec<_>>();
dump_parquet(regions_flat, "regions.parquet")?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllRecordDump {} => {
use crate::{
cache::RECORDS,
utils::helpers::{dump_parquet, read_cache},
};
use sdgb_api::title::model::GetUserMusicApiResp;
use sdgb_api::title::model::UserMusicDetailFlatten;
let records: Vec<GetUserMusicApiResp> = read_cache(RECORDS)?;
dump_parquet(
records
.into_iter()
.map(|resp| {
resp.user_music_list
.into_iter()
.map(|music| music.user_music_detail_list)
.flatten()
.map(move |detail| UserMusicDetailFlatten::new(resp.user_id, detail))
})
.flatten()
.collect::<Vec<UserMusicDetailFlatten>>(),
"records.parquet",
)?;
}
#[cfg(feature = "fetchall")]
Commands::ScrapeAllB50Dump {} => {
use sdgb_api::title::model::{MusicRating, MusicRatingFlatten};
use crate::{
cache::B50,
utils::helpers::{dump_parquet, read_cache},
};
let records: Vec<GetUserRatingApiResp> = read_cache(B50)?;
dump_parquet::<MusicRatingFlatten>(
records
.into_iter()
.map(
|GetUserRatingApiResp {
user_id,
user_rating,
}| {
user_rating
.rating_list
.into_iter()
.chain(user_rating.next_rating_list)
.filter_map(
move |MusicRating {
music_id,
level,
rom_version,
achievement,
}| {
let (_rank, dx_rating) =
music_db::query_music_level(music_id, level)?
.dx_rating(achievement);
Some(MusicRatingFlatten {
user_id,
music_id,
level,
rom_version,
achievement,
dx_rating,
})
},
)
},
)
.flatten()
.collect::<Vec<_>>(),
"b50.parquet",
)?;
}
Commands::Userdata {
user_id,
skip_login,

View File

@@ -1,183 +0,0 @@
use std::sync::Arc;
use std::{fs::OpenOptions, io::BufWriter};
use std::{path::Path, sync::atomic::Ordering};
use futures_util::StreamExt;
use nyquest_preset::nyquest::AsyncClient;
use parquet::basic::BrotliLevel;
use parquet::file::properties::WriterProperties;
use parquet::file::writer::SerializedFileWriter;
use parquet::record::RecordWriter;
use redb::ReadableTable;
use redb::TableDefinition;
use spdlog::{error, info};
use sdgb_api::title::MaiVersionExt;
use sdgb_api::title::{Sdgb1_53, methods::APIExt};
use sdgb_api::{ApiError, bincode};
use bincode::{BorrowDecode, Encode, borrow_decode_from_slice};
use crate::{EARLY_QUIT, cache};
#[allow(unused)]
pub fn read_cache_keys(
definition: TableDefinition<'_, u32, Vec<u8>>,
) -> Result<Vec<u32>, Box<dyn snafu::Error>> {
let txn = cache::read_txn()?;
let table = cache::open_table_ro(&txn, definition)?;
Ok(table
.iter()?
.flatten()
.map(|(value, _)| value.value())
.collect::<Vec<u32>>())
}
pub fn read_cache<D>(
definition: TableDefinition<'_, u32, Vec<u8>>,
) -> Result<Vec<D>, Box<dyn snafu::Error>>
where
D: for<'d> BorrowDecode<'d, ()>,
{
let txn = cache::read_txn()?;
let table = cache::open_table_ro(&txn, definition)?;
let config =
bincode::config::Configuration::<bincode::config::LittleEndian>::default().with_no_limit();
Ok(table
.iter()?
.flatten()
.map(|d| borrow_decode_from_slice(&d.1.value(), config))
.flatten()
.map(|(value, _)| value)
.collect::<Vec<D>>())
}
pub fn dump_parquet<D>(
data: impl Into<Vec<D>>,
output_path: impl AsRef<Path>,
) -> Result<(), Box<dyn snafu::Error>>
where
for<'a> &'a [D]: RecordWriter<D>,
{
let data = data.into();
let file = OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(output_path)?;
#[cfg(file_lock_ready)]
file.try_lock()?;
let writer = BufWriter::new(file);
let schema = data.as_slice().schema()?;
let props = Arc::new(
WriterProperties::builder()
.set_compression(parquet::basic::Compression::BROTLI(BrotliLevel::try_new(
6,
)?))
.build(),
);
let mut writer = SerializedFileWriter::new(writer, schema, props).unwrap();
let mut row_group = writer.next_row_group().unwrap();
data.as_slice().write_to_row_group(&mut row_group)?;
row_group.close()?;
writer.close().unwrap();
info!("dumped {} records", data.len());
Ok(())
}
pub async fn cached_concurrent_fetch<A: APIExt>(
user_ids: impl Into<Vec<u32>>,
client: &AsyncClient,
concurrency: usize,
definition: TableDefinition<'_, u32, Vec<u8>>,
) -> Result<(), Box<dyn snafu::Error>>
where
A::Payload: From<u32>,
A::Response: Encode + for<'a> BorrowDecode<'a, ()>,
{
cached_concurrent_fetch_userfn(
user_ids,
client,
concurrency,
definition,
async |client, user_id| {
Sdgb1_53::request_ext::<A>(client, A::Payload::from(user_id), user_id).await
},
)
.await
}
pub async fn cached_concurrent_fetch_userfn<R>(
user_ids: impl Into<Vec<u32>>,
client: &AsyncClient,
concurrency: usize,
definition: TableDefinition<'_, u32, Vec<u8>>,
scrape: impl AsyncFn(&AsyncClient, u32) -> Result<R, ApiError>,
) -> Result<(), Box<dyn snafu::Error>>
where
R: Encode + for<'a> BorrowDecode<'a, ()>,
{
let _ = cache::init_db();
let user_ids = user_ids.into();
let read = cache::read_txn()?;
let write = cache::write_txn()?;
let config = sdgb_api::bincode::config::Configuration::<
sdgb_api::bincode::config::LittleEndian,
>::default()
.with_no_limit();
info!("number of user_id: {}", user_ids.len());
let collect = futures_util::stream::iter(user_ids)
.map(async |user_id| {
{
let cache_table = cache::open_table_ro(&read, definition)?;
let data = cache_table.get(user_id)?;
if data.is_some() {
return Ok(());
}
}
if EARLY_QUIT.load(Ordering::Relaxed) {
return Err("early skip due to ctrl-c")?;
}
let resp = scrape(&client, user_id).await;
match &resp {
Ok(resp) => {
use sdgb_api::bincode::encode_to_vec;
if let Ok(mut table) = cache::open_table(&write, definition)
&& let Ok(encoded) = encode_to_vec(resp, config)
{
info!("encode length for {user_id}: {}", encoded.len());
_ = table.insert(user_id, encoded);
}
}
Err(sdgb_api::ApiError::JSON { .. }) => {}
Err(e) => {
error!("fetch failed: {e}");
}
}
Result::<_, Box<dyn snafu::Error>>::Ok(())
})
.buffer_unordered(concurrency) // slower to avoid being banned
.collect::<Vec<_>>()
.await;
drop(collect);
let _ = write.commit();
Ok(())
}

View File

@@ -65,6 +65,3 @@ pub fn human_readable_display(
Ok(())
}
#[cfg(feature = "fetchall")]
pub mod helpers;