perf: parquet based data export
This commit is contained in:
@@ -1,9 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
use std::{fs::OpenOptions, io::BufWriter};
|
||||
use std::{path::Path, sync::atomic::Ordering};
|
||||
|
||||
use futures_util::StreamExt;
|
||||
use nyquest_preset::nyquest::AsyncClient;
|
||||
|
||||
use parquet::basic::BrotliLevel;
|
||||
use parquet::file::properties::WriterProperties;
|
||||
use parquet::file::writer::SerializedFileWriter;
|
||||
use parquet::record::RecordWriter;
|
||||
use redb::ReadableTable;
|
||||
use redb::TableDefinition;
|
||||
use serde::Serialize;
|
||||
@@ -52,7 +57,46 @@ where
|
||||
.collect::<Vec<D>>())
|
||||
}
|
||||
|
||||
pub fn dump_cache<D>(
|
||||
pub fn dump_parquet<D>(
|
||||
data: impl Into<Vec<D>>,
|
||||
output_path: impl AsRef<Path>,
|
||||
) -> Result<(), Box<dyn snafu::Error>>
|
||||
where
|
||||
for<'a> &'a [D]: RecordWriter<D>,
|
||||
{
|
||||
let data = data.into();
|
||||
let file = OpenOptions::new()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.write(true)
|
||||
.open(output_path)?;
|
||||
|
||||
#[cfg(file_lock_ready)]
|
||||
file.try_lock()?;
|
||||
|
||||
let writer = BufWriter::new(file);
|
||||
let schema = data.as_slice().schema()?;
|
||||
let props = Arc::new(
|
||||
WriterProperties::builder()
|
||||
.set_compression(parquet::basic::Compression::BROTLI(BrotliLevel::try_new(
|
||||
6,
|
||||
)?))
|
||||
.build(),
|
||||
);
|
||||
|
||||
let mut writer = SerializedFileWriter::new(writer, schema, props).unwrap();
|
||||
let mut row_group = writer.next_row_group().unwrap();
|
||||
|
||||
data.as_slice().write_to_row_group(&mut row_group)?;
|
||||
row_group.close()?;
|
||||
|
||||
writer.close().unwrap();
|
||||
info!("dumped {} user id", data.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn dump_json<D>(
|
||||
output_path: impl AsRef<Path>,
|
||||
definition: TableDefinition<'_, u32, Vec<u8>>,
|
||||
) -> Result<(), Box<dyn snafu::Error>>
|
||||
|
||||
Reference in New Issue
Block a user