Skip to content

Commit f676a12

Browse files
committed
perf: Add benchmark for query performance
1 parent 4f46866 commit f676a12

4 files changed

Lines changed: 134 additions & 62 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ criterion = "0.8.1"
2525
tempfile = "3.10.1"
2626

2727
[[bench]]
28-
name = "insertion"
28+
name = "db"
2929
harness = false
3030

3131
[[bin]]

benches/db.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
use argusdb::db::DB;
2+
use argusdb::query::{BinaryOperator, Expression, LogicalPlan, execute_plan};
3+
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
4+
use serde_json::json;
5+
use std::hint;
6+
use tempfile::tempdir;
7+
8+
fn generate_doc_with_keys(num_keys: usize, i: usize) -> serde_json::Value {
9+
let mut doc = serde_json::Map::new();
10+
for j in 0..num_keys {
11+
doc.insert(format!("key{}", j), serde_json::Value::from(i));
12+
}
13+
serde_json::Value::Object(doc)
14+
}
15+
16+
fn generate_query_plan(collection_name: &str, selectivity: f64, total_docs: usize) -> LogicalPlan {
17+
let scan_plan = LogicalPlan::Scan {
18+
collection: collection_name.to_string(),
19+
};
20+
21+
if selectivity >= 1.0 {
22+
// 100% selectivity or more, no filter needed
23+
return scan_plan;
24+
}
25+
26+
let filter_value = (total_docs as f64 * selectivity).round() as i64;
27+
28+
let predicate = Expression::Binary {
29+
left: Box::new(Expression::FieldReference("value".to_string())),
30+
op: BinaryOperator::Lt,
31+
right: Box::new(Expression::Literal(json!(filter_value))),
32+
};
33+
34+
LogicalPlan::Filter {
35+
input: Box::new(scan_plan),
36+
predicate,
37+
}
38+
}
39+
40+
fn insertion_benchmark(c: &mut Criterion) {
41+
let mut group = c.benchmark_group("insertion");
42+
group.sample_size(10);
43+
let max_docs = 10_000;
44+
45+
for num_keys in [1usize, 10usize, 100usize].iter() {
46+
group.bench_function(BenchmarkId::new("insert", *num_keys), |b| {
47+
b.iter_custom(|iters| {
48+
let mut total_duration = std::time::Duration::new(0, 0);
49+
for _ in 0..iters {
50+
// Setup for each iteration: Create a new DB in a temp directory
51+
let dir = tempdir().unwrap();
52+
let mut db = DB::new(dir.path().to_str().unwrap(), max_docs + 1, 10);
53+
db.create_collection("test").unwrap();
54+
55+
let start = std::time::Instant::now();
56+
57+
// Benchmarked routine: Insert documents
58+
for i in 0..(max_docs / num_keys) {
59+
let doc = generate_doc_with_keys(*num_keys, i);
60+
db.insert("test", hint::black_box(doc)).unwrap();
61+
}
62+
63+
total_duration += start.elapsed();
64+
65+
// Teardown for each iteration: `dir` (TempDir) is dropped here,
66+
// cleaning up the temporary database files.
67+
}
68+
total_duration
69+
})
70+
});
71+
}
72+
73+
group.finish();
74+
}
75+
76+
fn query_benchmark(c: &mut Criterion) {
77+
let mut group = c.benchmark_group("query");
78+
group.sample_size(10);
79+
let num_docs = 10_000;
80+
let collection_name = "test";
81+
82+
// Setup: Insert 10,000 documents with a 'value' field
83+
// This setup will be run once per sample for iter_custom (which is not what we want)
84+
// So we need to put the DB setup inside the iter_custom loop
85+
// But then I cannot use group.throughput
86+
87+
// So for query benchmark, the setup has to be outside of the iter_custom entirely for the benchmark group
88+
// This implies that the DB must be created once and then queries run against it.
89+
// However, the original insertion benchmark creates a *new* DB for every iteration (not every sample).
90+
// This is to avoid cumulative effects (e.g. flushing to disk etc.)
91+
92+
// Let's create a separate setup for queries that returns the DB
93+
let dir = tempdir().unwrap();
94+
let mut db = DB::new(dir.path().to_str().unwrap(), num_docs + 1, 10); // Don't flush
95+
db.create_collection(collection_name).unwrap();
96+
for i in 0..num_docs {
97+
db.insert(collection_name, json!({"value": i})).unwrap();
98+
}
99+
let db_arc = std::sync::Arc::new(std::sync::Mutex::new(db));
100+
101+
for selectivity in [1.0, 0.1, 0.01].iter() {
102+
let plan = generate_query_plan(collection_name, *selectivity, num_docs);
103+
group.throughput(Throughput::Elements(
104+
(num_docs as f64 * *selectivity).round() as u64,
105+
));
106+
107+
group.bench_function(BenchmarkId::new("query", selectivity), |b| {
108+
b.iter_custom(|iters| {
109+
let mut total_duration = std::time::Duration::new(0, 0);
110+
for _ in 0..iters {
111+
// For each iteration, acquire the DB lock and execute the query
112+
let db_lock = db_arc.lock().unwrap();
113+
114+
let start = std::time::Instant::now();
115+
116+
// Execute the query plan and iterate over all results
117+
let mut iter = execute_plan(plan.clone(), &db_lock).unwrap();
118+
while iter.next().is_some() {} // Consume all results
119+
120+
total_duration += start.elapsed();
121+
}
122+
total_duration
123+
})
124+
});
125+
}
126+
127+
group.finish();
128+
}
129+
130+
criterion_group!(benches, insertion_benchmark, query_benchmark);
131+
criterion_main!(benches);

benches/insertion.rs

Lines changed: 0 additions & 59 deletions
This file was deleted.

src/query.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub enum LogicalOperator {
3838
Or,
3939
}
4040

41-
#[derive(Debug)]
41+
#[derive(Debug, Clone)]
4242
pub enum LogicalPlan {
4343
Scan {
4444
collection: String,
@@ -61,7 +61,7 @@ pub enum LogicalPlan {
6161
},
6262
}
6363

64-
#[derive(Debug)]
64+
#[derive(Debug, Clone)]
6565
pub enum Statement {
6666
Insert {
6767
collection: String,

0 commit comments

Comments
 (0)