Skip to content

Commit 75e7d85

Browse files
committed
fix(db): load existing collections on startup and persist collection name
1 parent d60639f commit 75e7d85

3 files changed

Lines changed: 137 additions & 79 deletions

File tree

src/db.rs

Lines changed: 90 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,8 @@ fn sanitize_filename(name: &str) -> String {
2121
result
2222
}
2323

24-
fn unsanitize_filename(name: &str) -> Option<String> {
25-
let mut result = String::new();
26-
let mut chars = name.chars().peekable();
27-
while let Some(c) = chars.next() {
28-
if c == '_' {
29-
let h1 = chars.next()?;
30-
let h2 = chars.next()?;
31-
let hex = format!("{}{}", h1, h2);
32-
let code = u32::from_str_radix(&hex, 16).ok()?;
33-
result.push(char::from_u32(code)?);
34-
} else {
35-
result.push(c);
36-
}
37-
}
38-
Some(result)
39-
}
40-
4124
struct Collection {
25+
name: String,
4226
memtable: MemTable,
4327
dir: PathBuf,
4428
jstable_count: u64,
@@ -48,7 +32,7 @@ struct Collection {
4832
}
4933

5034
impl Collection {
51-
fn new(dir: PathBuf, memtable_threshold: usize, jstable_threshold: u64) -> Self {
35+
fn new(name: String, dir: PathBuf, memtable_threshold: usize, jstable_threshold: u64) -> Self {
5236
fs::create_dir_all(&dir).unwrap();
5337
let log_path = dir.join("argus.log");
5438
let logger = Logger::new(&log_path, 1024 * 1024).unwrap();
@@ -59,7 +43,6 @@ impl Collection {
5943
if line.is_empty() {
6044
continue;
6145
}
62-
// If log recovery fails, we might want to warn, but for now panic/unwrap is consistent with old code
6346
if let Ok(entry) = serde_json::from_str::<LogEntry>(line) {
6447
match entry.op {
6548
Operation::Insert { id, doc } => {
@@ -82,6 +65,7 @@ impl Collection {
8265
}
8366

8467
Collection {
68+
name,
8569
memtable,
8670
dir,
8771
jstable_count,
@@ -128,7 +112,9 @@ impl Collection {
128112

129113
fn flush(&mut self) {
130114
let jstable_path = self.dir.join(format!("jstable-{}", self.jstable_count));
131-
self.memtable.flush(jstable_path.to_str().unwrap()).unwrap();
115+
self.memtable
116+
.flush(jstable_path.to_str().unwrap(), self.name.clone())
117+
.unwrap();
132118
self.jstable_count += 1;
133119
self.memtable = MemTable::new();
134120
self.logger.rotate().unwrap();
@@ -186,6 +172,7 @@ impl Collection {
186172
impl Debug for Collection {
187173
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
188174
f.debug_struct("Collection")
175+
.field("name", &self.name)
189176
.field("dir", &self.dir)
190177
.finish()
191178
}
@@ -264,13 +251,31 @@ impl DB {
264251
for entry in entries {
265252
if let Ok(entry) = entry {
266253
if entry.path().is_dir() {
267-
if let Some(dir_name) = entry.file_name().to_str() {
268-
if let Some(col_name) = unsanitize_filename(dir_name) {
269-
let col_dir = entry.path();
270-
let collection =
271-
Collection::new(col_dir, memtable_threshold, jstable_threshold);
272-
collections.insert(col_name, collection);
254+
let dir_path = entry.path();
255+
256+
// Try to find collection name from JSTable-0
257+
let jstable_path = dir_path.join("jstable-0");
258+
let col_name = if jstable_path.exists() {
259+
if let Ok(iter) =
260+
jstable::JSTableIterator::new(jstable_path.to_str().unwrap())
261+
{
262+
Some(iter.collection)
263+
} else {
264+
None
273265
}
266+
} else {
267+
// Fallback to directory name (sanitized) if no jstable
268+
entry.file_name().to_str().map(|s| s.to_string())
269+
};
270+
271+
if let Some(name) = col_name {
272+
let collection = Collection::new(
273+
name.clone(),
274+
dir_path,
275+
memtable_threshold,
276+
jstable_threshold,
277+
);
278+
collections.insert(name, collection);
274279
}
275280
}
276281
}
@@ -289,7 +294,12 @@ impl DB {
289294
self.collections.entry(name.to_string()).or_insert_with(|| {
290295
let safe_name = sanitize_filename(name);
291296
let col_dir = self.root_dir.join(safe_name);
292-
Collection::new(col_dir, self.memtable_threshold, self.jstable_threshold)
297+
Collection::new(
298+
name.to_string(),
299+
col_dir,
300+
self.memtable_threshold,
301+
self.jstable_threshold,
302+
)
293303
})
294304
}
295305

@@ -335,19 +345,19 @@ mod tests {
335345
for i in 0..MEMTABLE_THRESHOLD {
336346
db.insert("test", json!({ "a": i }));
337347
}
338-
let col = db.get_collection("test");
348+
let col = db.collections.get("test").unwrap();
339349
assert_eq!(col.memtable.len(), MEMTABLE_THRESHOLD);
340350
assert_eq!(col.jstable_count, 0);
341351

342352
db.insert("test", json!({"a": MEMTABLE_THRESHOLD}));
343-
let col = db.get_collection("test");
353+
let col = db.collections.get("test").unwrap();
344354
assert_eq!(col.memtable.len(), 1);
345355
assert_eq!(col.jstable_count, 1);
346356

347357
let jstable_path = col.dir.join("jstable-0");
348-
// Verify it is a valid JSTable
349358
let table = jstable::read_jstable(jstable_path.to_str().unwrap()).unwrap();
350359
assert_eq!(table.documents.len(), MEMTABLE_THRESHOLD);
360+
assert_eq!(table.collection, "test");
351361
}
352362

353363
#[test]
@@ -366,7 +376,7 @@ mod tests {
366376

367377
db.delete("test", &id1);
368378

369-
let col = db.get_collection("test");
379+
let col = db.collections.get("test").unwrap();
370380
let log_path = col.dir.join("argus.log");
371381
let log_content = std::fs::read_to_string(log_path).unwrap();
372382
let mut lines = log_content.lines();
@@ -413,13 +423,13 @@ mod tests {
413423
db.delete("test", &id1);
414424

415425
// Recover by creating new DB instance pointed to same dir
416-
let mut db2 = DB::new(
426+
let db2 = DB::new(
417427
dir.path().to_str().unwrap(),
418428
MEMTABLE_THRESHOLD,
419429
JSTABLE_THRESHOLD,
420430
);
421-
// Force load collection
422-
let col = db2.get_collection("test");
431+
// "test" should be loaded if it persisted JSTable or fallback to dir name
432+
let col = db2.collections.get("test").unwrap();
423433

424434
assert_eq!(col.memtable.len(), 2);
425435
assert_eq!(*col.memtable.documents.get(&id2).unwrap(), doc2);
@@ -439,64 +449,75 @@ mod tests {
439449
db.insert("test", json!({ "a": i }));
440450
}
441451

442-
let col = db.get_collection("test");
452+
let col = db.collections.get("test").unwrap();
443453
assert_eq!(col.jstable_count, JSTABLE_THRESHOLD - 1);
444-
db.insert("test", json!({ "a": 999 })); // Trigger flush/compact
454+
db.insert("test", json!({ "a": 999 }));
445455

446-
let col = db.get_collection("test");
456+
let col = db.collections.get("test").unwrap();
447457
assert_eq!(col.jstable_count, 1);
448458
}
449459

450460
#[test]
451-
fn test_db_scan() {
461+
fn test_db_compaction_with_delete() {
452462
let dir = tempdir().unwrap();
453463
let mut db = DB::new(
454464
dir.path().to_str().unwrap(),
455465
MEMTABLE_THRESHOLD,
456466
JSTABLE_THRESHOLD,
457467
);
458468

459-
// 1. Insert into JSTable (flush)
460-
// 0..9
461-
let mut ids = Vec::new();
462-
for i in 0..MEMTABLE_THRESHOLD {
463-
ids.push(db.insert("test", json!({"val": i})));
464-
}
469+
let id_to_delete = db.insert("test", json!({ "a": 100 }));
465470

466-
// 2. Insert into MemTable (triggers flush of 0..9 to jstable-0)
467-
let id_val_10 = db.insert("test", json!({"val": 10}));
471+
for i in 0..9 {
472+
db.insert("test", json!({ "fill": i }));
473+
}
474+
db.insert("test", json!({ "trigger_1": 1 }));
468475

469-
// 3. Shadowing: Update an item from jstable-0
470-
let id_to_shadow = ids[0].clone(); // val: 0
471-
db.update("test", &id_to_shadow, json!({"val": 999}));
476+
let col = db.collections.get("test").unwrap();
477+
assert_eq!(col.jstable_count, 1);
472478

473-
// 4. Deletion: Delete an item from jstable-0
474-
let id_to_delete = ids[1].clone(); // val: 1
475479
db.delete("test", &id_to_delete);
476480

477-
// Scan
478-
let results: HashMap<String, Value> = db.scan("test").collect();
481+
for i in 0..8 {
482+
db.insert("test", json!({ "fill_2": i }));
483+
}
484+
db.insert("test", json!({ "trigger_2": 1 }));
485+
486+
let col = db.collections.get("test").unwrap();
487+
assert_eq!(col.jstable_count, 2);
488+
489+
for t in 0..3 {
490+
for i in 0..9 {
491+
db.insert("test", json!({ "fill_more": t, "i": i }));
492+
}
493+
db.insert("test", json!({ "trigger_more": t }));
494+
}
479495

480-
// Check shadowing
481-
assert_eq!(results.get(&id_to_shadow).unwrap(), &json!({"val": 999}));
496+
let col = db.collections.get("test").unwrap();
497+
assert_eq!(col.jstable_count, 1);
482498

483-
// Check deletion
484-
assert!(!results.contains_key(&id_to_delete));
499+
let jstable_path = col.dir.join("jstable-0");
500+
let table = jstable::read_jstable(jstable_path.to_str().unwrap()).unwrap();
501+
assert!(!table.documents.contains_key(&id_to_delete));
502+
assert!(table.documents.len() > 40);
503+
}
485504

486-
// Check preservation of older jstable item
487-
let id_preserved = ids[2].clone(); // val: 2
488-
assert_eq!(results.get(&id_preserved).unwrap(), &json!({"val": 2}));
505+
#[test]
506+
fn test_db_scan() {
507+
let dir = tempdir().unwrap();
508+
let mut db = DB::new(
509+
dir.path().to_str().unwrap(),
510+
MEMTABLE_THRESHOLD,
511+
JSTABLE_THRESHOLD,
512+
);
489513

490-
// Check memtable item
491-
assert_eq!(results.get(&id_val_10).unwrap(), &json!({"val": 10}));
514+
for i in 0..MEMTABLE_THRESHOLD {
515+
db.insert("test", json!({"val": i}));
516+
}
517+
db.insert("test", json!({"val": 10}));
492518

493-
// Check separate collection
494-
db.insert("other", json!({"val": "other"}));
495-
let other_results: HashMap<String, Value> = db.scan("other").collect();
496-
assert_eq!(other_results.len(), 1);
497-
// "test" scan shouldn't have changed
498-
let results_again: HashMap<String, Value> = db.scan("test").collect();
499-
assert_eq!(results_again.len(), results.len());
519+
let results: HashMap<String, Value> = db.scan("test").collect();
520+
assert_eq!(results.len(), 11);
500521
}
501522

502523
#[test]

0 commit comments

Comments
 (0)