B-Tree indexing

This commit is contained in:
2026-02-25 15:58:30 +11:00
parent 77593fe3b4
commit 64b811f330
6 changed files with 1300 additions and 178 deletions

View File

@@ -49,7 +49,13 @@ function fmtOps(ops: number): string {
/**
* Run a full suite of benchmarks against a given storage instance.
*/
async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs: Doc[], supportsRangeOps = true) {
async function benchmarkStorage(
label: string,
storage: BaseStorage<Doc>,
docs: Doc[],
options: { supportsRangeOps?: boolean; hasAgeIndex?: boolean } = {},
) {
const { supportsRangeOps = true, hasAgeIndex = false } = options;
const count = docs.length;
console.log(`\n${'='.repeat(60)}`);
console.log(` ${label} (${count.toLocaleString()} documents)`);
@@ -59,13 +65,13 @@ async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs:
const insertMs = await time(async () => {
await storage.insertMany(docs);
});
console.log(` insertMany ${insertMs.toFixed(2)}ms (${fmtOps((count / insertMs) * 1000)} ops/sec)`);
console.log(` insertMany ${insertMs.toFixed(2)}ms (${fmtOps((count / insertMs) * 1000)} ops/sec)`);
// --- Find all (no filter) ---
const findAllMs = await time(async () => {
await storage.find();
});
console.log(` find() ${findAllMs.toFixed(2)}ms (${fmtOps((count / findAllMs) * 1000)} docs/sec)`);
console.log(` find() ${findAllMs.toFixed(2)}ms (${fmtOps((count / findAllMs) * 1000)} docs/sec)`);
// --- Find by indexed field (equality) ---
const lookupCount = Math.min(count, 1_000);
@@ -74,7 +80,7 @@ async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs:
await storage.findOne({ id: `id-${i}` });
}
});
console.log(` findOne indexed ${findIndexedMs.toFixed(2)}ms (${fmtOps((lookupCount / findIndexedMs) * 1000)} ops/sec) [${lookupCount} lookups]`);
console.log(` findOne indexed ${findIndexedMs.toFixed(2)}ms (${fmtOps((lookupCount / findIndexedMs) * 1000)} ops/sec) [${lookupCount} lookups]`);
// --- Find by non-indexed field (full scan) ---
const scanCount = Math.min(count, 1_000);
@@ -83,28 +89,40 @@ async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs:
await storage.findOne({ email: `user-${i}@test.com` });
}
});
console.log(` findOne scan ${findScanMs.toFixed(2)}ms (${fmtOps((scanCount / findScanMs) * 1000)} ops/sec) [${scanCount} lookups]`);
console.log(` findOne scan ${findScanMs.toFixed(2)}ms (${fmtOps((scanCount / findScanMs) * 1000)} ops/sec) [${scanCount} lookups]`);
// --- Find with $gte / $lt range (full scan) ---
// --- Range queries ---
if (supportsRangeOps) {
// Wide range: 20% selectivity (10 out of 50 age values).
const rangeCount = Math.min(count, 100);
let rangeTotal = 0;
const findRangeMs = await time(async () => {
let rangeWideTotal = 0;
const findRangeWideMs = await time(async () => {
for (let i = 0; i < rangeCount; i++) {
const results = await storage.find({ age: { $gte: 30, $lt: 40 } });
rangeTotal += results.length;
rangeWideTotal += results.length;
}
});
console.log(` find $gte/$lt ${findRangeMs.toFixed(2)}ms (${fmtOps((rangeCount / findRangeMs) * 1000)} ops/sec) [${rangeCount} queries, ~${Math.round(rangeTotal / rangeCount)} hits/query]`);
const indexLabel = hasAgeIndex ? 'B+Tree' : 'scan';
console.log(` find wide [${indexLabel}] ${findRangeWideMs.toFixed(2)}ms (${fmtOps((rangeCount / findRangeWideMs) * 1000)} ops/sec) [${rangeCount}x, ~${Math.round(rangeWideTotal / rangeCount)} hits, 20% sel.]`);
// --- Find with combined equality + operator (index narrows, operator verifies) ---
// Narrow range: 2% selectivity (1 out of 50 age values).
let rangeNarrowTotal = 0;
const findRangeNarrowMs = await time(async () => {
for (let i = 0; i < rangeCount; i++) {
const results = await storage.find({ age: { $gte: 42, $lt: 43 } });
rangeNarrowTotal += results.length;
}
});
console.log(` find narrow [${indexLabel}] ${findRangeNarrowMs.toFixed(2)}ms (${fmtOps((rangeCount / findRangeNarrowMs) * 1000)} ops/sec) [${rangeCount}x, ~${Math.round(rangeNarrowTotal / rangeCount)} hits, 2% sel.]`);
// --- Combined equality + operator ---
const comboCount = Math.min(count, 1_000);
const findComboMs = await time(async () => {
for (let i = 0; i < comboCount; i++) {
await storage.find({ id: `id-${i}`, age: { $gte: 20 } });
}
});
console.log(` find idx+operator ${findComboMs.toFixed(2)}ms (${fmtOps((comboCount / findComboMs) * 1000)} ops/sec) [${comboCount} queries]`);
console.log(` find idx+operator ${findComboMs.toFixed(2)}ms (${fmtOps((comboCount / findComboMs) * 1000)} ops/sec) [${comboCount} queries]`);
}
// --- Update by indexed field ---
@@ -114,7 +132,7 @@ async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs:
await storage.updateOne({ id: `id-${i}` }, { name: `updated-${i}` });
}
});
console.log(` updateOne indexed ${updateMs.toFixed(2)}ms (${fmtOps((updateCount / updateMs) * 1000)} ops/sec) [${updateCount} updates]`);
console.log(` updateOne indexed ${updateMs.toFixed(2)}ms (${fmtOps((updateCount / updateMs) * 1000)} ops/sec) [${updateCount} updates]`);
// --- Delete by indexed field ---
const deleteCount = Math.min(count, 1_000);
@@ -123,11 +141,11 @@ async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs:
await storage.deleteOne({ id: `id-${i}` });
}
});
console.log(` deleteOne indexed ${deleteMs.toFixed(2)}ms (${fmtOps((deleteCount / deleteMs) * 1000)} ops/sec) [${deleteCount} deletes]`);
console.log(` deleteOne indexed ${deleteMs.toFixed(2)}ms (${fmtOps((deleteCount / deleteMs) * 1000)} ops/sec) [${deleteCount} deletes]`);
// --- Verify remaining count ---
const remaining = await storage.find();
console.log(` remaining docs: ${remaining.length.toLocaleString()}`);
console.log(` remaining docs: ${remaining.length.toLocaleString()}`);
}
// ---------------------------------------------------------------------------
@@ -135,7 +153,7 @@ async function benchmarkStorage(label: string, storage: BaseStorage<Doc>, docs:
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// StorageMemory — indexed vs non-indexed
// StorageMemory — B+ Tree range queries vs full scan
// ---------------------------------------------------------------------------
const DOC_COUNTS = [1_000, 10_000, 50_000];
@@ -143,46 +161,45 @@ const DOC_COUNTS = [1_000, 10_000, 50_000];
for (const count of DOC_COUNTS) {
const docs = generateDocs(count);
const indexed = StorageMemory.from<Doc>(['id', 'name']);
await benchmarkStorage('StorageMemory (indexed: id, name)', indexed, docs);
// Indexes on id, name, AND age — range queries on age use B+ Tree.
const indexedWithAge = StorageMemory.from<Doc>(['id', 'name', 'age']);
await benchmarkStorage('StorageMemory (indexed: id,name,age)', indexedWithAge, docs, { hasAgeIndex: true });
// Indexes on id, name only — range queries on age fall back to full scan.
const indexed = StorageMemory.from<Doc>(['id', 'name']);
await benchmarkStorage('StorageMemory (indexed: id,name)', indexed, docs);
// No indexes at all.
const noIndex = StorageMemory.from<Doc>();
await benchmarkStorage('StorageMemory (no indexes)', noIndex, docs);
}
// ---------------------------------------------------------------------------
// EncryptedStorage — with plaintextKeys for range queries
// EncryptedStorage
// ---------------------------------------------------------------------------
const ENCRYPTED_DOC_COUNTS = [100, 1_000, 10_000];
const ENCRYPTED_DOC_COUNTS = [100, 1_000];
const encryptionKey = await AESKey.fromSeed('benchmark-key');
for (const count of ENCRYPTED_DOC_COUNTS) {
const docs = generateDocs(count);
// No indexes + plaintextKeys — range queries on age work.
const encBaseNoIdx = StorageMemory.from<Record<string, any>>();
const encNoIdx = EncryptedStorage.from<Doc>(encBaseNoIdx, encryptionKey, {
plaintextKeys: ['age'],
});
await benchmarkStorage('Encrypted (no indexes, plaintextKeys: age)', encNoIdx, docs);
// Indexed + plaintextKeys — range queries on age work.
const encBaseA = StorageMemory.from<Record<string, any>>(['id', 'name']);
// Indexed + plaintextKeys (age) — range queries on age use B+ Tree via backing store.
const encBaseA = StorageMemory.from<Record<string, any>>(['id', 'name', 'age']);
const encA = EncryptedStorage.from<Doc>(encBaseA, encryptionKey, {
plaintextKeys: ['age'],
});
await benchmarkStorage('Encrypted (indexed, plaintextKeys: age)', encA, docs);
await benchmarkStorage('Encrypted (indexed+age, plaintextKeys: age)', encA, docs, { hasAgeIndex: true });
// Indexed, fully encrypted — same indexes but no plaintext keys.
// Indexed, fully encrypted — no range ops.
const encBaseB = StorageMemory.from<Record<string, any>>(['id', 'name']);
const encB = EncryptedStorage.from<Doc>(encBaseB, encryptionKey);
await benchmarkStorage('Encrypted (indexed, fully encrypted)', encB, docs, false);
await benchmarkStorage('Encrypted (indexed, fully encrypted)', encB, docs, { supportsRangeOps: false });
// No indexes, fully encrypted — worst case.
const encBaseC = StorageMemory.from<Record<string, any>>();
const encC = EncryptedStorage.from<Doc>(encBaseC, encryptionKey);
await benchmarkStorage('Encrypted (no indexes, fully encrypted)', encC, docs, false);
await benchmarkStorage('Encrypted (no indexes, fully encrypted)', encC, docs, { supportsRangeOps: false });
}
console.log('\nDone.\n');