From 64b811f3308b7f63a4b3decaab3670a7afb61a1d Mon Sep 17 00:00:00 2001 From: Harvey Zuccon Date: Wed, 25 Feb 2026 15:58:30 +1100 Subject: [PATCH] B-Tree indexing --- benchmarks/storage.ts | 83 +++-- src/storage/storage-localstorage.ts | 226 ++++++++---- src/storage/storage-memory.ts | 224 +++++++---- src/utils/btree.test.ts | 391 ++++++++++++++++++++ src/utils/btree.ts | 553 ++++++++++++++++++++++++++++ src/utils/index.ts | 1 + 6 files changed, 1300 insertions(+), 178 deletions(-) create mode 100644 src/utils/btree.test.ts create mode 100644 src/utils/btree.ts diff --git a/benchmarks/storage.ts b/benchmarks/storage.ts index 9bb350d..2460d90 100644 --- a/benchmarks/storage.ts +++ b/benchmarks/storage.ts @@ -49,7 +49,13 @@ function fmtOps(ops: number): string { /** * Run a full suite of benchmarks against a given storage instance. */ -async function benchmarkStorage(label: string, storage: BaseStorage, docs: Doc[], supportsRangeOps = true) { +async function benchmarkStorage( + label: string, + storage: BaseStorage, + docs: Doc[], + options: { supportsRangeOps?: boolean; hasAgeIndex?: boolean } = {}, +) { + const { supportsRangeOps = true, hasAgeIndex = false } = options; const count = docs.length; console.log(`\n${'='.repeat(60)}`); console.log(` ${label} (${count.toLocaleString()} documents)`); @@ -59,13 +65,13 @@ async function benchmarkStorage(label: string, storage: BaseStorage, docs: const insertMs = await time(async () => { await storage.insertMany(docs); }); - console.log(` insertMany ${insertMs.toFixed(2)}ms (${fmtOps((count / insertMs) * 1000)} ops/sec)`); + console.log(` insertMany ${insertMs.toFixed(2)}ms (${fmtOps((count / insertMs) * 1000)} ops/sec)`); // --- Find all (no filter) --- const findAllMs = await time(async () => { await storage.find(); }); - console.log(` find() ${findAllMs.toFixed(2)}ms (${fmtOps((count / findAllMs) * 1000)} docs/sec)`); + console.log(` find() ${findAllMs.toFixed(2)}ms (${fmtOps((count / findAllMs) * 1000)} docs/sec)`); // --- Find by indexed field (equality) --- const lookupCount = Math.min(count, 1_000); @@ -74,7 +80,7 @@ async function benchmarkStorage(label: string, storage: BaseStorage, docs: await storage.findOne({ id: `id-${i}` }); } }); - console.log(` findOne indexed ${findIndexedMs.toFixed(2)}ms (${fmtOps((lookupCount / findIndexedMs) * 1000)} ops/sec) [${lookupCount} lookups]`); + console.log(` findOne indexed ${findIndexedMs.toFixed(2)}ms (${fmtOps((lookupCount / findIndexedMs) * 1000)} ops/sec) [${lookupCount} lookups]`); // --- Find by non-indexed field (full scan) --- const scanCount = Math.min(count, 1_000); @@ -83,28 +89,40 @@ async function benchmarkStorage(label: string, storage: BaseStorage, docs: await storage.findOne({ email: `user-${i}@test.com` }); } }); - console.log(` findOne scan ${findScanMs.toFixed(2)}ms (${fmtOps((scanCount / findScanMs) * 1000)} ops/sec) [${scanCount} lookups]`); + console.log(` findOne scan ${findScanMs.toFixed(2)}ms (${fmtOps((scanCount / findScanMs) * 1000)} ops/sec) [${scanCount} lookups]`); - // --- Find with $gte / $lt range (full scan) --- + // --- Range queries --- if (supportsRangeOps) { + // Wide range: 20% selectivity (10 out of 50 age values). const rangeCount = Math.min(count, 100); - let rangeTotal = 0; - const findRangeMs = await time(async () => { + let rangeWideTotal = 0; + const findRangeWideMs = await time(async () => { for (let i = 0; i < rangeCount; i++) { const results = await storage.find({ age: { $gte: 30, $lt: 40 } }); - rangeTotal += results.length; + rangeWideTotal += results.length; } }); - console.log(` find $gte/$lt ${findRangeMs.toFixed(2)}ms (${fmtOps((rangeCount / findRangeMs) * 1000)} ops/sec) [${rangeCount} queries, ~${Math.round(rangeTotal / rangeCount)} hits/query]`); + const indexLabel = hasAgeIndex ? 'B+Tree' : 'scan'; + console.log(` find wide [${indexLabel}] ${findRangeWideMs.toFixed(2)}ms (${fmtOps((rangeCount / findRangeWideMs) * 1000)} ops/sec) [${rangeCount}x, ~${Math.round(rangeWideTotal / rangeCount)} hits, 20% sel.]`); - // --- Find with combined equality + operator (index narrows, operator verifies) --- + // Narrow range: 2% selectivity (1 out of 50 age values). + let rangeNarrowTotal = 0; + const findRangeNarrowMs = await time(async () => { + for (let i = 0; i < rangeCount; i++) { + const results = await storage.find({ age: { $gte: 42, $lt: 43 } }); + rangeNarrowTotal += results.length; + } + }); + console.log(` find narrow [${indexLabel}] ${findRangeNarrowMs.toFixed(2)}ms (${fmtOps((rangeCount / findRangeNarrowMs) * 1000)} ops/sec) [${rangeCount}x, ~${Math.round(rangeNarrowTotal / rangeCount)} hits, 2% sel.]`); + + // --- Combined equality + operator --- const comboCount = Math.min(count, 1_000); const findComboMs = await time(async () => { for (let i = 0; i < comboCount; i++) { await storage.find({ id: `id-${i}`, age: { $gte: 20 } }); } }); - console.log(` find idx+operator ${findComboMs.toFixed(2)}ms (${fmtOps((comboCount / findComboMs) * 1000)} ops/sec) [${comboCount} queries]`); + console.log(` find idx+operator ${findComboMs.toFixed(2)}ms (${fmtOps((comboCount / findComboMs) * 1000)} ops/sec) [${comboCount} queries]`); } // --- Update by indexed field --- @@ -114,7 +132,7 @@ async function benchmarkStorage(label: string, storage: BaseStorage, docs: await storage.updateOne({ id: `id-${i}` }, { name: `updated-${i}` }); } }); - console.log(` updateOne indexed ${updateMs.toFixed(2)}ms (${fmtOps((updateCount / updateMs) * 1000)} ops/sec) [${updateCount} updates]`); + console.log(` updateOne indexed ${updateMs.toFixed(2)}ms (${fmtOps((updateCount / updateMs) * 1000)} ops/sec) [${updateCount} updates]`); // --- Delete by indexed field --- const deleteCount = Math.min(count, 1_000); @@ -123,11 +141,11 @@ async function benchmarkStorage(label: string, storage: BaseStorage, docs: await storage.deleteOne({ id: `id-${i}` }); } }); - console.log(` deleteOne indexed ${deleteMs.toFixed(2)}ms (${fmtOps((deleteCount / deleteMs) * 1000)} ops/sec) [${deleteCount} deletes]`); + console.log(` deleteOne indexed ${deleteMs.toFixed(2)}ms (${fmtOps((deleteCount / deleteMs) * 1000)} ops/sec) [${deleteCount} deletes]`); // --- Verify remaining count --- const remaining = await storage.find(); - console.log(` remaining docs: ${remaining.length.toLocaleString()}`); + console.log(` remaining docs: ${remaining.length.toLocaleString()}`); } // --------------------------------------------------------------------------- @@ -135,7 +153,7 @@ async function benchmarkStorage(label: string, storage: BaseStorage, docs: // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- -// StorageMemory — indexed vs non-indexed +// StorageMemory — B+ Tree range queries vs full scan // --------------------------------------------------------------------------- const DOC_COUNTS = [1_000, 10_000, 50_000]; @@ -143,46 +161,45 @@ const DOC_COUNTS = [1_000, 10_000, 50_000]; for (const count of DOC_COUNTS) { const docs = generateDocs(count); - const indexed = StorageMemory.from(['id', 'name']); - await benchmarkStorage('StorageMemory (indexed: id, name)', indexed, docs); + // Indexes on id, name, AND age — range queries on age use B+ Tree. + const indexedWithAge = StorageMemory.from(['id', 'name', 'age']); + await benchmarkStorage('StorageMemory (indexed: id,name,age)', indexedWithAge, docs, { hasAgeIndex: true }); + // Indexes on id, name only — range queries on age fall back to full scan. + const indexed = StorageMemory.from(['id', 'name']); + await benchmarkStorage('StorageMemory (indexed: id,name)', indexed, docs); + + // No indexes at all. const noIndex = StorageMemory.from(); await benchmarkStorage('StorageMemory (no indexes)', noIndex, docs); } // --------------------------------------------------------------------------- -// EncryptedStorage — with plaintextKeys for range queries +// EncryptedStorage // --------------------------------------------------------------------------- -const ENCRYPTED_DOC_COUNTS = [100, 1_000, 10_000]; +const ENCRYPTED_DOC_COUNTS = [100, 1_000]; const encryptionKey = await AESKey.fromSeed('benchmark-key'); for (const count of ENCRYPTED_DOC_COUNTS) { const docs = generateDocs(count); - // No indexes + plaintextKeys — range queries on age work. - const encBaseNoIdx = StorageMemory.from>(); - const encNoIdx = EncryptedStorage.from(encBaseNoIdx, encryptionKey, { - plaintextKeys: ['age'], - }); - await benchmarkStorage('Encrypted (no indexes, plaintextKeys: age)', encNoIdx, docs); - - // Indexed + plaintextKeys — range queries on age work. - const encBaseA = StorageMemory.from>(['id', 'name']); + // Indexed + plaintextKeys (age) — range queries on age use B+ Tree via backing store. + const encBaseA = StorageMemory.from>(['id', 'name', 'age']); const encA = EncryptedStorage.from(encBaseA, encryptionKey, { plaintextKeys: ['age'], }); - await benchmarkStorage('Encrypted (indexed, plaintextKeys: age)', encA, docs); + await benchmarkStorage('Encrypted (indexed+age, plaintextKeys: age)', encA, docs, { hasAgeIndex: true }); - // Indexed, fully encrypted — same indexes but no plaintext keys. + // Indexed, fully encrypted — no range ops. const encBaseB = StorageMemory.from>(['id', 'name']); const encB = EncryptedStorage.from(encBaseB, encryptionKey); - await benchmarkStorage('Encrypted (indexed, fully encrypted)', encB, docs, false); + await benchmarkStorage('Encrypted (indexed, fully encrypted)', encB, docs, { supportsRangeOps: false }); // No indexes, fully encrypted — worst case. const encBaseC = StorageMemory.from>(); const encC = EncryptedStorage.from(encBaseC, encryptionKey); - await benchmarkStorage('Encrypted (no indexes, fully encrypted)', encC, docs, false); + await benchmarkStorage('Encrypted (no indexes, fully encrypted)', encC, docs, { supportsRangeOps: false }); } console.log('\nDone.\n'); diff --git a/src/storage/storage-localstorage.ts b/src/storage/storage-localstorage.ts index c70dc31..fac54ea 100644 --- a/src/storage/storage-localstorage.ts +++ b/src/storage/storage-localstorage.ts @@ -6,6 +6,7 @@ import { type ComparisonOperators, isOperatorObject, } from './base-storage.js'; +import { BPlusTree, type BPlusTreeEntry } from 'src/utils/btree.js'; /** * Key prefix separator used to namespace documents within localStorage. @@ -25,7 +26,7 @@ const MANIFEST_SUFFIX = '__keys__'; const COUNTER_SUFFIX = '__next__'; /** - * Separator used when joining multiple field values into a single index key. + * Separator used when joining field names to create the index map key. */ const INDEX_KEY_SEP = '\x00'; @@ -41,6 +42,18 @@ function normalizeIndexes(indexes?: IndexDefinition): string[][] { return indexes as string[][]; } +/** + * Comparator for compound index keys (arrays of raw values). + */ +function tupleCompare(a: any[], b: any[]): number { + const len = Math.min(a.length, b.length); + for (let i = 0; i < len; i++) { + if (a[i] < b[i]) return -1; + if (a[i] > b[i]) return 1; + } + return a.length - b.length; +} + /** * Implementation of BaseStorage using the browser's localStorage API. * @@ -50,9 +63,9 @@ function normalizeIndexes(indexes?: IndexDefinition): string[][] { * tracks all internal keys so that read operations avoid scanning every * key in localStorage. * - * Optional indexes provide fast lookups when a query filter matches - * an index exactly. Indexes are held in memory and rebuilt only when a - * cross-tab manifest change is detected. + * Optional indexes are backed by B+ Trees, providing O(log n) equality + * lookups and O(log n + k) range queries. Indexes are held in memory and + * rebuilt only when a cross-tab manifest change is detected. * * Because localStorage is synchronous and string-only, all values are * JSON-serialised on write and parsed on read. @@ -91,12 +104,11 @@ export class StorageLocalStorage< private indexDefs: string[][]; /** - * Secondary index maps (same structure as StorageMemory). - * Outer key = index name (joined field names). - * Inner key = index value (joined field values from a document). - * Inner value = set of internal numeric keys. + * Secondary indexes backed by B+ Trees. + * Map key = index name (joined field names). + * Map value = B+ Tree mapping index keys to sets of internal document keys. */ - private indexes: Map>>; + private indexes: Map>; /** Lazily-created child storage instances. */ private children: Map>; @@ -111,7 +123,9 @@ export class StorageLocalStorage< this.indexDefs = normalizeIndexes(indexes); this.indexes = new Map(); for (const fields of this.indexDefs) { - this.indexes.set(fields.join(INDEX_KEY_SEP), new Map()); + const name = fields.join(INDEX_KEY_SEP); + const comparator = fields.length > 1 ? tupleCompare : undefined; + this.indexes.set(name, new BPlusTree(32, comparator)); } // Bootstrap from localStorage. @@ -143,18 +157,20 @@ export class StorageLocalStorage< this.refreshManifest(); let results: T[]; - const indexedKeys = this.resolveIndexKeys(filter); + const resolution = this.resolveIndexKeys(filter); + + if (resolution !== null) { + const { keys, resolvedFields } = resolution; + const filterKeys = filter ? Object.keys(filter) : []; + const needsVerification = filterKeys.some((k) => !resolvedFields.includes(k)); - if (indexedKeys !== null) { - // Use the index to narrow which documents we read from localStorage. results = []; - for (const key of indexedKeys) { + for (const key of keys) { const raw = localStorage.getItem(this.docKey(key)); if (raw === null) continue; const doc = JSON.parse(raw) as T; - if (this.matchesFilter(doc, filter)) { - results.push(doc); - } + if (needsVerification && !this.matchesFilter(doc, filter)) continue; + results.push(doc); } } else { // Full scan over all documents in the manifest. @@ -290,17 +306,30 @@ export class StorageLocalStorage< * Uses an index when possible, otherwise falls back to a full scan. */ private collectMatches(filter?: Filter): Array<[number, T]> { - const indexKeys = this.resolveIndexKeys(filter); - const keysToScan = indexKeys ?? this.manifest; + const resolution = this.resolveIndexKeys(filter); const results: Array<[number, T]> = []; - for (const key of keysToScan) { - const raw = localStorage.getItem(this.docKey(key)); - if (raw === null) continue; - const doc = JSON.parse(raw) as T; - if (this.matchesFilter(doc, filter)) { + if (resolution !== null) { + const { keys, resolvedFields } = resolution; + const filterKeys = filter ? Object.keys(filter) : []; + const needsVerification = filterKeys.some((k) => !resolvedFields.includes(k)); + + for (const key of keys) { + const raw = localStorage.getItem(this.docKey(key)); + if (raw === null) continue; + const doc = JSON.parse(raw) as T; + if (needsVerification && !this.matchesFilter(doc, filter)) continue; results.push([key, doc]); } + } else { + for (const key of this.manifest) { + const raw = localStorage.getItem(this.docKey(key)); + if (raw === null) continue; + const doc = JSON.parse(raw) as T; + if (this.matchesFilter(doc, filter)) { + results.push([key, doc]); + } + } } return results; @@ -325,48 +354,44 @@ export class StorageLocalStorage< // --------------------------------------------------------------------------- /** - * Build the index value string for a given document and set of fields. - * Returns `null` if any field is missing from the document. + * Build the B+ Tree key for a document and a set of index fields. + * - Single-field indexes return the raw field value. + * - Compound indexes return an array of raw field values. + * Returns `null` if any required field is missing from the document. */ - private buildIndexValue(doc: Record, fields: string[]): string | null { - const parts: string[] = []; + private buildIndexKey(doc: Record, fields: string[]): any | null { + if (fields.length === 1) { + if (!(fields[0] in doc)) return null; + return doc[fields[0]]; + } + + const parts: any[] = []; for (const field of fields) { if (!(field in doc)) return null; - parts.push(String(doc[field])); + parts.push(doc[field]); } - return parts.join(INDEX_KEY_SEP); + return parts; } /** Register a document in all applicable indexes. */ private addToIndexes(internalKey: number, doc: T): void { for (const fields of this.indexDefs) { - const indexName = fields.join(INDEX_KEY_SEP); - const indexValue = this.buildIndexValue(doc, fields); - if (indexValue === null) continue; + const indexKey = this.buildIndexKey(doc, fields); + if (indexKey === null) continue; - const indexMap = this.indexes.get(indexName)!; - let bucket = indexMap.get(indexValue); - if (!bucket) { - bucket = new Set(); - indexMap.set(indexValue, bucket); - } - bucket.add(internalKey); + const name = fields.join(INDEX_KEY_SEP); + this.indexes.get(name)!.insert(indexKey, internalKey); } } /** Remove a document from all applicable indexes. */ private removeFromIndexes(internalKey: number, doc: T): void { for (const fields of this.indexDefs) { - const indexName = fields.join(INDEX_KEY_SEP); - const indexValue = this.buildIndexValue(doc, fields); - if (indexValue === null) continue; + const indexKey = this.buildIndexKey(doc, fields); + if (indexKey === null) continue; - const indexMap = this.indexes.get(indexName)!; - const bucket = indexMap.get(indexValue); - if (bucket) { - bucket.delete(internalKey); - if (bucket.size === 0) indexMap.delete(indexValue); - } + const name = fields.join(INDEX_KEY_SEP); + this.indexes.get(name)!.delete(indexKey, internalKey); } } @@ -374,45 +399,104 @@ export class StorageLocalStorage< * Attempt to resolve candidate internal keys from the indexes. * Returns `null` if no index can serve the query. * - * Only plain equality values are used for index resolution — operator - * objects are excluded since hash-based indexes only support equality. + * Supports three resolution strategies: + * 1. Equality lookup via B+ Tree `.get()` — O(log n) + * 2. Range scan via B+ Tree `.range()` — O(log n + k) + * 3. Compound equality — B+ Tree `.get()` with a tuple key */ - private resolveIndexKeys(filter?: Filter): Set | null { + private resolveIndexKeys( + filter?: Filter, + ): { keys: Iterable; resolvedFields: string[] } | null { if (!filter) return null; const filterKeys = Object.keys(filter); if (filterKeys.length === 0) return null; - const equalityFilter: Record = {}; - for (const [key, value] of Object.entries(filter)) { - if (!isOperatorObject(value)) { - equalityFilter[key] = value; - } - } - - if (Object.keys(equalityFilter).length === 0) return null; - for (const fields of this.indexDefs) { - if (!fields.every((f) => f in equalityFilter)) continue; - const indexName = fields.join(INDEX_KEY_SEP); - const indexValue = this.buildIndexValue(equalityFilter, fields); - if (indexValue === null) continue; + const btree = this.indexes.get(indexName)!; - const indexMap = this.indexes.get(indexName)!; - const bucket = indexMap.get(indexValue); - return bucket ?? new Set(); + if (fields.length === 1) { + // --- Single-field index --- + const field = fields[0]; + if (!(field in filter)) continue; + + const filterValue = (filter as any)[field]; + + if (isOperatorObject(filterValue)) { + const keys = this.resolveOperatorViaTree(btree, filterValue); + if (keys !== null) return { keys, resolvedFields: [field] }; + continue; + } + + // Plain equality. + return { keys: btree.get(filterValue) ?? [], resolvedFields: [field] }; + } else { + // --- Compound index — all fields must be plain equality --- + if (!fields.every((f) => f in filter && !isOperatorObject((filter as any)[f]))) { + continue; + } + + const tupleKey = fields.map((f) => (filter as any)[f]); + return { keys: btree.get(tupleKey) ?? [], resolvedFields: [...fields] }; + } } return null; } /** - * Rebuild all in-memory index maps by reading every document from + * Try to resolve an operator filter against a single-field B+ Tree index. + * Returns a flat array of matching internal keys, or null if the + * operators can't be served by the tree ($ne). + */ + private resolveOperatorViaTree( + btree: BPlusTree, + ops: ComparisonOperators, + ): Iterable | null { + if (ops.$ne !== undefined) return null; + + if (ops.$eq !== undefined) { + return btree.get(ops.$eq) ?? []; + } + + let min: any = undefined; + let max: any = undefined; + let lowerInclusive = true; + let upperInclusive = false; + + if (ops.$gt !== undefined) { min = ops.$gt; lowerInclusive = false; } + if (ops.$gte !== undefined) { min = ops.$gte; lowerInclusive = true; } + if (ops.$lt !== undefined) { max = ops.$lt; upperInclusive = false; } + if (ops.$lte !== undefined) { max = ops.$lte; upperInclusive = true; } + + if (min === undefined && max === undefined) return null; + + const entries = btree.range(min, max, { lowerInclusive, upperInclusive }); + return this.flattenEntryKeys(entries); + } + + /** + * Flatten B+ Tree range results into a flat array of internal keys. + * Uses an array instead of a Set — no hash overhead, no deduplication + * needed because each internal key only appears under one index key. + */ + private flattenEntryKeys(entries: BPlusTreeEntry[]): number[] { + const result: number[] = []; + for (const entry of entries) { + for (const key of entry.values) { + result.push(key); + } + } + return result; + } + + /** + * Rebuild all in-memory index B+ Trees by reading every document from * localStorage. Called only when a cross-tab manifest change is detected. */ private rebuildIndexes(): void { - for (const [, indexMap] of this.indexes) { - indexMap.clear(); + for (const [, btree] of this.indexes) { + btree.clear(); } for (const key of this.manifest) { diff --git a/src/storage/storage-memory.ts b/src/storage/storage-memory.ts index 30ec34d..9a0d064 100644 --- a/src/storage/storage-memory.ts +++ b/src/storage/storage-memory.ts @@ -6,10 +6,10 @@ import { type ComparisonOperators, isOperatorObject, } from './base-storage.js'; +import { BPlusTree, type BPlusTreeEntry } from 'src/utils/btree.js'; /** - * Separator used when joining multiple field values into a single index key. - * Chosen to be unlikely to appear in real field values. + * Separator used when joining field names to create the index map key. */ const INDEX_KEY_SEP = '\x00'; @@ -29,13 +29,26 @@ function normalizeIndexes(indexes?: IndexDefinition): string[][] { return indexes as string[][]; } +/** + * Comparator for compound index keys (arrays of raw values). + * Compares element-by-element using native `<` / `>` operators. + */ +function tupleCompare(a: any[], b: any[]): number { + const len = Math.min(a.length, b.length); + for (let i = 0; i < len; i++) { + if (a[i] < b[i]) return -1; + if (a[i] > b[i]) return 1; + } + return a.length - b.length; +} + /** * Implementation of BaseStore using Memory as the storage backend. * * @remarks * Documents are keyed internally by an auto-incrementing numeric key. - * Optional indexes provide O(1) lookups when a query filter matches - * an index exactly. + * Optional indexes are backed by B+ Trees, providing O(log n) equality + * lookups and O(log n + k) range queries. */ export class StorageMemory< T extends Record = Record, @@ -53,12 +66,11 @@ export class StorageMemory< private store: Map; /** - * Secondary index maps. - * Outer key = index name (joined field names). - * Inner key = index value (joined field values from a document). - * Inner value = set of internal keys that share this index value. + * Secondary indexes backed by B+ Trees. + * Map key = index name (joined field names). + * Map value = B+ Tree mapping index keys to sets of internal document keys. */ - private indexes: Map>>; + private indexes: Map>; /** The normalized index definitions supplied at construction time. */ private indexDefs: string[][]; @@ -73,10 +85,12 @@ export class StorageMemory< this.children = new Map(); this.indexDefs = normalizeIndexes(indexes); - // Initialise an empty map for each index definition. + // Create a B+ Tree for each index definition. this.indexes = new Map(); for (const fields of this.indexDefs) { - this.indexes.set(fields.join(INDEX_KEY_SEP), new Map()); + const name = fields.join(INDEX_KEY_SEP); + const comparator = fields.length > 1 ? tupleCompare : undefined; + this.indexes.set(name, new BPlusTree(32, comparator)); } } @@ -225,16 +239,19 @@ export class StorageMemory< * Uses an index when possible, otherwise falls back to a full scan. */ private collectMatches(filter?: Filter): Array<[number, T]> { - const indexKeys = this.resolveIndexKeys(filter); + const resolution = this.resolveIndexKeys(filter); + + if (resolution !== null) { + const { keys, resolvedFields } = resolution; + const filterKeys = filter ? Object.keys(filter) : []; + const needsVerification = filterKeys.some((k) => !resolvedFields.includes(k)); - if (indexKeys !== null) { - // We have candidate internal keys from the index — fetch and verify. const results: Array<[number, T]> = []; - for (const key of indexKeys) { + for (const key of keys) { const doc = this.store.get(key); - if (doc && this.matchesFilter(doc, filter)) { - results.push([key, doc]); - } + if (!doc) continue; + if (needsVerification && !this.matchesFilter(doc, filter)) continue; + results.push([key, doc]); } return results; } @@ -269,106 +286,165 @@ export class StorageMemory< // --------------------------------------------------------------------------- /** - * Build the index value string for a given document and set of fields. - * Returns `null` if any of the fields are missing from the document, - * since we can't meaningfully index a partial key. + * Build the B+ Tree key for a document and a set of index fields. + * - Single-field indexes return the raw field value. + * - Compound indexes return an array of raw field values. + * Returns `null` if any required field is missing from the document. */ - private buildIndexValue(doc: Record, fields: string[]): string | null { - const parts: string[] = []; + private buildIndexKey(doc: Record, fields: string[]): any | null { + if (fields.length === 1) { + if (!(fields[0] in doc)) return null; + return doc[fields[0]]; + } + + const parts: any[] = []; for (const field of fields) { if (!(field in doc)) return null; - parts.push(String(doc[field])); + parts.push(doc[field]); } - return parts.join(INDEX_KEY_SEP); + return parts; } /** Register a document in all applicable indexes. */ private addToIndexes(internalKey: number, doc: T): void { for (const fields of this.indexDefs) { - const indexName = fields.join(INDEX_KEY_SEP); - const indexValue = this.buildIndexValue(doc, fields); - if (indexValue === null) continue; + const indexKey = this.buildIndexKey(doc, fields); + if (indexKey === null) continue; - const indexMap = this.indexes.get(indexName)!; - let bucket = indexMap.get(indexValue); - if (!bucket) { - bucket = new Set(); - indexMap.set(indexValue, bucket); - } - bucket.add(internalKey); + const name = fields.join(INDEX_KEY_SEP); + this.indexes.get(name)!.insert(indexKey, internalKey); } } /** Remove a document from all applicable indexes. */ private removeFromIndexes(internalKey: number, doc: T): void { for (const fields of this.indexDefs) { - const indexName = fields.join(INDEX_KEY_SEP); - const indexValue = this.buildIndexValue(doc, fields); - if (indexValue === null) continue; + const indexKey = this.buildIndexKey(doc, fields); + if (indexKey === null) continue; - const indexMap = this.indexes.get(indexName)!; - const bucket = indexMap.get(indexValue); - if (bucket) { - bucket.delete(internalKey); - if (bucket.size === 0) indexMap.delete(indexValue); - } + const name = fields.join(INDEX_KEY_SEP); + this.indexes.get(name)!.delete(indexKey, internalKey); } } /** - * Attempt to resolve a set of candidate internal keys from the indexes. - * Returns `null` if no index can serve the query. - * - * An index is used when the filter contains plain equality values for every - * field in the index. Operator objects (e.g. `{ $lt: 50 }`) are excluded - * from index resolution since hash-based indexes only support equality. + * Result of an index resolution attempt. + * `keys` is an iterable of candidate internal keys. + * `resolvedFields` lists the filter fields fully satisfied by the index, + * so callers can skip re-verifying those conditions in matchesFilter. */ - private resolveIndexKeys(filter?: Filter): Set | null { + private resolveIndexKeys( + filter?: Filter, + ): { keys: Iterable; resolvedFields: string[] } | null { if (!filter) return null; const filterKeys = Object.keys(filter); if (filterKeys.length === 0) return null; - // Extract only the equality fields from the filter (skip operator objects). - const equalityFilter: Record = {}; - for (const [key, value] of Object.entries(filter)) { - if (!isOperatorObject(value)) { - equalityFilter[key] = value; - } - } - - if (Object.keys(equalityFilter).length === 0) return null; - for (const fields of this.indexDefs) { - // Every field in the index must be present as an equality value. - if (!fields.every((f) => f in equalityFilter)) continue; - const indexName = fields.join(INDEX_KEY_SEP); - const indexValue = this.buildIndexValue(equalityFilter, fields); - if (indexValue === null) continue; + const btree = this.indexes.get(indexName)!; - const indexMap = this.indexes.get(indexName)!; - const bucket = indexMap.get(indexValue); - return bucket ?? new Set(); + if (fields.length === 1) { + // --- Single-field index --- + const field = fields[0]; + if (!(field in filter)) continue; + + const filterValue = (filter as any)[field]; + + if (isOperatorObject(filterValue)) { + const keys = this.resolveOperatorViaTree(btree, filterValue); + if (keys !== null) return { keys, resolvedFields: [field] }; + continue; + } + + // Plain equality. + return { keys: btree.get(filterValue) ?? [], resolvedFields: [field] }; + } else { + // --- Compound index — all fields must be plain equality --- + if (!fields.every((f) => f in filter && !isOperatorObject((filter as any)[f]))) { + continue; + } + + const tupleKey = fields.map((f) => (filter as any)[f]); + return { keys: btree.get(tupleKey) ?? [], resolvedFields: [...fields] }; + } } return null; } + /** + * Try to resolve an operator filter against a single-field B+ Tree index. + * Returns a flat array of matching internal keys, or null if the + * operators can't be served by the tree ($ne). + */ + private resolveOperatorViaTree( + btree: BPlusTree, + ops: ComparisonOperators, + ): Iterable | null { + // $ne prevents efficient index use. + if (ops.$ne !== undefined) return null; + + // $eq is a point lookup. + if (ops.$eq !== undefined) { + return btree.get(ops.$eq) ?? []; + } + + // Extract range bounds from the remaining operators. + let min: any = undefined; + let max: any = undefined; + let lowerInclusive = true; + let upperInclusive = false; + + if (ops.$gt !== undefined) { min = ops.$gt; lowerInclusive = false; } + if (ops.$gte !== undefined) { min = ops.$gte; lowerInclusive = true; } + if (ops.$lt !== undefined) { max = ops.$lt; upperInclusive = false; } + if (ops.$lte !== undefined) { max = ops.$lte; upperInclusive = true; } + + if (min === undefined && max === undefined) return null; + + const entries = btree.range(min, max, { lowerInclusive, upperInclusive }); + return this.flattenEntryKeys(entries); + } + + /** + * Flatten B+ Tree range results into a flat array of internal keys. + * Uses an array instead of a Set — no hash overhead, no deduplication + * needed because each internal key only appears under one index key. + */ + private flattenEntryKeys(entries: BPlusTreeEntry[]): number[] { + const result: number[] = []; + for (const entry of entries) { + for (const key of entry.values) { + result.push(key); + } + } + return result; + } + /** * Try to answer a `find` query entirely through an index. * Returns `null` when no index can serve the filter, signalling * the caller to fall back to a full scan. + * + * When the index covers every field in the filter, matchesFilter + * is skipped entirely — the B+ Tree has already ensured the + * conditions are met. */ private findViaIndex(filter?: Filter): T[] | null { - const keys = this.resolveIndexKeys(filter); - if (keys === null) return null; + const resolution = this.resolveIndexKeys(filter); + if (resolution === null) return null; + + const { keys, resolvedFields } = resolution; + const filterKeys = filter ? Object.keys(filter) : []; + const needsVerification = filterKeys.some((k) => !resolvedFields.includes(k)); const results: T[] = []; for (const key of keys) { const doc = this.store.get(key); - if (doc && this.matchesFilter(doc, filter)) { - results.push(doc); - } + if (!doc) continue; + if (needsVerification && !this.matchesFilter(doc, filter)) continue; + results.push(doc); } return results; } diff --git a/src/utils/btree.test.ts b/src/utils/btree.test.ts new file mode 100644 index 0000000..5a49772 --- /dev/null +++ b/src/utils/btree.test.ts @@ -0,0 +1,391 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { BPlusTree } from './btree.js'; + +describe('BPlusTree', () => { + let tree: BPlusTree; + + beforeEach(() => { + tree = new BPlusTree(); + }); + + // ------------------------------------------------------------------------- + // Construction + // ------------------------------------------------------------------------- + + describe('constructor', () => { + it('should create an empty tree', () => { + expect(tree.size).toBe(0); + }); + + it('should reject order < 3', () => { + expect(() => new BPlusTree(2)).toThrow('order must be at least 3'); + }); + }); + + // ------------------------------------------------------------------------- + // Insert & Get + // ------------------------------------------------------------------------- + + describe('insert and get', () => { + it('should insert and retrieve a single entry', () => { + tree.insert(10, 'a'); + expect(tree.get(10)).toEqual(new Set(['a'])); + expect(tree.size).toBe(1); + }); + + it('should handle multiple distinct keys', () => { + tree.insert(10, 'a'); + tree.insert(20, 'b'); + tree.insert(5, 'c'); + expect(tree.get(10)).toEqual(new Set(['a'])); + expect(tree.get(20)).toEqual(new Set(['b'])); + expect(tree.get(5)).toEqual(new Set(['c'])); + expect(tree.size).toBe(3); + }); + + it('should return undefined for missing keys', () => { + tree.insert(10, 'a'); + expect(tree.get(99)).toBeUndefined(); + }); + + it('should accumulate duplicate keys into a Set', () => { + tree.insert(10, 'a'); + tree.insert(10, 'b'); + tree.insert(10, 'c'); + expect(tree.get(10)).toEqual(new Set(['a', 'b', 'c'])); + expect(tree.size).toBe(3); + }); + + it('should not double-count duplicate values for the same key', () => { + tree.insert(10, 'a'); + tree.insert(10, 'a'); + expect(tree.get(10)).toEqual(new Set(['a'])); + expect(tree.size).toBe(1); + }); + }); + + // ------------------------------------------------------------------------- + // Delete + // ------------------------------------------------------------------------- + + describe('delete', () => { + it('should delete a specific value from a key', () => { + tree.insert(10, 'a'); + tree.insert(10, 'b'); + expect(tree.delete(10, 'a')).toBe(true); + expect(tree.get(10)).toEqual(new Set(['b'])); + expect(tree.size).toBe(1); + }); + + it('should remove the key entry when its last value is deleted', () => { + tree.insert(10, 'a'); + expect(tree.delete(10, 'a')).toBe(true); + expect(tree.get(10)).toBeUndefined(); + expect(tree.size).toBe(0); + }); + + it('should delete all values for a key when value is omitted', () => { + tree.insert(10, 'a'); + tree.insert(10, 'b'); + expect(tree.delete(10)).toBe(true); + expect(tree.get(10)).toBeUndefined(); + expect(tree.size).toBe(0); + }); + + it('should return false for non-existent key', () => { + expect(tree.delete(99)).toBe(false); + }); + + it('should return false for non-existent value', () => { + tree.insert(10, 'a'); + expect(tree.delete(10, 'z')).toBe(false); + expect(tree.size).toBe(1); + }); + }); + + // ------------------------------------------------------------------------- + // Range queries + // ------------------------------------------------------------------------- + + describe('range', () => { + beforeEach(() => { + for (let i = 0; i < 100; i++) { + tree.insert(i, `v${i}`); + } + }); + + it('should return all entries when no bounds given', () => { + const result = tree.range(); + expect(result.length).toBe(100); + expect(result[0].key).toBe(0); + expect(result[99].key).toBe(99); + }); + + it('should return entries in key order', () => { + const keys = tree.range().map((e) => e.key); + for (let i = 1; i < keys.length; i++) { + expect(keys[i]).toBeGreaterThan(keys[i - 1]); + } + }); + + it('should respect lower bound (inclusive by default)', () => { + const result = tree.range(50); + expect(result.length).toBe(50); + expect(result[0].key).toBe(50); + }); + + it('should respect upper bound (exclusive by default)', () => { + const result = tree.range(undefined, 10); + expect(result.length).toBe(10); + expect(result[result.length - 1].key).toBe(9); + }); + + it('should support inclusive upper bound', () => { + const result = tree.range(undefined, 10, { upperInclusive: true }); + expect(result.length).toBe(11); + expect(result[result.length - 1].key).toBe(10); + }); + + it('should support exclusive lower bound', () => { + const result = tree.range(50, undefined, { lowerInclusive: false }); + expect(result.length).toBe(49); + expect(result[0].key).toBe(51); + }); + + it('should handle combined bounds', () => { + const result = tree.range(20, 30); + expect(result.length).toBe(10); + expect(result[0].key).toBe(20); + expect(result[result.length - 1].key).toBe(29); + }); + + it('should return empty array for no-result range', () => { + const result = tree.range(200, 300); + expect(result).toEqual([]); + }); + + it('should return empty for inverted bounds', () => { + const result = tree.range(50, 10); + expect(result).toEqual([]); + }); + }); + + // ------------------------------------------------------------------------- + // Edge cases + // ------------------------------------------------------------------------- + + describe('edge cases', () => { + it('should handle get on empty tree', () => { + expect(tree.get(1)).toBeUndefined(); + }); + + it('should handle range on empty tree', () => { + expect(tree.range()).toEqual([]); + }); + + it('should handle delete on empty tree', () => { + expect(tree.delete(1)).toBe(false); + }); + + it('should handle insert-then-delete-all back to empty', () => { + for (let i = 0; i < 50; i++) { + tree.insert(i, `v${i}`); + } + for (let i = 0; i < 50; i++) { + expect(tree.delete(i, `v${i}`)).toBe(true); + } + expect(tree.size).toBe(0); + expect(tree.range()).toEqual([]); + // Verify we can still insert after emptying. + tree.insert(1, 'new'); + expect(tree.get(1)).toEqual(new Set(['new'])); + }); + }); + + // ------------------------------------------------------------------------- + // Clear + // ------------------------------------------------------------------------- + + describe('clear', () => { + it('should reset the tree to empty', () => { + for (let i = 0; i < 100; i++) tree.insert(i, `v${i}`); + expect(tree.size).toBe(100); + tree.clear(); + expect(tree.size).toBe(0); + expect(tree.get(0)).toBeUndefined(); + expect(tree.range()).toEqual([]); + }); + }); + + // ------------------------------------------------------------------------- + // Entries iterator + // ------------------------------------------------------------------------- + + describe('entries', () => { + it('should yield all entries in key order', () => { + tree.insert(30, 'c'); + tree.insert(10, 'a'); + tree.insert(20, 'b'); + + const result = [...tree.entries()]; + expect(result.map((e) => e.key)).toEqual([10, 20, 30]); + }); + + it('should yield nothing for empty tree', () => { + expect([...tree.entries()]).toEqual([]); + }); + }); + + // ------------------------------------------------------------------------- + // Large dataset + // ------------------------------------------------------------------------- + + describe('large dataset', () => { + const N = 10_000; + + it('should correctly store and retrieve N items', () => { + for (let i = 0; i < N; i++) { + tree.insert(i, `v${i}`); + } + expect(tree.size).toBe(N); + + // Spot-check some values. + expect(tree.get(0)).toEqual(new Set(['v0'])); + expect(tree.get(N - 1)).toEqual(new Set([`v${N - 1}`])); + expect(tree.get(Math.floor(N / 2))).toEqual(new Set([`v${Math.floor(N / 2)}`])); + }); + + it('should produce correct range results on large dataset', () => { + for (let i = 0; i < N; i++) { + tree.insert(i, `v${i}`); + } + + const result = tree.range(5000, 5010); + expect(result.length).toBe(10); + expect(result[0].key).toBe(5000); + expect(result[9].key).toBe(5009); + }); + + it('should survive inserting and deleting many items', () => { + for (let i = 0; i < N; i++) { + tree.insert(i, `v${i}`); + } + + // Delete the first half. + for (let i = 0; i < N / 2; i++) { + expect(tree.delete(i, `v${i}`)).toBe(true); + } + + expect(tree.size).toBe(N / 2); + expect(tree.get(0)).toBeUndefined(); + expect(tree.get(N / 2)).toEqual(new Set([`v${N / 2}`])); + + // Remaining range should start at N/2. + const remaining = tree.range(); + expect(remaining.length).toBe(N / 2); + expect(remaining[0].key).toBe(N / 2); + }); + }); + + // ------------------------------------------------------------------------- + // Custom comparator + // ------------------------------------------------------------------------- + + describe('custom comparator', () => { + it('should support reverse ordering', () => { + const reverseTree = new BPlusTree(32, (a, b) => b - a); + reverseTree.insert(1, 'a'); + reverseTree.insert(2, 'b'); + reverseTree.insert(3, 'c'); + + const entries = [...reverseTree.entries()]; + expect(entries.map((e) => e.key)).toEqual([3, 2, 1]); + }); + }); + + // ------------------------------------------------------------------------- + // Node splitting (small order to force splits) + // ------------------------------------------------------------------------- + + describe('node splitting with small order', () => { + let smallTree: BPlusTree; + + beforeEach(() => { + smallTree = new BPlusTree(4); + }); + + it('should handle splits correctly', () => { + // Order 4 means max 3 keys per node — splits after the 4th insert. + for (let i = 0; i < 20; i++) { + smallTree.insert(i, `v${i}`); + } + expect(smallTree.size).toBe(20); + + // All values should be retrievable. + for (let i = 0; i < 20; i++) { + expect(smallTree.get(i)).toEqual(new Set([`v${i}`])); + } + }); + + it('should maintain sorted order after many splits', () => { + // Insert in random order to stress split logic. + const values = Array.from({ length: 50 }, (_, i) => i); + for (let i = values.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); + [values[i], values[j]] = [values[j], values[i]]; + } + + for (const v of values) { + smallTree.insert(v, `v${v}`); + } + + const entries = [...smallTree.entries()]; + const keys = entries.map((e) => e.key); + expect(keys).toEqual([...keys].sort((a, b) => a - b)); + }); + + it('should handle delete with merging at small order', () => { + for (let i = 0; i < 20; i++) { + smallTree.insert(i, `v${i}`); + } + + // Delete enough to trigger merges. + for (let i = 0; i < 15; i++) { + expect(smallTree.delete(i, `v${i}`)).toBe(true); + } + + expect(smallTree.size).toBe(5); + + // Remaining keys should be intact. + for (let i = 15; i < 20; i++) { + expect(smallTree.get(i)).toEqual(new Set([`v${i}`])); + } + }); + }); + + // ------------------------------------------------------------------------- + // String keys + // ------------------------------------------------------------------------- + + describe('string keys', () => { + it('should work with string keys using default comparator', () => { + const strTree = new BPlusTree(); + strTree.insert('banana', 1); + strTree.insert('apple', 2); + strTree.insert('cherry', 3); + + const entries = [...strTree.entries()]; + expect(entries.map((e) => e.key)).toEqual(['apple', 'banana', 'cherry']); + expect(strTree.get('banana')).toEqual(new Set([1])); + }); + + it('should support string range queries', () => { + const strTree = new BPlusTree(); + const words = ['apple', 'banana', 'cherry', 'date', 'elderberry', 'fig']; + words.forEach((w, i) => strTree.insert(w, i)); + + const result = strTree.range('banana', 'elderberry'); + expect(result.map((e) => e.key)).toEqual(['banana', 'cherry', 'date']); + }); + }); +}); diff --git a/src/utils/btree.ts b/src/utils/btree.ts new file mode 100644 index 0000000..509ab64 --- /dev/null +++ b/src/utils/btree.ts @@ -0,0 +1,553 @@ +/** + * Generic comparator function. Returns negative if a < b, positive if a > b, 0 if equal. + */ +export type Comparator = (a: K, b: K) => number; + +/** + * Options for range queries. + */ +export type RangeOptions = { + /** Whether the lower bound is inclusive (default: true). */ + lowerInclusive?: boolean; + /** Whether the upper bound is inclusive (default: false). */ + upperInclusive?: boolean; +}; + +/** + * A single entry returned by range queries and iteration. + */ +export type BPlusTreeEntry = { + key: K; + values: Set; +}; + +// --------------------------------------------------------------------------- +// Node types +// --------------------------------------------------------------------------- + +/** + * Internal (non-leaf) node. Stores keys that guide searches and pointers + * to child nodes. Does NOT store values — all values live in leaves. + */ +class InternalNode { + keys: K[] = []; + children: Array | LeafNode> = []; +} + +/** + * Leaf node. Stores key/value-set pairs and maintains a doubly-linked + * list across all leaves for efficient range scans. + */ +class LeafNode { + keys: K[] = []; + values: Array> = []; + next: LeafNode | null = null; + prev: LeafNode | null = null; +} + +type Node = InternalNode | LeafNode; + +function isLeaf(node: Node): node is LeafNode { + return node instanceof LeafNode; +} + +// --------------------------------------------------------------------------- +// Default comparator +// --------------------------------------------------------------------------- + +/** + * Default comparator using native `<` / `>` operators. + * Works correctly for numbers, strings, and Dates. + */ +function defaultComparator(a: K, b: K): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +// --------------------------------------------------------------------------- +// B+ Tree +// --------------------------------------------------------------------------- + +/** + * In-memory B+ Tree with duplicate-key support. + * + * Each unique key maps to a `Set`, allowing multiple values to share + * the same key (e.g. many documents with the same indexed field value). + * + * Leaf nodes are linked in a doubly-linked list so range scans are O(k) + * after the initial O(log n) descent. + * + * @typeParam K - Key type (must be comparable via the provided comparator) + * @typeParam V - Value type stored in each key's Set + */ +export class BPlusTree { + /** Maximum number of keys per node. A node splits when it exceeds this. */ + private readonly maxKeys: number; + + /** Minimum number of keys a non-root node must hold after deletion. */ + private readonly minKeys: number; + + private readonly compare: Comparator; + + private root: Node; + + /** Total number of individual values across all keys. */ + private _size = 0; + + constructor(order = 32, comparator?: Comparator) { + if (order < 3) throw new Error('B+ Tree order must be at least 3'); + this.maxKeys = order - 1; + this.minKeys = Math.ceil(order / 2) - 1; + this.compare = comparator ?? defaultComparator; + this.root = new LeafNode(); + } + + // ------------------------------------------------------------------------- + // Public API + // ------------------------------------------------------------------------- + + /** Total number of individual values stored in the tree. */ + get size(): number { + return this._size; + } + + /** Remove all entries from the tree. */ + clear(): void { + this.root = new LeafNode(); + this._size = 0; + } + + /** + * Insert a value under the given key. If the key already exists the + * value is added to its Set; otherwise a new key entry is created. + */ + insert(key: K, value: V): void { + const leaf = this.findLeaf(key); + const idx = this.leafKeyIndex(leaf, key); + + if (idx < leaf.keys.length && this.compare(leaf.keys[idx], key) === 0) { + // Key exists — add to its value set. + const before = leaf.values[idx].size; + leaf.values[idx].add(value); + this._size += leaf.values[idx].size - before; + } else { + // New key — splice into position. + leaf.keys.splice(idx, 0, key); + leaf.values.splice(idx, 0, new Set([value])); + this._size++; + } + + // Split if the leaf overflows. + if (leaf.keys.length > this.maxKeys) { + this.splitLeaf(leaf); + } + } + + /** + * Look up all values associated with the exact key. + * Returns `undefined` if the key is not present. + */ + get(key: K): Set | undefined { + const leaf = this.findLeaf(key); + const idx = this.leafKeyIndex(leaf, key); + if (idx < leaf.keys.length && this.compare(leaf.keys[idx], key) === 0) { + return leaf.values[idx]; + } + return undefined; + } + + /** + * Delete a value (or all values) for the given key. + * + * - If `value` is provided, only that value is removed from the key's Set. + * The key entry is removed when its Set becomes empty. + * - If `value` is omitted, the entire key entry (with all values) is removed. + * + * @returns `true` if something was removed, `false` if the key/value wasn't found. + */ + delete(key: K, value?: V): boolean { + const leaf = this.findLeaf(key); + const idx = this.leafKeyIndex(leaf, key); + + if (idx >= leaf.keys.length || this.compare(leaf.keys[idx], key) !== 0) { + return false; + } + + if (value !== undefined) { + const set = leaf.values[idx]; + if (!set.has(value)) return false; + set.delete(value); + this._size--; + + if (set.size > 0) return true; + // Set empty — fall through to remove the key entry entirely. + } else { + this._size -= leaf.values[idx].size; + } + + leaf.keys.splice(idx, 1); + leaf.values.splice(idx, 1); + + // Rebalance if needed (skip for root leaf). + if (leaf !== this.root && leaf.keys.length < this.minKeys) { + this.rebalanceLeaf(leaf); + } + + // Shrink tree height if the root internal node has a single child. + if (!isLeaf(this.root) && this.root.children.length === 1) { + this.root = this.root.children[0]; + } + + return true; + } + + /** + * Range query. Returns all entries whose keys fall within `[min, max]` + * (bounds configurable via `opts`). + * + * - Omit `min` for an unbounded lower end. + * - Omit `max` for an unbounded upper end. + * - Omit both to iterate the entire tree in key order. + * + * Default bounds: lower inclusive, upper exclusive (half-open interval). + */ + range( + min?: K, + max?: K, + opts?: RangeOptions, + ): BPlusTreeEntry[] { + const lowerInc = opts?.lowerInclusive ?? true; + const upperInc = opts?.upperInclusive ?? false; + + const results: BPlusTreeEntry[] = []; + + // Find the starting leaf. + let leaf: LeafNode; + let startIdx: number; + + if (min !== undefined) { + leaf = this.findLeaf(min); + startIdx = this.leafKeyIndex(leaf, min); + // Adjust for exclusive lower bound. + if (!lowerInc && startIdx < leaf.keys.length && this.compare(leaf.keys[startIdx], min) === 0) { + startIdx++; + } + } else { + leaf = this.firstLeaf(); + startIdx = 0; + } + + // Walk the leaf chain collecting matching entries. + let currentLeaf: LeafNode | null = leaf; + let i = startIdx; + + while (currentLeaf) { + while (i < currentLeaf.keys.length) { + const key = currentLeaf.keys[i]; + + if (max !== undefined) { + const cmp = this.compare(key, max); + if (cmp > 0 || (cmp === 0 && !upperInc)) { + return results; + } + } + + results.push({ key, values: currentLeaf.values[i] }); + i++; + } + + currentLeaf = currentLeaf.next; + i = 0; + } + + return results; + } + + /** + * Iterate over all entries in key order. + */ + *entries(): IterableIterator> { + let leaf: LeafNode | null = this.firstLeaf(); + while (leaf) { + for (let i = 0; i < leaf.keys.length; i++) { + yield { key: leaf.keys[i], values: leaf.values[i] }; + } + leaf = leaf.next; + } + } + + // ------------------------------------------------------------------------- + // Tree navigation + // ------------------------------------------------------------------------- + + /** + * Descend to the leaf node that should contain the given key. + */ + private findLeaf(key: K): LeafNode { + let node: Node = this.root; + while (!isLeaf(node)) { + const internal = node as InternalNode; + let childIdx = internal.keys.length; + for (let i = 0; i < internal.keys.length; i++) { + if (this.compare(key, internal.keys[i]) < 0) { + childIdx = i; + break; + } + } + node = internal.children[childIdx]; + } + return node; + } + + /** Get the leftmost leaf in the tree. */ + private firstLeaf(): LeafNode { + let node: Node = this.root; + while (!isLeaf(node)) { + node = (node as InternalNode).children[0]; + } + return node; + } + + /** + * Binary search within a leaf for the insertion position of `key`. + * Returns the index of the first key >= `key`. + */ + private leafKeyIndex(leaf: LeafNode, key: K): number { + let lo = 0; + let hi = leaf.keys.length; + while (lo < hi) { + const mid = (lo + hi) >>> 1; + if (this.compare(leaf.keys[mid], key) < 0) { + lo = mid + 1; + } else { + hi = mid; + } + } + return lo; + } + + // ------------------------------------------------------------------------- + // Splitting + // ------------------------------------------------------------------------- + + /** + * Split an overflowing leaf node. The right half becomes a new leaf, + * and a copy of its first key is promoted to the parent. + */ + private splitLeaf(leaf: LeafNode): void { + const mid = Math.ceil(leaf.keys.length / 2); + const newLeaf = new LeafNode(); + + newLeaf.keys = leaf.keys.splice(mid); + newLeaf.values = leaf.values.splice(mid); + + // Maintain the doubly-linked list. + newLeaf.next = leaf.next; + newLeaf.prev = leaf; + if (leaf.next) leaf.next.prev = newLeaf; + leaf.next = newLeaf; + + const promotedKey = newLeaf.keys[0]; + this.insertIntoParent(leaf, promotedKey, newLeaf); + } + + /** + * Split an overflowing internal node. The middle key is pushed up + * to the parent (not copied — it's removed from this level). + */ + private splitInternal(node: InternalNode): void { + const mid = Math.floor(node.keys.length / 2); + const promotedKey = node.keys[mid]; + + const newNode = new InternalNode(); + newNode.keys = node.keys.splice(mid + 1); + newNode.children = node.children.splice(mid + 1); + node.keys.splice(mid, 1); // remove the promoted key + + this.insertIntoParent(node, promotedKey, newNode); + } + + /** + * Insert a promoted key and new right child into the parent of `left`. + * If `left` is the root, a new root is created. + */ + private insertIntoParent( + left: Node, + key: K, + right: Node, + ): void { + if (left === this.root) { + const newRoot = new InternalNode(); + newRoot.keys = [key]; + newRoot.children = [left, right]; + this.root = newRoot; + return; + } + + const parent = this.findParent(this.root, left) as InternalNode; + const idx = parent.children.indexOf(left); + + parent.keys.splice(idx, 0, key); + parent.children.splice(idx + 1, 0, right); + + if (parent.keys.length > this.maxKeys) { + this.splitInternal(parent); + } + } + + // ------------------------------------------------------------------------- + // Rebalancing (deletion) + // ------------------------------------------------------------------------- + + /** + * Rebalance a leaf that has fewer than `minKeys` entries after deletion. + * Tries to borrow from a sibling first; if neither sibling can spare + * a key, merges with a sibling. + */ + private rebalanceLeaf(leaf: LeafNode): void { + const parent = this.findParent(this.root, leaf) as InternalNode; + const idx = parent.children.indexOf(leaf); + + // Try borrowing from the right sibling. + if (idx < parent.children.length - 1) { + const rightSibling = parent.children[idx + 1] as LeafNode; + if (rightSibling.keys.length > this.minKeys) { + leaf.keys.push(rightSibling.keys.shift()!); + leaf.values.push(rightSibling.values.shift()!); + parent.keys[idx] = rightSibling.keys[0]; + return; + } + } + + // Try borrowing from the left sibling. + if (idx > 0) { + const leftSibling = parent.children[idx - 1] as LeafNode; + if (leftSibling.keys.length > this.minKeys) { + leaf.keys.unshift(leftSibling.keys.pop()!); + leaf.values.unshift(leftSibling.values.pop()!); + parent.keys[idx - 1] = leaf.keys[0]; + return; + } + } + + // Merge with a sibling. + if (idx < parent.children.length - 1) { + this.mergeLeaves(leaf, parent.children[idx + 1] as LeafNode, parent, idx); + } else { + this.mergeLeaves(parent.children[idx - 1] as LeafNode, leaf, parent, idx - 1); + } + } + + /** + * Merge `right` leaf into `left` leaf and remove the separator key + * from the parent. + */ + private mergeLeaves( + left: LeafNode, + right: LeafNode, + parent: InternalNode, + separatorIdx: number, + ): void { + left.keys.push(...right.keys); + left.values.push(...right.values); + + // Fix linked list pointers. + left.next = right.next; + if (right.next) right.next.prev = left; + + // Remove the separator key and right child from the parent. + parent.keys.splice(separatorIdx, 1); + parent.children.splice(separatorIdx + 1, 1); + + // Recursively rebalance the parent if needed. + if (parent !== this.root && parent.keys.length < this.minKeys) { + this.rebalanceInternal(parent); + } + } + + /** + * Rebalance an internal node that has too few keys after a merge. + */ + private rebalanceInternal(node: InternalNode): void { + const parent = this.findParent(this.root, node) as InternalNode; + const idx = parent.children.indexOf(node); + + // Try borrowing from the right sibling. + if (idx < parent.children.length - 1) { + const rightSibling = parent.children[idx + 1] as InternalNode; + if (rightSibling.keys.length > this.minKeys) { + node.keys.push(parent.keys[idx]); + parent.keys[idx] = rightSibling.keys.shift()!; + node.children.push(rightSibling.children.shift()!); + return; + } + } + + // Try borrowing from the left sibling. + if (idx > 0) { + const leftSibling = parent.children[idx - 1] as InternalNode; + if (leftSibling.keys.length > this.minKeys) { + node.keys.unshift(parent.keys[idx - 1]); + parent.keys[idx - 1] = leftSibling.keys.pop()!; + node.children.unshift(leftSibling.children.pop()!); + return; + } + } + + // Merge with a sibling. + if (idx < parent.children.length - 1) { + const rightSibling = parent.children[idx + 1] as InternalNode; + this.mergeInternal(node, rightSibling, parent, idx); + } else { + const leftSibling = parent.children[idx - 1] as InternalNode; + this.mergeInternal(leftSibling, node, parent, idx - 1); + } + } + + /** + * Merge two internal nodes by pulling down the separator key from the + * parent and concatenating children. + */ + private mergeInternal( + left: InternalNode, + right: InternalNode, + parent: InternalNode, + separatorIdx: number, + ): void { + left.keys.push(parent.keys[separatorIdx]); + left.keys.push(...right.keys); + left.children.push(...right.children); + + parent.keys.splice(separatorIdx, 1); + parent.children.splice(separatorIdx + 1, 1); + + if (parent !== this.root && parent.keys.length < this.minKeys) { + this.rebalanceInternal(parent); + } + } + + // ------------------------------------------------------------------------- + // Utilities + // ------------------------------------------------------------------------- + + /** + * Walk the tree from `current` downward to find the parent of `target`. + * Returns `null` if `target` is the root or not found. + */ + private findParent( + current: Node, + target: Node, + ): InternalNode | null { + if (isLeaf(current)) return null; + const internal = current as InternalNode; + + for (const child of internal.children) { + if (child === target) return internal; + const found = this.findParent(child, target); + if (found) return found; + } + + return null; + } +} diff --git a/src/utils/index.ts b/src/utils/index.ts index 3b3921d..1619f37 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -1,3 +1,4 @@ export * from './event-emitter.js'; export * from './exponential-backoff.js'; export * from './sse-session.js'; +export * from './btree.js';