B-Tree indexing

This commit is contained in:
2026-02-25 15:58:30 +11:00
parent 77593fe3b4
commit 64b811f330
6 changed files with 1300 additions and 178 deletions

View File

@@ -6,10 +6,10 @@ import {
type ComparisonOperators,
isOperatorObject,
} from './base-storage.js';
import { BPlusTree, type BPlusTreeEntry } from 'src/utils/btree.js';
/**
* Separator used when joining multiple field values into a single index key.
* Chosen to be unlikely to appear in real field values.
* Separator used when joining field names to create the index map key.
*/
const INDEX_KEY_SEP = '\x00';
@@ -29,13 +29,26 @@ function normalizeIndexes(indexes?: IndexDefinition): string[][] {
return indexes as string[][];
}
/**
* Comparator for compound index keys (arrays of raw values).
* Compares element-by-element using native `<` / `>` operators.
*/
function tupleCompare(a: any[], b: any[]): number {
const len = Math.min(a.length, b.length);
for (let i = 0; i < len; i++) {
if (a[i] < b[i]) return -1;
if (a[i] > b[i]) return 1;
}
return a.length - b.length;
}
/**
* Implementation of BaseStore using Memory as the storage backend.
*
* @remarks
* Documents are keyed internally by an auto-incrementing numeric key.
* Optional indexes provide O(1) lookups when a query filter matches
* an index exactly.
* Optional indexes are backed by B+ Trees, providing O(log n) equality
* lookups and O(log n + k) range queries.
*/
export class StorageMemory<
T extends Record<string, any> = Record<string, any>,
@@ -53,12 +66,11 @@ export class StorageMemory<
private store: Map<number, T>;
/**
* Secondary index maps.
* Outer key = index name (joined field names).
* Inner key = index value (joined field values from a document).
* Inner value = set of internal keys that share this index value.
* Secondary indexes backed by B+ Trees.
* Map key = index name (joined field names).
* Map value = B+ Tree mapping index keys to sets of internal document keys.
*/
private indexes: Map<string, Map<string, Set<number>>>;
private indexes: Map<string, BPlusTree<any, number>>;
/** The normalized index definitions supplied at construction time. */
private indexDefs: string[][];
@@ -73,10 +85,12 @@ export class StorageMemory<
this.children = new Map();
this.indexDefs = normalizeIndexes(indexes);
// Initialise an empty map for each index definition.
// Create a B+ Tree for each index definition.
this.indexes = new Map();
for (const fields of this.indexDefs) {
this.indexes.set(fields.join(INDEX_KEY_SEP), new Map());
const name = fields.join(INDEX_KEY_SEP);
const comparator = fields.length > 1 ? tupleCompare : undefined;
this.indexes.set(name, new BPlusTree<any, number>(32, comparator));
}
}
@@ -225,16 +239,19 @@ export class StorageMemory<
* Uses an index when possible, otherwise falls back to a full scan.
*/
private collectMatches(filter?: Filter<T>): Array<[number, T]> {
const indexKeys = this.resolveIndexKeys(filter);
const resolution = this.resolveIndexKeys(filter);
if (resolution !== null) {
const { keys, resolvedFields } = resolution;
const filterKeys = filter ? Object.keys(filter) : [];
const needsVerification = filterKeys.some((k) => !resolvedFields.includes(k));
if (indexKeys !== null) {
// We have candidate internal keys from the index — fetch and verify.
const results: Array<[number, T]> = [];
for (const key of indexKeys) {
for (const key of keys) {
const doc = this.store.get(key);
if (doc && this.matchesFilter(doc, filter)) {
results.push([key, doc]);
}
if (!doc) continue;
if (needsVerification && !this.matchesFilter(doc, filter)) continue;
results.push([key, doc]);
}
return results;
}
@@ -269,106 +286,165 @@ export class StorageMemory<
// ---------------------------------------------------------------------------
/**
* Build the index value string for a given document and set of fields.
* Returns `null` if any of the fields are missing from the document,
* since we can't meaningfully index a partial key.
* Build the B+ Tree key for a document and a set of index fields.
* - Single-field indexes return the raw field value.
* - Compound indexes return an array of raw field values.
* Returns `null` if any required field is missing from the document.
*/
private buildIndexValue(doc: Record<string, any>, fields: string[]): string | null {
const parts: string[] = [];
private buildIndexKey(doc: Record<string, any>, fields: string[]): any | null {
if (fields.length === 1) {
if (!(fields[0] in doc)) return null;
return doc[fields[0]];
}
const parts: any[] = [];
for (const field of fields) {
if (!(field in doc)) return null;
parts.push(String(doc[field]));
parts.push(doc[field]);
}
return parts.join(INDEX_KEY_SEP);
return parts;
}
/** Register a document in all applicable indexes. */
private addToIndexes(internalKey: number, doc: T): void {
for (const fields of this.indexDefs) {
const indexName = fields.join(INDEX_KEY_SEP);
const indexValue = this.buildIndexValue(doc, fields);
if (indexValue === null) continue;
const indexKey = this.buildIndexKey(doc, fields);
if (indexKey === null) continue;
const indexMap = this.indexes.get(indexName)!;
let bucket = indexMap.get(indexValue);
if (!bucket) {
bucket = new Set();
indexMap.set(indexValue, bucket);
}
bucket.add(internalKey);
const name = fields.join(INDEX_KEY_SEP);
this.indexes.get(name)!.insert(indexKey, internalKey);
}
}
/** Remove a document from all applicable indexes. */
private removeFromIndexes(internalKey: number, doc: T): void {
for (const fields of this.indexDefs) {
const indexName = fields.join(INDEX_KEY_SEP);
const indexValue = this.buildIndexValue(doc, fields);
if (indexValue === null) continue;
const indexKey = this.buildIndexKey(doc, fields);
if (indexKey === null) continue;
const indexMap = this.indexes.get(indexName)!;
const bucket = indexMap.get(indexValue);
if (bucket) {
bucket.delete(internalKey);
if (bucket.size === 0) indexMap.delete(indexValue);
}
const name = fields.join(INDEX_KEY_SEP);
this.indexes.get(name)!.delete(indexKey, internalKey);
}
}
/**
* Attempt to resolve a set of candidate internal keys from the indexes.
* Returns `null` if no index can serve the query.
*
* An index is used when the filter contains plain equality values for every
* field in the index. Operator objects (e.g. `{ $lt: 50 }`) are excluded
* from index resolution since hash-based indexes only support equality.
* Result of an index resolution attempt.
* `keys` is an iterable of candidate internal keys.
* `resolvedFields` lists the filter fields fully satisfied by the index,
* so callers can skip re-verifying those conditions in matchesFilter.
*/
private resolveIndexKeys(filter?: Filter<T>): Set<number> | null {
private resolveIndexKeys(
filter?: Filter<T>,
): { keys: Iterable<number>; resolvedFields: string[] } | null {
if (!filter) return null;
const filterKeys = Object.keys(filter);
if (filterKeys.length === 0) return null;
// Extract only the equality fields from the filter (skip operator objects).
const equalityFilter: Record<string, any> = {};
for (const [key, value] of Object.entries(filter)) {
if (!isOperatorObject(value)) {
equalityFilter[key] = value;
}
}
if (Object.keys(equalityFilter).length === 0) return null;
for (const fields of this.indexDefs) {
// Every field in the index must be present as an equality value.
if (!fields.every((f) => f in equalityFilter)) continue;
const indexName = fields.join(INDEX_KEY_SEP);
const indexValue = this.buildIndexValue(equalityFilter, fields);
if (indexValue === null) continue;
const btree = this.indexes.get(indexName)!;
const indexMap = this.indexes.get(indexName)!;
const bucket = indexMap.get(indexValue);
return bucket ?? new Set();
if (fields.length === 1) {
// --- Single-field index ---
const field = fields[0];
if (!(field in filter)) continue;
const filterValue = (filter as any)[field];
if (isOperatorObject(filterValue)) {
const keys = this.resolveOperatorViaTree(btree, filterValue);
if (keys !== null) return { keys, resolvedFields: [field] };
continue;
}
// Plain equality.
return { keys: btree.get(filterValue) ?? [], resolvedFields: [field] };
} else {
// --- Compound index — all fields must be plain equality ---
if (!fields.every((f) => f in filter && !isOperatorObject((filter as any)[f]))) {
continue;
}
const tupleKey = fields.map((f) => (filter as any)[f]);
return { keys: btree.get(tupleKey) ?? [], resolvedFields: [...fields] };
}
}
return null;
}
/**
* Try to resolve an operator filter against a single-field B+ Tree index.
* Returns a flat array of matching internal keys, or null if the
* operators can't be served by the tree ($ne).
*/
private resolveOperatorViaTree(
btree: BPlusTree<any, number>,
ops: ComparisonOperators<any>,
): Iterable<number> | null {
// $ne prevents efficient index use.
if (ops.$ne !== undefined) return null;
// $eq is a point lookup.
if (ops.$eq !== undefined) {
return btree.get(ops.$eq) ?? [];
}
// Extract range bounds from the remaining operators.
let min: any = undefined;
let max: any = undefined;
let lowerInclusive = true;
let upperInclusive = false;
if (ops.$gt !== undefined) { min = ops.$gt; lowerInclusive = false; }
if (ops.$gte !== undefined) { min = ops.$gte; lowerInclusive = true; }
if (ops.$lt !== undefined) { max = ops.$lt; upperInclusive = false; }
if (ops.$lte !== undefined) { max = ops.$lte; upperInclusive = true; }
if (min === undefined && max === undefined) return null;
const entries = btree.range(min, max, { lowerInclusive, upperInclusive });
return this.flattenEntryKeys(entries);
}
/**
* Flatten B+ Tree range results into a flat array of internal keys.
* Uses an array instead of a Set — no hash overhead, no deduplication
* needed because each internal key only appears under one index key.
*/
private flattenEntryKeys(entries: BPlusTreeEntry<any, number>[]): number[] {
const result: number[] = [];
for (const entry of entries) {
for (const key of entry.values) {
result.push(key);
}
}
return result;
}
/**
* Try to answer a `find` query entirely through an index.
* Returns `null` when no index can serve the filter, signalling
* the caller to fall back to a full scan.
*
* When the index covers every field in the filter, matchesFilter
* is skipped entirely — the B+ Tree has already ensured the
* conditions are met.
*/
private findViaIndex(filter?: Filter<T>): T[] | null {
const keys = this.resolveIndexKeys(filter);
if (keys === null) return null;
const resolution = this.resolveIndexKeys(filter);
if (resolution === null) return null;
const { keys, resolvedFields } = resolution;
const filterKeys = filter ? Object.keys(filter) : [];
const needsVerification = filterKeys.some((k) => !resolvedFields.includes(k));
const results: T[] = [];
for (const key of keys) {
const doc = this.store.get(key);
if (doc && this.matchesFilter(doc, filter)) {
results.push(doc);
}
if (!doc) continue;
if (needsVerification && !this.matchesFilter(doc, filter)) continue;
results.push(doc);
}
return results;
}