B-Tree indexing

This commit is contained in:
2026-02-25 15:58:30 +11:00
parent 77593fe3b4
commit 64b811f330
6 changed files with 1300 additions and 178 deletions

553
src/utils/btree.ts Normal file
View File

@@ -0,0 +1,553 @@
/**
* Generic comparator function. Returns negative if a < b, positive if a > b, 0 if equal.
*/
export type Comparator<K> = (a: K, b: K) => number;
/**
* Options for range queries.
*/
export type RangeOptions = {
/** Whether the lower bound is inclusive (default: true). */
lowerInclusive?: boolean;
/** Whether the upper bound is inclusive (default: false). */
upperInclusive?: boolean;
};
/**
* A single entry returned by range queries and iteration.
*/
export type BPlusTreeEntry<K, V> = {
key: K;
values: Set<V>;
};
// ---------------------------------------------------------------------------
// Node types
// ---------------------------------------------------------------------------
/**
* Internal (non-leaf) node. Stores keys that guide searches and pointers
* to child nodes. Does NOT store values — all values live in leaves.
*/
class InternalNode<K, V> {
keys: K[] = [];
children: Array<InternalNode<K, V> | LeafNode<K, V>> = [];
}
/**
* Leaf node. Stores key/value-set pairs and maintains a doubly-linked
* list across all leaves for efficient range scans.
*/
class LeafNode<K, V> {
keys: K[] = [];
values: Array<Set<V>> = [];
next: LeafNode<K, V> | null = null;
prev: LeafNode<K, V> | null = null;
}
type Node<K, V> = InternalNode<K, V> | LeafNode<K, V>;
function isLeaf<K, V>(node: Node<K, V>): node is LeafNode<K, V> {
return node instanceof LeafNode;
}
// ---------------------------------------------------------------------------
// Default comparator
// ---------------------------------------------------------------------------
/**
* Default comparator using native `<` / `>` operators.
* Works correctly for numbers, strings, and Dates.
*/
function defaultComparator<K>(a: K, b: K): number {
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
// ---------------------------------------------------------------------------
// B+ Tree
// ---------------------------------------------------------------------------
/**
* In-memory B+ Tree with duplicate-key support.
*
* Each unique key maps to a `Set<V>`, allowing multiple values to share
* the same key (e.g. many documents with the same indexed field value).
*
* Leaf nodes are linked in a doubly-linked list so range scans are O(k)
* after the initial O(log n) descent.
*
* @typeParam K - Key type (must be comparable via the provided comparator)
* @typeParam V - Value type stored in each key's Set
*/
export class BPlusTree<K, V> {
/** Maximum number of keys per node. A node splits when it exceeds this. */
private readonly maxKeys: number;
/** Minimum number of keys a non-root node must hold after deletion. */
private readonly minKeys: number;
private readonly compare: Comparator<K>;
private root: Node<K, V>;
/** Total number of individual values across all keys. */
private _size = 0;
constructor(order = 32, comparator?: Comparator<K>) {
if (order < 3) throw new Error('B+ Tree order must be at least 3');
this.maxKeys = order - 1;
this.minKeys = Math.ceil(order / 2) - 1;
this.compare = comparator ?? defaultComparator;
this.root = new LeafNode<K, V>();
}
// -------------------------------------------------------------------------
// Public API
// -------------------------------------------------------------------------
/** Total number of individual values stored in the tree. */
get size(): number {
return this._size;
}
/** Remove all entries from the tree. */
clear(): void {
this.root = new LeafNode<K, V>();
this._size = 0;
}
/**
* Insert a value under the given key. If the key already exists the
* value is added to its Set; otherwise a new key entry is created.
*/
insert(key: K, value: V): void {
const leaf = this.findLeaf(key);
const idx = this.leafKeyIndex(leaf, key);
if (idx < leaf.keys.length && this.compare(leaf.keys[idx], key) === 0) {
// Key exists — add to its value set.
const before = leaf.values[idx].size;
leaf.values[idx].add(value);
this._size += leaf.values[idx].size - before;
} else {
// New key — splice into position.
leaf.keys.splice(idx, 0, key);
leaf.values.splice(idx, 0, new Set([value]));
this._size++;
}
// Split if the leaf overflows.
if (leaf.keys.length > this.maxKeys) {
this.splitLeaf(leaf);
}
}
/**
* Look up all values associated with the exact key.
* Returns `undefined` if the key is not present.
*/
get(key: K): Set<V> | undefined {
const leaf = this.findLeaf(key);
const idx = this.leafKeyIndex(leaf, key);
if (idx < leaf.keys.length && this.compare(leaf.keys[idx], key) === 0) {
return leaf.values[idx];
}
return undefined;
}
/**
* Delete a value (or all values) for the given key.
*
* - If `value` is provided, only that value is removed from the key's Set.
* The key entry is removed when its Set becomes empty.
* - If `value` is omitted, the entire key entry (with all values) is removed.
*
* @returns `true` if something was removed, `false` if the key/value wasn't found.
*/
delete(key: K, value?: V): boolean {
const leaf = this.findLeaf(key);
const idx = this.leafKeyIndex(leaf, key);
if (idx >= leaf.keys.length || this.compare(leaf.keys[idx], key) !== 0) {
return false;
}
if (value !== undefined) {
const set = leaf.values[idx];
if (!set.has(value)) return false;
set.delete(value);
this._size--;
if (set.size > 0) return true;
// Set empty — fall through to remove the key entry entirely.
} else {
this._size -= leaf.values[idx].size;
}
leaf.keys.splice(idx, 1);
leaf.values.splice(idx, 1);
// Rebalance if needed (skip for root leaf).
if (leaf !== this.root && leaf.keys.length < this.minKeys) {
this.rebalanceLeaf(leaf);
}
// Shrink tree height if the root internal node has a single child.
if (!isLeaf(this.root) && this.root.children.length === 1) {
this.root = this.root.children[0];
}
return true;
}
/**
* Range query. Returns all entries whose keys fall within `[min, max]`
* (bounds configurable via `opts`).
*
* - Omit `min` for an unbounded lower end.
* - Omit `max` for an unbounded upper end.
* - Omit both to iterate the entire tree in key order.
*
* Default bounds: lower inclusive, upper exclusive (half-open interval).
*/
range(
min?: K,
max?: K,
opts?: RangeOptions,
): BPlusTreeEntry<K, V>[] {
const lowerInc = opts?.lowerInclusive ?? true;
const upperInc = opts?.upperInclusive ?? false;
const results: BPlusTreeEntry<K, V>[] = [];
// Find the starting leaf.
let leaf: LeafNode<K, V>;
let startIdx: number;
if (min !== undefined) {
leaf = this.findLeaf(min);
startIdx = this.leafKeyIndex(leaf, min);
// Adjust for exclusive lower bound.
if (!lowerInc && startIdx < leaf.keys.length && this.compare(leaf.keys[startIdx], min) === 0) {
startIdx++;
}
} else {
leaf = this.firstLeaf();
startIdx = 0;
}
// Walk the leaf chain collecting matching entries.
let currentLeaf: LeafNode<K, V> | null = leaf;
let i = startIdx;
while (currentLeaf) {
while (i < currentLeaf.keys.length) {
const key = currentLeaf.keys[i];
if (max !== undefined) {
const cmp = this.compare(key, max);
if (cmp > 0 || (cmp === 0 && !upperInc)) {
return results;
}
}
results.push({ key, values: currentLeaf.values[i] });
i++;
}
currentLeaf = currentLeaf.next;
i = 0;
}
return results;
}
/**
* Iterate over all entries in key order.
*/
*entries(): IterableIterator<BPlusTreeEntry<K, V>> {
let leaf: LeafNode<K, V> | null = this.firstLeaf();
while (leaf) {
for (let i = 0; i < leaf.keys.length; i++) {
yield { key: leaf.keys[i], values: leaf.values[i] };
}
leaf = leaf.next;
}
}
// -------------------------------------------------------------------------
// Tree navigation
// -------------------------------------------------------------------------
/**
* Descend to the leaf node that should contain the given key.
*/
private findLeaf(key: K): LeafNode<K, V> {
let node: Node<K, V> = this.root;
while (!isLeaf(node)) {
const internal = node as InternalNode<K, V>;
let childIdx = internal.keys.length;
for (let i = 0; i < internal.keys.length; i++) {
if (this.compare(key, internal.keys[i]) < 0) {
childIdx = i;
break;
}
}
node = internal.children[childIdx];
}
return node;
}
/** Get the leftmost leaf in the tree. */
private firstLeaf(): LeafNode<K, V> {
let node: Node<K, V> = this.root;
while (!isLeaf(node)) {
node = (node as InternalNode<K, V>).children[0];
}
return node;
}
/**
* Binary search within a leaf for the insertion position of `key`.
* Returns the index of the first key >= `key`.
*/
private leafKeyIndex(leaf: LeafNode<K, V>, key: K): number {
let lo = 0;
let hi = leaf.keys.length;
while (lo < hi) {
const mid = (lo + hi) >>> 1;
if (this.compare(leaf.keys[mid], key) < 0) {
lo = mid + 1;
} else {
hi = mid;
}
}
return lo;
}
// -------------------------------------------------------------------------
// Splitting
// -------------------------------------------------------------------------
/**
* Split an overflowing leaf node. The right half becomes a new leaf,
* and a copy of its first key is promoted to the parent.
*/
private splitLeaf(leaf: LeafNode<K, V>): void {
const mid = Math.ceil(leaf.keys.length / 2);
const newLeaf = new LeafNode<K, V>();
newLeaf.keys = leaf.keys.splice(mid);
newLeaf.values = leaf.values.splice(mid);
// Maintain the doubly-linked list.
newLeaf.next = leaf.next;
newLeaf.prev = leaf;
if (leaf.next) leaf.next.prev = newLeaf;
leaf.next = newLeaf;
const promotedKey = newLeaf.keys[0];
this.insertIntoParent(leaf, promotedKey, newLeaf);
}
/**
* Split an overflowing internal node. The middle key is pushed up
* to the parent (not copied — it's removed from this level).
*/
private splitInternal(node: InternalNode<K, V>): void {
const mid = Math.floor(node.keys.length / 2);
const promotedKey = node.keys[mid];
const newNode = new InternalNode<K, V>();
newNode.keys = node.keys.splice(mid + 1);
newNode.children = node.children.splice(mid + 1);
node.keys.splice(mid, 1); // remove the promoted key
this.insertIntoParent(node, promotedKey, newNode);
}
/**
* Insert a promoted key and new right child into the parent of `left`.
* If `left` is the root, a new root is created.
*/
private insertIntoParent(
left: Node<K, V>,
key: K,
right: Node<K, V>,
): void {
if (left === this.root) {
const newRoot = new InternalNode<K, V>();
newRoot.keys = [key];
newRoot.children = [left, right];
this.root = newRoot;
return;
}
const parent = this.findParent(this.root, left) as InternalNode<K, V>;
const idx = parent.children.indexOf(left);
parent.keys.splice(idx, 0, key);
parent.children.splice(idx + 1, 0, right);
if (parent.keys.length > this.maxKeys) {
this.splitInternal(parent);
}
}
// -------------------------------------------------------------------------
// Rebalancing (deletion)
// -------------------------------------------------------------------------
/**
* Rebalance a leaf that has fewer than `minKeys` entries after deletion.
* Tries to borrow from a sibling first; if neither sibling can spare
* a key, merges with a sibling.
*/
private rebalanceLeaf(leaf: LeafNode<K, V>): void {
const parent = this.findParent(this.root, leaf) as InternalNode<K, V>;
const idx = parent.children.indexOf(leaf);
// Try borrowing from the right sibling.
if (idx < parent.children.length - 1) {
const rightSibling = parent.children[idx + 1] as LeafNode<K, V>;
if (rightSibling.keys.length > this.minKeys) {
leaf.keys.push(rightSibling.keys.shift()!);
leaf.values.push(rightSibling.values.shift()!);
parent.keys[idx] = rightSibling.keys[0];
return;
}
}
// Try borrowing from the left sibling.
if (idx > 0) {
const leftSibling = parent.children[idx - 1] as LeafNode<K, V>;
if (leftSibling.keys.length > this.minKeys) {
leaf.keys.unshift(leftSibling.keys.pop()!);
leaf.values.unshift(leftSibling.values.pop()!);
parent.keys[idx - 1] = leaf.keys[0];
return;
}
}
// Merge with a sibling.
if (idx < parent.children.length - 1) {
this.mergeLeaves(leaf, parent.children[idx + 1] as LeafNode<K, V>, parent, idx);
} else {
this.mergeLeaves(parent.children[idx - 1] as LeafNode<K, V>, leaf, parent, idx - 1);
}
}
/**
* Merge `right` leaf into `left` leaf and remove the separator key
* from the parent.
*/
private mergeLeaves(
left: LeafNode<K, V>,
right: LeafNode<K, V>,
parent: InternalNode<K, V>,
separatorIdx: number,
): void {
left.keys.push(...right.keys);
left.values.push(...right.values);
// Fix linked list pointers.
left.next = right.next;
if (right.next) right.next.prev = left;
// Remove the separator key and right child from the parent.
parent.keys.splice(separatorIdx, 1);
parent.children.splice(separatorIdx + 1, 1);
// Recursively rebalance the parent if needed.
if (parent !== this.root && parent.keys.length < this.minKeys) {
this.rebalanceInternal(parent);
}
}
/**
* Rebalance an internal node that has too few keys after a merge.
*/
private rebalanceInternal(node: InternalNode<K, V>): void {
const parent = this.findParent(this.root, node) as InternalNode<K, V>;
const idx = parent.children.indexOf(node);
// Try borrowing from the right sibling.
if (idx < parent.children.length - 1) {
const rightSibling = parent.children[idx + 1] as InternalNode<K, V>;
if (rightSibling.keys.length > this.minKeys) {
node.keys.push(parent.keys[idx]);
parent.keys[idx] = rightSibling.keys.shift()!;
node.children.push(rightSibling.children.shift()!);
return;
}
}
// Try borrowing from the left sibling.
if (idx > 0) {
const leftSibling = parent.children[idx - 1] as InternalNode<K, V>;
if (leftSibling.keys.length > this.minKeys) {
node.keys.unshift(parent.keys[idx - 1]);
parent.keys[idx - 1] = leftSibling.keys.pop()!;
node.children.unshift(leftSibling.children.pop()!);
return;
}
}
// Merge with a sibling.
if (idx < parent.children.length - 1) {
const rightSibling = parent.children[idx + 1] as InternalNode<K, V>;
this.mergeInternal(node, rightSibling, parent, idx);
} else {
const leftSibling = parent.children[idx - 1] as InternalNode<K, V>;
this.mergeInternal(leftSibling, node, parent, idx - 1);
}
}
/**
* Merge two internal nodes by pulling down the separator key from the
* parent and concatenating children.
*/
private mergeInternal(
left: InternalNode<K, V>,
right: InternalNode<K, V>,
parent: InternalNode<K, V>,
separatorIdx: number,
): void {
left.keys.push(parent.keys[separatorIdx]);
left.keys.push(...right.keys);
left.children.push(...right.children);
parent.keys.splice(separatorIdx, 1);
parent.children.splice(separatorIdx + 1, 1);
if (parent !== this.root && parent.keys.length < this.minKeys) {
this.rebalanceInternal(parent);
}
}
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------
/**
* Walk the tree from `current` downward to find the parent of `target`.
* Returns `null` if `target` is the root or not found.
*/
private findParent(
current: Node<K, V>,
target: Node<K, V>,
): InternalNode<K, V> | null {
if (isLeaf(current)) return null;
const internal = current as InternalNode<K, V>;
for (const child of internal.children) {
if (child === target) return internal;
const found = this.findParent(child, target);
if (found) return found;
}
return null;
}
}