diff --git a/benchmarks/storage.ts b/benchmarks/storage.ts index 3e75c7e..d2bbaf2 100644 --- a/benchmarks/storage.ts +++ b/benchmarks/storage.ts @@ -189,7 +189,7 @@ for (const count of DOC_COUNTS) { // EncryptedStorage // --------------------------------------------------------------------------- -const ENCRYPTED_DOC_COUNTS = [100, 1_000]; +const ENCRYPTED_DOC_COUNTS = [100, 1_000, 10_000]; const encryptionKey = await AESKey.fromSeed('benchmark-key'); for (const count of ENCRYPTED_DOC_COUNTS) { diff --git a/src/storage/V2-Readme.md b/src/storage/V2-Readme.md new file mode 100644 index 0000000..bf7bc25 --- /dev/null +++ b/src/storage/V2-Readme.md @@ -0,0 +1,717 @@ +# Storage Architecture + +This document describes the intended storage architecture for the project. + +The main design goal is to keep the primitives small and composable, so the same fundamental pieces can be used in: + +- browser clients +- local desktop/mobile apps +- trusted servers +- opaque or low-trust remote storage backends + +It also aims to separate: + +- raw persistence +- query mechanics +- immutable event truth +- derived current-state views +- encryption policy + +--- + +## Core idea + +There are **two fundamental storage primitives**: + +1. **BlobStorage**: key → binary blob +2. **DocumentStorage**: store of current-state documents + +Then there are **higher-level layers** built on top: + +- **EventLog**: append-only immutable event history, usually backed by `BlobStorage` +- **Queryable**: query/filter/sort/update helper, built on top of `DocumentStorage` +- **MaterializedView**: domain-specific projection that reads an `EventLog` and writes derived documents into a `DocumentStorage` + +This means the architecture is not one giant storage abstraction. It is a set of small pieces that can be combined. + +--- + +## Layer overview + +```text +BlobStorage + -> EventLog + +DocumentStorage + -> Queryable + +MaterializedView + -> consumes EventLog + -> writes DocumentStorage + -> may expose Queryable for reads +``` + +This is the key relationship: + +- `BlobStorage` is not event-specific +- `DocumentStorage` is not query-specific +- `Queryable` is not domain-specific +- `MaterializedView` is where domain-specific projection logic lives + +--- + +## Why this split exists + +### BlobStorage +`BlobStorage` is the smallest persistence primitive. + +It is appropriate when the caller already knows the key and wants to read/write an opaque payload. + +Good for: + +- encrypted remote storage +- snapshots +- immutable event entries +- simple per-key settings blobs +- replication substrates + +### DocumentStorage +`DocumentStorage` is a primitive for storing current-state documents. + +It does **not** need to know about events or query languages. It is just the place where document data lives. + +Good for: + +- current state +- materialized views +- structured settings +- drafts +- cached API data +- queryable local/server projections + +### Queryable +`Queryable` is a helper or engine layered on top of `DocumentStorage`. + +It adds things like: + +- filtering +- sorting +- indexing +- query-based updates/deletes + +It should stay generic. It should not know what a wallet event is, what a BCH UTXO is, or what a LinkedIn post lifecycle means. + +### EventLog +`EventLog` is an append-only immutable log of events. It is usually backed by `BlobStorage`. + +It is the source of truth when using event-sourced flows. + +### MaterializedView +`MaterializedView` is a domain-specific component that: + +- reads events from an `EventLog` +- interprets those events +- writes derived documents into a `DocumentStorage` +- optionally exposes `Queryable` for reads + +This is where event semantics belong. + +--- + +## High-level architecture diagrams + +### 1. Simple client-side settings + +```text +DocumentStorage + -> Queryable +``` + +or, if settings are small and always read/written as one blob: + +```text +BlobStorage +``` + +### 2. Opaque remote event system + +```text +BlobStorage + -> EncryptedBlobStorage + -> EventLog + -> MaterializedView + -> DocumentStorage + -> Queryable +``` + +### 3. Trusted server, plain document service + +```text +DocumentStorage + -> Queryable +``` + +### 4. Trusted server, event-sourced read model + +```text +BlobStorage + -> EventLog + +DocumentStorage + -> Queryable + +MaterializedView + -> consumes EventLog + -> writes DocumentStorage +``` + +--- + +## Concrete primitive interfaces + +These are intentionally small. + +## BlobStorage + +```ts +export interface BlobStorage { + get(key: string): Promise + set(key: string, value: Uint8Array): Promise + keys(prefix?: string): Promise + has?(key: string): Promise + delete?(key: string): Promise + close?(): Promise +} +``` + +Example adapters: + +- `MemoryBlobStorage` +- `BrowserLocalBlobStorage` +- `IndexedDbBlobStorage` +- `SqliteBlobStorage` +- `RemoteBlobStorage` + +--- + +## DocumentStorage + +```ts +export interface DocumentStorage> { + insert(document: TDocument): Promise + insertMany(documents: TDocument[]): Promise + + list(): Promise + clear(): Promise + + replaceById?( + id: string, + document: TDocument, + ): Promise + + deleteById?(id: string): Promise + + close?(): Promise +} +``` + +This interface is intentionally minimal. + +It stores documents. It does not define filtering, indexes, or event semantics. + +Example adapters: + +- `MemoryDocumentStorage` +- `IndexedDbDocumentStorage` +- `SqliteDocumentStorage` + +--- + +## Queryable + +`Queryable` is built **on top of** a `DocumentStorage`. + +```ts +export type Filter = Partial<{ + [K in keyof T]: T[K] +}> + +export interface Queryable> { + find(filter?: Filter): Promise + findOne(filter?: Filter): Promise + + updateMany?( + filter: Filter, + update: Partial, + ): Promise + + deleteMany?( + filter: Filter, + ): Promise +} +``` + +A simple implementation might scan all documents from `DocumentStorage`. +A richer implementation might maintain indexes. + +The important point is: + +**Queryable is a utility/engine over document data, not the fundamental storage primitive.** + +--- + +## EventLog + +`EventLog` is built **on top of** `BlobStorage`. + +```ts +export interface EventEnvelope { + id: string + streamId: string + type: string + timestamp: number + payload: TEvent +} + +export interface EventLog { + append(event: EventEnvelope): Promise + list(streamId?: string): Promise[]> + get?(id: string): Promise | undefined> +} +``` + +A typical implementation stores one event per blob key: + +- key: `streamId/events/eventId` +- value: serialized event envelope + +--- + +## MaterializedView + +`MaterializedView` is domain-specific. It knows how to translate events into current-state documents. + +```ts +export interface MaterializedView { + rebuild(): Promise + apply(event: EventEnvelope): Promise +} +``` + +A materialized view usually owns or uses: + +- one `EventLog` +- one `DocumentStorage` +- optionally one `Queryable` + +--- + +# Relationship between MaterializedView and Queryable + +This is an important conceptual point. + +A **materialized view is not the same thing as a query engine**. + +Instead: + +- the **materialized view** is the derived dataset and the logic that maintains it +- the **document storage** is where that derived dataset is stored +- the **queryable** is how that derived dataset is read efficiently + +So the relationship is: + +```text +MaterializedView + -> writes to DocumentStorage + -> may expose Queryable +``` + +or more concretely: + +```text +WalletMaterializedView + -> MemoryDocumentStorage + -> Queryable +``` + +--- + +# Example 1: client-side settings + +If settings are simple and always loaded as one object, just use `BlobStorage` directly. + +```ts +const blobs = new BrowserLocalBlobStorage() + +await blobs.set( + 'settings', + new TextEncoder().encode(JSON.stringify({ darkMode: true })), +) +``` + +If settings are structured and you want querying or incremental updates, use `DocumentStorage` and `Queryable`. + +```ts +type SettingDoc = { + key: string + value: unknown +} + +const documents = new MemoryDocumentStorage() +const queryable = new BasicQueryable(documents) + +await documents.insert({ key: 'darkMode', value: true }) +await documents.insert({ key: 'language', value: 'en' }) + +const darkMode = await queryable.findOne({ key: 'darkMode' }) +``` + +No event log is required unless you specifically want history. + +--- + +# Example 2: wallet event sourcing + +## Event types + +```ts +type WalletEvent = + | { + kind: 'UtxoObserved' + outpoint: string + value: number + lockingBytecode: string + } + | { + kind: 'UtxoConfirmed' + outpoint: string + minedAtHeight: number + } + | { + kind: 'UtxoSpent' + outpoint: string + } +``` + +## Derived document shape + +```ts +type UtxoDoc = { + outpoint: string + value: number + lockingBytecode: string + status: 'pending' | 'confirmed' + minedAtHeight?: number +} +``` + +## Materialized view implementation + +```ts +class WalletMaterializedView + implements MaterializedView +{ + public readonly query: Queryable + + constructor( + private readonly eventLog: EventLog, + private readonly documents: DocumentStorage, + ) { + this.query = new BasicQueryable(documents) + } + + async rebuild(): Promise { + await this.documents.clear() + + const events = await this.eventLog.list('wallet') + for (const event of events) { + await this.apply(event) + } + } + + async apply(event: EventEnvelope): Promise { + switch (event.payload.kind) { + case 'UtxoObserved': + await this.documents.insert({ + outpoint: event.payload.outpoint, + value: event.payload.value, + lockingBytecode: event.payload.lockingBytecode, + status: 'pending', + }) + return + + case 'UtxoConfirmed': { + const docs = await this.documents.list() + const updated = docs.map((doc) => + doc.outpoint === event.payload.outpoint + ? { + ...doc, + status: 'confirmed' as const, + minedAtHeight: event.payload.minedAtHeight, + } + : doc, + ) + + await this.documents.clear() + await this.documents.insertMany(updated) + return + } + + case 'UtxoSpent': { + const docs = await this.documents.list() + const remaining = docs.filter( + (doc) => doc.outpoint !== event.payload.outpoint, + ) + + await this.documents.clear() + await this.documents.insertMany(remaining) + return + } + } + } +} +``` + +## Usage + +```ts +const eventBlobs = new MemoryBlobStorage() +const eventLog = new BasicEventLog(eventBlobs) + +const utxoDocuments = new MemoryDocumentStorage() +const walletView = new WalletMaterializedView(eventLog, utxoDocuments) + +await eventLog.append({ + id: '001', + streamId: 'wallet', + type: 'wallet', + timestamp: Date.now(), + payload: { + kind: 'UtxoObserved', + outpoint: 'tx1:0', + value: 1000, + lockingBytecode: '76a914...', + }, +}) + +await eventLog.append({ + id: '002', + streamId: 'wallet', + type: 'wallet', + timestamp: Date.now(), + payload: { + kind: 'UtxoConfirmed', + outpoint: 'tx1:0', + minedAtHeight: 900000, + }, +}) + +await walletView.rebuild() + +const confirmed = await walletView.query.find({ status: 'confirmed' }) +``` + +--- + +# Example 3: trusted server storing a LinkedIn post + +If this is a normal service and there is no need for append-only event truth, use `DocumentStorage` directly. + +```ts +type LinkedInPostDoc = { + id: string + userId: string + content: string + status: 'draft' | 'scheduled' | 'published' + updatedAt: number +} + +const postDocuments = new SqliteDocumentStorage('posts') +const posts = new BasicQueryable(postDocuments) +``` + +This is appropriate when the current state is what matters. + +Use an event log only if you actually want: + +- change history +- replay +- auditability +- sync +- derived read models + +--- + +# Encryption policies + +Encryption can be applied at different layers depending on what is being protected. + +## Policy A: encrypted blob/event storage + +Use when you want opaque storage, especially for remote or low-trust systems. + +```text +BlobStorage + -> EncryptedBlobStorage + -> EventLog +``` + +This protects the contents of event payloads or opaque blobs. +The server or adapter only sees: + +- key +- ciphertext blob +- access patterns + +It does **not** understand the event structure. + +## Policy B: encrypted document storage + +Use when you want queryable document storage, but still want encrypted-at-rest fields. + +```text +DocumentStorage + -> EncryptedDocumentStorage + -> Queryable +``` + +This is appropriate when the backend is trusted or semi-trusted and queryability matters. +It leaks more than encrypted blob storage, because queries and indexes reveal some structure. + +## Policy C: both + +Use encrypted blob storage for event truth and encrypted document storage for local/server projections. + +```text +BlobStorage + -> EncryptedBlobStorage + -> EventLog + -> MaterializedView + -> DocumentStorage + -> EncryptedDocumentStorage + -> Queryable +``` + +--- + +# Shared client/server model + +A key design goal is that the same architecture can be used on both client and server by swapping adapters. + +## Client + +- `BrowserLocalBlobStorage` +- `IndexedDbDocumentStorage` +- `MemoryDocumentStorage` + +## Server + +- `SqliteBlobStorage` +- `SqliteDocumentStorage` +- `MemoryDocumentStorage` for ephemeral processing/tests + +The important point is that the **abstractions stay the same**: + +- event log is still built on `BlobStorage` +- queryable is still built on `DocumentStorage` +- materialized views still consume event logs and write document stores + +Only the backing adapters change. + +--- + +# Why not one giant storage abstraction? + +A single giant abstraction tends to mix too many responsibilities: + +- persistence +- query logic +- event semantics +- business-specific derived queries +- remote sync assumptions +- encryption policy + +That leads to tight coupling and awkward public APIs. + +This design deliberately avoids that by using small primitives and explicit higher-level layers. + +--- + +# Rules of thumb + +## Use BlobStorage directly when + +- you know the key already +- you read/write whole opaque values +- you do not need querying +- you want server-oblivious or encrypted blob semantics + +## Use DocumentStorage + Queryable when + +- you want current-state structured data +- filtering/sorting/updating is useful +- you do not need immutable event truth +- you are building a normal CRUD-style app/service + +## Use EventLog when + +- you want immutable source-of-truth history +- you want replay or auditability +- you want syncable append-only state +- you want projections/materialized views + +## Use MaterializedView when + +- current state is derived from immutable events +- consumers need fast queries over the current state +- you want to keep domain event semantics out of generic storage primitives + +--- + +# Recommended naming + +To keep the codebase easy to reason about, prefer names like: + +- `BlobStorage` +- `DocumentStorage` +- `Queryable` +- `EventLog` +- `MaterializedView` + +Avoid using one class name for both a primitive and a high-level domain role. + +For example, `QueryableStorage` is often ambiguous because it sounds like both: + +- a storage primitive +- and a query utility + +Splitting that into `DocumentStorage` and `Queryable` is clearer. + +--- + +# Summary + +The intended architecture is: + +```text +BlobStorage + -> EventLog + +DocumentStorage + -> Queryable + +MaterializedView + -> consumes EventLog + -> writes DocumentStorage + -> may expose Queryable +``` + +This gives: + +- small reusable primitives +- consistent client/server layering +- clear separation of truth vs derived state +- explicit query behavior +- explicit encryption policies +- less coupling than a monolithic storage/state interface +