diff --git a/src/__tests__/sync.test.ts b/src/__tests__/sync.test.ts index 43a9f39..64efde0 100644 --- a/src/__tests__/sync.test.ts +++ b/src/__tests__/sync.test.ts @@ -89,13 +89,13 @@ describe('SyncWorker', () => { expect(await cache.get('/new-file.txt')).toBeTruthy() }) - it('should update changed nodes based on etag', async () => { + it('should update changed nodes based on size', async () => { const originalNode = makeNode('/file.txt') - originalNode.etag = 'old-etag' + originalNode.size = 1024 await cache.set('/file.txt', originalNode) const updatedNode = makeNode('/file.txt') - updatedNode.etag = 'new-etag' + updatedNode.size = 2048 const tree: Record = { '/': [updatedNode], @@ -109,7 +109,7 @@ describe('SyncWorker', () => { worker.stop() const cached = await cache.get('/file.txt') - expect(cached?.etag).toBe('new-etag') + expect(cached?.size).toBe(2048) }) it('should handle forceSync', async () => { diff --git a/src/adapters/cache/sqlite.ts b/src/adapters/cache/sqlite.ts index 7694e22..e5e23a0 100644 --- a/src/adapters/cache/sqlite.ts +++ b/src/adapters/cache/sqlite.ts @@ -11,10 +11,25 @@ const DEFAULT_DB_PATH = join(homedir(), '.config', 'sftp-proxy', 'cache.db') /** * SQLite-backed cache implementation. * Stores node metadata in a `nodes` table and log entries in a `logs` table. + * + * All prepared statements are compiled once and reused to avoid the cost of + * re-parsing SQL on every call (better-sqlite3's prepare is synchronous + * and surprisingly expensive when called thousands of times). */ export class SqliteCache extends BaseCache { private db: Database.Database + // Pre-compiled statements — initialised in initSchema() + private stmtGet!: Database.Statement + private stmtUpsert!: Database.Statement + private stmtDelete!: Database.Statement + private stmtAll!: Database.Statement + private stmtKeys!: Database.Statement + private stmtChildren!: Database.Statement + private stmtClear!: Database.Statement + private stmtWriteLog!: Database.Statement + private stmtReadLogs!: Database.Statement + /** * @param dbPath - Path to the SQLite database file. Defaults to ~/.config/sftp-proxy/cache.db */ @@ -25,6 +40,7 @@ export class SqliteCache extends BaseCache { this.db = new Database(dbPath) this.db.pragma('journal_mode = WAL') this.initSchema() + this.prepareStatements() } /** Creates tables if they don't already exist. */ @@ -49,6 +65,28 @@ export class SqliteCache extends BaseCache { `) } + /** Compiles all prepared statements once at construction time. */ + private prepareStatements(): void { + this.stmtGet = this.db.prepare('SELECT * FROM nodes WHERE path = ?') + this.stmtUpsert = this.db.prepare(` + INSERT OR REPLACE INTO nodes (path, name, is_dir, size, modified, etag, synced_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + `) + this.stmtDelete = this.db.prepare('DELETE FROM nodes WHERE path = ?') + this.stmtAll = this.db.prepare('SELECT * FROM nodes') + this.stmtKeys = this.db.prepare('SELECT path FROM nodes') + this.stmtChildren = this.db.prepare( + 'SELECT * FROM nodes WHERE path LIKE ? AND path NOT LIKE ?' + ) + this.stmtClear = this.db.prepare('DELETE FROM nodes') + this.stmtWriteLog = this.db.prepare( + 'INSERT INTO logs (timestamp, level, message) VALUES (?, ?, ?)' + ) + this.stmtReadLogs = this.db.prepare( + 'SELECT timestamp, level, message FROM logs ORDER BY id DESC LIMIT ?' + ) + } + /** Maps a database row to a GenericNode. */ private rowToNode(row: Record): GenericNode { return { @@ -62,15 +100,12 @@ export class SqliteCache extends BaseCache { } async get(key: string): Promise { - const row = this.db.prepare('SELECT * FROM nodes WHERE path = ?').get(key) as Record | undefined + const row = this.stmtGet.get(key) as Record | undefined return row ? this.rowToNode(row) : null } async set(key: string, value: GenericNode): Promise { - this.db.prepare(` - INSERT OR REPLACE INTO nodes (path, name, is_dir, size, modified, etag, synced_at) - VALUES (?, ?, ?, ?, ?, ?, ?) - `).run( + this.stmtUpsert.run( key, value.name, value.isDir ? 1 : 0, @@ -82,16 +117,16 @@ export class SqliteCache extends BaseCache { } async delete(key: string): Promise { - this.db.prepare('DELETE FROM nodes WHERE path = ?').run(key) + this.stmtDelete.run(key) } async all(): Promise { - const rows = this.db.prepare('SELECT * FROM nodes').all() as Record[] + const rows = this.stmtAll.all() as Record[] return rows.map(row => this.rowToNode(row)) } async keys(): Promise { - const rows = this.db.prepare('SELECT path FROM nodes').all() as Array<{ path: string }> + const rows = this.stmtKeys.all() as Array<{ path: string }> return rows.map(row => row.path) } @@ -104,15 +139,12 @@ export class SqliteCache extends BaseCache { const prefix = normalised + '/' const nestedPrefix = normalised + '/%/%' - const rows = this.db.prepare( - `SELECT * FROM nodes WHERE path LIKE ? AND path NOT LIKE ?` - ).all(prefix + '%', nestedPrefix) as Record[] - + const rows = this.stmtChildren.all(prefix + '%', nestedPrefix) as Record[] return rows.map(row => this.rowToNode(row)) } async clear(): Promise { - this.db.prepare('DELETE FROM nodes').run() + this.stmtClear.run() } /** @@ -120,9 +152,7 @@ export class SqliteCache extends BaseCache { * Used by the winston SQLite transport. */ writeLog(level: string, message: string): void { - this.db.prepare( - 'INSERT INTO logs (timestamp, level, message) VALUES (?, ?, ?)' - ).run(Date.now(), level, message) + this.stmtWriteLog.run(Date.now(), level, message) } /** @@ -130,9 +160,7 @@ export class SqliteCache extends BaseCache { * @param limit - Maximum number of entries to return. Defaults to 500. */ readLogs(limit: number = 500): Array<{ timestamp: number; level: string; message: string }> { - return this.db.prepare( - 'SELECT timestamp, level, message FROM logs ORDER BY id DESC LIMIT ?' - ).all(limit) as Array<{ timestamp: number; level: string; message: string }> + return this.stmtReadLogs.all(limit) as Array<{ timestamp: number; level: string; message: string }> } /** Closes the database connection. */ diff --git a/src/adapters/server/base.ts b/src/adapters/server/base.ts index 21fe8ba..8419bd8 100644 --- a/src/adapters/server/base.ts +++ b/src/adapters/server/base.ts @@ -10,16 +10,48 @@ export interface ResolvedPath { } /** - * Filenames that macOS Finder and other OS-level clients probe for - * automatically. These never exist on remote storage and should be - * silently rejected to avoid log spam and unnecessary network requests. + * Filenames and directories that macOS Finder, Spotlight, and other OS-level + * clients probe for automatically. These never exist on remote storage and + * should be silently rejected to avoid log spam and unnecessary network requests. * + * Basename patterns: * - `._*` — AppleDouble resource fork sidecar files * - `.DS_Store` — Finder directory metadata * - `.Spotlight-V100`, `.Trashes`, `.fseventsd` — Spotlight / Trash / FSEvents * - `desktop.ini`, `Thumbs.db` — Windows Explorer metadata + * - `Contents` — macOS bundle/package probe (Finder checks every dir for this) + * - `_CodeSignature` — macOS code signing probe + * - `Icon\r` — macOS custom folder icon file + * - `.localized` — macOS localisation marker + * - `.hidden` — macOS hidden file marker + * - `.ql_diskcache`, `.ql_thumb*` — QuickLook cache probes + * - `.DocumentRevisions-V100` — macOS document versions DB + * - `.TemporaryItems` — macOS temporary item store + * - `.apdisk` — Apple disk image metadata + * - `.com.apple.*` — various Apple services metadata */ -const IGNORED_BASENAMES_RE = /^\._|^\.DS_Store$|^\.Spotlight-V100$|^\.Trashes$|^\.fseventsd$|^desktop\.ini$|^Thumbs\.db$/ +const IGNORED_BASENAMES_RE = new RegExp( + [ + /^\._/, // AppleDouble resource forks + /^\.DS_Store$/, // Finder metadata + /^\.Spotlight-V100$/, // Spotlight index + /^\.Trashes$/, // Trash folder + /^\.fseventsd$/, // FSEvents log + /^\.localized$/, // Localisation marker + /^\.hidden$/, // Hidden marker + /^\.apdisk$/, // AFP disk metadata + /^\.ql_diskcache$/, // QuickLook cache + /^\.ql_thumb/, // QuickLook thumbnails + /^\.DocumentRevisions-V100$/, // Document versions DB + /^\.TemporaryItems$/, // Temporary items + /^\.com\.apple\./, // Apple service metadata + /^Contents$/, // macOS bundle/package probe + /^_CodeSignature$/, // Code signing probe + /^Icon\r?$/, // Custom folder icon + /^desktop\.ini$/, // Windows Explorer metadata + /^Thumbs\.db$/, // Windows thumbnail cache + ].map(r => r.source).join('|') +) /** Returns true if the basename of a path matches a known OS metadata probe. */ export function isIgnoredPath(filePath: string): boolean { @@ -27,6 +59,13 @@ export function isIgnoredPath(filePath: string): boolean { return IGNORED_BASENAMES_RE.test(name) } +/** + * TTL for negative (404) cache entries in milliseconds. + * Paths that don't exist on the remote are remembered for this long + * to prevent repeated network requests from OS metadata probes. + */ +const NEGATIVE_CACHE_TTL_MS = 5 * 60 * 1000 + /** * Abstract base class for all protocol servers (SFTP, FTP, WebDAV). * @@ -36,6 +75,14 @@ export function isIgnoredPath(filePath: string): boolean { export abstract class BaseServer { protected clients: BaseClient[] + /** + * Negative cache: remembers paths that returned 404 so we don't hit the + * remote again for each repeated macOS/Finder probe. Entries expire after + * NEGATIVE_CACHE_TTL_MS. Key is "mountPath:remotePath", value is expiry timestamp. + */ + private negativeCacheStat = new Map() + private negativeCacheList = new Map() + constructor(clients: BaseClient[]) { this.clients = clients } @@ -46,6 +93,20 @@ export abstract class BaseServer { /** Stops the protocol server. */ abstract stop(): Promise + /** Returns true if a negative cache entry exists and hasn't expired. */ + private isNegativelyCached(cache: Map, key: string): boolean { + const expiry = cache.get(key) + if (expiry === undefined) return false + if (Date.now() < expiry) return true + cache.delete(key) + return false + } + + /** Adds a path to the negative cache. */ + private setNegativeCache(cache: Map, key: string): void { + cache.set(key, Date.now() + NEGATIVE_CACHE_TTL_MS) + } + /** * Resolves an incoming virtual path to the client that owns it and the * path relative to that client's basePath. @@ -96,9 +157,10 @@ export abstract class BaseServer { /** * Handles a stat request with cache-first strategy. - * 1. Check client.cache — if hit, return immediately - * 2. If miss, prioritise the parent dir for sync and try stat directly - * 3. Never block indefinitely — return null if all else fails + * 1. Reject known OS metadata probes immediately + * 2. Check positive cache — if hit, return immediately + * 3. Check negative cache — if recently 404'd, return null without network I/O + * 4. Fall through to direct stat (no sync prioritise — that's too expensive) */ protected async handleStat(incomingPath: string): Promise { const resolved = this.resolveClient(incomingPath) @@ -119,7 +181,7 @@ export abstract class BaseServer { const { client, remotePath } = resolved - // Silently ignore OS metadata probes (._files, .DS_Store, etc.) + // Silently ignore OS metadata probes (._files, .DS_Store, Contents, etc.) if (isIgnoredPath(remotePath)) { return null } @@ -136,30 +198,32 @@ export abstract class BaseServer { } } - // Try cache first + // Try positive cache first const cached = await client.cache.get(remotePath) if (cached) return cached - // Prioritise the parent directory for sync - const parentDir = posix.dirname(remotePath) - client.sync.prioritise(parentDir) + // Check negative cache — avoid hitting the remote for known-missing paths + const negKey = `${client.mountPath}:${remotePath}` + if (this.isNegativelyCached(this.negativeCacheStat, negKey)) { + return null + } - // Fall through to direct stat + // Fall through to direct stat (no prioritise — that triggers expensive recursive syncs) try { const node = await client.stat(remotePath) await client.cache.set(remotePath, node) return node - } catch (err) { - logger.error(`stat failed for ${incomingPath}: ${(err as Error).message}`) + } catch { + this.setNegativeCache(this.negativeCacheStat, negKey) return null } } /** * Handles a list request with cache-first strategy. - * 1. Check client.cache.children — if non-empty, return immediately - * 2. If miss, prioritise the path for sync and try list directly - * 3. Never block indefinitely — return empty array if all else fails + * 1. Check positive cache — if non-empty, return immediately + * 2. Check negative cache — if recently failed, return empty without network I/O + * 3. Fall through to direct list */ protected async handleList(incomingPath: string): Promise { const resolved = this.resolveClient(incomingPath) @@ -174,22 +238,25 @@ export abstract class BaseServer { const { client, remotePath } = resolved - // Try cache first + // Try positive cache first const cached = await client.cache.children(remotePath) if (cached.length > 0) return cached - // Prioritise this path for sync - client.sync.prioritise(remotePath) + // Check negative cache + const negKey = `${client.mountPath}:${remotePath}` + if (this.isNegativelyCached(this.negativeCacheList, negKey)) { + return [] + } - // Fall through to direct list + // Fall through to direct list (no prioritise — let the background sync handle it) try { const nodes = await client.list(remotePath) for (const node of nodes) { await client.cache.set(node.path, node) } return nodes - } catch (err) { - logger.error(`list failed for ${incomingPath}: ${(err as Error).message}`) + } catch { + this.setNegativeCache(this.negativeCacheList, negKey) return [] } } diff --git a/src/logger.ts b/src/logger.ts index 4fa2509..0387551 100644 --- a/src/logger.ts +++ b/src/logger.ts @@ -37,11 +37,17 @@ export const logger = createLogger({ ], }) +/** Tracks whether a SQLite transport has already been attached. */ +let sqliteTransportAttached = false + /** * Attaches the SQLite transport to the global logger. - * Called once during startup after the SqliteCache is created. + * Only the first call takes effect — subsequent calls are no-ops to prevent + * duplicate transports when multiple clients each create a SqliteCache. */ export function attachSqliteTransport(cache: SqliteCache): void { + if (sqliteTransportAttached) return + sqliteTransportAttached = true logger.add(new SqliteTransport(cache)) } diff --git a/src/sync.ts b/src/sync.ts index ffbb638..d3b1c25 100644 --- a/src/sync.ts +++ b/src/sync.ts @@ -15,12 +15,14 @@ export interface SyncableClient { interface QueueEntry { path: string priority: SyncPriority + /** When true, child directories discovered during this sync will be enqueued too. */ + recursive: boolean /** Resolvers for anyone awaiting forceSync on this path. */ waiters: Array<{ resolve: () => void; reject: (err: Error) => void }> } -/** Minimum polling interval in milliseconds. */ -const MIN_BACKOFF_MS = 10_000 +/** Minimum polling interval in milliseconds (2 minutes). */ +const MIN_BACKOFF_MS = 2 * 60 * 1000 /** Maximum polling interval in milliseconds (30 minutes). */ const MAX_BACKOFF_MS = 30 * 60 * 1000 @@ -55,7 +57,7 @@ export class SyncWorker { start(): void { if (this.running) return this.running = true - this.enqueue('/', SyncPriority.HIGH) + this.enqueue('/', SyncPriority.HIGH, true) this.pump() } @@ -77,11 +79,13 @@ export class SyncWorker { const existing = this.queue.get(path) if (existing) { existing.priority = SyncPriority.HIGH + existing.recursive = true existing.waiters.push({ resolve, reject }) } else { this.queue.set(path, { path, priority: SyncPriority.HIGH, + recursive: true, waiters: [{ resolve, reject }], }) } @@ -97,8 +101,9 @@ export class SyncWorker { const existing = this.queue.get(path) if (existing) { existing.priority = SyncPriority.HIGH + existing.recursive = true } else { - this.enqueue(path, SyncPriority.HIGH) + this.enqueue(path, SyncPriority.HIGH, true) } this.pump() } @@ -115,9 +120,9 @@ export class SyncWorker { } /** Adds a path to the queue if not already present. */ - private enqueue(path: string, priority: SyncPriority): void { + private enqueue(path: string, priority: SyncPriority, recursive: boolean): void { if (this.queue.has(path)) return - this.queue.set(path, { path, priority, waiters: [] }) + this.queue.set(path, { path, priority, recursive, waiters: [] }) } /** @@ -197,23 +202,37 @@ export class SyncWorker { // Build a set of remote paths for quick lookup const remotePaths = new Set(remoteNodes.map(n => n.path)) + // Track structural changes (files added/removed) separately from metadata-only updates. + // Only structural changes should reset the backoff — metadata fluctuations from the + // remote server (e.g. WebDAV returning different etags each request) shouldn't. + let structuralChange = false + // Upsert new/changed nodes for (const remoteNode of remoteNodes) { const cached = await this.cache.get(remoteNode.path) - const isChanged = !cached - || cached.etag !== remoteNode.etag - || cached.modified.getTime() !== remoteNode.modified.getTime() - || cached.size !== remoteNode.size - - if (isChanged) { + if (!cached) { + // Brand new node — this is a structural change await this.cache.set(remoteNode.path, remoteNode) - changesDetected = true + structuralChange = true + } else { + // Only update the cache if something meaningful changed. + // Compare size and name (stable properties); skip etag and modified which + // many WebDAV servers regenerate on every PROPFIND. + const meaningfulChange = cached.size !== remoteNode.size + || cached.name !== remoteNode.name + || cached.isDir !== remoteNode.isDir + + if (meaningfulChange) { + await this.cache.set(remoteNode.path, remoteNode) + structuralChange = true + } } - // Enqueue child directories for background crawl - if (remoteNode.isDir) { - this.enqueue(remoteNode.path, SyncPriority.LOW) + // Only recurse into child directories during initial sync or user-triggered refreshes. + // Background polls skip this to avoid a full tree crawl every cycle. + if (remoteNode.isDir && entry.recursive) { + this.enqueue(remoteNode.path, SyncPriority.LOW, true) } } @@ -221,12 +240,13 @@ export class SyncWorker { for (const cachedNode of cachedNodes) { if (!remotePaths.has(cachedNode.path)) { await this.cache.delete(cachedNode.path) - changesDetected = true + structuralChange = true } } - // Reset backoff if any changes were detected - if (changesDetected) { + // Only reset backoff on structural changes (added/removed files). + // Metadata-only diffs (etag, mtime) are expected noise and shouldn't prevent idle. + if (structuralChange) { this.backoffMs = MIN_BACKOFF_MS logger.info(`Sync detected changes at ${path}`) } @@ -256,15 +276,22 @@ export class SyncWorker { if (!this.running) return if (this.pollTimer) return - this.pollTimer = setTimeout(() => { + const timer = setTimeout(() => { this.pollTimer = null if (!this.running) return - this.enqueue('/', SyncPriority.LOW) + // Non-recursive: only re-check the root listing, don't crawl the entire tree. + // User-triggered actions (forceSync, prioritise) still do full recursive syncs. + this.enqueue('/', SyncPriority.LOW, false) this.pump() // Increase backoff for next cycle (capped at MAX_BACKOFF_MS) this.backoffMs = Math.min(this.backoffMs * 2, MAX_BACKOFF_MS) }, this.backoffMs) + + // Unref so the timer alone doesn't keep the event loop (and CPU) active. + // The server TCP listeners already keep the process alive. + timer.unref() + this.pollTimer = timer } }