feat: added session indexer logic

2026-05-09 05:58:27 +00:00 · 2026-04-23 17:32:08 +03:00
parent 7b75ed0b72
commit f99af1ff67
14 changed files with 1193 additions and 6 deletions
--- a/server/shared/utils.ts
+++ b/server/shared/utils.ts
@@ -1,6 +1,8 @@
 import { randomUUID } from 'node:crypto';
-import { mkdir, readFile, writeFile } from 'node:fs/promises';
+import fs from 'node:fs';
+import { mkdir, readFile, readdir, stat, writeFile } from 'node:fs/promises';
 import path from 'node:path';
+import readline from 'node:readline';

 import type { NextFunction, Request, RequestHandler, Response } from 'express';

@@ -215,3 +217,167 @@ export const writeJsonConfig = async (filePath: string, data: Record<string, unk
  await writeFile(filePath, `${JSON.stringify(data, null, 2)}\n`, 'utf8');
 };

+// ---------------------------
+//----------------- SESSION SYNCHRONIZER TITLE HELPERS ------------
+/**
+ * Produces a compact session title suitable for UI rendering and DB storage.
+ *
+ * Use this when converting provider-native names into a consistent title value.
+ * The helper collapses repeated whitespace, trims the result, and truncates it
+ * to 120 characters so every provider writes stable and bounded metadata.
+ * If the normalized input is empty, it returns the supplied fallback title.
+ */
+export function normalizeSessionName(rawValue: string | undefined, fallback: string): string {
+  const normalized = (rawValue ?? '').replace(/\s+/g, ' ').trim();
+  if (!normalized) {
+    return fallback;
+  }
+
+  return normalized.slice(0, 120);
+}
+
+// ---------------------------
+//----------------- SESSION SYNCHRONIZER FILESYSTEM HELPERS ------------
+/**
+ * Recursively discovers files that match one extension, with optional incremental filtering.
+ *
+ * Provider synchronizers call this to find transcript artifacts under provider
+ * home directories. Pass `lastScanAt` to include only files created after the
+ * previous scan, or pass `null` to perform a full rescan. Missing directories
+ * are treated as empty because not every provider exists on every machine.
+ */
+export async function findFilesRecursivelyCreatedAfter(
+  rootDir: string,
+  extension: string,
+  lastScanAt: Date | null,
+  fileList: string[] = []
+): Promise<string[]> {
+  try {
+    const entries = await readdir(rootDir, { withFileTypes: true });
+    for (const entry of entries) {
+      const fullPath = path.join(rootDir, entry.name);
+
+      if (entry.isDirectory()) {
+        await findFilesRecursivelyCreatedAfter(fullPath, extension, lastScanAt, fileList);
+        continue;
+      }
+
+      if (!entry.isFile() || !entry.name.endsWith(extension)) {
+        continue;
+      }
+
+      if (!lastScanAt) {
+        fileList.push(fullPath);
+        continue;
+      }
+
+      const fileStat = await stat(fullPath);
+      if (fileStat.birthtime > lastScanAt) {
+        fileList.push(fullPath);
+      }
+    }
+  } catch {
+    // Missing provider folders are expected in first-run or partial setups.
+  }
+
+  return fileList;
+}
+
+/**
+ * Reads file creation/update timestamps and maps them to DB-friendly ISO strings.
+ *
+ * Session indexers use this to persist `created_at` and `updated_at` metadata
+ * when upserting sessions. If the file cannot be read, an empty object is
+ * returned so indexing can continue for other files.
+ */
+export async function readFileTimestamps(
+  filePath: string
+): Promise<{ createdAt?: string; updatedAt?: string }> {
+  try {
+    const fileStat = await stat(filePath);
+    return {
+      createdAt: fileStat.birthtime.toISOString(),
+      updatedAt: fileStat.mtime.toISOString(),
+    };
+  } catch {
+    return {};
+  }
+}
+
+// ---------------------------
+//----------------- SESSION SYNCHRONIZER JSONL PARSING HELPERS ------------
+/**
+ * Builds a first-seen key/value lookup map from a JSONL file.
+ *
+ * Use this for provider index files where session id -> display name metadata
+ * is stored line-by-line. The first value for each key wins, preserving the
+ * earliest known label while avoiding repeated map overwrites.
+ */
+export async function buildLookupMap(
+  filePath: string,
+  keyField: string,
+  valueField: string
+): Promise<Map<string, string>> {
+  const lookup = new Map<string, string>();
+
+  try {
+    const fileStream = fs.createReadStream(filePath);
+    const lineReader = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
+
+    for await (const line of lineReader) {
+      const trimmed = line.trim();
+      if (!trimmed) {
+        continue;
+      }
+
+      const parsed = JSON.parse(trimmed) as Record<string, unknown>;
+      const key = parsed[keyField];
+      const value = parsed[valueField];
+
+      if (typeof key === 'string' && typeof value === 'string' && !lookup.has(key)) {
+        lookup.set(key, value);
+      }
+    }
+  } catch {
+    // Missing or unreadable lookup files should not block session sync.
+  }
+
+  return lookup;
+}
+
+/**
+ * Reads a JSONL file and returns the first extracted payload that matches caller criteria.
+ *
+ * The caller supplies an `extractor` that validates provider-specific row
+ * shapes. This helper centralizes line-by-line parsing and lets indexers stop
+ * scanning as soon as one valid row is found.
+ */
+export async function extractFirstValidJsonlData<T>(
+  filePath: string,
+  extractor: (parsedJson: unknown) => T | null | undefined
+): Promise<T | null> {
+  try {
+    const fileStream = fs.createReadStream(filePath);
+    const lineReader = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
+
+    for await (const line of lineReader) {
+      const trimmed = line.trim();
+      if (!trimmed) {
+        continue;
+      }
+
+      const parsed = JSON.parse(trimmed);
+      const extracted = extractor(parsed);
+      if (extracted) {
+        lineReader.close();
+        fileStream.close();
+        return extracted;
+      }
+    }
+  } catch {
+    // Ignore malformed or missing artifacts so full scans keep progressing.
+  }
+
+  return null;
+}
+