feat: added session indexer logic

This commit is contained in:
Haileyesus
2026-04-23 17:32:08 +03:00
parent 7b75ed0b72
commit f99af1ff67
14 changed files with 1193 additions and 6 deletions

View File

@@ -1,6 +1,8 @@
import { randomUUID } from 'node:crypto';
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import fs from 'node:fs';
import { mkdir, readFile, readdir, stat, writeFile } from 'node:fs/promises';
import path from 'node:path';
import readline from 'node:readline';
import type { NextFunction, Request, RequestHandler, Response } from 'express';
@@ -215,3 +217,167 @@ export const writeJsonConfig = async (filePath: string, data: Record<string, unk
await writeFile(filePath, `${JSON.stringify(data, null, 2)}\n`, 'utf8');
};
// ---------------------------
//----------------- SESSION SYNCHRONIZER TITLE HELPERS ------------
/**
* Produces a compact session title suitable for UI rendering and DB storage.
*
* Use this when converting provider-native names into a consistent title value.
* The helper collapses repeated whitespace, trims the result, and truncates it
* to 120 characters so every provider writes stable and bounded metadata.
* If the normalized input is empty, it returns the supplied fallback title.
*/
export function normalizeSessionName(rawValue: string | undefined, fallback: string): string {
const normalized = (rawValue ?? '').replace(/\s+/g, ' ').trim();
if (!normalized) {
return fallback;
}
return normalized.slice(0, 120);
}
// ---------------------------
//----------------- SESSION SYNCHRONIZER FILESYSTEM HELPERS ------------
/**
* Recursively discovers files that match one extension, with optional incremental filtering.
*
* Provider synchronizers call this to find transcript artifacts under provider
* home directories. Pass `lastScanAt` to include only files created after the
* previous scan, or pass `null` to perform a full rescan. Missing directories
* are treated as empty because not every provider exists on every machine.
*/
export async function findFilesRecursivelyCreatedAfter(
rootDir: string,
extension: string,
lastScanAt: Date | null,
fileList: string[] = []
): Promise<string[]> {
try {
const entries = await readdir(rootDir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(rootDir, entry.name);
if (entry.isDirectory()) {
await findFilesRecursivelyCreatedAfter(fullPath, extension, lastScanAt, fileList);
continue;
}
if (!entry.isFile() || !entry.name.endsWith(extension)) {
continue;
}
if (!lastScanAt) {
fileList.push(fullPath);
continue;
}
const fileStat = await stat(fullPath);
if (fileStat.birthtime > lastScanAt) {
fileList.push(fullPath);
}
}
} catch {
// Missing provider folders are expected in first-run or partial setups.
}
return fileList;
}
/**
* Reads file creation/update timestamps and maps them to DB-friendly ISO strings.
*
* Session indexers use this to persist `created_at` and `updated_at` metadata
* when upserting sessions. If the file cannot be read, an empty object is
* returned so indexing can continue for other files.
*/
export async function readFileTimestamps(
filePath: string
): Promise<{ createdAt?: string; updatedAt?: string }> {
try {
const fileStat = await stat(filePath);
return {
createdAt: fileStat.birthtime.toISOString(),
updatedAt: fileStat.mtime.toISOString(),
};
} catch {
return {};
}
}
// ---------------------------
//----------------- SESSION SYNCHRONIZER JSONL PARSING HELPERS ------------
/**
* Builds a first-seen key/value lookup map from a JSONL file.
*
* Use this for provider index files where session id -> display name metadata
* is stored line-by-line. The first value for each key wins, preserving the
* earliest known label while avoiding repeated map overwrites.
*/
export async function buildLookupMap(
filePath: string,
keyField: string,
valueField: string
): Promise<Map<string, string>> {
const lookup = new Map<string, string>();
try {
const fileStream = fs.createReadStream(filePath);
const lineReader = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
for await (const line of lineReader) {
const trimmed = line.trim();
if (!trimmed) {
continue;
}
const parsed = JSON.parse(trimmed) as Record<string, unknown>;
const key = parsed[keyField];
const value = parsed[valueField];
if (typeof key === 'string' && typeof value === 'string' && !lookup.has(key)) {
lookup.set(key, value);
}
}
} catch {
// Missing or unreadable lookup files should not block session sync.
}
return lookup;
}
/**
* Reads a JSONL file and returns the first extracted payload that matches caller criteria.
*
* The caller supplies an `extractor` that validates provider-specific row
* shapes. This helper centralizes line-by-line parsing and lets indexers stop
* scanning as soon as one valid row is found.
*/
export async function extractFirstValidJsonlData<T>(
filePath: string,
extractor: (parsedJson: unknown) => T | null | undefined
): Promise<T | null> {
try {
const fileStream = fs.createReadStream(filePath);
const lineReader = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
for await (const line of lineReader) {
const trimmed = line.trim();
if (!trimmed) {
continue;
}
const parsed = JSON.parse(trimmed);
const extracted = extractor(parsed);
if (extracted) {
lineReader.close();
fileStream.close();
return extracted;
}
}
} catch {
// Ignore malformed or missing artifacts so full scans keep progressing.
}
return null;
}