Compare commits

..

2 Commits

Author SHA1 Message Date
Simos Mikelatos
80ce5b8313 Merge branch 'main' into fix/shell-user-npm-path-priority 2026-06-24 11:10:47 +02:00
Haileyesus
9a33426eed fix(shell): prioritize user npm binaries
Interactive shells could resolve bundled or system CLIs before user-installed npm binaries.

Move existing user npm global directories to the front of PATH while preserving all other entries.
2026-06-22 15:27:13 +03:00
41 changed files with 211 additions and 2498 deletions

View File

@@ -69,7 +69,7 @@ const sessionIdSchema = {
const tools: ToolDefinition[] = [
{
name: 'browser_create_session',
description: 'Create a Browser session that the agent can control. Provide profileName to use a specific persistent profile; when omitted, the configured persistent profile is used only if session persistence is enabled, otherwise a temporary session is created.',
description: 'Create a temporary Browser session that the agent can control. Optionally provide a background profileName to reuse cookies and storage.',
inputSchema: {
type: 'object',
properties: {

View File

@@ -61,10 +61,9 @@ import userRoutes from './routes/user.js';
import geminiRoutes from './routes/gemini.js';
import pluginsRoutes from './routes/plugins.js';
import providerRoutes from './modules/providers/provider.routes.js';
import voiceRoutes from './voice-proxy.js';
import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
import { browserUseService, VIEWER_COOKIE_NAME } from './modules/browser-use/index.js';
import { browserUseService } from './modules/browser-use/browser-use.service.js';
import { startEnabledPluginServers, stopAllPlugins, getPluginPort } from './utils/plugin-process-manager.js';
import { initializeDatabase, projectsDb, sessionsDb } from './modules/database/index.js';
import { configureWebPush } from './services/vapid-keys.js';
@@ -146,8 +145,6 @@ const wss = createWebSocketServer(server, {
shouldAutoOpenUrlFromOutput,
},
getPluginPort,
browserUseViewer: (ws, pathname) => browserUseService.handleViewerWebSocket(ws, pathname),
authenticateBrowserUseViewer: authenticateBrowserUseViewerPath,
});
// Make WebSocket server available to routes
@@ -213,42 +210,11 @@ app.use('/api/gemini', authenticateToken, geminiRoutes);
// Plugins API Routes (protected)
app.use('/api/plugins', authenticateToken, pluginsRoutes);
function readCookieValue(header, name) {
if (!header) return null;
const prefix = `${name}=`;
const cookie = String(header).split(';').map((part) => part.trim()).find((part) => part.startsWith(prefix));
return cookie ? decodeURIComponent(cookie.slice(prefix.length)) : null;
}
function authenticateBrowserUseViewerPath(pathname, token) {
const parts = String(pathname || '').split('/');
const sessionId = parts[4];
if (parts[1] !== 'api' || parts[2] !== 'browser-use' || parts[3] !== 'sessions' || parts[5] !== 'viewer' || parts[6] !== 'websockify') {
return false;
}
return browserUseService.validateViewerToken(decodeURIComponent(sessionId), token);
}
function authenticateBrowserUse(req, res, next) {
const match = /^\/sessions\/([^/]+)\/viewer(?:\/|$)/.exec(req.path || '');
if (match) {
const sessionId = decodeURIComponent(match[1]);
const token = typeof req.query.viewerToken === 'string'
? req.query.viewerToken
: readCookieValue(req.headers.cookie, VIEWER_COOKIE_NAME);
if (browserUseService.validateViewerToken(sessionId, token)) {
return next();
}
return res.status(401).json({ error: 'Browser viewer access requires a valid session token.' });
}
return authenticateToken(req, res, next);
}
// Browser MCP bridge API (local token protected)
app.use('/api/browser-use-mcp', browserUseMcpRoutes);
// Browser API Routes (protected)
app.use('/api/browser-use', authenticateBrowserUse, browserUseRoutes);
app.use('/api/browser-use', authenticateToken, browserUseRoutes);
// Unified provider MCP routes (protected)
app.use('/api/providers', authenticateToken, providerRoutes);
@@ -256,8 +222,6 @@ app.use('/api/providers', authenticateToken, providerRoutes);
// Agent API Routes (uses API key authentication)
app.use('/api/agent', agentRoutes);
app.use('/api/voice', authenticateToken, voiceRoutes);
// Serve public files (like api-docs.html)
app.use(express.static(path.join(APP_ROOT, 'public')));

View File

@@ -1,7 +1,6 @@
import express from 'express';
import { browserUseService } from '@/modules/browser-use/browser-use.service.js';
import { VIEWER_COOKIE_NAME, VIEWER_TOKEN_TTL_MS } from '@/modules/browser-use/browser-use.viewer.js';
const router = express.Router();
@@ -9,45 +8,6 @@ function readParam(value: string | string[] | undefined): string {
return Array.isArray(value) ? value[0] || '' : value || '';
}
const SAFE_VIEWER_ROOT_FILES = new Set(['vnc.html', 'favicon.ico', 'manifest.json']);
const SAFE_VIEWER_ROOT_DIRS = new Set(['app', 'core', 'vendor', 'assets', 'images', 'utils']);
function isSafeViewerPath(viewerPath: string): boolean {
if (!viewerPath || viewerPath.startsWith('/') || viewerPath.includes('..') || viewerPath.includes('\\')) {
return false;
}
if (!/^[A-Za-z0-9][A-Za-z0-9._~/-]*$/.test(viewerPath)) {
return false;
}
if (SAFE_VIEWER_ROOT_FILES.has(viewerPath)) {
return true;
}
const [rootDir] = viewerPath.split('/');
return Boolean(rootDir && SAFE_VIEWER_ROOT_DIRS.has(rootDir));
}
function isSecureRequest(req: express.Request): boolean {
const forwardedProto = String(req.headers['x-forwarded-proto'] || '')
.split(',')[0]
.trim()
.toLowerCase();
return req.secure || forwardedProto === 'https';
}
function readQueryString(originalUrl: string): string {
const queryIndex = originalUrl.indexOf('?');
if (queryIndex < 0) {
return '';
}
const params = new URLSearchParams(originalUrl.slice(queryIndex + 1));
params.delete('viewerToken');
const nextQuery = params.toString();
return nextQuery ? `?${nextQuery}` : '';
}
router.get('/status', async (_req, res) => {
try {
res.json({ success: true, data: await browserUseService.getStatus() });
@@ -102,60 +62,13 @@ router.get('/sessions', async (_req, res) => {
try {
res.json({ success: true, data: { sessions: await browserUseService.listSessions() } });
} catch (error) {
res.status(500).json({
res.status(401).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to list browser sessions.',
});
}
});
router.get('/sessions/:sessionId/viewer/*', async (req, res) => {
try {
const sessionId = readParam(req.params.sessionId);
const originalPath = req.originalUrl.split('?')[0] || '';
const viewerMarker = `/sessions/${sessionId}/viewer/`;
const markerIndex = originalPath.indexOf(viewerMarker);
const rawViewerPath = markerIndex >= 0 ? originalPath.slice(markerIndex + viewerMarker.length) : 'vnc.html';
const viewerPath = decodeURIComponent(rawViewerPath).replace(/^\/+/, '') || 'vnc.html';
if (!isSafeViewerPath(viewerPath)) {
res.status(400).json({ success: false, error: 'Invalid Browser viewer path.' });
return;
}
const viewerToken = readParam(req.query.viewerToken as string | string[] | undefined);
if (viewerPath === 'vnc.html' && browserUseService.validateViewerToken(sessionId, viewerToken)) {
res.cookie(VIEWER_COOKIE_NAME, viewerToken, {
httpOnly: true,
sameSite: 'lax',
secure: isSecureRequest(req),
maxAge: VIEWER_TOKEN_TTL_MS,
path: '/api/browser-use/sessions/' + encodeURIComponent(sessionId) + '/viewer',
});
}
const target = browserUseService.getViewerProxyTarget(sessionId);
const query = readQueryString(req.originalUrl);
const upstream = await fetch(`http://127.0.0.1:${target.websockifyPort}/${viewerPath}${query}`, {
headers: {
accept: String(req.headers.accept || '*/*'),
},
});
const contentType = upstream.headers.get('content-type');
if (contentType) {
res.setHeader('content-type', contentType);
}
const cacheControl = viewerPath === 'vnc.html' ? 'no-store' : 'public, max-age=3600';
res.setHeader('cache-control', cacheControl);
res.status(upstream.status);
const body = Buffer.from(await upstream.arrayBuffer());
res.send(body);
} catch (error) {
res.status(404).json({
success: false,
error: error instanceof Error ? error.message : 'Browser viewer is not available.',
});
}
});
router.post('/sessions/:sessionId/stop', async (req, res) => {
try {
const result = await browserUseService.stopSession(readParam(req.params.sessionId));

View File

@@ -1,86 +1,128 @@
import { createRequire } from 'node:module';
import { randomUUID } from 'node:crypto';
import { execFileSync, spawn } from 'node:child_process';
import { randomBytes, randomUUID } from 'node:crypto';
import { spawn } from 'node:child_process';
import fs from 'node:fs';
import net from 'node:net';
import os from 'node:os';
import path from 'node:path';
import { WebSocket } from 'ws';
import { appConfigDb } from '@/modules/database/index.js';
import { providerMcpService } from '@/modules/providers/index.js';
import { getModuleDir } from '@/utils/runtime-paths.js';
import {
getOrCreateMcpToken,
getProfilePath,
normalizeBrowserBackend,
PROFILE_ROOT,
readSettings,
resolveSessionProfileName,
useVisibleCamoufoxBackend,
writeSettings,
} from './browser-use.settings.js';
import type {
BrowserUseSession,
BrowserUseSettings,
PublicBrowserUseSession,
RuntimeHandle,
RuntimeProbe,
RuntimeReadiness,
} from './browser-use.types.js';
import { getViewerUrl, handleViewerWebSocket, VIEWER_TOKEN_TTL_MS } from './browser-use.viewer.js';
const require = createRequire(import.meta.url);
const __dirname = getModuleDir(import.meta.url);
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_MAX_SESSIONS_PER_OWNER || '3', 10);
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
const BROWSER_USE_SETTINGS_KEY = 'browser_use_settings';
const BROWSER_USE_MCP_TOKEN_KEY = 'browser_use_mcp_token';
type BrowserUseRuntime = 'cloud' | 'local';
type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
type BrowserUseSession = {
id: string;
ownerId: string;
createdBy: 'agent';
runtime: BrowserUseRuntime;
status: BrowserUseSessionStatus;
url: string | null;
title: string | null;
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
profileName: string | null;
viewport: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent';
} | null;
};
type PublicBrowserUseSession = Omit<BrowserUseSession, 'ownerId'>;
type RuntimeHandle = {
browser?: any;
context?: any;
page?: any;
};
type BrowserUseSettings = {
enabled: boolean;
};
type RuntimeReadiness = {
playwright: any | null;
playwrightInstalled: boolean;
chromiumInstalled: boolean;
chromiumExecutablePath: string | null;
installInProgress: boolean;
installMessage: string | null;
};
type RuntimeProbe = Omit<RuntimeReadiness, 'installInProgress' | 'installMessage'>;
const sessions = new Map<string, BrowserUseSession>();
const handles = new Map<string, RuntimeHandle>();
const reservedDisplays = new Set<string>();
const viewerTokens = new Map<string, { token: string; expiresAt: number }>();
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
let lastInstallMessage: string | null = null;
let runtimeProbeCache: { value: RuntimeProbe; updatedAt: number } | null = null;
const DEFAULT_SETTINGS: BrowserUseSettings = {
enabled: false,
};
const AGENT_OWNER_ID = 'agent';
const PROFILE_ROOT = path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles');
const MCP_SERVER_NAME = 'cloudcli-browser';
const LEGACY_MCP_SERVER_NAMES = ['cloudcli-browser-use'];
const RUNTIME_READINESS_CACHE_TTL_MS = 30_000;
const VISIBLE_BROWSER_ENABLED = process.env.CLOUDCLI_BROWSER_USE_VISIBLE !== 'false';
const RUNTIME_ROOT = process.env.CLOUDCLI_BROWSER_USE_RUNTIME_ROOT || '/opt/claudecodeui/.runtime-browser';
const NOVNC_ROOT = process.env.CLOUDCLI_BROWSER_USE_NOVNC_ROOT || path.join(RUNTIME_ROOT, 'novnc');
const X11VNC_BIN = process.env.CLOUDCLI_BROWSER_USE_X11VNC_BIN || path.join(RUNTIME_ROOT, 'rootfs/usr/bin/x11vnc');
const X11VNC_LIB_DIR = process.env.CLOUDCLI_BROWSER_USE_X11VNC_LIB_DIR || path.join(RUNTIME_ROOT, 'rootfs/usr/lib/x86_64-linux-gnu');
const X11VNC_EXTRA_LIB_DIR = process.env.CLOUDCLI_BROWSER_USE_X11VNC_EXTRA_LIB_DIR || path.join(RUNTIME_ROOT, 'rootfs/lib/x86_64-linux-gnu');
const LOG_RUNTIME_PROCESS_OUTPUT = process.env.CLOUDCLI_BROWSER_USE_RUNTIME_LOGS === 'true';
function getRuntime(): 'cloud' | 'local' {
function getRuntime(): BrowserUseRuntime {
return IS_PLATFORM ? 'cloud' : 'local';
}
function getCamoufoxExecutablePath(): string | null {
const configured = process.env.CLOUDCLI_BROWSER_USE_CAMOUFOX_EXECUTABLE;
if (configured && fs.existsSync(configured)) {
return configured;
}
function readSettings(): BrowserUseSettings {
try {
const output = execFileSync(path.join(os.homedir(), '.local/bin/camoufox'), ['path'], {
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'ignore'],
}).trim();
const executablePath = fs.statSync(output).isDirectory()
? path.join(output, 'camoufox')
: output;
return fs.existsSync(executablePath) ? executablePath : null;
} catch {
return null;
const raw = appConfigDb.get(BROWSER_USE_SETTINGS_KEY);
if (!raw) {
return DEFAULT_SETTINGS;
}
const parsed = JSON.parse(raw) as Partial<BrowserUseSettings>;
return {
enabled: parsed.enabled === true,
};
} catch (error: any) {
console.warn('[Browser] Failed to read settings:', error?.message || error);
return DEFAULT_SETTINGS;
}
}
function writeSettings(settings: BrowserUseSettings): BrowserUseSettings {
const normalized = {
enabled: settings.enabled === true,
};
appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized));
return normalized;
}
function getOrCreateMcpToken(): string {
const existing = appConfigDb.get(BROWSER_USE_MCP_TOKEN_KEY);
if (existing) {
return existing;
}
const token = randomBytes(32).toString('hex');
appConfigDb.set(BROWSER_USE_MCP_TOKEN_KEY, token);
return token;
}
function getSetupMessage(settings: BrowserUseSettings, readiness: RuntimeReadiness): string {
if (!settings.enabled) {
return 'Browser is disabled in settings.';
@@ -90,26 +132,6 @@ function getSetupMessage(settings: BrowserUseSettings, readiness: RuntimeReadine
return 'Install Playwright and Chromium to use browser sessions.';
}
if (settings.browserBackend === 'camoufox-vnc' && !getCamoufoxExecutablePath()) {
return 'Camoufox is selected, but Camoufox is not installed.';
}
if (useVisibleCamoufoxBackend(settings)) {
if (!VISIBLE_BROWSER_ENABLED) {
return 'Camoufox is selected, but visible browser sessions are disabled.';
}
if (!getCamoufoxExecutablePath()) {
return 'Camoufox is selected, but Camoufox is not installed.';
}
if (!fs.existsSync(X11VNC_BIN)) {
return 'Camoufox is selected, but x11vnc is missing.';
}
if (!fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'))) {
return 'Camoufox is selected, but noVNC is missing.';
}
return readiness.installMessage || 'Camoufox runtime is not ready.';
}
if (!readiness.chromiumInstalled) {
return 'Playwright is installed, but Chromium is missing. Install the Chromium runtime to continue.';
}
@@ -154,6 +176,24 @@ async function removeMcpServerFromAllProviders(name: string) {
return results.map((result) => ({ ...result, name }));
}
function normalizeProfileName(profileName?: string | null): string | null {
const normalized = String(profileName || '').trim();
if (!normalized) {
return null;
}
return normalized.slice(0, 80);
}
function getProfilePath(profileName: string): string {
const safeName = profileName
.toLowerCase()
.replace(/[^a-z0-9._-]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 80) || 'default';
return path.join(PROFILE_ROOT, safeName);
}
function probeRuntime(): RuntimeProbe {
const playwright = getPlaywright();
const readiness: RuntimeProbe = {
@@ -198,175 +238,6 @@ function getRuntimeReadiness(options: { force?: boolean } = {}): RuntimeReadines
};
}
function findAvailablePort(): Promise<number> {
return new Promise((resolve, reject) => {
const server = net.createServer();
server.on('error', reject);
server.listen(0, '127.0.0.1', () => {
const address = server.address();
server.close(() => {
if (typeof address === 'object' && address?.port) {
resolve(address.port);
} else {
reject(new Error('Failed to reserve a browser runtime port.'));
}
});
});
});
}
function delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function isRuntimeProcessAlive(child: ReturnType<typeof spawn>): boolean {
return child.exitCode === null && child.signalCode === null && !child.killed;
}
function assertRuntimeProcessesAlive(processes: Array<ReturnType<typeof spawn>>, label: string) {
const exited = processes.find((child) => !isRuntimeProcessAlive(child));
if (exited) {
throw new Error(`${label} exited before the Browser viewer runtime was ready.`);
}
}
async function isPortListening(port: number): Promise<boolean> {
return new Promise((resolve) => {
const socket = net.createConnection({ host: '127.0.0.1', port });
let settled = false;
const finish = (listening: boolean) => {
if (settled) {
return;
}
settled = true;
socket.destroy();
resolve(listening);
};
socket.setTimeout(250);
socket.once('connect', () => finish(true));
socket.once('timeout', () => finish(false));
socket.once('error', () => finish(false));
});
}
async function waitForRuntimePort(
port: number,
label: string,
processes: Array<ReturnType<typeof spawn>>,
timeoutMs = 5_000,
) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
assertRuntimeProcessesAlive(processes, label);
if (await isPortListening(port)) {
return;
}
await delay(100);
}
assertRuntimeProcessesAlive(processes, label);
throw new Error(`${label} did not start listening on 127.0.0.1:${port}.`);
}
function killRuntimeProcesses(processes?: Array<ReturnType<typeof spawn>>) {
processes?.forEach((child) => child.kill('SIGTERM'));
}
function reserveDisplay(): string {
for (let index = 90; index < 140; index += 1) {
const display = `:${index}`;
if (!reservedDisplays.has(display)) {
reservedDisplays.add(display);
return display;
}
}
throw new Error('No browser display slots are available.');
}
function spawnRuntimeProcess(command: string, args: string[], options: { env?: NodeJS.ProcessEnv } = {}) {
const child = spawn(command, args, {
env: { ...process.env, ...options.env },
stdio: ['ignore', 'ignore', 'pipe'],
});
child.stderr?.on('data', (chunk) => {
if (!LOG_RUNTIME_PROCESS_OUTPUT) {
return;
}
const text = String(chunk).trim();
if (text) {
console.warn(`[Browser runtime] ${path.basename(command)}: ${text}`);
}
});
child.on('error', (error) => {
console.warn(`[Browser runtime] ${path.basename(command)} failed:`, error.message);
});
return child;
}
async function startVisibleRuntime(): Promise<NonNullable<RuntimeHandle['viewer']> & { processes: Array<ReturnType<typeof spawn>> }> {
const display = reserveDisplay();
const vncPort = await findAvailablePort();
const websockifyPort = await findAvailablePort();
const processes: Array<ReturnType<typeof spawn>> = [];
try {
processes.push(spawnRuntimeProcess('Xvfb', [
display,
'-screen',
'0',
'1440x900x24',
'-ac',
'-nolisten',
'tcp',
]));
await delay(700);
assertRuntimeProcessesAlive(processes, 'Xvfb');
if (!fs.existsSync(X11VNC_BIN)) {
throw new Error(`x11vnc is missing at ${X11VNC_BIN}.`);
}
processes.push(spawnRuntimeProcess(X11VNC_BIN, [
'-display',
display,
'-localhost',
'-forever',
'-shared',
'-rfbport',
String(vncPort),
'-nopw',
'-quiet',
], {
env: {
LD_LIBRARY_PATH: `${X11VNC_LIB_DIR}:${X11VNC_EXTRA_LIB_DIR}:${process.env.LD_LIBRARY_PATH || ''}`,
},
}));
await waitForRuntimePort(vncPort, 'x11vnc', processes);
if (!fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'))) {
throw new Error(`noVNC is missing at ${NOVNC_ROOT}.`);
}
processes.push(spawnRuntimeProcess(path.join(os.homedir(), '.local/bin/websockify'), [
'--web',
NOVNC_ROOT,
`127.0.0.1:${websockifyPort}`,
`127.0.0.1:${vncPort}`,
]));
await waitForRuntimePort(websockifyPort, 'websockify', processes);
return {
display,
vncPort,
websockifyPort,
noVncRoot: NOVNC_ROOT,
processes,
};
} catch (error) {
killRuntimeProcesses(processes);
reservedDisplays.delete(display);
throw error;
}
}
const INSTALL_COMMAND_TIMEOUT_MS = Number.parseInt(
process.env.CLOUDCLI_BROWSER_USE_INSTALL_TIMEOUT_MS || String(10 * 60 * 1000),
10,
@@ -479,45 +350,6 @@ function publicSession(session: BrowserUseSession): PublicBrowserUseSession {
return publicFields;
}
function getSessionViewer(sessionId: string): RuntimeHandle['viewer'] | null {
const session = sessions.get(sessionId);
if (!session || session.ownerId !== AGENT_OWNER_ID || session.status !== 'ready') {
return null;
}
return handles.get(sessionId)?.viewer || null;
}
function createViewerToken(sessionId: string): string {
const token = randomUUID();
viewerTokens.set(sessionId, {
token,
expiresAt: Date.now() + VIEWER_TOKEN_TTL_MS,
});
return token;
}
function deleteViewerToken(sessionId: string) {
viewerTokens.delete(sessionId);
}
function validateViewerTokenForSession(sessionId: string, token: string | null | undefined): boolean {
if (!token) {
return false;
}
const session = sessions.get(sessionId);
const viewer = session?.ownerId === AGENT_OWNER_ID && session.status === 'ready'
? handles.get(sessionId)?.viewer || null
: null;
const stored = viewerTokens.get(sessionId);
if (!viewer || !stored || stored.token !== token || stored.expiresAt < Date.now()) {
if (stored?.expiresAt && stored.expiresAt < Date.now()) {
viewerTokens.delete(sessionId);
}
return false;
}
return true;
}
function ownerSessions(ownerId: string): BrowserUseSession[] {
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
}
@@ -525,13 +357,8 @@ function ownerSessions(ownerId: string): BrowserUseSession[] {
async function closeHandle(sessionId: string): Promise<void> {
const handle = handles.get(sessionId);
handles.delete(sessionId);
deleteViewerToken(sessionId);
await handle?.context?.close?.().catch(() => undefined);
await handle?.browser?.close().catch(() => undefined);
killRuntimeProcesses(handle?.processes);
if (handle?.viewer?.display) {
reservedDisplays.delete(handle.viewer.display);
}
}
async function expireStaleSessions(now = Date.now()): Promise<void> {
@@ -597,11 +424,6 @@ export const browserUseService = {
const current = readSettings();
const nextSettings = {
enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled,
persistSessions: typeof settings.persistSessions === 'boolean' ? settings.persistSessions : current.persistSessions,
defaultProfileName: typeof settings.defaultProfileName === 'string'
? settings.defaultProfileName
: current.defaultProfileName,
browserBackend: settings.browserBackend ? normalizeBrowserBackend(settings.browserBackend) : current.browserBackend,
};
const next = writeSettings(nextSettings);
@@ -617,28 +439,14 @@ export const browserUseService = {
async getStatus() {
const settings = readSettings();
const readiness = getRuntimeReadiness();
const useVisibleBackend = useVisibleCamoufoxBackend(settings);
const visibleCamoufoxReady = useVisibleBackend
&& VISIBLE_BROWSER_ENABLED
&& readiness.playwrightInstalled
&& Boolean(getCamoufoxExecutablePath())
&& fs.existsSync(X11VNC_BIN)
&& fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'));
const available = settings.enabled
&& readiness.playwrightInstalled
&& (useVisibleBackend ? visibleCamoufoxReady : readiness.chromiumInstalled);
const available = settings.enabled && readiness.playwrightInstalled && readiness.chromiumInstalled;
return {
enabled: settings.enabled,
runtime: getRuntime(),
backend: useVisibleBackend ? 'camoufox-vnc' : 'playwright',
browserBackend: settings.browserBackend,
available,
playwrightInstalled: readiness.playwrightInstalled,
chromiumInstalled: readiness.chromiumInstalled,
camoufoxInstalled: Boolean(getCamoufoxExecutablePath()),
noVncInstalled: fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html')),
x11vncInstalled: fs.existsSync(X11VNC_BIN),
installInProgress: readiness.installInProgress,
sessionCount: sessions.size,
message: available
@@ -697,7 +505,7 @@ export const browserUseService = {
}
await expireStaleSessions();
const profileName = resolveSessionProfileName(settings, options?.profileName);
const profileName = normalizeProfileName(options?.profileName);
const now = new Date().toISOString();
const session: BrowserUseSession = {
@@ -713,9 +521,6 @@ export const browserUseService = {
updatedAt: now,
lastAction: 'create',
message: null,
backend: useVisibleCamoufoxBackend(settings) ? 'camoufox-vnc' : 'playwright',
viewerUrl: null,
viewerEmbedUrl: null,
profileName,
viewport: { width: 1440, height: 900 },
cursor: null,
@@ -727,13 +532,7 @@ export const browserUseService = {
}
const readiness = getRuntimeReadiness();
const useVisibleBackend = useVisibleCamoufoxBackend(settings);
const visibleCamoufoxReady = useVisibleBackend
&& VISIBLE_BROWSER_ENABLED
&& Boolean(getCamoufoxExecutablePath())
&& fs.existsSync(X11VNC_BIN)
&& fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'));
if (!settings.enabled || !readiness.playwrightInstalled || !readiness.playwright || (useVisibleBackend ? !visibleCamoufoxReady : !readiness.chromiumInstalled)) {
if (!settings.enabled || !readiness.playwrightInstalled || !readiness.chromiumInstalled || !readiness.playwright) {
session.message = getSetupMessage(settings, readiness);
sessions.set(session.id, session);
return publicSession(session);
@@ -742,73 +541,31 @@ export const browserUseService = {
let browser: any | undefined;
let context: any | undefined;
let page: any;
let viewer: RuntimeHandle['viewer'];
let processes: RuntimeHandle['processes'];
const launchOptions: Record<string, unknown> = {
headless: !useVisibleBackend,
const launchOptions = {
headless: true,
args: ['--disable-dev-shm-usage'],
};
const contextOptions = useVisibleBackend
? { viewport: null }
: {
viewport: { width: 1440, height: 900 },
serviceWorkers: 'block',
};
const contextOptions = {
viewport: { width: 1440, height: 900 },
serviceWorkers: 'block',
};
try {
if (useVisibleBackend) {
const camoufoxExecutable = getCamoufoxExecutablePath();
if (!camoufoxExecutable) {
throw new Error('Camoufox is not installed.');
}
const runtime = await startVisibleRuntime();
viewer = {
display: runtime.display,
vncPort: runtime.vncPort,
websockifyPort: runtime.websockifyPort,
noVncRoot: runtime.noVncRoot,
};
processes = runtime.processes;
launchOptions.executablePath = camoufoxExecutable;
launchOptions.env = {
...process.env,
DISPLAY: runtime.display,
LD_LIBRARY_PATH: `${X11VNC_LIB_DIR}:${X11VNC_EXTRA_LIB_DIR}:${process.env.LD_LIBRARY_PATH || ''}`,
};
launchOptions.args = [];
session.backend = 'camoufox-vnc';
const viewerToken = createViewerToken(session.id);
session.viewerUrl = getViewerUrl(session.id, viewerToken);
session.viewerEmbedUrl = session.viewerUrl;
}
if (profileName) {
fs.mkdirSync(PROFILE_ROOT, { recursive: true });
const browserType = useVisibleBackend ? readiness.playwright.firefox : readiness.playwright.chromium;
context = await browserType.launchPersistentContext(getProfilePath(profileName), {
...launchOptions,
...contextOptions,
});
page = context.pages()[0] || await context.newPage();
} else {
const browserType = useVisibleBackend ? readiness.playwright.firefox : readiness.playwright.chromium;
browser = await browserType.launch(launchOptions);
context = await browser.newContext(contextOptions);
page = await context.newPage();
}
} catch (error) {
await context?.close?.().catch(() => undefined);
await browser?.close?.().catch(() => undefined);
killRuntimeProcesses(processes);
if (viewer?.display) {
reservedDisplays.delete(viewer.display);
}
throw error;
if (profileName) {
fs.mkdirSync(PROFILE_ROOT, { recursive: true });
context = await readiness.playwright.chromium.launchPersistentContext(getProfilePath(profileName), {
...launchOptions,
...contextOptions,
});
page = context.pages()[0] || await context.newPage();
} else {
browser = await readiness.playwright.chromium.launch(launchOptions);
context = await browser.newContext(contextOptions);
page = await context.newPage();
}
session.status = 'ready';
session.message = 'Browser session is ready.';
sessions.set(session.id, session);
handles.set(session.id, { browser, context, page, processes, viewer });
handles.set(session.id, { browser, context, page });
await captureSession(session, page);
return publicSession(session);
},
@@ -1055,25 +812,6 @@ export const browserUseService = {
return { deleted: true, sessionId };
},
getViewerProxyTarget(sessionId: string) {
const viewer = getSessionViewer(sessionId);
if (!viewer) {
throw new Error('Browser viewer is not available for this session.');
}
return {
websockifyPort: viewer.websockifyPort,
noVncRoot: viewer.noVncRoot,
};
},
validateViewerToken(sessionId: string, token: string | null | undefined) {
return validateViewerTokenForSession(sessionId, token);
},
handleViewerWebSocket(clientWs: WebSocket, pathname: string) {
handleViewerWebSocket(clientWs, pathname, getSessionViewer);
},
async agentStopSession(sessionId: string) {
await this.getAgentSession(sessionId);
return this.stopSession(sessionId);

View File

@@ -1,147 +0,0 @@
import { randomBytes } from 'node:crypto';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { appConfigDb } from '@/modules/database/index.js';
import type { BrowserUseBackend, BrowserUseSettings } from './browser-use.types.js';
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
const BROWSER_USE_SETTINGS_KEY = 'browser_use_settings';
const BROWSER_USE_MCP_TOKEN_KEY = 'browser_use_mcp_token';
const MAX_PROFILE_NAME_LENGTH = 80;
export const DEFAULT_BROWSER_USE_SETTINGS: BrowserUseSettings = {
enabled: false,
persistSessions: false,
defaultProfileName: 'default',
browserBackend: IS_PLATFORM ? 'camoufox-vnc' : 'playwright',
};
export const PROFILE_ROOT = process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT
|| path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles');
export function normalizeBrowserBackend(value: unknown): BrowserUseBackend {
return value === 'playwright' || value === 'camoufox-vnc'
? value
: DEFAULT_BROWSER_USE_SETTINGS.browserBackend;
}
function trimEdgeDashes(value: string): string {
let start = 0;
let end = value.length;
while (start < end && value[start] === '-') {
start += 1;
}
while (end > start && value[end - 1] === '-') {
end -= 1;
}
return value.slice(start, end);
}
export function normalizeProfileName(profileName?: string | null): string | null {
const sanitized = trimEdgeDashes(String(profileName || '')
.trim()
.toLowerCase()
.replace(/[^a-z0-9._-]+/g, '-'));
const normalized = sanitized
.slice(0, MAX_PROFILE_NAME_LENGTH)
.replace(/^-+|-+$/g, '');
if (!normalized) {
return null;
}
return /[a-z0-9]/.test(normalized) ? normalized : null;
}
export function normalizeDefaultProfileName(profileName?: string | null): string {
return normalizeProfileName(profileName) || DEFAULT_BROWSER_USE_SETTINGS.defaultProfileName;
}
export function resolveSessionProfileName(settings: BrowserUseSettings, profileName?: string | null): string | null {
const requestedProfileName = normalizeProfileName(profileName);
if (String(profileName || '').trim() && !requestedProfileName) {
throw new Error('Browser profile name must include at least one letter or number.');
}
if (requestedProfileName) {
validateRequestedProfileName(profileName, requestedProfileName);
return requestedProfileName;
}
return settings.persistSessions ? normalizeDefaultProfileName(settings.defaultProfileName) : null;
}
export function getProfilePath(profileName: string): string {
return path.join(PROFILE_ROOT, normalizeDefaultProfileName(profileName));
}
function validateRequestedProfileName(profileName: string | null | undefined, normalizedProfileName: string): void {
const requestedProfileName = String(profileName || '').trim();
const existingProfileName = findExistingProfileName(normalizedProfileName);
if (
existingProfileName
&& (requestedProfileName !== normalizedProfileName || existingProfileName !== normalizedProfileName)
) {
throw new Error(`Browser profile "${requestedProfileName}" resolves to existing profile "${existingProfileName}". Use "${normalizedProfileName}" instead.`);
}
}
function findExistingProfileName(normalizedProfileName: string): string | null {
try {
if (!fs.existsSync(PROFILE_ROOT)) {
return null;
}
const entries = fs.readdirSync(PROFILE_ROOT, { withFileTypes: true });
const match = entries.find((entry) => entry.isDirectory() && normalizeProfileName(entry.name) === normalizedProfileName);
return match?.name || null;
} catch {
return null;
}
}
export function useVisibleCamoufoxBackend(settings: BrowserUseSettings): boolean {
return settings.browserBackend === 'camoufox-vnc';
}
export function readSettings(): BrowserUseSettings {
try {
const raw = appConfigDb.get(BROWSER_USE_SETTINGS_KEY);
if (!raw) {
return DEFAULT_BROWSER_USE_SETTINGS;
}
const parsed = JSON.parse(raw) as Partial<BrowserUseSettings>;
return {
enabled: parsed.enabled === true,
persistSessions: parsed.persistSessions === true,
defaultProfileName: normalizeDefaultProfileName(parsed.defaultProfileName),
browserBackend: normalizeBrowserBackend(parsed.browserBackend),
};
} catch (error: any) {
console.warn('[Browser] Failed to read settings:', error?.message || error);
return DEFAULT_BROWSER_USE_SETTINGS;
}
}
export function writeSettings(settings: BrowserUseSettings): BrowserUseSettings {
const normalized = {
enabled: settings.enabled === true,
persistSessions: settings.persistSessions === true,
defaultProfileName: normalizeDefaultProfileName(settings.defaultProfileName),
browserBackend: normalizeBrowserBackend(settings.browserBackend),
};
appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized));
return normalized;
}
export function getOrCreateMcpToken(): string {
const existing = appConfigDb.get(BROWSER_USE_MCP_TOKEN_KEY);
if (existing) {
return existing;
}
const token = randomBytes(32).toString('hex');
appConfigDb.set(BROWSER_USE_MCP_TOKEN_KEY, token);
return token;
}

View File

@@ -1,66 +0,0 @@
import type { spawn } from 'node:child_process';
export type BrowserUseRuntime = 'cloud' | 'local';
export type BrowserUseBackend = 'playwright' | 'camoufox-vnc';
export type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
export type BrowserUseSession = {
id: string;
ownerId: string;
createdBy: 'agent';
runtime: BrowserUseRuntime;
status: BrowserUseSessionStatus;
url: string | null;
title: string | null;
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
backend: BrowserUseBackend;
viewerUrl: string | null;
viewerEmbedUrl: string | null;
profileName: string | null;
viewport: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent';
} | null;
};
export type PublicBrowserUseSession = Omit<BrowserUseSession, 'ownerId'>;
export type RuntimeHandle = {
browser?: any;
context?: any;
page?: any;
processes?: Array<ReturnType<typeof spawn>>;
viewer?: {
display: string;
vncPort: number;
websockifyPort: number;
noVncRoot: string;
};
};
export type BrowserUseSettings = {
enabled: boolean;
persistSessions: boolean;
defaultProfileName: string;
browserBackend: BrowserUseBackend;
};
export type RuntimeReadiness = {
playwright: any | null;
playwrightInstalled: boolean;
chromiumInstalled: boolean;
chromiumExecutablePath: string | null;
installInProgress: boolean;
installMessage: string | null;
};
export type RuntimeProbe = Omit<RuntimeReadiness, 'installInProgress' | 'installMessage'>;

View File

@@ -1,76 +0,0 @@
import { WebSocket } from 'ws';
import type { RuntimeHandle } from './browser-use.types.js';
type BrowserUseViewer = NonNullable<RuntimeHandle['viewer']>;
export const VIEWER_COOKIE_NAME = 'browser_use_viewer_token';
const DEFAULT_VIEWER_TOKEN_TTL_MS = 30 * 60 * 1000;
const parsedViewerTokenTtlMs = Number.parseInt(
process.env.CLOUDCLI_BROWSER_USE_VIEWER_TOKEN_TTL_MS || String(DEFAULT_VIEWER_TOKEN_TTL_MS),
10,
);
export const VIEWER_TOKEN_TTL_MS =
Number.isFinite(parsedViewerTokenTtlMs) && parsedViewerTokenTtlMs > 0
? parsedViewerTokenTtlMs
: DEFAULT_VIEWER_TOKEN_TTL_MS;
export function getViewerUrl(sessionId: string, viewerToken?: string): string {
const basePath = `/api/browser-use/sessions/${encodeURIComponent(sessionId)}/viewer`;
const websockifyPath = viewerToken
? `${basePath}/websockify?viewerToken=${encodeURIComponent(viewerToken)}`
: `${basePath}/websockify`;
const params = new URLSearchParams({
autoconnect: '1',
resize: 'scale',
reconnect: '1',
path: websockifyPath,
});
if (viewerToken) {
params.set('viewerToken', viewerToken);
}
return `${basePath}/vnc.html?${params.toString()}`;
}
export function handleViewerWebSocket(
clientWs: WebSocket,
pathname: string,
getSessionViewer: (sessionId: string) => BrowserUseViewer | null | undefined,
) {
const match = /^\/api\/browser-use\/sessions\/([^/]+)\/viewer\/websockify\/?$/.exec(pathname);
const sessionId = match ? decodeURIComponent(match[1]) : '';
const viewer = sessionId ? getSessionViewer(sessionId) : null;
if (!viewer) {
clientWs.close(4404, 'Browser viewer not found');
return;
}
const upstream = new WebSocket(`ws://127.0.0.1:${viewer.websockifyPort}`);
upstream.on('open', () => {
clientWs.on('message', (data) => {
if (upstream.readyState === WebSocket.OPEN) {
upstream.send(data);
}
});
upstream.on('message', (data) => {
if (clientWs.readyState === WebSocket.OPEN) {
clientWs.send(data);
}
});
});
upstream.on('close', (code, reason) => {
if (clientWs.readyState === WebSocket.OPEN) {
clientWs.close(code, reason);
}
});
upstream.on('error', () => {
if (clientWs.readyState === WebSocket.OPEN) {
clientWs.close(4502, 'Browser viewer upstream error');
}
});
clientWs.on('close', () => {
if (upstream.readyState === WebSocket.OPEN) {
upstream.close();
}
});
}

View File

@@ -1,2 +0,0 @@
export { browserUseService } from './browser-use.service.js';
export { VIEWER_COOKIE_NAME } from './browser-use.viewer.js';

View File

@@ -1,73 +0,0 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import test from 'node:test';
const originalProfileRoot = process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT;
const testProfileRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'browser-use-profiles-'));
process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT = testProfileRoot;
const {
getProfilePath,
normalizeDefaultProfileName,
normalizeProfileName,
PROFILE_ROOT,
resolveSessionProfileName,
} = await import('@/modules/browser-use/browser-use.settings.js');
test.after(() => {
if (originalProfileRoot === undefined) {
delete process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT;
} else {
process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT = originalProfileRoot;
}
fs.rmSync(testProfileRoot, { recursive: true, force: true });
});
test('browser profile names are canonicalized before storage and path resolution', () => {
assert.equal(normalizeProfileName(' Work Profile!! '), 'work-profile');
assert.equal(normalizeProfileName(`${'-'.repeat(100)}Work Profile`), 'work-profile');
assert.equal(normalizeDefaultProfileName(' Work Profile!! '), 'work-profile');
assert.equal(
getProfilePath(' Work Profile!! '),
`${PROFILE_ROOT}/work-profile`,
);
assert.equal(
resolveSessionProfileName({
enabled: true,
persistSessions: true,
defaultProfileName: ' Work Profile!! ',
browserBackend: 'playwright',
}),
'work-profile',
);
});
test('browser profile aliases are rejected when the normalized profile already exists', () => {
const profileName = `alias-test-${Date.now()}`;
fs.mkdirSync(getProfilePath(profileName), { recursive: true });
try {
assert.throws(
() => resolveSessionProfileName({
enabled: true,
persistSessions: false,
defaultProfileName: 'default',
browserBackend: 'playwright',
}, profileName.toUpperCase()),
/resolves to existing profile/,
);
assert.equal(
resolveSessionProfileName({
enabled: true,
persistSessions: false,
defaultProfileName: 'default',
browserBackend: 'playwright',
}, profileName),
profileName,
);
} finally {
fs.rmSync(getProfilePath(profileName), { recursive: true, force: true });
}
});

View File

@@ -1,9 +1,8 @@
import type { Server as HttpServer } from 'node:http';
import { WebSocket, WebSocketServer, type VerifyClientCallbackSync } from 'ws';
import { WebSocketServer, type VerifyClientCallbackSync } from 'ws';
import { handleChatConnection } from '@/modules/websocket/services/chat-websocket.service.js';
import { VIEWER_COOKIE_NAME } from '@/modules/browser-use/index.js';
import { verifyWebSocketClient } from '@/modules/websocket/services/websocket-auth.service.js';
import { handlePluginWsProxy } from '@/modules/websocket/services/plugin-websocket-proxy.service.js';
import { handleShellConnection } from '@/modules/websocket/services/shell-websocket.service.js';
@@ -14,21 +13,8 @@ type WebSocketServerDependencies = {
chat: Parameters<typeof handleChatConnection>[2];
shell: Parameters<typeof handleShellConnection>[1];
getPluginPort: Parameters<typeof handlePluginWsProxy>[2];
browserUseViewer?: (ws: WebSocket, pathname: string) => void;
authenticateBrowserUseViewer?: (pathname: string, token: string | null) => boolean;
};
function readCookieValue(header: unknown, name: string): string | null {
if (!header) return null;
const prefix = `${name}=`;
const cookie = String(header).split(';').map((part) => part.trim()).find((part) => part.startsWith(prefix));
return cookie ? decodeURIComponent(cookie.slice(prefix.length)) : null;
}
function getBrowserUseViewerToken(url: URL, headers: Record<string, unknown>): string | null {
return url.searchParams.get('viewerToken') || readCookieValue(headers.cookie, VIEWER_COOKIE_NAME);
}
/**
* Creates and wires the server-wide websocket gateway used for chat, shell, and
* plugin proxy routes.
@@ -41,17 +27,7 @@ export function createWebSocketServer(
server,
verifyClient: ((
info: Parameters<VerifyClientCallbackSync<AuthenticatedWebSocketRequest>>[0]
) => {
const requestUrl = new URL(info.req.url ?? '/', 'http://localhost');
if (
requestUrl.pathname.startsWith('/api/browser-use/sessions/')
&& requestUrl.pathname.endsWith('/viewer/websockify')
) {
const token = getBrowserUseViewerToken(requestUrl, info.req.headers as Record<string, unknown>);
return Boolean(dependencies.authenticateBrowserUseViewer?.(requestUrl.pathname, token));
}
return verifyWebSocketClient(info, dependencies.verifyClient);
}),
) => verifyWebSocketClient(info, dependencies.verifyClient)),
});
wss.on('connection', (ws, request) => {
@@ -92,11 +68,6 @@ export function createWebSocketServer(
return;
}
if (pathname.startsWith('/api/browser-use/sessions/') && pathname.endsWith('/viewer/websockify')) {
dependencies.browserUseViewer?.(ws, pathname);
return;
}
console.log('[WARN] Unknown WebSocket path:', pathname);
ws.close();
});

View File

@@ -1,224 +0,0 @@
// Optional voice proxy — forwards STT/TTS to an OpenAI-compatible audio backend.
//
// The backend is whatever the user points at: OpenAI, Groq, or a local server
// (LocalAI / Speaches / Kokoro-FastAPI / openedai-speech / etc.). It must expose the
// standard OpenAI audio endpoints:
// POST {base}/audio/transcriptions (multipart 'file' + 'model') -> { text }
// POST {base}/audio/speech ({ model, voice, input }) -> audio bytes
//
// Config is resolved per-request from headers (set by the client's voice settings),
// falling back to server env defaults. Mounted at /api/voice behind authenticateToken.
import { Readable } from 'node:stream';
import express from 'express';
const ENV = {
baseUrl: (process.env.VOICE_API_BASE_URL || '').replace(/\/$/, ''),
apiKey: process.env.VOICE_API_KEY || '',
sttModel: process.env.VOICE_STT_MODEL || 'whisper-1',
ttsModel: process.env.VOICE_TTS_MODEL || 'tts-1',
ttsVoice: process.env.VOICE_TTS_VOICE || 'alloy',
};
/**
* Resolve the voice backend config for a request. Client headers (set from the
* user's in-app voice settings) take precedence over the server env defaults.
* @param {import('express').Request} req
* @returns {{baseUrl: string, apiKey: string, sttModel: string, ttsModel: string, ttsVoice: string, ttsFormat: string}}
*/
function resolveConfig(req) {
const h = req.headers;
return {
// Security: do not allow clients to control the outbound backend host.
// Always use the server-side configured base URL.
baseUrl: ENV.baseUrl,
apiKey: String(h['x-voice-api-key'] || '') || ENV.apiKey,
sttModel: String(h['x-voice-stt-model'] || '') || ENV.sttModel,
ttsModel: String(h['x-voice-tts-model'] || '') || ENV.ttsModel,
ttsVoice: String(h['x-voice-tts-voice'] || '') || ENV.ttsVoice,
ttsFormat: String(h['x-voice-tts-format'] || '').trim(),
};
}
const router = express.Router();
// Generous by default — local TTS can synthesize long messages at ~real-time on CPU.
// Guard against a non-numeric/zero override that would make setTimeout fire immediately.
const DEFAULT_VOICE_TIMEOUT_MS = 300000;
const _parsedTimeout = Number(process.env.VOICE_TIMEOUT_MS);
const VOICE_TIMEOUT_MS = Number.isFinite(_parsedTimeout) && _parsedTimeout > 0
? _parsedTimeout
: DEFAULT_VOICE_TIMEOUT_MS;
/**
* fetch() with an AbortController timeout so a stalled backend can't hold the
* request open indefinitely. Aborts after VOICE_TIMEOUT_MS.
* @param {string} url
* @param {RequestInit} [options]
* @returns {Promise<Response>}
*/
async function fetchWithTimeout(url, options = {}) {
const parsed = new URL(url);
if (!['http:', 'https:'].includes(parsed.protocol) || !isAllowedBackendUrl(parsed.origin)) {
throw new Error('Blocked outbound voice backend URL');
}
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS);
try {
return await fetch(parsed.toString(), { redirect: 'manual', ...options, signal: controller.signal });
} finally {
clearTimeout(timer);
}
}
/**
* Turn a backend fetch failure into a clear, actionable client response:
* 504 on timeout (AbortError), 502 otherwise.
* @param {import('express').Response} res
* @param {Error} e
*/
function backendError(res, e) {
if (e && e.name === 'AbortError') {
return res.status(504).json({
error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your voice backend.`,
});
}
return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
}
/**
* SSRF guard for the user-configurable backend URL: allow http/https only and
* block the link-local / cloud-metadata range (169.254.x). localhost and private
* ranges are allowed on purpose so users can point at a local voice server
* (LocalAI, Speaches, Kokoro-FastAPI, etc.).
* @param {string} raw
* @returns {boolean}
*/
function isAllowedBackendUrl(raw) {
let u;
try {
u = new URL(raw);
} catch {
return false;
}
if (u.protocol !== 'http:' && u.protocol !== 'https:') return false;
if (u.hostname === '169.254.169.254' || u.hostname.startsWith('169.254.')) return false;
return true;
}
/**
* Relay an upstream (backend) error to the client without making an upstream
* 401/403 look like the user's own app login failed.
* @param {import('express').Response} res
* @param {number} status
* @param {string} [text]
*/
function upstreamError(res, status, text) {
if (status === 401 || status === 403) {
return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
}
return res.status(status).json({ error: text || 'voice backend error' });
}
let _upload = null;
/**
* Lazily build a memory-storage multer instance (25 MB cap) for audio uploads,
* so multer is only imported when the voice feature is actually used.
* @returns {Promise<import('multer').Multer>}
*/
async function getUpload() {
if (!_upload) {
const multer = (await import('multer')).default;
_upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
}
return _upload;
}
/**
* Build the Authorization header for the backend, or an empty object when no
* key is configured (e.g. a local server that needs none).
* @param {string} apiKey
* @returns {Record<string, string>}
*/
function authHeader(apiKey) {
return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
}
/**
* GET /api/voice/health -> { configured } (true when a backend base URL is set).
*/
router.get('/health', (req, res) => {
res.json({ configured: Boolean(resolveConfig(req).baseUrl) });
});
/**
* POST /api/voice/transcribe (multipart 'audio') -> { text }.
* Forwards the uploaded audio to the backend's /audio/transcriptions endpoint.
*/
router.post('/transcribe', async (req, res) => {
const cfg = resolveConfig(req);
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
const upload = await getUpload();
upload.single('audio')(req, res, async (err) => {
if (err) return res.status(400).json({ error: err.message });
if (!req.file) return res.status(400).json({ error: 'No audio uploaded' });
try {
const fd = new FormData();
fd.append(
'file',
new Blob([req.file.buffer], { type: req.file.mimetype || 'audio/webm' }),
req.file.originalname || 'recording.webm',
);
fd.append('model', cfg.sttModel);
const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/transcriptions`, {
method: 'POST',
headers: authHeader(cfg.apiKey),
body: fd,
});
const text = await r.text();
if (!r.ok) return upstreamError(res, r.status, text);
let data;
try { data = JSON.parse(text); } catch { data = { text }; }
res.json({ text: data.text ?? '' });
} catch (e) {
backendError(res, e);
}
});
});
/**
* POST /api/voice/tts { text } -> audio bytes.
* Forwards the text to the backend's /audio/speech endpoint and streams the audio back.
*/
router.post('/tts', async (req, res) => {
const cfg = resolveConfig(req);
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
const text = req.body?.text;
if (typeof text !== 'string' || !text.trim()) return res.status(400).json({ error: 'text required' });
try {
const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/speech`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', ...authHeader(cfg.apiKey) },
body: JSON.stringify({
model: cfg.ttsModel,
voice: cfg.ttsVoice,
input: text,
...(cfg.ttsFormat ? { response_format: cfg.ttsFormat } : {}),
}),
});
if (!r.ok) {
const errText = await r.text().catch(() => 'tts failed');
return upstreamError(res, r.status, errText);
}
res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg');
res.setHeader('Cache-Control', 'no-store');
if (!r.body) return res.end();
Readable.fromWeb(r.body).on('error', (error) => res.destroy(error)).pipe(res);
} catch (e) {
backendError(res, e);
}
});
export default router;

View File

@@ -1,4 +1,4 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { useCallback, useEffect, useMemo, useState } from 'react';
import {
Bot,
Clock3,
@@ -7,7 +7,6 @@ import {
ExternalLink,
Loader2,
MonitorPlay,
MousePointer2,
RefreshCw,
Settings,
Square,
@@ -20,14 +19,9 @@ import { Badge, Button } from '../../../shared/view/ui';
import { authenticatedFetch } from '../../../utils/api';
import type { SettingsMainTab } from '../../settings/types/types';
const BROWSER_USE_GUIDE_URL = 'https://cloudcli.ai/docs/browser-use';
const BROWSER_USE_CACHE_TTL_MS = 30_000;
type BrowserUseStatus = {
enabled: boolean;
available: boolean;
backend: 'playwright' | 'camoufox-vnc';
browserBackend: 'playwright' | 'camoufox-vnc';
playwrightInstalled: boolean;
chromiumInstalled: boolean;
installInProgress: boolean;
@@ -45,9 +39,6 @@ type BrowserUseSession = {
updatedAt: string;
lastAction: string | null;
message: string | null;
backend?: 'playwright' | 'camoufox-vnc';
viewerUrl?: string | null;
viewerEmbedUrl?: string | null;
createdBy: 'agent';
profileName: string | null;
viewport: {
@@ -63,48 +54,17 @@ type BrowserUseSession = {
type BrowserUsePanelProps = {
isVisible: boolean;
projectId?: string | null;
onShowSettings?: (tab?: SettingsMainTab) => void;
};
type BrowserUsePanelCacheEntry = {
status: BrowserUseStatus | null;
sessions: BrowserUseSession[];
selectedSessionId: string | null;
updatedAt: number;
};
const browserUsePanelCache = new Map<string, BrowserUsePanelCacheEntry>();
async function readJson<T>(response: Response): Promise<T> {
const text = await response.text();
let data: any = {};
if (text) {
try {
data = JSON.parse(text);
} catch {
throw new Error(response.ok ? 'Received an invalid Browser response.' : `Browser request failed (${response.status}).`);
}
}
const data = await response.json();
if (!response.ok || data.success === false) {
throw new Error(data.error || data.details || `Request failed (${response.status})`);
}
return data as T;
}
async function fetchBrowserPanelData() {
const [statusResponse, sessionsResponse] = await Promise.all([
authenticatedFetch('/api/browser-use/status'),
authenticatedFetch('/api/browser-use/sessions'),
]);
const statusData = await readJson<{ data: BrowserUseStatus }>(statusResponse);
const sessionsData = await readJson<{ data: { sessions: BrowserUseSession[] } }>(sessionsResponse);
return {
status: statusData.data,
sessions: [...sessionsData.data.sessions].sort((a, b) => Date.parse(b.createdAt) - Date.parse(a.createdAt)),
};
}
function formatRelativeTime(value: string | null): string {
if (!value) return 'Never';
@@ -159,42 +119,20 @@ function getStatusDot(status: BrowserUseSession['status']): string {
return 'bg-border';
}
function getEngineLabel(backend?: BrowserUseStatus['backend'] | BrowserUseSession['backend']): string {
return backend === 'camoufox-vnc' ? 'Visible browser' : 'Playwright';
}
const PROMPTS = [
'Use Browser to inspect the checkout flow and report any broken UI states.',
'Open <url> with Browser, interact with the page, and summarize what changed after each step.',
];
function getBrowserUseCacheKey(projectId?: string | null): string {
return projectId ? `browser-use:project:${projectId}` : 'browser-use:global';
}
function getFreshCacheEntry(cacheKey: string): BrowserUsePanelCacheEntry | null {
const entry = browserUsePanelCache.get(cacheKey);
if (!entry || Date.now() - entry.updatedAt > BROWSER_USE_CACHE_TTL_MS) {
return null;
}
return entry;
}
export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }: BrowserUsePanelProps) {
const cacheKey = getBrowserUseCacheKey(projectId);
const initialCacheEntry = getFreshCacheEntry(cacheKey);
const [status, setStatus] = useState<BrowserUseStatus | null>(() => initialCacheEntry?.status ?? null);
const [sessions, setSessions] = useState<BrowserUseSession[]>(() => initialCacheEntry?.sessions ?? []);
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(() => (
initialCacheEntry?.selectedSessionId || initialCacheEntry?.sessions[0]?.id || null
));
const [hasLoadedOnce, setHasLoadedOnce] = useState(Boolean(initialCacheEntry));
export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUsePanelProps) {
const [status, setStatus] = useState<BrowserUseStatus | null>(null);
const [sessions, setSessions] = useState<BrowserUseSession[]>([]);
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
const [isRefreshing, setIsRefreshing] = useState(false);
const [isBusy, setIsBusy] = useState(false);
const [isInstalling, setIsInstalling] = useState(false);
const [isFullscreen, setIsFullscreen] = useState(false);
const [error, setError] = useState<string | null>(null);
const activeLoadIdRef = useRef(0);
const selectedSession = useMemo(
() => sessions.find((session) => session.id === selectedSessionId) || sessions[0] || null,
@@ -202,12 +140,8 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
);
const activeSessions = sessions.filter((session) => session.status === 'ready');
const isInitialLoading = isRefreshing && !hasLoadedOnce && sessions.length === 0;
const isBackgroundRefreshing = isRefreshing && !isInitialLoading;
const needsBrowserBinaries = Boolean(status?.enabled && !status.available);
const runtimeLabel = isInitialLoading
? 'Loading'
: !status?.enabled
const needsBrowserBinaries = Boolean(status?.enabled && (!status.playwrightInstalled || !status.chromiumInstalled));
const runtimeLabel = !status?.enabled
? 'Disabled'
: status.available
? 'Ready'
@@ -223,72 +157,29 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
: null;
const refresh = useCallback(async () => {
const loadId = activeLoadIdRef.current + 1;
activeLoadIdRef.current = loadId;
setIsRefreshing(true);
try {
let nextData: Awaited<ReturnType<typeof fetchBrowserPanelData>>;
try {
nextData = await fetchBrowserPanelData();
} catch (error) {
if (loadId !== activeLoadIdRef.current) {
return;
}
await new Promise((resolve) => setTimeout(resolve, 350));
nextData = await fetchBrowserPanelData();
}
if (activeLoadIdRef.current !== loadId) {
return;
}
const nextSessions = nextData.sessions;
setStatus(nextData.status);
const [statusResponse, sessionsResponse] = await Promise.all([
authenticatedFetch('/api/browser-use/status'),
authenticatedFetch('/api/browser-use/sessions'),
]);
const statusData = await readJson<{ data: BrowserUseStatus }>(statusResponse);
const sessionsData = await readJson<{ data: { sessions: BrowserUseSession[] } }>(sessionsResponse);
const nextSessions = sessionsData.data.sessions;
setStatus(statusData.data);
setSessions(nextSessions);
setHasLoadedOnce(true);
let nextSelectedSessionId: string | null = null;
setSelectedSessionId((current) => {
nextSelectedSessionId = current && nextSessions.some((session) => session.id === current)
setSelectedSessionId((current) => (
current && nextSessions.some((session) => session.id === current)
? current
: nextSessions[0]?.id || null;
return nextSelectedSessionId;
});
browserUsePanelCache.set(cacheKey, {
status: nextData.status,
sessions: nextSessions,
selectedSessionId: nextSelectedSessionId,
updatedAt: Date.now(),
});
: nextSessions[0]?.id || null
));
setError(null);
} catch (err) {
if (activeLoadIdRef.current !== loadId) {
return;
}
setHasLoadedOnce(true);
setError(err instanceof Error ? err.message : 'Failed to load Browser');
} finally {
if (activeLoadIdRef.current === loadId) {
setIsRefreshing(false);
}
setIsRefreshing(false);
}
}, [cacheKey]);
useEffect(() => {
const cachedEntry = browserUsePanelCache.get(cacheKey);
if (!cachedEntry) return;
browserUsePanelCache.set(cacheKey, {
...cachedEntry,
selectedSessionId,
});
}, [cacheKey, selectedSessionId]);
useEffect(() => {
const cachedEntry = getFreshCacheEntry(cacheKey);
setStatus(cachedEntry?.status ?? null);
setSessions(cachedEntry?.sessions ?? []);
setSelectedSessionId(cachedEntry?.selectedSessionId || cachedEntry?.sessions[0]?.id || null);
setHasLoadedOnce(Boolean(cachedEntry));
setError(null);
activeLoadIdRef.current += 1;
}, [cacheKey]);
}, []);
useEffect(() => {
if (!isVisible) return;
@@ -362,10 +253,6 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
<span>{formatRelativeTime(session.updatedAt)}</span>
<span className="truncate">- {formatAction(session.lastAction)}</span>
</div>
<div className="mt-2 flex flex-wrap gap-1.5 pl-3.5 text-[10px] text-muted-foreground">
<span className="rounded border border-border/70 bg-background/70 px-1.5 py-0.5">{getEngineLabel(session.backend)}</span>
<span className="rounded border border-border/70 bg-background/70 px-1.5 py-0.5">{session.profileName || 'Temporary'}</span>
</div>
</button>
);
};
@@ -383,18 +270,9 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
</div>
<p className="mt-1 max-w-xl text-sm leading-6 text-muted-foreground">
{status?.enabled
? 'When an agent opens a browser, you can watch the latest screenshot, take control in a new tab, or end the running session.'
: 'Enable Browser to let agents open websites, test flows, capture screenshots, and debug UI from a real page.'}
? 'Agent browser sessions appear here while an AI task is using Browser.'
: 'Enable Browser in settings to let agents open monitored browser sessions.'}
</p>
<a
href={BROWSER_USE_GUIDE_URL}
target="_blank"
rel="noopener noreferrer"
className="mt-2 inline-flex items-center gap-1.5 text-sm font-medium text-primary hover:underline"
>
Read the Browser guide
<ExternalLink className="h-3.5 w-3.5" />
</a>
</div>
</div>
@@ -434,19 +312,10 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
</div>
);
const renderLoadingState = () => (
<div className="flex min-h-0 flex-1 items-center justify-center p-6">
<div className="flex items-center gap-3 rounded-md border border-border bg-card/40 px-4 py-3 text-sm text-muted-foreground shadow-sm">
<Loader2 className="h-4 w-4 animate-spin text-primary" />
Loading browser sessions...
</div>
</div>
);
const renderBrowserSurface = (fullscreen = false) => (
<div className={cn('flex flex-1 items-center justify-center bg-neutral-950', fullscreen ? 'min-h-[80vh]' : 'min-h-[420px]')}>
{selectedSession?.screenshotDataUrl ? (
<div className="group relative inline-block max-h-full">
<div className="relative inline-block max-h-full">
<img
src={selectedSession.screenshotDataUrl}
alt="Browser session screenshot"
@@ -460,18 +329,6 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
<div className="absolute left-1/2 top-1/2 h-2 w-2 -translate-x-1/2 -translate-y-1/2 rounded-full bg-white" />
</div>
)}
{selectedSession?.viewerEmbedUrl && selectedSession.status === 'ready' && (
<button
type="button"
onClick={() => window.open(selectedSession.viewerUrl || selectedSession.viewerEmbedUrl || '', '_blank', 'noopener,noreferrer')}
className="absolute inset-0 flex items-center justify-center bg-black/0 opacity-0 transition focus-visible:bg-black/30 focus-visible:opacity-100 focus-visible:outline-none group-hover:bg-black/30 group-hover:opacity-100"
>
<span className="inline-flex items-center gap-2 rounded-md border border-white/20 bg-black/80 px-3 py-2 text-sm font-medium text-white shadow-lg">
<MousePointer2 className="h-4 w-4" />
Take control
</span>
</button>
)}
</div>
) : (
<div className="px-6 text-center">
@@ -493,29 +350,10 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
<Badge variant="outline" className={cn('text-[10px]', getRuntimeTone(status, isInstalling))}>
{runtimeLabel}
</Badge>
<Badge variant="outline" className="border-border bg-background text-[10px] text-muted-foreground">
{getEngineLabel(status?.backend)}
</Badge>
</div>
<p className="mt-0.5 text-xs text-muted-foreground">Watch and manage browser sessions agents use to test real websites.</p>
{isBackgroundRefreshing && (
<div className="mt-1 flex items-center gap-1.5 text-xs text-muted-foreground">
<RefreshCw className="h-3 w-3 animate-spin" />
Refreshing sessions...
</div>
)}
<p className="mt-0.5 text-xs text-muted-foreground">Monitor browser sessions opened by AI agents.</p>
</div>
<div className="flex items-center gap-1.5">
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={() => window.open(BROWSER_USE_GUIDE_URL, '_blank', 'noopener,noreferrer')}
title="Open Browser guide"
aria-label="Open Browser guide"
>
<ExternalLink className="h-3.5 w-3.5" />
</Button>
{onShowSettings && (
<Button
variant="ghost"
@@ -587,7 +425,7 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
</div>
{sessions.length === 0 ? (
isInitialLoading ? renderLoadingState() : renderEmptyState()
renderEmptyState()
) : (
<div className="min-h-0 flex-1 overflow-auto bg-muted/20 p-4">
<div className="mx-auto flex min-h-[500px] max-w-7xl flex-col overflow-hidden rounded-md border border-border bg-background shadow-sm">
@@ -603,32 +441,14 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
<ExternalLink className="h-3.5 w-3.5 shrink-0" />
<span className="truncate">{selectedSession?.url || 'No page loaded'}</span>
</div>
<div className="mt-1 flex flex-wrap gap-1.5 text-[10px] text-muted-foreground">
<span className="rounded border border-border/70 bg-muted/30 px-1.5 py-0.5">{getEngineLabel(selectedSession?.backend || status?.backend)}</span>
<span className="rounded border border-border/70 bg-muted/30 px-1.5 py-0.5">Profile: {selectedSession?.profileName || 'Temporary'}</span>
<span className="rounded border border-border/70 bg-muted/30 px-1.5 py-0.5">Updated {formatRelativeTime(selectedSession?.updatedAt || null)}</span>
</div>
</div>
<div className="hidden text-xs text-muted-foreground md:block">
{formatAction(selectedSession?.lastAction || null)}
</div>
{selectedSession?.viewerUrl && selectedSession.status === 'ready' && (
<Button
variant="outline"
size="sm"
className="h-8"
onClick={() => window.open(selectedSession.viewerUrl || '', '_blank', 'noopener,noreferrer')}
title="Open live browser control in a new tab"
aria-label="Open live browser control in a new tab"
>
<MousePointer2 className="h-4 w-4" />
Take control
</Button>
)}
<Button variant="ghost" size="sm" className="h-8 w-8 p-0" onClick={() => setIsFullscreen(true)} disabled={!selectedSession?.screenshotDataUrl} title="Full screen" aria-label="Full screen">
<Expand className="h-4 w-4" />
</Button>
<Button variant="ghost" size="sm" className="h-8 w-8 p-0 lg:hidden" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'} title="End session" aria-label="End session">
<Button variant="ghost" size="sm" className="h-8 w-8 p-0 lg:hidden" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'} title="Stop session" aria-label="Stop session">
<Square className="h-4 w-4" />
</Button>
<Button variant="ghost" size="sm" className="h-8 w-8 p-0 lg:hidden" onClick={deleteSession} disabled={isBusy || !selectedSession} title="Delete session" aria-label="Delete session">
@@ -655,11 +475,6 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
<div className="min-h-0 flex-1 overflow-y-auto p-3">
{sessions.length > 0 ? (
<div className="space-y-2">{sessions.map(renderSessionItem)}</div>
) : isInitialLoading ? (
<div className="flex items-center justify-center gap-2 rounded-md border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
<Loader2 className="h-3.5 w-3.5 animate-spin" />
Loading sessions...
</div>
) : (
<div className="rounded-md border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
No agent browser sessions.
@@ -690,7 +505,7 @@ export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }
<div className="mt-3 grid grid-cols-2 gap-2">
<Button variant="outline" size="sm" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
<Square className="h-4 w-4" />
End
Stop
</Button>
<Button variant="outline" size="sm" onClick={deleteSession} disabled={isBusy || !selectedSession}>
<Trash2 className="h-4 w-4" />

View File

@@ -775,17 +775,6 @@ export function useChatComposerState({
handleSubmitRef.current = handleSubmit;
}, [handleSubmit]);
// A voice transcript either fills the input (to edit before sending) or, when the
// user tapped "stop and send", is submitted straight away. Mirror the value into
// inputValueRef synchronously so handleSubmit reads the new text, not the stale state.
const handleVoiceTranscript = useCallback((text: string, send?: boolean) => {
const base = inputValueRef.current.trim();
const next = base ? `${base} ${text}` : text;
setInput(next);
inputValueRef.current = next;
if (send) handleSubmitRef.current?.(createFakeSubmitEvent());
}, [setInput]);
useEffect(() => {
inputValueRef.current = input;
}, [input]);
@@ -1024,7 +1013,6 @@ export function useChatComposerState({
isDragActive,
openImagePicker: open,
handleSubmit,
handleVoiceTranscript,
handleInputChange,
handleKeyDown,
handlePaste,

View File

@@ -114,6 +114,7 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
const [providerModelsLoading, setProviderModelsLoading] = useState(true);
const [providerModelsRefreshing, setProviderModelsRefreshing] = useState(false);
const lastProviderRef = useRef(provider);
const providerModelsRequestIdRef = useRef(0);
const setStoredProviderModel = useCallback((targetProvider: LLMProvider, model: string) => {
@@ -343,8 +344,14 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
localStorage.setItem('selected-provider', selectedSession.__provider);
}, [provider, selectedSession]);
// Permission prompts belong to a session, not to the transient provider
// selection that is synchronized after navigation.
useEffect(() => {
if (lastProviderRef.current === provider) {
return;
}
setPendingPermissionRequests([]);
lastProviderRef.current = provider;
}, [provider]);
useEffect(() => {
setPendingPermissionRequests((previous) =>
previous.filter((request) => !request.sessionId || request.sessionId === selectedSession?.id),

View File

@@ -1,29 +1,20 @@
import { useEffect, useRef } from 'react';
import { useEffect } from 'react';
import type { Dispatch, MutableRefObject, SetStateAction } from 'react';
import type { ServerEvent } from '../../../contexts/WebSocketContext';
import { showCompletionTitleIndicator } from '../../../utils/pageTitleNotification';
import { playChatCompletionSound, playNotificationSound } from '../../../utils/notificationSound';
import { playChatCompletionSound } from '../../../utils/notificationSound';
import type { MarkSessionIdle, MarkSessionProcessing } from '../../../hooks/useSessionProtection';
import type { PendingPermissionRequest } from '../types/types';
import type { ProjectSession, LLMProvider } from '../../../types/app';
import type { SessionStore, NormalizedMessage } from '../../../stores/useSessionStore';
const isActionablePermissionRequest = (request: { toolName?: unknown } | null | undefined): boolean => {
return request?.toolName !== 'ExitPlanMode' && request?.toolName !== 'exit_plan_mode';
};
const hasActionablePermissionRequests = (requests: Array<{ toolName?: unknown }> | null | undefined): boolean => {
return Array.isArray(requests) && requests.some((request) => isActionablePermissionRequest(request));
};
interface UseChatRealtimeHandlersArgs {
subscribe: (listener: (event: ServerEvent) => void) => () => void;
provider: LLMProvider;
selectedSession: ProjectSession | null;
currentSessionId: string | null;
setTokenBudget: (budget: Record<string, unknown> | null) => void;
pendingPermissionRequests: PendingPermissionRequest[];
setPendingPermissionRequests: Dispatch<SetStateAction<PendingPermissionRequest[]>>;
streamTimerRef: MutableRefObject<number | null>;
accumulatedStreamRef: MutableRefObject<string>;
@@ -61,7 +52,6 @@ export function useChatRealtimeHandlers({
selectedSession,
currentSessionId,
setTokenBudget,
pendingPermissionRequests,
setPendingPermissionRequests,
streamTimerRef,
accumulatedStreamRef,
@@ -72,29 +62,13 @@ export function useChatRealtimeHandlers({
onWebSocketReconnect,
sessionStore,
}: UseChatRealtimeHandlersArgs) {
// Session switches can send `chat.subscribe` before this effect has a chance
// to rebind the websocket listener. Read the visible session id from a ref
// so a fast `chat_subscribed` ack is matched against the current view, not
// the previous render's closed-over selection.
const activeViewSessionIdRef = useRef<string | null>(selectedSession?.id || currentSessionId || null);
activeViewSessionIdRef.current = selectedSession?.id || currentSessionId || null;
// Keep the latest pending-permission snapshot available to the websocket
// listener so back-to-back permission events can dedupe and re-arm the
// notification sound before React finishes a rerender.
const pendingPermissionRequestsRef = useRef(pendingPermissionRequests);
useEffect(() => {
pendingPermissionRequestsRef.current = pendingPermissionRequests;
}, [pendingPermissionRequests]);
useEffect(() => {
const handleEvent = (msg: ServerEvent) => {
if (!msg.kind) {
return;
}
const activeViewSessionId = activeViewSessionIdRef.current;
const activeViewSessionId = selectedSession?.id || currentSessionId || null;
const sid = (typeof msg.sessionId === 'string' && msg.sessionId) || activeViewSessionId;
// Record replay progress for every sequenced live event.
@@ -127,16 +101,7 @@ export function useChatRealtimeHandlers({
const isViewedSession = sid === activeViewSessionId;
if (isViewedSession && Array.isArray(msg.pendingPermissions)) {
const nextPendingPermissionRequests = msg.pendingPermissions as PendingPermissionRequest[];
const hadActionablePermissionRequests = hasActionablePermissionRequests(pendingPermissionRequestsRef.current);
const hasPendingActionablePermissionRequests = hasActionablePermissionRequests(nextPendingPermissionRequests);
pendingPermissionRequestsRef.current = nextPendingPermissionRequests;
setPendingPermissionRequests(nextPendingPermissionRequests);
if (hasPendingActionablePermissionRequests && !hadActionablePermissionRequests) {
void playNotificationSound();
}
setPendingPermissionRequests(msg.pendingPermissions as PendingPermissionRequest[]);
}
return;
}
@@ -238,7 +203,6 @@ export function useChatRealtimeHandlers({
// hides it immediately and atomically.
onSessionIdle?.(sid);
if (sid === activeViewSessionId) {
pendingPermissionRequestsRef.current = [];
setPendingPermissionRequests([]);
}
@@ -270,14 +234,10 @@ export function useChatRealtimeHandlers({
case 'permission_request': {
if (!msg.requestId) break;
if (isActionablePermissionRequest({ toolName: msg.toolName })) {
void playNotificationSound();
}
if (sid === activeViewSessionId) {
const previousPendingPermissionRequests = pendingPermissionRequestsRef.current;
if (!previousPendingPermissionRequests.some((request) => request.requestId === msg.requestId)) {
const nextPendingPermissionRequests = [...previousPendingPermissionRequests, {
setPendingPermissionRequests((prev) => {
if (prev.some((r: PendingPermissionRequest) => r.requestId === msg.requestId)) return prev;
return [...prev, {
requestId: msg.requestId as string,
toolName: (msg.toolName as string) || 'UnknownTool',
input: msg.input,
@@ -285,10 +245,7 @@ export function useChatRealtimeHandlers({
sessionId: sid || null,
receivedAt: new Date(),
}];
pendingPermissionRequestsRef.current = nextPendingPermissionRequests;
setPendingPermissionRequests(nextPendingPermissionRequests);
}
});
}
if (sid) {
onSessionProcessing?.(sid);
@@ -298,12 +255,7 @@ export function useChatRealtimeHandlers({
case 'permission_cancelled': {
if (msg.requestId && sid === activeViewSessionId) {
const nextPendingPermissionRequests = pendingPermissionRequestsRef.current.filter(
(request: PendingPermissionRequest) => request.requestId !== msg.requestId,
);
pendingPermissionRequestsRef.current = nextPendingPermissionRequests;
setPendingPermissionRequests(nextPendingPermissionRequests);
setPendingPermissionRequests((prev) => prev.filter((r: PendingPermissionRequest) => r.requestId !== msg.requestId));
}
break;
}
@@ -334,7 +286,6 @@ export function useChatRealtimeHandlers({
selectedSession,
currentSessionId,
setTokenBudget,
pendingPermissionRequests,
setPendingPermissionRequests,
streamTimerRef,
accumulatedStreamRef,

View File

@@ -1,33 +0,0 @@
import { useCallback, useEffect, useState } from 'react';
import { voicePlayer, voiceId, type VoiceSnapshot } from '../../../lib/voicePlayer';
export type TtsState = VoiceSnapshot['state'];
/**
* Thin adapter over the app-level voicePlayer. Playback lives outside React (see
* lib/voicePlayer), so switching chats or re-rendering a message no longer cuts the
* audio off. This hook just reflects the player's state for one message and forwards taps.
*/
export function useTts(getText: () => string) {
const content = getText();
const id = voiceId(content);
const [snap, setSnap] = useState<VoiceSnapshot>(() => voicePlayer.getSnapshot(id));
useEffect(() => {
const update = () =>
setSnap((prev) => {
const next = voicePlayer.getSnapshot(id);
return prev.state === next.state && prev.error === next.error ? prev : next;
});
update();
return voicePlayer.subscribe(update);
}, [id]);
const toggle = useCallback(() => {
voicePlayer.unlock(); // synchronous, within the click gesture (iOS)
voicePlayer.toggle(content);
}, [content]);
return { state: snap.state, toggle, error: snap.error };
}

View File

@@ -1,85 +0,0 @@
import { useEffect, useState } from 'react';
import { authenticatedFetch } from '../../../utils/api';
import { readVoiceConfig, VOICE_CONFIG_SYNC_EVENT } from '../../../hooks/useVoiceConfig';
// Voice UI is gated on the `voiceEnabled` UI preference (toggled in Quick Settings /
// the Settings modal) and a configured voice backend.
const STORAGE_KEY = 'uiPreferences';
const SYNC_EVENT = 'ui-preferences:sync';
let healthRequest: Promise<boolean> | null = null;
function checkVoiceHealth(): Promise<boolean> {
if (healthRequest) return healthRequest;
const request = authenticatedFetch('/api/voice/health')
.then(async (response) => {
if (!response.ok) throw new Error(`Voice health check failed (${response.status})`);
const data = await response.json();
return data?.configured === true;
})
.finally(() => {
healthRequest = null;
});
healthRequest = request;
return request;
}
function readVoiceEnabled(): boolean {
try {
const raw = localStorage.getItem(STORAGE_KEY);
if (!raw) return false;
const parsed = JSON.parse(raw);
return parsed?.voiceEnabled === true || parsed?.voiceEnabled === 'true';
} catch {
return false;
}
}
export function useVoiceAvailable(): boolean {
const [enabled, setEnabled] = useState<boolean>(() =>
typeof window === 'undefined' ? false : readVoiceEnabled(),
);
const [available, setAvailable] = useState(false);
useEffect(() => {
const update = () => setEnabled(readVoiceEnabled());
window.addEventListener('storage', update);
window.addEventListener(SYNC_EVENT, update as EventListener);
return () => {
window.removeEventListener('storage', update);
window.removeEventListener(SYNC_EVENT, update as EventListener);
};
}, []);
useEffect(() => {
let active = true;
let requestId = 0;
const check = async () => {
if (!enabled) {
setAvailable(false);
return;
}
if (readVoiceConfig().baseUrl.trim()) {
setAvailable(true);
return;
}
const id = ++requestId;
try {
const result = await checkVoiceHealth();
if (active && id === requestId) setAvailable(result);
} catch {
if (active && id === requestId) setAvailable(false);
}
};
void check();
window.addEventListener(VOICE_CONFIG_SYNC_EVENT, check);
return () => {
active = false;
window.removeEventListener(VOICE_CONFIG_SYNC_EVENT, check);
};
}, [enabled]);
return enabled && available;
}

View File

@@ -1,149 +0,0 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import { transcribeVoice } from '../../../lib/voiceApi';
// Mobile-safe recording: iOS Safari 18.4+ supports webm/opus; older iOS needs mp4.
const MIME_CANDIDATES = [
'audio/webm;codecs=opus',
'audio/webm',
'audio/mp4',
'audio/ogg;codecs=opus',
'audio/ogg',
];
function pickMime(): string {
for (const t of MIME_CANDIDATES) {
try {
if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported(t)) return t;
} catch {
/* isTypeSupported can throw on some iOS versions */
}
}
return '';
}
export type VoiceInputState = 'idle' | 'recording' | 'transcribing';
/**
* Push-to-talk dictation. Records the mic, uploads to /api/voice/transcribe
* (an OpenAI-compatible speech-to-text backend via the Express proxy), and
* returns the transcript through onTranscript.
*/
export function useVoiceInput(
onTranscript: (text: string, send?: boolean) => void,
onError?: (msg: string) => void,
) {
const [state, setState] = useState<VoiceInputState>('idle');
const recorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const streamRef = useRef<MediaStream | null>(null);
const cancelledRef = useRef(false);
const startingRef = useRef(false);
// Whether the in-progress stop should auto-send the transcript (vs just fill the box).
const sendRef = useRef(false);
const stopTracks = () => {
streamRef.current?.getTracks().forEach((t) => t.stop());
streamRef.current = null;
};
// Stop the mic if the component unmounts mid-recording.
useEffect(() => {
cancelledRef.current = false;
return () => {
cancelledRef.current = true;
startingRef.current = false;
streamRef.current?.getTracks().forEach((t) => t.stop());
streamRef.current = null;
recorderRef.current = null;
};
}, []);
const start = useCallback(async () => {
if (startingRef.current || (recorderRef.current && recorderRef.current.state !== 'inactive')) return;
startingRef.current = true;
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: { echoCancellation: true, noiseSuppression: true },
});
if (cancelledRef.current) {
stream.getTracks().forEach((t) => t.stop());
return;
}
streamRef.current = stream;
const mimeType = pickMime();
const rec = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
recorderRef.current = rec;
chunksRef.current = [];
rec.ondataavailable = (e) => {
if (e.data.size > 0) chunksRef.current.push(e.data);
};
rec.onstop = async () => {
stopTracks();
if (cancelledRef.current) return;
// Capture and clear the send intent for this stop before any async work.
const shouldSend = sendRef.current;
sendRef.current = false;
const type = rec.mimeType || 'audio/webm';
const blob = new Blob(chunksRef.current, { type });
if (blob.size < 800) {
setState('idle');
onError?.('Recording too short');
return;
}
setState('transcribing');
try {
const ext = type.includes('mp4') ? 'm4a' : type.includes('ogg') ? 'ogg' : 'webm';
const res = await transcribeVoice(blob, `recording.${ext}`);
if (!res.ok) throw new Error(`transcribe ${res.status}`);
const data = await res.json();
if (cancelledRef.current) return;
const text = String(data?.text || '').trim();
if (text) onTranscript(text, shouldSend);
else onError?.('No speech detected');
} catch (e) {
if (!cancelledRef.current) {
onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
}
} finally {
if (!cancelledRef.current) setState('idle');
}
};
rec.start();
setState('recording');
} catch (e) {
recorderRef.current = null;
stopTracks();
if (cancelledRef.current) return;
const err = e as { name?: string; message?: string };
let msg = `Mic error: ${err?.message || e}`;
if (err?.name === 'NotAllowedError') msg = 'Microphone access denied.';
else if (err?.name === 'NotFoundError') msg = 'No microphone found.';
onError?.(msg);
setState('idle');
} finally {
startingRef.current = false;
}
}, [onTranscript, onError]);
// Stop recording. Pass { send: true } to auto-send the transcript once it's ready.
// Guard on the recorder's own state (not React state) so a double tap, or the mic
// and Send buttons both firing, can't call stop() on an already-inactive recorder.
const stop = useCallback((opts?: { send?: boolean }) => {
const rec = recorderRef.current;
if (rec && rec.state !== 'inactive') {
sendRef.current = opts?.send ?? false;
rec.stop();
}
}, []);
const toggle = useCallback(() => {
if (state === 'recording') stop();
else if (state === 'idle') start();
}, [state, start, stop]);
return { state, toggle, stop };
}

View File

@@ -173,7 +173,6 @@ function ChatInterface({
isDragActive,
openImagePicker,
handleSubmit,
handleVoiceTranscript,
handleInputChange,
handleKeyDown,
handlePaste,
@@ -240,7 +239,6 @@ function ChatInterface({
selectedSession,
currentSessionId,
setTokenBudget,
pendingPermissionRequests,
setPendingPermissionRequests,
streamTimerRef,
accumulatedStreamRef,
@@ -407,7 +405,6 @@ function ChatInterface({
renderInputWithMentions={renderInputWithMentions}
textareaRef={textareaRef}
input={input}
onVoiceTranscript={handleVoiceTranscript}
onInputChange={handleInputChange}
onTextareaClick={handleTextareaClick}
onTextareaKeyDown={handleKeyDown}

View File

@@ -1,5 +1,4 @@
import { useTranslation } from 'react-i18next';
import { useCallback, useEffect, useRef, useState } from 'react';
import type {
ChangeEvent,
ClipboardEvent,
@@ -10,10 +9,8 @@ import type {
RefObject,
TouchEvent,
} from 'react';
import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react';
import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react';
import { useVoiceInput } from '../../hooks/useVoiceInput';
import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
import type { SessionActivity } from '../../../../hooks/useSessionProtection';
import type { PendingPermissionRequest, PermissionMode } from '../../types/types';
import {
@@ -30,7 +27,6 @@ import {
import CommandMenu from './CommandMenu';
import ActivityIndicator from './ActivityIndicator';
import ImageAttachment from './ImageAttachment';
import VoiceInputButton from './VoiceInputButton';
import PermissionRequestsBanner from './PermissionRequestsBanner';
import TokenUsageSummary from './TokenUsageSummary';
@@ -93,7 +89,6 @@ interface ChatComposerProps {
renderInputWithMentions: (text: string) => ReactNode;
textareaRef: RefObject<HTMLTextAreaElement>;
input: string;
onVoiceTranscript?: (text: string, send?: boolean) => void;
onInputChange: (event: ChangeEvent<HTMLTextAreaElement>) => void;
onTextareaClick: (event: MouseEvent<HTMLTextAreaElement>) => void;
onTextareaKeyDown: (event: KeyboardEvent<HTMLTextAreaElement>) => void;
@@ -147,7 +142,6 @@ export default function ChatComposer({
renderInputWithMentions,
textareaRef,
input,
onVoiceTranscript,
onInputChange,
onTextareaClick,
onTextareaKeyDown,
@@ -160,28 +154,6 @@ export default function ChatComposer({
sendByCtrlEnter,
}: ChatComposerProps) {
const { t } = useTranslation('chat');
// Voice state is hosted here (not in the mic button) so the main Send button can stop
// recording and send the transcript in one tap, the way the mic button drops it in the box.
const voiceAvailable = useVoiceAvailable();
const [voiceError, setVoiceError] = useState<string | null>(null);
const voiceErrorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
const handleVoiceError = useCallback((msg: string) => {
setVoiceError(msg);
if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000);
}, []);
useEffect(() => () => {
if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
}, []);
const noopTranscript = useCallback(() => {}, []);
const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput(
onVoiceTranscript ?? noopTranscript,
handleVoiceError,
);
const isRecording = voiceState === 'recording';
const isTranscribing = voiceState === 'transcribing';
const textareaRect = textareaRef.current?.getBoundingClientRect();
const commandMenuPosition = {
top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -337,10 +309,6 @@ export default function ChatComposer({
<ImageIcon />
</PromptInputButton>
{onVoiceTranscript && voiceAvailable && (
<VoiceInputButton state={voiceState} onToggle={voiceToggle} errorMsg={voiceError} />
)}
<button
type="button"
onClick={onModeSwitch}
@@ -419,21 +387,10 @@ export default function ChatComposer({
{sendByCtrlEnter ? t('input.hintText.ctrlEnter') : t('input.hintText.enter')}
</div>
<PromptInputSubmit
onClick={
isLoading
? onAbortSession
: isRecording
? (e: MouseEvent<HTMLButtonElement>) => {
e.preventDefault();
voiceStop({ send: true });
}
: undefined
}
disabled={isLoading ? false : isRecording ? false : isTranscribing ? true : !input.trim()}
onClick={isLoading ? onAbortSession : undefined}
disabled={!isLoading && !input.trim()}
className="h-10 w-10 sm:h-10 sm:w-10"
>
{isTranscribing ? <Loader2 className="h-4 w-4 animate-spin" /> : undefined}
</PromptInputSubmit>
/>
</div>
</PromptInputFooter>
</PromptInput>

View File

@@ -15,7 +15,6 @@ import { Reasoning, ReasoningTrigger, ReasoningContent } from '../../../../share
import { Markdown } from './Markdown';
import MessageCopyControl from './MessageCopyControl';
import MessageSpeakControl from './MessageSpeakControl';
type DiffLine = {
type: string;
@@ -416,9 +415,6 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
{shouldShowAssistantCopyControl && (
<MessageCopyControl content={assistantCopyContent} messageType="assistant" />
)}
{shouldShowAssistantCopyControl && (
<MessageSpeakControl content={assistantCopyContent} />
)}
{!isGrouped && <span>{formattedTime}</span>}
</div>
)}

View File

@@ -1,44 +0,0 @@
import { Volume2, Loader2, Square } from 'lucide-react';
import { useTranslation } from 'react-i18next';
import { useTts } from '../../hooks/useTts';
import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
// Tap-to-speak button beside the copy control on assistant messages.
// Renders nothing unless the optional voice feature is enabled.
const MessageSpeakControl = ({ content }: { content: string }) => {
const { t } = useTranslation('chat');
const available = useVoiceAvailable();
const { state, toggle, error } = useTts(() => content);
if (!available) return null;
const title =
state === 'playing' ? t('voice.stopSpeaking') : state === 'loading' ? t('voice.loading') : t('voice.speak');
return (
<span className="relative inline-flex">
{error && (
<span className="absolute bottom-full left-1/2 z-10 mb-1 max-w-[240px] -translate-x-1/2 whitespace-normal rounded bg-red-600 px-2 py-1 text-center text-xs text-white shadow-lg">
{error}
</span>
)}
<button
type="button"
onClick={toggle}
title={title}
aria-label={title}
className="inline-flex items-center gap-1 rounded px-1 py-0.5 text-gray-400 transition-colors hover:text-gray-600 dark:text-gray-500 dark:hover:text-gray-300"
>
{state === 'playing' ? (
<Square className="h-3.5 w-3.5" />
) : state === 'loading' ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<Volume2 className="h-3.5 w-3.5" />
)}
</button>
</span>
);
};
export default MessageSpeakControl;

View File

@@ -1,46 +0,0 @@
import { useTranslation } from 'react-i18next';
import { Mic, Square, Loader2 } from 'lucide-react';
import { PromptInputButton } from '../../../../shared/view/ui';
import type { VoiceInputState } from '../../hooks/useVoiceInput';
type Props = {
state: VoiceInputState;
onToggle: () => void;
errorMsg?: string | null;
};
// Push-to-talk mic button (presentational). Recording state and the stop-and-send action
// are owned by the composer so the main Send button can drive them too. This button just
// starts recording and, while recording, stops and drops the transcript into the input box.
export default function VoiceInputButton({ state, onToggle, errorMsg }: Props) {
const { t } = useTranslation('chat');
const icon =
state === 'recording' ? (
<Square className="text-red-500" />
) : state === 'transcribing' ? (
<Loader2 className="animate-spin" />
) : (
<Mic />
);
return (
<span className="relative inline-flex">
{errorMsg && (
<span className="absolute bottom-full left-1/2 mb-1 -translate-x-1/2 whitespace-nowrap rounded bg-red-600 px-2 py-1 text-xs text-white shadow-lg">
{errorMsg}
</span>
)}
<PromptInputButton
tooltip={{ content: state === 'recording' ? t('voice.stopRecording') : t('voice.input') }}
onClick={(e: { preventDefault: () => void }) => {
e.preventDefault();
onToggle();
}}
>
{icon}
</PromptInputButton>
</span>
);
}

View File

@@ -200,11 +200,7 @@ function MainContent({
{shouldShowBrowserTab && activeTab === 'browser' && (
<div className="h-full overflow-hidden">
<BrowserUsePanel
isVisible={activeTab === 'browser'}
projectId={selectedProject.projectId}
onShowSettings={onShowSettings}
/>
<BrowserUsePanel isVisible={activeTab === 'browser'} onShowSettings={onShowSettings} />
</div>
)}

View File

@@ -73,15 +73,7 @@ export default function MainContentTitle({
<h2 className="scrollbar-hide overflow-x-auto whitespace-nowrap text-sm font-semibold leading-tight text-foreground">
{getSessionTitle(selectedSession)}
</h2>
<div className="flex min-w-0 items-center gap-2 text-[11px] leading-tight text-muted-foreground">
<span className="min-w-0 truncate">{selectedProject.displayName}</span>
<span
className="hidden min-w-0 max-w-[45%] flex-shrink truncate border-l border-border/60 pl-2 font-mono text-[10px] sm:block"
title={selectedSession.id}
>
{selectedSession.id}
</span>
</div>
<div className="truncate text-[11px] leading-tight text-muted-foreground">{selectedProject.displayName}</div>
</div>
) : showChatNewSession ? (
<div className="min-w-0">

View File

@@ -4,7 +4,6 @@ import {
Eye,
Languages,
Maximize2,
Mic,
} from 'lucide-react';
import type { PreferenceToggleItem } from './types';
@@ -55,9 +54,4 @@ export const INPUT_SETTING_TOGGLES: PreferenceToggleItem[] = [
labelKey: 'quickSettings.sendByCtrlEnter',
icon: Languages,
},
{
key: 'voiceEnabled',
labelKey: 'quickSettings.voiceEnabled',
icon: Mic,
},
];

View File

@@ -6,8 +6,7 @@ export type PreferenceToggleKey =
| 'showRawParameters'
| 'showThinking'
| 'autoScrollToBottom'
| 'sendByCtrlEnter'
| 'voiceEnabled';
| 'sendByCtrlEnter';
export type QuickSettingsPreferences = Record<PreferenceToggleKey, boolean>;

View File

@@ -28,9 +28,6 @@ export default function QuickSettingsContent({
onPreferenceChange,
}: QuickSettingsContentProps) {
const { t } = useTranslation('settings');
const inputSettingToggles = preferences.voiceEnabled
? INPUT_SETTING_TOGGLES
: INPUT_SETTING_TOGGLES.filter(({ key }) => key !== 'voiceEnabled');
const renderToggleRows = (items: PreferenceToggleItem[]) => (
items.map(({ key, labelKey, icon }) => (
@@ -70,7 +67,7 @@ export default function QuickSettingsContent({
</QuickSettingsSection>
<QuickSettingsSection title={t('quickSettings.sections.inputSettings')}>
{renderToggleRows(inputSettingToggles)}
{renderToggleRows(INPUT_SETTING_TOGGLES)}
<p className="ml-3 text-xs text-gray-500 dark:text-gray-400">
{t('quickSettings.sendByCtrlEnterDescription')}
</p>

View File

@@ -27,14 +27,12 @@ export default function QuickSettingsPanelView() {
showThinking: preferences.showThinking,
autoScrollToBottom: preferences.autoScrollToBottom,
sendByCtrlEnter: preferences.sendByCtrlEnter,
voiceEnabled: preferences.voiceEnabled,
}), [
preferences.autoExpandTools,
preferences.autoScrollToBottom,
preferences.sendByCtrlEnter,
preferences.showRawParameters,
preferences.showThinking,
preferences.voiceEnabled,
]);
const handlePreferenceChange = useCallback(

View File

@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
import type { LLMProvider } from '../../../types/app';
import type { ProviderAuthStatus } from '../../provider-auth/types';
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
export type AgentProvider = LLMProvider;
export type AgentCategory = 'account' | 'permissions' | 'mcp' | 'skills';
export type ProjectSortOrder = 'name' | 'date';

View File

@@ -7,7 +7,6 @@ import SettingsSidebar from '../view/SettingsSidebar';
import AgentsSettingsTab from '../view/tabs/agents-settings/AgentsSettingsTab';
import AppearanceSettingsTab from '../view/tabs/AppearanceSettingsTab';
import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSettingsTab';
import VoiceSettingsTab from '../view/tabs/VoiceSettingsTab';
import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
@@ -158,8 +157,6 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
{activeTab === 'api' && <CredentialsSettingsTab />}
{activeTab === 'voice' && <VoiceSettingsTab />}
{activeTab === 'plugins' && <PluginSettingsTab />}
{activeTab === 'about' && <AboutTab />}

View File

@@ -1,6 +1,5 @@
import { Bell, Bot, GitBranch, Info, Key, ListChecks, Mic, MonitorPlay, Palette, Puzzle } from 'lucide-react';
import { Bell, Bot, GitBranch, Info, Key, ListChecks, MonitorPlay, Palette, Puzzle } from 'lucide-react';
import { useTranslation } from 'react-i18next';
import { cn } from '../../../lib/utils';
import { PillBar, Pill } from '../../../shared/view/ui';
import type { SettingsMainTab } from '../types/types';
@@ -21,7 +20,6 @@ const NAV_ITEMS: NavItem[] = [
{ id: 'appearance', labelKey: 'mainTabs.appearance', icon: Palette },
{ id: 'git', labelKey: 'mainTabs.git', icon: GitBranch },
{ id: 'api', labelKey: 'mainTabs.apiTokens', icon: Key },
{ id: 'voice', labelKey: 'mainTabs.voice', icon: Mic },
{ id: 'tasks', labelKey: 'mainTabs.tasks', icon: ListChecks },
{ id: 'browser', labelKey: 'mainTabs.browser', icon: MonitorPlay },
{ id: 'plugins', labelKey: 'mainTabs.plugins', icon: Puzzle },

View File

@@ -1,91 +0,0 @@
import type { InputHTMLAttributes } from 'react';
import { useTranslation } from 'react-i18next';
import SettingsSection from '../SettingsSection';
import SettingsToggle from '../SettingsToggle';
import { useUiPreferences } from '../../../../hooks/useUiPreferences';
import { useVoiceConfig } from '../../../../hooks/useVoiceConfig';
const inputClass =
'w-full rounded-md border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring';
function Field({ label, ...props }: { label: string } & InputHTMLAttributes<HTMLInputElement>) {
return (
<label className="block space-y-1">
<span className="text-sm font-medium text-foreground">{label}</span>
<input className={inputClass} {...props} />
</label>
);
}
export default function VoiceSettingsTab() {
const { t } = useTranslation('settings');
const { preferences, setPreference } = useUiPreferences();
const { config, update } = useVoiceConfig();
const voiceEnabled = preferences.voiceEnabled;
return (
<div className="space-y-8">
<SettingsSection title={t('voiceSettings.title')} description={t('voiceSettings.description')}>
<div className="flex items-center justify-between rounded-lg border border-border p-3">
<div className="pr-3">
<div className="text-sm font-medium text-foreground">{t('voiceSettings.enable')}</div>
<div className="text-xs text-muted-foreground">{t('voiceSettings.enableDescription')}</div>
</div>
<SettingsToggle
checked={voiceEnabled}
onChange={(v) => setPreference('voiceEnabled', v)}
ariaLabel={t('voiceSettings.enable')}
/>
</div>
</SettingsSection>
{voiceEnabled && (
<SettingsSection title={t('voiceSettings.backendTitle')} description={t('voiceSettings.backendDescription')}>
<div className="space-y-4">
<Field
label={t('voiceSettings.baseUrl')}
placeholder="https://api.openai.com/v1"
value={config.baseUrl}
onChange={(e) => update({ baseUrl: e.target.value })}
/>
<Field
label={t('voiceSettings.apiKey')}
type="password"
autoComplete="off"
placeholder="sk-…"
value={config.apiKey}
onChange={(e) => update({ apiKey: e.target.value })}
/>
<div className="grid grid-cols-1 gap-4 sm:grid-cols-4">
<Field
label={t('voiceSettings.sttModel')}
placeholder="whisper-1"
value={config.sttModel}
onChange={(e) => update({ sttModel: e.target.value })}
/>
<Field
label={t('voiceSettings.ttsModel')}
placeholder="tts-1"
value={config.ttsModel}
onChange={(e) => update({ ttsModel: e.target.value })}
/>
<Field
label={t('voiceSettings.voice')}
placeholder="alloy"
value={config.ttsVoice}
onChange={(e) => update({ ttsVoice: e.target.value })}
/>
<Field
label={t('voiceSettings.format')}
placeholder="mp3"
value={config.ttsFormat}
onChange={(e) => update({ ttsFormat: e.target.value })}
/>
</div>
<p className="text-xs text-muted-foreground">{t('voiceSettings.note')}</p>
</div>
</SettingsSection>
)}
</div>
);
}

View File

@@ -1,32 +1,22 @@
import { useCallback, useEffect, useState } from 'react';
import { Download, ExternalLink, Eye, Loader2, Zap } from 'lucide-react';
import { Download, Loader2 } from 'lucide-react';
import { Button, Input } from '../../../../../shared/view/ui';
import { Button } from '../../../../../shared/view/ui';
import { authenticatedFetch } from '../../../../../utils/api';
import SettingsCard from '../../SettingsCard';
import SettingsRow from '../../SettingsRow';
import SettingsSection from '../../SettingsSection';
import SettingsToggle from '../../SettingsToggle';
const BROWSER_USE_GUIDE_URL = 'https://cloudcli.ai/docs/browser-use';
type BrowserUseSettings = {
enabled: boolean;
persistSessions: boolean;
defaultProfileName: string;
browserBackend: 'playwright' | 'camoufox-vnc';
};
type BrowserUseStatus = {
enabled: boolean;
available: boolean;
backend: 'playwright' | 'camoufox-vnc';
browserBackend: 'playwright' | 'camoufox-vnc';
playwrightInstalled: boolean;
chromiumInstalled: boolean;
camoufoxInstalled: boolean;
noVncInstalled: boolean;
x11vncInstalled: boolean;
installInProgress: boolean;
message: string;
};
@@ -42,20 +32,16 @@ async function readJson<T>(response: Response): Promise<T> {
export default function BrowserUseSettingsTab() {
const [settings, setSettings] = useState<BrowserUseSettings | null>(null);
const [status, setStatus] = useState<BrowserUseStatus | null>(null);
const [hasLoadedSettings, setHasLoadedSettings] = useState(false);
const [isSettingsLoading, setIsSettingsLoading] = useState(true);
const [isStatusLoading, setIsStatusLoading] = useState(true);
const [isSaving, setIsSaving] = useState(false);
const [isInstalling, setIsInstalling] = useState(false);
const [error, setError] = useState<string | null>(null);
const [profileNameDraft, setProfileNameDraft] = useState('default');
const loadSettings = useCallback(async () => {
const settingsResponse = await authenticatedFetch('/api/browser-use/settings');
const settingsData = await readJson<{ data: { settings: BrowserUseSettings } }>(settingsResponse);
setSettings(settingsData.data.settings);
setHasLoadedSettings(true);
setProfileNameDraft(settingsData.data.settings.defaultProfileName || 'default');
}, []);
const loadStatus = useCallback(async () => {
@@ -66,7 +52,6 @@ export default function BrowserUseSettingsTab() {
useEffect(() => {
setError(null);
setHasLoadedSettings(false);
setIsSettingsLoading(true);
setIsStatusLoading(true);
@@ -89,7 +74,6 @@ export default function BrowserUseSettingsTab() {
});
const data = await readJson<{ data: { settings: BrowserUseSettings } }>(response);
setSettings(data.data.settings);
setHasLoadedSettings(true);
window.dispatchEvent(new Event('browserUseSettingsChanged'));
setIsStatusLoading(true);
await loadStatus();
@@ -117,21 +101,8 @@ export default function BrowserUseSettingsTab() {
}
};
const saveProfileName = async () => {
const nextName = profileNameDraft.trim() || 'default';
setProfileNameDraft(nextName);
if (nextName === settings?.defaultProfileName) {
return;
}
await updateSettings({ defaultProfileName: nextName });
};
const browserEnabled = settings?.enabled === true;
const browserDisabled = hasLoadedSettings && settings?.enabled === false;
const persistSessions = settings?.persistSessions === true;
const selectedBackend = settings?.browserBackend || 'playwright';
const effectiveBackend = status?.backend || 'playwright';
const needsBrowserBinaries = Boolean(browserEnabled && status && !status.available);
const needsBrowserBinaries = Boolean(browserEnabled && status && (!status.playwrightInstalled || !status.chromiumInstalled));
const runtimeLabel = (installed?: boolean) => {
if (isStatusLoading && !status) {
return 'checking...';
@@ -143,165 +114,33 @@ export default function BrowserUseSettingsTab() {
<div className="space-y-8">
<SettingsSection
title="Browser"
description="Give coding agents a working browser so they can open websites, test flows, capture screenshots, and help debug what users actually see."
description="Allow agents to create guarded Playwright browser sessions that you can monitor from the Browser tab."
>
<SettingsCard divided>
<SettingsRow
label="Give Agents Browser Access"
description="Let agents use a browser during coding tasks while you can watch live sessions, open them in a tab, and stop them at any time."
label="Enable Browser"
description="Registers Browser for supported agents. Agents can create browser sessions; you can watch, stop, and delete them."
>
{isSettingsLoading && !hasLoadedSettings ? (
{isSettingsLoading && !settings ? (
<Loader2 className="h-4 w-4 animate-spin text-muted-foreground" />
) : hasLoadedSettings ? (
) : (
<SettingsToggle
checked={browserEnabled}
onChange={(value) => void updateSettings({ enabled: value })}
ariaLabel="Give Agents Browser Access"
ariaLabel="Enable Browser"
disabled={isSaving}
/>
) : (
<span className="text-sm text-muted-foreground">Unavailable</span>
)}
</SettingsRow>
{browserDisabled && (
<div className="px-4 py-4">
<a
href={BROWSER_USE_GUIDE_URL}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 text-sm font-medium text-primary hover:underline"
>
Read the Browser guide
<ExternalLink className="h-3.5 w-3.5" />
</a>
</div>
)}
{error && (
<div className="px-4 py-4">
<div className="rounded-md border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
{error}
</div>
</div>
)}
{browserEnabled && (
<>
<div className="space-y-3 px-4 py-4">
<div className="min-w-0">
<div className="text-sm font-medium text-foreground">Browser Engine</div>
<div className="mt-0.5 text-sm text-muted-foreground">
Pick the kind of browser experience agents should use for new sessions.
</div>
</div>
<div className="grid gap-2 sm:grid-cols-2">
{([
{
value: 'playwright' as const,
label: 'Playwright',
description: 'Best for quick checks, screenshots, and automated page interaction when no manual login is needed.',
icon: Zap,
},
{
value: 'camoufox-vnc' as const,
label: 'Camoufox + noVNC',
description: 'Best when a person may need to log in, approve a step, or watch the browser session live.',
icon: Eye,
},
]).map((option) => {
const Icon = option.icon;
const selected = selectedBackend === option.value;
return (
<button
key={option.value}
type="button"
onClick={() => void updateSettings({ browserBackend: option.value })}
disabled={isSaving || isSettingsLoading}
className={[
'group flex min-h-[88px] items-start gap-3 rounded-lg border px-3 py-3 text-left transition-colors',
'focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background',
selected
? 'border-primary bg-primary/5 text-foreground shadow-sm'
: 'border-border bg-background hover:border-foreground/20 hover:bg-muted/40',
(isSaving || isSettingsLoading) ? 'cursor-not-allowed opacity-60' : '',
].join(' ')}
aria-pressed={selected}
>
<span className={[
'mt-0.5 flex h-8 w-8 flex-shrink-0 items-center justify-center rounded-md border',
selected ? 'border-primary/30 bg-primary/10 text-primary' : 'border-border bg-muted/40 text-muted-foreground',
].join(' ')}
>
<Icon className="h-4 w-4" />
</span>
<span className="min-w-0">
<span className="block text-sm font-medium">{option.label}</span>
<span className="mt-1 block text-xs leading-relaxed text-muted-foreground">{option.description}</span>
</span>
</button>
);
})}
</div>
</div>
<SettingsRow
label="Remember Browser Logins"
description="Keep cookies and site storage in a named profile so agents can reuse signed-in sessions instead of starting from scratch."
>
{isSettingsLoading && !settings ? (
<Loader2 className="h-4 w-4 animate-spin text-muted-foreground" />
) : (
<SettingsToggle
checked={persistSessions}
onChange={(value) => void updateSettings({ persistSessions: value })}
ariaLabel="Remember Browser Logins"
disabled={isSaving}
/>
)}
</SettingsRow>
{persistSessions && (
<SettingsRow
label="Default Browser Profile"
description="New browser sessions use this profile by default, so saved logins stay tied to a predictable workspace."
>
<Input
value={profileNameDraft}
onChange={(event) => setProfileNameDraft(event.target.value)}
onBlur={() => void saveProfileName()}
onKeyDown={(event) => {
if (event.key === 'Enter') {
event.currentTarget.blur();
}
}}
disabled={isSaving || isSettingsLoading}
className="w-40"
aria-label="Default Browser Profile"
/>
</SettingsRow>
)}
</>
)}
{browserEnabled && (
<div className="space-y-4 px-4 py-4">
<div className="flex flex-wrap gap-2 text-xs text-muted-foreground">
<span className="rounded-md border border-border px-2 py-1">
Backend: {effectiveBackend === 'camoufox-vnc' ? 'Camoufox + noVNC' : 'Playwright'}
</span>
<span className="rounded-md border border-border px-2 py-1">
Playwright: {runtimeLabel(status?.playwrightInstalled)}
</span>
<span className="rounded-md border border-border px-2 py-1">
Chromium: {runtimeLabel(status?.chromiumInstalled)}
</span>
<span className="rounded-md border border-border px-2 py-1">
Camoufox: {runtimeLabel(status?.camoufoxInstalled)}
</span>
<span className="rounded-md border border-border px-2 py-1">
noVNC: {runtimeLabel(status?.noVncInstalled)}
</span>
<span className="rounded-md border border-border px-2 py-1">
Status: {isStatusLoading && !status ? 'checking...' : status?.available ? 'ready' : browserEnabled ? 'setup required' : 'disabled'}
</span>
@@ -333,17 +172,12 @@ export default function BrowserUseSettingsTab() {
</div>
)}
<a
href={BROWSER_USE_GUIDE_URL}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 text-sm font-medium text-primary hover:underline"
>
Read the Browser guide
<ExternalLink className="h-3.5 w-3.5" />
</a>
{error && (
<div className="rounded-md border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
{error}
</div>
)}
</div>
)}
</SettingsCard>
</SettingsSection>
</div>

View File

@@ -7,7 +7,6 @@ type UiPreferences = {
autoScrollToBottom: boolean;
sendByCtrlEnter: boolean;
sidebarVisible: boolean;
voiceEnabled: boolean;
};
type UiPreferenceKey = keyof UiPreferences;
@@ -40,7 +39,6 @@ const DEFAULTS: UiPreferences = {
autoScrollToBottom: true,
sendByCtrlEnter: false,
sidebarVisible: true,
voiceEnabled: false,
};
const PREFERENCE_KEYS = Object.keys(DEFAULTS) as UiPreferenceKey[];

View File

@@ -1,68 +0,0 @@
import { useState } from 'react';
export type VoiceConfig = {
baseUrl: string;
apiKey: string;
sttModel: string;
ttsModel: string;
ttsVoice: string;
ttsFormat: string;
};
const STORAGE_KEY = 'voiceConfig';
export const VOICE_CONFIG_SYNC_EVENT = 'voice-config:sync';
const DEFAULTS: VoiceConfig = { baseUrl: '', apiKey: '', sttModel: '', ttsModel: '', ttsVoice: '', ttsFormat: '' };
export function readVoiceConfig(): VoiceConfig {
try {
const raw = localStorage.getItem(STORAGE_KEY);
if (!raw) return { ...DEFAULTS };
const parsed = JSON.parse(raw);
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return { ...DEFAULTS };
const config = { ...DEFAULTS };
for (const key of Object.keys(DEFAULTS) as (keyof VoiceConfig)[]) {
if (typeof parsed[key] === 'string') config[key] = parsed[key];
}
return config;
} catch {
return { ...DEFAULTS };
}
}
// Headers the voice proxy reads to target a per-user OpenAI-compatible backend.
// Empty fields are omitted so the server's env defaults apply.
export function voiceConfigHeaders(): Record<string, string> {
if (typeof window === 'undefined') return {};
const c = readVoiceConfig();
const h: Record<string, string> = {};
if (c.apiKey) h['x-voice-api-key'] = c.apiKey;
if (c.sttModel) h['x-voice-stt-model'] = c.sttModel;
if (c.ttsModel) h['x-voice-tts-model'] = c.ttsModel;
if (c.ttsVoice) h['x-voice-tts-voice'] = c.ttsVoice;
if (c.ttsFormat.trim()) h['x-voice-tts-format'] = c.ttsFormat.trim();
return h;
}
export function useVoiceConfig() {
const [config, setConfig] = useState<VoiceConfig>(() =>
typeof window === 'undefined' ? { ...DEFAULTS } : readVoiceConfig(),
);
const update = (patch: Partial<VoiceConfig>) => {
setConfig((prev) => {
const next = { ...prev, ...patch };
try {
const stored: Partial<VoiceConfig> = { ...next };
if (next.ttsFormat.trim()) stored.ttsFormat = next.ttsFormat.trim();
else delete stored.ttsFormat;
localStorage.setItem(STORAGE_KEY, JSON.stringify(stored));
window.dispatchEvent(new Event(VOICE_CONFIG_SYNC_EVENT));
} catch {
/* ignore persistence errors */
}
return next;
});
};
return { config, update };
}

View File

@@ -122,14 +122,6 @@
}
}
},
"voice": {
"input": "Voice input",
"stopRecording": "Stop recording",
"transcribing": "Transcribing…",
"speak": "Read aloud",
"stopSpeaking": "Stop",
"loading": "Loading…"
},
"input": {
"placeholder": "Type / for commands, @ for files, or ask {{provider}} anything...",
"placeholderDefault": "Type your message...",

View File

@@ -50,21 +50,6 @@
"resetToDefaults": "Reset to Defaults",
"cancelChanges": "Cancel Changes"
},
"voiceSettings": {
"title": "Voice",
"description": "Speech-to-text input and read-aloud, via an OpenAI-compatible audio backend.",
"enable": "Enable voice",
"enableDescription": "Show the mic button and the read-aloud button on messages.",
"backendTitle": "Backend",
"backendDescription": "Point at OpenAI, Groq, or a local server (LocalAI, Speaches, Kokoro-FastAPI). Leave blank to use the server default.",
"baseUrl": "Base URL",
"apiKey": "API key",
"sttModel": "Speech-to-text model",
"ttsModel": "Text-to-speech model",
"voice": "Voice",
"format": "Audio format",
"note": "A custom base URL is called directly by your browser and must allow browser CORS requests. Leave it blank to use the server-configured backend."
},
"quickSettings": {
"title": "Quick Settings",
"sections": {
@@ -79,7 +64,6 @@
"showThinking": "Show thinking",
"autoScrollToBottom": "Auto-scroll to bottom",
"sendByCtrlEnter": "Send by Ctrl+Enter",
"voiceEnabled": "Voice (mic + read aloud)",
"sendByCtrlEnterDescription": "When enabled, pressing Ctrl+Enter will send the message instead of just Enter. This is useful for IME users to avoid accidental sends.",
"dragHandle": {
"dragging": "Dragging handle",
@@ -110,7 +94,6 @@
"appearance": "Appearance",
"git": "Git",
"apiTokens": "API & Tokens",
"voice": "Voice",
"tasks": "Tasks",
"browser": "Browser",
"notifications": "Notifications",
@@ -131,7 +114,7 @@
},
"sound": {
"title": "Sound",
"description": "Play a short tone when a chat run finishes or needs tool approval.",
"description": "Play a short tone when a chat run finishes.",
"enabled": "Enabled",
"test": "Test sound"
},

View File

@@ -1,60 +0,0 @@
import { authenticatedFetch } from '../utils/api';
import { readVoiceConfig, voiceConfigHeaders } from '../hooks/useVoiceConfig';
function directUrl(baseUrl: string, path: string): string {
return `${baseUrl.replace(/\/$/, '')}${path}`;
}
export function voiceConfigSignature(): string {
return JSON.stringify(readVoiceConfig());
}
export function transcribeVoice(blob: Blob, filename: string): Promise<Response> {
const config = readVoiceConfig();
const body = new FormData();
if (config.baseUrl.trim()) {
body.append('file', blob, filename);
body.append('model', config.sttModel || 'whisper-1');
return fetch(directUrl(config.baseUrl.trim(), '/audio/transcriptions'), {
method: 'POST',
headers: config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {},
body,
});
}
body.append('audio', blob, filename);
return authenticatedFetch('/api/voice/transcribe', {
method: 'POST',
headers: voiceConfigHeaders(),
body,
});
}
export function synthesizeVoice(text: string, signal: AbortSignal): Promise<Response> {
const config = readVoiceConfig();
if (config.baseUrl.trim()) {
return fetch(directUrl(config.baseUrl.trim(), '/audio/speech'), {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
},
body: JSON.stringify({
model: config.ttsModel || 'tts-1',
voice: config.ttsVoice || 'alloy',
input: text,
...(config.ttsFormat.trim() ? { response_format: config.ttsFormat.trim() } : {}),
}),
signal,
});
}
return authenticatedFetch('/api/voice/tts', {
method: 'POST',
body: JSON.stringify({ text }),
headers: voiceConfigHeaders(),
signal,
});
}

View File

@@ -1,196 +0,0 @@
import { synthesizeVoice, voiceConfigSignature } from './voiceApi';
// A single app-level audio player for read-aloud. It owns one <audio> element, lives
// outside the React tree, and caches generated audio by content. Because playback is not
// tied to a component, switching chats or re-rendering a message can't revoke the blob URL
// out from under it (the cause of mid-play cutoffs). v1 plays one message at a time
// (a new play replaces the current one); the design leaves room for a queue later.
export type VoicePlayState = 'idle' | 'loading' | 'playing';
export type VoiceSnapshot = { state: VoicePlayState; error: string | null };
const IDLE: VoiceSnapshot = { state: 'idle', error: null };
const CACHE_MAX = 24;
const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min
// Stable id / cache key from the text and voice settings that affect its audio (djb2).
export function voiceId(content: string, signature = voiceConfigSignature()): string {
const input = JSON.stringify([content, signature]);
let h = 5381;
for (let i = 0; i < input.length; i++) h = (((h << 5) + h) + input.charCodeAt(i)) | 0;
return (h >>> 0).toString(36);
}
class VoicePlayer {
private audio: HTMLAudioElement | null = null;
private unlocked = false;
private cache = new Map<string, string>(); // id -> blob URL (insertion order = LRU)
private currentId: string | null = null;
private state: VoicePlayState = 'idle';
private errorId: string | null = null;
private errorMsg: string | null = null;
private token = 0; // bumps to ignore stale in-flight results
private activeController: AbortController | null = null; // aborts the in-flight TTS fetch
private errorTimer: ReturnType<typeof setTimeout> | null = null;
private listeners = new Set<() => void>();
subscribe(listener: () => void): () => void {
this.listeners.add(listener);
return () => {
this.listeners.delete(listener);
};
}
private emit() {
this.listeners.forEach((l) => l());
}
getSnapshot(id: string): VoiceSnapshot {
const state = this.currentId === id ? this.state : 'idle';
const error = this.errorId === id ? this.errorMsg : null;
if (state === 'idle' && error === null) return IDLE;
return { state, error };
}
private ensureAudio(): HTMLAudioElement {
if (!this.audio) {
const audio = new Audio();
audio.addEventListener('ended', () => this.onEnded());
audio.addEventListener('error', () => {
// Only meaningful while we believe we're playing.
if (this.state === 'playing') this.onEnded();
});
this.audio = audio;
}
return this.audio;
}
// Call synchronously from the click handler so iOS grants the (reused) element playback.
unlock() {
if (this.unlocked) return;
const audio = this.ensureAudio();
try {
const p = audio.play();
if (p && typeof p.catch === 'function') p.catch(() => {});
audio.pause();
} catch {
/* priming attempt; ignore */
}
this.unlocked = true;
}
toggle(content: string) {
const id = voiceId(content);
if (this.currentId === id && (this.state === 'playing' || this.state === 'loading')) {
this.stop();
return;
}
void this.play(id, content);
}
stop() {
this.token++; // ignore any stale in-flight result
this.abortActive(); // and actually cancel the network request
if (this.audio) this.audio.pause();
this.state = 'idle';
this.currentId = null;
this.emit();
}
private abortActive() {
if (this.activeController) {
this.activeController.abort();
this.activeController = null;
}
}
private onEnded() {
this.state = 'idle';
this.currentId = null;
this.emit();
// (queue auto-advance would hook in here)
}
private setError(id: string, msg: string) {
this.state = 'idle';
this.currentId = id;
this.errorId = id;
this.errorMsg = msg;
this.emit();
if (this.errorTimer) clearTimeout(this.errorTimer);
this.errorTimer = setTimeout(() => {
if (this.errorId === id) {
this.errorId = null;
this.errorMsg = null;
if (this.currentId === id) this.currentId = null;
this.emit();
}
}, 6000);
}
private async play(id: string, content: string) {
const audio = this.ensureAudio();
audio.pause();
this.currentId = id;
this.errorId = null;
this.errorMsg = null;
this.state = 'loading';
this.emit();
const myToken = ++this.token;
this.abortActive(); // cancel any request this play supersedes
try {
let url = this.cache.get(id);
if (!url) {
const controller = new AbortController();
this.activeController = controller;
const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS);
const res = await synthesizeVoice(content, controller.signal).finally(() => {
clearTimeout(timer);
if (this.activeController === controller) this.activeController = null;
});
if (myToken !== this.token) return; // superseded by another play/stop
if (!res.ok) {
let msg = `Read-aloud failed (${res.status})`;
try {
const j = await res.json();
if (j?.error) msg = String(j.error);
} catch {
/* non-JSON error body */
}
throw new Error(msg);
}
const blob = await res.blob();
if (myToken !== this.token) return;
url = URL.createObjectURL(blob);
this.cacheSet(id, url);
}
if (myToken !== this.token) return;
audio.src = url;
audio.load();
await audio.play();
if (myToken !== this.token) return;
this.state = 'playing';
this.emit();
} catch (e) {
if (myToken !== this.token) return;
const aborted = e instanceof Error && e.name === 'AbortError';
this.setError(id, aborted ? 'Read-aloud timed out.' : e instanceof Error ? e.message : 'Read-aloud failed');
}
}
private cacheSet(id: string, url: string) {
this.cache.set(id, url);
while (this.cache.size > CACHE_MAX) {
const oldest = this.cache.keys().next().value as string | undefined;
if (oldest === undefined) break;
const oldUrl = this.cache.get(oldest);
this.cache.delete(oldest);
if (oldUrl && oldUrl !== this.audio?.src) URL.revokeObjectURL(oldUrl);
}
}
}
export const voicePlayer = new VoicePlayer();

View File

@@ -58,7 +58,7 @@ const playTone = (
oscillator.stop(startsAt + duration + 0.02);
};
export const playNotificationSound = async ({ force = false } = {}): Promise<void> => {
export const playChatCompletionSound = async ({ force = false } = {}): Promise<void> => {
if (!force && !isNotificationSoundEnabled()) {
return;
}
@@ -81,5 +81,3 @@ export const playNotificationSound = async ({ force = false } = {}): Promise<voi
console.warn('Unable to play notification sound:', error);
}
};
export const playChatCompletionSound = (options = {}): Promise<void> => playNotificationSound(options);