Add browser use as MCP to providers (#889)

This commit is contained in:
Simos Mikelatos
2026-06-17 22:06:17 +02:00
committed by GitHub
parent c03ddb25fe
commit e88539170e
39 changed files with 5480 additions and 117 deletions

384
server/browser-use-mcp.ts Normal file
View File

@@ -0,0 +1,384 @@
#!/usr/bin/env node
import './load-env.js';
type JsonRpcRequest = {
jsonrpc: '2.0';
id?: string | number | null;
method: string;
params?: Record<string, unknown>;
};
type ToolDefinition = {
name: string;
description: string;
inputSchema: Record<string, unknown>;
};
const textResponse = (text: string) => ({
content: [{ type: 'text', text }],
});
const jsonResponse = (value: unknown) => textResponse(JSON.stringify(value, null, 2));
const readString = (value: unknown, name: string): string => {
if (typeof value !== 'string' || value.trim() === '') {
throw new Error(`${name} is required.`);
}
return value.trim();
};
const readOptionalString = (value: unknown): string | undefined =>
typeof value === 'string' && value.trim() ? value.trim() : undefined;
const readNumber = (value: unknown): number | undefined =>
typeof value === 'number' && Number.isFinite(value) ? value : undefined;
const apiUrl = (process.env.CLOUDCLI_BROWSER_USE_API_URL || 'http://127.0.0.1:3001/api/browser-use-mcp').replace(/\/$/, '');
const apiToken = process.env.CLOUDCLI_BROWSER_USE_MCP_TOKEN || '';
const API_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_API_TIMEOUT_MS || '60000', 10);
async function callBrowserUseApi(toolName: string, input: Record<string, unknown>) {
if (!apiToken) {
throw new Error('CLOUDCLI_BROWSER_USE_MCP_TOKEN is not configured.');
}
const response = await fetch(`${apiUrl}/tools/${encodeURIComponent(toolName)}`, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(input),
signal: AbortSignal.timeout(API_TIMEOUT_MS),
});
const data = await response.json() as { success?: boolean; data?: unknown; error?: string };
if (!response.ok || data.success === false) {
throw new Error(data.error || `Browser API request failed (${response.status})`);
}
return data.data;
}
const sessionIdSchema = {
type: 'object',
properties: {
sessionId: { type: 'string', description: 'Browser session id.' },
},
required: ['sessionId'],
};
const tools: ToolDefinition[] = [
{
name: 'browser_create_session',
description: 'Create a temporary Browser session that the agent can control. Optionally provide a background profileName to reuse cookies and storage.',
inputSchema: {
type: 'object',
properties: {
profileName: { type: 'string', description: 'Optional background profile name for persistent browser storage.' },
},
},
},
{
name: 'browser_list_sessions',
description: 'List Browser sessions currently available to agents.',
inputSchema: { type: 'object', properties: {} },
},
{
name: 'browser_snapshot',
description: 'Capture current page metadata, screenshot data URL, and visible body text for a Browser session.',
inputSchema: sessionIdSchema,
},
{
name: 'browser_take_screenshot',
description: 'Capture the latest screenshot for a Browser session.',
inputSchema: sessionIdSchema,
},
{
name: 'browser_navigate',
description: 'Navigate a Browser session to an HTTP or HTTPS URL.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
url: { type: 'string' },
},
required: ['sessionId', 'url'],
},
},
{
name: 'browser_click',
description: 'Click an element by CSS selector, visible text, or x/y coordinates.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
selector: { type: 'string' },
text: { type: 'string' },
x: { type: 'number' },
y: { type: 'number' },
},
required: ['sessionId'],
},
},
{
name: 'browser_type',
description: 'Type text into the focused page or fill a CSS selector. Set submit to press Enter after typing.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
selector: { type: 'string' },
text: { type: 'string' },
submit: { type: 'boolean' },
},
required: ['sessionId', 'text'],
},
},
{
name: 'browser_fill_form',
description: 'Fill multiple form fields using CSS selectors.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
fields: {
type: 'array',
items: {
type: 'object',
properties: {
selector: { type: 'string' },
value: { type: 'string' },
},
required: ['selector', 'value'],
},
},
},
required: ['sessionId', 'fields'],
},
},
{
name: 'browser_press_key',
description: 'Press a keyboard key, for example Enter, Escape, Tab, or Control+A.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
key: { type: 'string' },
},
required: ['sessionId', 'key'],
},
},
{
name: 'browser_select_option',
description: 'Select option values in a select element found by CSS selector.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
selector: { type: 'string' },
values: { type: 'array', items: { type: 'string' } },
},
required: ['sessionId', 'selector', 'values'],
},
},
{
name: 'browser_wait_for',
description: 'Wait for visible text, a URL pattern, or a short timeout.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
text: { type: 'string' },
url: { type: 'string' },
timeoutMs: { type: 'number' },
},
required: ['sessionId'],
},
},
{
name: 'browser_tabs',
description: 'List, open, select, or close tabs in a Browser session.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
action: { type: 'string', enum: ['list', 'new', 'select', 'close'] },
index: { type: 'number' },
url: { type: 'string' },
},
required: ['sessionId'],
},
},
{
name: 'browser_close_session',
description: 'Stop a Browser session controlled by agents.',
inputSchema: sessionIdSchema,
},
];
async function callTool(name: string, args: Record<string, unknown>) {
switch (name) {
case 'browser_create_session':
return jsonResponse(await callBrowserUseApi(name, {
profileName: readOptionalString(args.profileName),
}));
case 'browser_list_sessions':
return jsonResponse(await callBrowserUseApi(name, {}));
case 'browser_snapshot':
return jsonResponse(await callBrowserUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') }));
case 'browser_take_screenshot': {
return jsonResponse(await callBrowserUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') }));
}
case 'browser_navigate':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
url: readString(args.url, 'url'),
}));
case 'browser_click':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
selector: readOptionalString(args.selector),
text: readOptionalString(args.text),
x: readNumber(args.x),
y: readNumber(args.y),
}));
case 'browser_type':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
selector: readOptionalString(args.selector),
text: readString(args.text, 'text'),
submit: args.submit === true,
}));
case 'browser_fill_form': {
const fields = Array.isArray(args.fields)
? args.fields.map((field) => {
const record = field as Record<string, unknown>;
return {
selector: readString(record.selector, 'field.selector'),
value: readString(record.value, 'field.value'),
};
})
: [];
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
fields,
}));
}
case 'browser_press_key':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
key: readString(args.key, 'key'),
}));
case 'browser_select_option':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
selector: readString(args.selector, 'selector'),
values: Array.isArray(args.values) ? args.values.filter((value): value is string => typeof value === 'string') : [],
}));
case 'browser_wait_for':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
text: readOptionalString(args.text),
url: readOptionalString(args.url),
timeoutMs: readNumber(args.timeoutMs),
}));
case 'browser_tabs':
return jsonResponse(await callBrowserUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
action: args.action === 'new' || args.action === 'select' || args.action === 'close' || args.action === 'list'
? args.action
: undefined,
index: readNumber(args.index),
url: readOptionalString(args.url),
}));
case 'browser_close_session':
return jsonResponse(await callBrowserUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') }));
default:
throw new Error(`Unknown tool: ${name}`);
}
}
async function handleMessage(message: JsonRpcRequest) {
if (message.method === 'initialize') {
return {
protocolVersion: '2024-11-05',
capabilities: { tools: {} },
serverInfo: { name: 'cloudcli-browser', version: '1.0.0' },
};
}
if (message.method === 'tools/list') {
return { tools };
}
if (message.method === 'tools/call') {
const params = message.params || {};
const name = readString(params.name, 'name');
const args = (params.arguments && typeof params.arguments === 'object'
? params.arguments
: {}) as Record<string, unknown>;
return callTool(name, args);
}
if (message.method.startsWith('notifications/')) {
return undefined;
}
throw new Error(`Unsupported method: ${message.method}`);
}
function writeMessage(message: Record<string, unknown>) {
// MCP stdio transport uses newline-delimited JSON (one JSON-RPC message per line,
// no embedded newlines). This is NOT the LSP Content-Length framing.
process.stdout.write(`${JSON.stringify(message)}\n`);
}
function sendResult(id: string | number | null | undefined, result: unknown) {
if (id === undefined) {
return;
}
writeMessage({ jsonrpc: '2.0', id, result });
}
function sendError(id: string | number | null | undefined, error: unknown) {
if (id === undefined) {
return;
}
writeMessage({
jsonrpc: '2.0',
id,
error: {
code: -32000,
message: error instanceof Error ? error.message : String(error),
},
});
}
let buffer = '';
process.stdin.on('data', (chunk) => {
buffer += chunk.toString('utf8');
let newlineIndex: number;
while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
const rawMessage = buffer.slice(0, newlineIndex).trim();
buffer = buffer.slice(newlineIndex + 1);
if (!rawMessage) {
continue;
}
void (async () => {
let request: JsonRpcRequest;
try {
request = JSON.parse(rawMessage) as JsonRpcRequest;
} catch (error) {
sendError(null, error);
return;
}
try {
const result = await handleMessage(request);
sendResult(request.id, result);
} catch (error) {
sendError(request.id, error);
}
})();
}
});

View File

@@ -8,6 +8,7 @@
* (no args) - Start the server (default)
* start - Start the server
* sandbox - Manage Docker sandbox environments
* browser-use-mcp - Run Browser MCP stdio server
* status - Show configuration and data locations
* help - Show help information
* version - Show version information
@@ -154,12 +155,13 @@ Usage:
cloudcli [command] [options]
Commands:
start Start the CloudCLI server (default)
sandbox Manage Docker sandbox environments
status Show configuration and data locations
update Update to the latest version
help Show this help information
version Show version information
start Start the CloudCLI server (default)
sandbox Manage Docker sandbox environments
browser-use-mcp Run the Browser MCP stdio server
status Show configuration and data locations
update Update to the latest version
help Show this help information
version Show version information
Options:
-p, --port <port> Set server port (default: 3001)
@@ -605,6 +607,10 @@ async function startServer() {
await import('./index.js');
}
async function startBrowserUseMcp() {
await import('./browser-use-mcp.js');
}
// Parse CLI arguments
function parseArgs(args) {
const parsed = { command: 'start', options: {} };
@@ -658,6 +664,9 @@ async function main() {
case 'sandbox':
await sandboxCommand(remainingArgs || []);
break;
case 'browser-use-mcp':
await startBrowserUseMcp();
break;
case 'status':
case 'info':
showStatus();

View File

@@ -61,6 +61,9 @@ import userRoutes from './routes/user.js';
import geminiRoutes from './routes/gemini.js';
import pluginsRoutes from './routes/plugins.js';
import providerRoutes from './modules/providers/provider.routes.js';
import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
import { browserUseService } from './modules/browser-use/browser-use.service.js';
import { startEnabledPluginServers, stopAllPlugins, getPluginPort } from './utils/plugin-process-manager.js';
import { initializeDatabase, projectsDb, sessionsDb } from './modules/database/index.js';
import { configureWebPush } from './services/vapid-keys.js';
@@ -193,6 +196,12 @@ app.use('/api/gemini', authenticateToken, geminiRoutes);
// Plugins API Routes (protected)
app.use('/api/plugins', authenticateToken, pluginsRoutes);
// Browser MCP bridge API (local token protected)
app.use('/api/browser-use-mcp', browserUseMcpRoutes);
// Browser API Routes (protected)
app.use('/api/browser-use', authenticateToken, browserUseRoutes);
// Unified provider MCP routes (protected)
app.use('/api/providers', authenticateToken, providerRoutes);
@@ -1704,12 +1713,21 @@ async function startServer() {
await closeSessionsWatcher();
// Clean up plugin processes on shutdown
const shutdownPlugins = async () => {
await stopAllPlugins();
const shutdownRuntimeServices = async () => {
try {
await browserUseService.stopAllSessions();
} catch (err) {
console.error('[Browser] Error stopping sessions during shutdown:', err?.message || err);
}
try {
await stopAllPlugins();
} catch (err) {
console.error('[Plugins] Error stopping plugins during shutdown:', err?.message || err);
}
process.exit(0);
};
process.on('SIGTERM', () => void shutdownPlugins());
process.on('SIGINT', () => void shutdownPlugins());
process.on('SIGTERM', () => void shutdownRuntimeServices());
process.on('SIGINT', () => void shutdownRuntimeServices());
} catch (error) {
console.error('[ERROR] Failed to start server:', error);
process.exit(1);

View File

@@ -0,0 +1,120 @@
import express from 'express';
import { browserUseService } from '@/modules/browser-use/browser-use.service.js';
const router = express.Router();
function readBearerToken(header: unknown): string | null {
if (typeof header !== 'string') {
return null;
}
const match = /^Bearer\s+(\S.*)$/i.exec(header.trim());
return match?.[1]?.trim() || null;
}
router.use((req, res, next) => {
const expected = browserUseService.getMcpToken();
const token = readBearerToken(req.headers.authorization) || String(req.headers['x-browser-use-mcp-token'] || '');
if (!token || token !== expected) {
res.status(401).json({ success: false, error: 'Invalid Browser MCP token.' });
return;
}
next();
});
router.post('/tools/:toolName', async (req, res) => {
try {
const input = (req.body && typeof req.body === 'object' ? req.body : {}) as Record<string, unknown>;
const sessionId = typeof input.sessionId === 'string' ? input.sessionId : '';
const toolName = req.params.toolName;
let result: unknown;
switch (toolName) {
case 'browser_create_session':
result = await browserUseService.createAgentSession({
profileName: typeof input.profileName === 'string' ? input.profileName : null,
});
break;
case 'browser_list_sessions':
result = await browserUseService.listAgentSessions();
break;
case 'browser_snapshot':
case 'browser_take_screenshot':
result = await browserUseService.agentSnapshot(sessionId);
break;
case 'browser_navigate':
result = await browserUseService.agentNavigate(sessionId, String(input.url || ''));
break;
case 'browser_click':
result = await browserUseService.agentClick(sessionId, {
selector: typeof input.selector === 'string' ? input.selector : undefined,
text: typeof input.text === 'string' ? input.text : undefined,
x: typeof input.x === 'number' ? input.x : undefined,
y: typeof input.y === 'number' ? input.y : undefined,
});
break;
case 'browser_type':
result = await browserUseService.agentType(sessionId, {
selector: typeof input.selector === 'string' ? input.selector : undefined,
text: String(input.text || ''),
submit: input.submit === true,
});
break;
case 'browser_fill_form':
result = await browserUseService.agentFillForm(
sessionId,
Array.isArray(input.fields)
? input.fields.map((field) => {
const record = field as Record<string, unknown>;
return {
selector: String(record.selector || ''),
value: String(record.value || ''),
};
})
: [],
);
break;
case 'browser_press_key':
result = await browserUseService.agentPressKey(sessionId, String(input.key || ''));
break;
case 'browser_select_option':
result = await browserUseService.agentSelectOption(
sessionId,
String(input.selector || ''),
Array.isArray(input.values) ? input.values.filter((value): value is string => typeof value === 'string') : [],
);
break;
case 'browser_wait_for':
result = await browserUseService.agentWaitFor(sessionId, {
text: typeof input.text === 'string' ? input.text : undefined,
url: typeof input.url === 'string' ? input.url : undefined,
timeoutMs: typeof input.timeoutMs === 'number' ? input.timeoutMs : undefined,
});
break;
case 'browser_tabs':
result = await browserUseService.agentTabs(sessionId, {
action: input.action === 'new' || input.action === 'select' || input.action === 'close' || input.action === 'list'
? input.action
: undefined,
index: typeof input.index === 'number' ? input.index : undefined,
url: typeof input.url === 'string' ? input.url : undefined,
});
break;
case 'browser_close_session':
result = await browserUseService.agentStopSession(sessionId);
break;
default:
res.status(404).json({ success: false, error: `Unknown Browser MCP tool "${toolName}".` });
return;
}
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Browser MCP tool failed.',
});
}
});
export default router;

View File

@@ -0,0 +1,96 @@
import express from 'express';
import { browserUseService } from '@/modules/browser-use/browser-use.service.js';
const router = express.Router();
function readParam(value: string | string[] | undefined): string {
return Array.isArray(value) ? value[0] || '' : value || '';
}
router.get('/status', async (_req, res) => {
try {
res.json({ success: true, data: await browserUseService.getStatus() });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Browser status.',
});
}
});
router.get('/settings', async (_req, res) => {
try {
res.json({ success: true, data: { settings: await browserUseService.getSettings() } });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Browser settings.',
});
}
});
router.put('/settings', async (req, res) => {
try {
const settings = await browserUseService.updateSettings(req.body || {});
res.json({ success: true, data: { settings } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to save Browser settings.',
});
}
});
router.post('/runtime/install', async (_req, res) => {
try {
const result = await browserUseService.installRuntime();
res.status(result.success ? 200 : 500).json({
success: result.success,
data: result,
error: result.success ? undefined : result.message,
});
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to install Browser runtime.',
});
}
});
router.get('/sessions', async (_req, res) => {
try {
res.json({ success: true, data: { sessions: await browserUseService.listSessions() } });
} catch (error) {
res.status(401).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to list browser sessions.',
});
}
});
router.post('/sessions/:sessionId/stop', async (req, res) => {
try {
const result = await browserUseService.stopSession(readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to stop browser session.',
});
}
});
router.delete('/sessions/:sessionId', async (req, res) => {
try {
const result = await browserUseService.deleteSession(readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to delete browser session.',
});
}
});
export default router;

View File

@@ -0,0 +1,836 @@
import { createRequire } from 'node:module';
import { randomBytes, randomUUID } from 'node:crypto';
import { spawn } from 'node:child_process';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { appConfigDb } from '@/modules/database/index.js';
import { providerMcpService } from '@/modules/providers/index.js';
import { getModuleDir } from '@/utils/runtime-paths.js';
const require = createRequire(import.meta.url);
const __dirname = getModuleDir(import.meta.url);
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_MAX_SESSIONS_PER_OWNER || '3', 10);
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
const BROWSER_USE_SETTINGS_KEY = 'browser_use_settings';
const BROWSER_USE_MCP_TOKEN_KEY = 'browser_use_mcp_token';
type BrowserUseRuntime = 'cloud' | 'local';
type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
type BrowserUseSession = {
id: string;
ownerId: string;
createdBy: 'agent';
runtime: BrowserUseRuntime;
status: BrowserUseSessionStatus;
url: string | null;
title: string | null;
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
profileName: string | null;
viewport: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent';
} | null;
};
type PublicBrowserUseSession = Omit<BrowserUseSession, 'ownerId'>;
type RuntimeHandle = {
browser?: any;
context?: any;
page?: any;
};
type BrowserUseSettings = {
enabled: boolean;
};
type RuntimeReadiness = {
playwright: any | null;
playwrightInstalled: boolean;
chromiumInstalled: boolean;
chromiumExecutablePath: string | null;
installInProgress: boolean;
installMessage: string | null;
};
type RuntimeProbe = Omit<RuntimeReadiness, 'installInProgress' | 'installMessage'>;
const sessions = new Map<string, BrowserUseSession>();
const handles = new Map<string, RuntimeHandle>();
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
let lastInstallMessage: string | null = null;
let runtimeProbeCache: { value: RuntimeProbe; updatedAt: number } | null = null;
const DEFAULT_SETTINGS: BrowserUseSettings = {
enabled: false,
};
const AGENT_OWNER_ID = 'agent';
const PROFILE_ROOT = path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles');
const MCP_SERVER_NAME = 'cloudcli-browser';
const LEGACY_MCP_SERVER_NAMES = ['cloudcli-browser-use'];
const RUNTIME_READINESS_CACHE_TTL_MS = 30_000;
function getRuntime(): BrowserUseRuntime {
return IS_PLATFORM ? 'cloud' : 'local';
}
function readSettings(): BrowserUseSettings {
try {
const raw = appConfigDb.get(BROWSER_USE_SETTINGS_KEY);
if (!raw) {
return DEFAULT_SETTINGS;
}
const parsed = JSON.parse(raw) as Partial<BrowserUseSettings>;
return {
enabled: parsed.enabled === true,
};
} catch (error: any) {
console.warn('[Browser] Failed to read settings:', error?.message || error);
return DEFAULT_SETTINGS;
}
}
function writeSettings(settings: BrowserUseSettings): BrowserUseSettings {
const normalized = {
enabled: settings.enabled === true,
};
appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized));
return normalized;
}
function getOrCreateMcpToken(): string {
const existing = appConfigDb.get(BROWSER_USE_MCP_TOKEN_KEY);
if (existing) {
return existing;
}
const token = randomBytes(32).toString('hex');
appConfigDb.set(BROWSER_USE_MCP_TOKEN_KEY, token);
return token;
}
function getSetupMessage(settings: BrowserUseSettings, readiness: RuntimeReadiness): string {
if (!settings.enabled) {
return 'Browser is disabled in settings.';
}
if (!readiness.playwrightInstalled) {
return 'Install Playwright and Chromium to use browser sessions.';
}
if (!readiness.chromiumInstalled) {
return 'Playwright is installed, but Chromium is missing. Install the Chromium runtime to continue.';
}
return readiness.installMessage || 'Browser runtime is not ready.';
}
function getPlaywright(): any | null {
try {
return require('playwright');
} catch {
return null;
}
}
function getMcpCommand(): { command: string; args: string[] } {
const serverDir = path.resolve(__dirname, '..', '..');
const mcpScriptPath = path.join(serverDir, 'browser-use-mcp.js');
if (fs.existsSync(mcpScriptPath)) {
return {
command: process.execPath,
args: [mcpScriptPath],
};
}
return {
command: 'cloudcli',
args: ['browser-use-mcp'],
};
}
function getMcpApiUrl(): string {
const port = process.env.SERVER_PORT || process.env.PORT || '3001';
return `http://127.0.0.1:${port}/api/browser-use-mcp`;
}
async function removeMcpServerFromAllProviders(name: string) {
const results = await providerMcpService.removeMcpServerFromAllProviders({
name,
scope: 'user',
});
return results.map((result) => ({ ...result, name }));
}
function normalizeProfileName(profileName?: string | null): string | null {
const normalized = String(profileName || '').trim();
if (!normalized) {
return null;
}
return normalized.slice(0, 80);
}
function getProfilePath(profileName: string): string {
const safeName = profileName
.toLowerCase()
.replace(/[^a-z0-9._-]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 80) || 'default';
return path.join(PROFILE_ROOT, safeName);
}
function probeRuntime(): RuntimeProbe {
const playwright = getPlaywright();
const readiness: RuntimeProbe = {
playwright,
playwrightInstalled: Boolean(playwright),
chromiumInstalled: false,
chromiumExecutablePath: null,
};
if (!playwright) {
return readiness;
}
try {
const executablePath = playwright.chromium.executablePath();
readiness.chromiumExecutablePath = executablePath;
readiness.chromiumInstalled = Boolean(executablePath && fs.existsSync(executablePath));
} catch {
readiness.chromiumInstalled = false;
}
return readiness;
}
function getRuntimeReadiness(options: { force?: boolean } = {}): RuntimeReadiness {
const now = Date.now();
const cachedProbe = runtimeProbeCache;
const canUseCache = !options.force
&& !installPromise
&& cachedProbe
&& now - cachedProbe.updatedAt < RUNTIME_READINESS_CACHE_TTL_MS;
const probe = canUseCache ? cachedProbe.value : probeRuntime();
if (!canUseCache && !installPromise) {
runtimeProbeCache = { value: probe, updatedAt: now };
}
return {
...probe,
installInProgress: Boolean(installPromise),
installMessage: lastInstallMessage,
};
}
const INSTALL_COMMAND_TIMEOUT_MS = Number.parseInt(
process.env.CLOUDCLI_BROWSER_USE_INSTALL_TIMEOUT_MS || String(10 * 60 * 1000),
10,
);
function runCommand(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: process.cwd(),
env: process.env,
shell: false,
stdio: ['ignore', 'pipe', 'pipe'],
});
const output: string[] = [];
let settled = false;
const finish = (fn: () => void) => {
if (settled) {
return;
}
settled = true;
clearTimeout(timer);
fn();
};
const timer = setTimeout(() => {
child.kill('SIGKILL');
finish(() => reject(new Error(
`${command} ${args.join(' ')} timed out after ${INSTALL_COMMAND_TIMEOUT_MS}ms.`,
)));
}, INSTALL_COMMAND_TIMEOUT_MS);
timer.unref?.();
child.stdout.on('data', (chunk) => output.push(String(chunk)));
child.stderr.on('data', (chunk) => output.push(String(chunk)));
child.on('error', (error) => finish(() => reject(error)));
child.on('close', (code) => finish(() => {
if (code === 0) {
resolve();
return;
}
reject(new Error(output.join('').trim() || `${command} ${args.join(' ')} exited with code ${code}`));
}));
});
}
function formatInstallError(error: unknown): string {
const message = error instanceof Error ? error.message : String(error);
if (message.includes('sudo') && message.includes('password')) {
return 'Installing Chromium system dependencies requires administrator privileges. Run `npx playwright install-deps chromium` on the machine where CloudCLI runs, then try again.';
}
return message || 'Failed to install Browser runtime.';
}
async function installRuntime(): Promise<{ success: boolean; message: string }> {
if (installPromise) {
return installPromise;
}
const npmCommand = process.platform === 'win32' ? 'npm.cmd' : 'npm';
runtimeProbeCache = null;
installPromise = (async () => {
try {
lastInstallMessage = 'Installing Playwright package...';
await runCommand(npmCommand, ['install', '--no-save', '--no-package-lock', 'playwright']);
if (process.platform === 'linux') {
lastInstallMessage = 'Installing Chromium system dependencies...';
await runCommand(npmCommand, ['exec', '--', 'playwright', 'install-deps', 'chromium']);
}
lastInstallMessage = 'Installing Chromium runtime...';
await runCommand(npmCommand, ['exec', '--', 'playwright', 'install', 'chromium']);
lastInstallMessage = 'Browser runtime installed.';
return { success: true, message: lastInstallMessage };
} catch (error) {
lastInstallMessage = formatInstallError(error);
return { success: false, message: lastInstallMessage };
}
})();
try {
return await installPromise;
} finally {
installPromise = null;
runtimeProbeCache = null;
}
}
function normalizeUrl(rawUrl: string): string {
const trimmed = rawUrl.trim();
if (!trimmed) {
throw new Error('URL is required.');
}
const withProtocol = /^[a-zA-Z][a-zA-Z\d+\-.]*:/.test(trimmed)
? trimmed
: `https://${trimmed}`;
const parsed = new URL(withProtocol);
if (!['http:', 'https:'].includes(parsed.protocol)) {
throw new Error('Only http and https URLs are supported.');
}
return parsed.toString();
}
function publicSession(session: BrowserUseSession): PublicBrowserUseSession {
const { ownerId: _ownerId, ...publicFields } = session;
return publicFields;
}
function ownerSessions(ownerId: string): BrowserUseSession[] {
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
}
async function closeHandle(sessionId: string): Promise<void> {
const handle = handles.get(sessionId);
handles.delete(sessionId);
await handle?.context?.close?.().catch(() => undefined);
await handle?.browser?.close().catch(() => undefined);
}
async function expireStaleSessions(now = Date.now()): Promise<void> {
await Promise.all([...sessions.values()].map(async (session) => {
if (session.status !== 'ready') {
return;
}
const updatedAt = Date.parse(session.updatedAt);
if (!Number.isFinite(updatedAt) || now - updatedAt <= SESSION_TTL_MS) {
return;
}
await closeHandle(session.id);
session.status = 'stopped';
session.updatedAt = new Date(now).toISOString();
session.lastAction = 'expire';
session.message = 'Browser session expired after inactivity.';
}));
}
async function captureSession(session: BrowserUseSession, page: any): Promise<void> {
const screenshot = await page.screenshot({ type: 'jpeg', quality: 72, fullPage: false });
session.screenshotDataUrl = `data:image/jpeg;base64,${Buffer.from(screenshot).toString('base64')}`;
session.title = await page.title().catch(() => null);
session.url = page.url() || session.url;
session.viewport = page.viewportSize?.() || session.viewport;
session.updatedAt = new Date().toISOString();
}
async function getActionPoint(page: any, input: { selector?: string; text?: string; x?: number; y?: number }) {
if (typeof input.x === 'number' && typeof input.y === 'number') {
return { x: input.x, y: input.y };
}
const locator = input.selector
? page.locator(input.selector).first()
: input.text
? page.getByText(input.text, { exact: false }).first()
: null;
if (!locator) {
return null;
}
const box = await locator.boundingBox().catch(() => null);
if (!box) {
return null;
}
return {
x: Math.round(box.x + box.width / 2),
y: Math.round(box.y + box.height / 2),
};
}
export const browserUseService = {
async getSettings() {
return readSettings();
},
async updateSettings(settings: Partial<BrowserUseSettings>) {
const current = readSettings();
const nextSettings = {
enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled,
};
const next = writeSettings(nextSettings);
if (next.enabled) {
await this.registerAgentMcp();
} else if (current.enabled) {
await this.unregisterAgentMcp();
await this.stopAllSessions();
}
return next;
},
async getStatus() {
const settings = readSettings();
const readiness = getRuntimeReadiness();
const available = settings.enabled && readiness.playwrightInstalled && readiness.chromiumInstalled;
return {
enabled: settings.enabled,
runtime: getRuntime(),
available,
playwrightInstalled: readiness.playwrightInstalled,
chromiumInstalled: readiness.chromiumInstalled,
installInProgress: readiness.installInProgress,
sessionCount: sessions.size,
message: available
? 'Browser runtime is available.'
: getSetupMessage(settings, readiness),
};
},
async registerAgentMcp() {
const { command, args } = getMcpCommand();
await Promise.all(LEGACY_MCP_SERVER_NAMES.map((name) => removeMcpServerFromAllProviders(name)));
const results = await providerMcpService.addMcpServerToAllProviders({
name: MCP_SERVER_NAME,
scope: 'user',
transport: 'stdio',
command,
args,
env: {
CLOUDCLI_BROWSER_USE_MCP_TOKEN: getOrCreateMcpToken(),
CLOUDCLI_BROWSER_USE_API_URL: getMcpApiUrl(),
},
});
return { name: MCP_SERVER_NAME, command, args, results };
},
getMcpToken() {
return getOrCreateMcpToken();
},
async unregisterAgentMcp() {
const results = (await Promise.all(
[MCP_SERVER_NAME, ...LEGACY_MCP_SERVER_NAMES].map((name) => removeMcpServerFromAllProviders(name)),
)).flat();
return { name: MCP_SERVER_NAME, results };
},
async installRuntime() {
const result = await installRuntime();
return {
...result,
status: await this.getStatus(),
};
},
async listSessions() {
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => session.ownerId === AGENT_OWNER_ID)
.map(publicSession);
},
async createAgentSession(options?: { profileName?: string | null }) {
const settings = readSettings();
if (!settings.enabled) {
throw new Error('Browser agent tools are disabled.');
}
await expireStaleSessions();
const profileName = normalizeProfileName(options?.profileName);
const now = new Date().toISOString();
const session: BrowserUseSession = {
id: randomUUID(),
ownerId: AGENT_OWNER_ID,
createdBy: 'agent',
runtime: getRuntime(),
status: 'unavailable',
url: null,
title: null,
screenshotDataUrl: null,
createdAt: now,
updatedAt: now,
lastAction: 'create',
message: null,
profileName,
viewport: { width: 1440, height: 900 },
cursor: null,
};
const activeOwnerSessions = ownerSessions(AGENT_OWNER_ID).filter((item) => item.status === 'ready');
if (activeOwnerSessions.length >= MAX_SESSIONS_PER_OWNER) {
throw new Error(`Browser is limited to ${MAX_SESSIONS_PER_OWNER} active agent sessions.`);
}
const readiness = getRuntimeReadiness();
if (!settings.enabled || !readiness.playwrightInstalled || !readiness.chromiumInstalled || !readiness.playwright) {
session.message = getSetupMessage(settings, readiness);
sessions.set(session.id, session);
return publicSession(session);
}
let browser: any | undefined;
let context: any | undefined;
let page: any;
const launchOptions = {
headless: true,
args: ['--disable-dev-shm-usage'],
};
const contextOptions = {
viewport: { width: 1440, height: 900 },
serviceWorkers: 'block',
};
if (profileName) {
fs.mkdirSync(PROFILE_ROOT, { recursive: true });
context = await readiness.playwright.chromium.launchPersistentContext(getProfilePath(profileName), {
...launchOptions,
...contextOptions,
});
page = context.pages()[0] || await context.newPage();
} else {
browser = await readiness.playwright.chromium.launch(launchOptions);
context = await browser.newContext(contextOptions);
page = await context.newPage();
}
session.status = 'ready';
session.message = 'Browser session is ready.';
sessions.set(session.id, session);
handles.set(session.id, { browser, context, page });
await captureSession(session, page);
return publicSession(session);
},
async listAgentSessions() {
const settings = readSettings();
if (!settings.enabled) {
return [];
}
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => session.ownerId === AGENT_OWNER_ID)
.map(publicSession);
},
async getAgentSession(sessionId: string) {
const settings = readSettings();
if (!settings.enabled) {
throw new Error('Browser agent tools are disabled.');
}
const session = sessions.get(sessionId);
if (!session || session.ownerId !== AGENT_OWNER_ID) {
throw new Error('Browser session not found.');
}
return session;
},
async agentNavigate(sessionId: string, rawUrl: string) {
await this.getAgentSession(sessionId);
await expireStaleSessions();
const session = sessions.get(sessionId);
if (!session || session.ownerId !== AGENT_OWNER_ID) {
throw new Error('Browser session not found.');
}
if (session.status !== 'ready') {
throw new Error(session.message || 'Browser session is not available.');
}
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
const url = normalizeUrl(rawUrl);
await handle.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
session.lastAction = `navigate:${url}`;
session.cursor = null;
await captureSession(session, handle.page);
return publicSession(session);
},
async agentSnapshot(sessionId: string) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
await captureSession(session, handle.page);
const text = await handle.page.locator('body').innerText({ timeout: 5_000 }).catch(() => '');
return {
session: publicSession(session),
text: text.slice(0, 30_000),
};
},
async agentClick(sessionId: string, input: { selector?: string; text?: string; x?: number; y?: number }) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
const point = await getActionPoint(handle.page, input);
if (input.selector) {
await handle.page.locator(input.selector).first().click({ timeout: 10_000 });
} else if (input.text) {
await handle.page.getByText(input.text, { exact: false }).first().click({ timeout: 10_000 });
} else if (typeof input.x === 'number' && typeof input.y === 'number') {
await handle.page.mouse.click(input.x, input.y);
} else {
throw new Error('Provide selector, text, or x/y coordinates.');
}
session.lastAction = 'click';
session.cursor = point ? { ...point, actor: 'agent' } : null;
await captureSession(session, handle.page);
return publicSession(session);
},
async agentType(sessionId: string, input: { selector?: string; text: string; submit?: boolean }) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
if (input.selector) {
await handle.page.locator(input.selector).first().fill(input.text, { timeout: 10_000 });
session.cursor = await getActionPoint(handle.page, input).then((point) => (
point ? { ...point, actor: 'agent' as const } : null
));
} else {
await handle.page.keyboard.type(input.text);
}
if (input.submit) {
await handle.page.keyboard.press('Enter');
}
session.lastAction = 'type';
await captureSession(session, handle.page);
return publicSession(session);
},
async agentFillForm(sessionId: string, fields: Array<{ selector: string; value: string }>) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
for (const field of fields) {
await handle.page.locator(field.selector).first().fill(field.value, { timeout: 10_000 });
}
session.lastAction = 'fill_form';
if (fields[0]) {
session.cursor = await getActionPoint(handle.page, { selector: fields[0].selector }).then((point) => (
point ? { ...point, actor: 'agent' as const } : null
));
}
await captureSession(session, handle.page);
return publicSession(session);
},
async agentPressKey(sessionId: string, key: string) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
await handle.page.keyboard.press(key);
session.lastAction = `press_key:${key}`;
await captureSession(session, handle.page);
return publicSession(session);
},
async agentSelectOption(sessionId: string, selector: string, values: string[]) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
await handle.page.locator(selector).first().selectOption(values, { timeout: 10_000 });
session.lastAction = 'select_option';
session.cursor = await getActionPoint(handle.page, { selector }).then((point) => (
point ? { ...point, actor: 'agent' as const } : null
));
await captureSession(session, handle.page);
return publicSession(session);
},
async agentWaitFor(sessionId: string, input: { text?: string; url?: string; timeoutMs?: number }) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
const timeout = Math.max(250, Math.min(input.timeoutMs || 5_000, 30_000));
if (input.text) {
await handle.page.getByText(input.text, { exact: false }).first().waitFor({ timeout });
} else if (input.url) {
await handle.page.waitForURL(input.url, { timeout });
} else {
await handle.page.waitForTimeout(timeout);
}
session.lastAction = 'wait_for';
await captureSession(session, handle.page);
return publicSession(session);
},
async agentTabs(sessionId: string, input: { action?: 'list' | 'new' | 'select' | 'close'; index?: number; url?: string }) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
if (!handle?.context || !handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
const action = input.action || 'list';
if (action === 'new') {
const page = await handle.context.newPage();
handles.set(sessionId, { ...handle, page });
if (input.url) {
await this.agentNavigate(sessionId, input.url);
}
} else if (action === 'select') {
const page = handle.context.pages()[input.index || 0];
if (!page) {
throw new Error('Tab not found.');
}
handles.set(sessionId, { ...handle, page });
} else if (action === 'close') {
const pages = handle.context.pages();
const page = pages[input.index ?? pages.indexOf(handle.page)];
if (!page) {
throw new Error('Tab not found.');
}
await page.close();
handles.set(sessionId, { ...handle, page: handle.context.pages()[0] || await handle.context.newPage() });
}
const updatedHandle = handles.get(sessionId);
await captureSession(session, updatedHandle?.page || handle.page);
return {
session: publicSession(session),
tabs: handle.context.pages().map((page: any, index: number) => ({
index,
url: page.url(),
active: page === (updatedHandle?.page || handle.page),
})),
};
},
async stopSession(sessionId: string) {
const session = sessions.get(sessionId);
if (!session || session.ownerId !== AGENT_OWNER_ID) {
return { stopped: false };
}
await closeHandle(sessionId);
session.status = 'stopped';
session.updatedAt = new Date().toISOString();
session.lastAction = 'stop';
session.message = 'Browser session stopped. Create a new session to continue browsing.';
return { stopped: true, session: publicSession(session) };
},
async deleteSession(sessionId: string) {
const session = sessions.get(sessionId);
if (!session || session.ownerId !== AGENT_OWNER_ID) {
return { deleted: false };
}
await closeHandle(sessionId);
sessions.delete(sessionId);
return { deleted: true, sessionId };
},
async agentStopSession(sessionId: string) {
await this.getAgentSession(sessionId);
return this.stopSession(sessionId);
},
async stopAllSessions() {
await Promise.all([...sessions.keys()].map(async (sessionId) => {
await closeHandle(sessionId);
const session = sessions.get(sessionId);
if (session) {
session.status = 'stopped';
session.updatedAt = new Date().toISOString();
session.lastAction = 'shutdown';
session.message = 'Browser session stopped during server shutdown.';
}
}));
},
};
process.once('beforeExit', () => {
void browserUseService.stopAllSessions();
});

View File

@@ -0,0 +1,10 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { browserUseService } from '@/modules/browser-use/browser-use.service.js';
test('browser monitor list starts empty without agent sessions', async () => {
const sessions = await browserUseService.listSessions();
assert.deepEqual(sessions, []);
});

View File

@@ -1,5 +1,6 @@
export { sessionSynchronizerService } from './services/session-synchronizer.service.js';
export { providerSkillsService } from './services/skills.service.js';
export { providerMcpService } from './services/mcp.service.js';
export { initializeSessionsWatcher } from './services/sessions-watcher.service.js';
export { closeSessionsWatcher } from './services/sessions-watcher.service.js';

View File

@@ -80,4 +80,30 @@ export const providerMcpService = {
return results;
},
/**
* Removes one MCP server from every provider. Mirrors `addMcpServerToAllProviders`
* by iterating the live provider registry, so callers stay in sync with which
* providers exist instead of maintaining their own provider list.
*/
async removeMcpServerFromAllProviders(
input: { name: string; scope?: McpScope; workspacePath?: string },
): Promise<Array<{ provider: LLMProvider; removed: boolean; error?: string }>> {
const results: Array<{ provider: LLMProvider; removed: boolean; error?: string }> = [];
const providers = providerRegistry.listProviders();
for (const provider of providers) {
try {
const result = await provider.mcp.removeServer(input);
results.push({ provider: provider.id, removed: result.removed });
} catch (error) {
results.push({
provider: provider.id,
removed: false,
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
return results;
},
};