diff --git a/server/browser-use-mcp.ts b/server/browser-use-mcp.ts new file mode 100644 index 00000000..22a4c3e4 --- /dev/null +++ b/server/browser-use-mcp.ts @@ -0,0 +1,390 @@ +#!/usr/bin/env node +import './load-env.js'; + +type JsonRpcRequest = { + jsonrpc: '2.0'; + id?: string | number | null; + method: string; + params?: Record; +}; + +type ToolDefinition = { + name: string; + description: string; + inputSchema: Record; +}; + +const textResponse = (text: string) => ({ + content: [{ type: 'text', text }], +}); + +const jsonResponse = (value: unknown) => textResponse(JSON.stringify(value, null, 2)); + +const readString = (value: unknown, name: string): string => { + if (typeof value !== 'string' || value.trim() === '') { + throw new Error(`${name} is required.`); + } + return value.trim(); +}; + +const readOptionalString = (value: unknown): string | undefined => + typeof value === 'string' && value.trim() ? value.trim() : undefined; + +const readNumber = (value: unknown): number | undefined => + typeof value === 'number' && Number.isFinite(value) ? value : undefined; + +const apiUrl = (process.env.CLOUDCLI_BROWSER_USE_API_URL || 'http://127.0.0.1:3001/api/browser-use-mcp').replace(/\/$/, ''); +const apiToken = process.env.CLOUDCLI_BROWSER_USE_MCP_TOKEN || ''; + +async function callBrowserUseApi(toolName: string, input: Record) { + if (!apiToken) { + throw new Error('CLOUDCLI_BROWSER_USE_MCP_TOKEN is not configured.'); + } + + const response = await fetch(`${apiUrl}/tools/${encodeURIComponent(toolName)}`, { + method: 'POST', + headers: { + Authorization: `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(input), + }); + const data = await response.json() as { success?: boolean; data?: unknown; error?: string }; + if (!response.ok || data.success === false) { + throw new Error(data.error || `Browser Use API request failed (${response.status})`); + } + return data.data; +} + +const sessionIdSchema = { + type: 'object', + properties: { + sessionId: { type: 'string', description: 'Browser Use session id.' }, + }, + required: ['sessionId'], +}; + +const tools: ToolDefinition[] = [ + { + name: 'browser_create_session', + description: 'Create a temporary Browser Use session that the agent can control. Optionally provide a background profileName to reuse cookies and storage.', + inputSchema: { + type: 'object', + properties: { + profileName: { type: 'string', description: 'Optional background profile name for persistent browser storage.' }, + }, + }, + }, + { + name: 'browser_list_sessions', + description: 'List Browser Use sessions currently available to agents.', + inputSchema: { type: 'object', properties: {} }, + }, + { + name: 'browser_snapshot', + description: 'Capture current page metadata, screenshot data URL, and visible body text for a Browser Use session.', + inputSchema: sessionIdSchema, + }, + { + name: 'browser_take_screenshot', + description: 'Capture the latest screenshot for a Browser Use session.', + inputSchema: sessionIdSchema, + }, + { + name: 'browser_navigate', + description: 'Navigate a Browser Use session to an HTTP or HTTPS URL.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + url: { type: 'string' }, + }, + required: ['sessionId', 'url'], + }, + }, + { + name: 'browser_click', + description: 'Click an element by CSS selector, visible text, or x/y coordinates.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + selector: { type: 'string' }, + text: { type: 'string' }, + x: { type: 'number' }, + y: { type: 'number' }, + }, + required: ['sessionId'], + }, + }, + { + name: 'browser_type', + description: 'Type text into the focused page or fill a CSS selector. Set submit to press Enter after typing.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + selector: { type: 'string' }, + text: { type: 'string' }, + submit: { type: 'boolean' }, + }, + required: ['sessionId', 'text'], + }, + }, + { + name: 'browser_fill_form', + description: 'Fill multiple form fields using CSS selectors.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + fields: { + type: 'array', + items: { + type: 'object', + properties: { + selector: { type: 'string' }, + value: { type: 'string' }, + }, + required: ['selector', 'value'], + }, + }, + }, + required: ['sessionId', 'fields'], + }, + }, + { + name: 'browser_press_key', + description: 'Press a keyboard key, for example Enter, Escape, Tab, or Control+A.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + key: { type: 'string' }, + }, + required: ['sessionId', 'key'], + }, + }, + { + name: 'browser_select_option', + description: 'Select option values in a select element found by CSS selector.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + selector: { type: 'string' }, + values: { type: 'array', items: { type: 'string' } }, + }, + required: ['sessionId', 'selector', 'values'], + }, + }, + { + name: 'browser_wait_for', + description: 'Wait for visible text, a URL pattern, or a short timeout.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + text: { type: 'string' }, + url: { type: 'string' }, + timeoutMs: { type: 'number' }, + }, + required: ['sessionId'], + }, + }, + { + name: 'browser_tabs', + description: 'List, open, select, or close tabs in a Browser Use session.', + inputSchema: { + type: 'object', + properties: { + sessionId: { type: 'string' }, + action: { type: 'string', enum: ['list', 'new', 'select', 'close'] }, + index: { type: 'number' }, + url: { type: 'string' }, + }, + required: ['sessionId'], + }, + }, + { + name: 'browser_close_session', + description: 'Stop a Browser Use session controlled by agents.', + inputSchema: sessionIdSchema, + }, +]; + +async function callTool(name: string, args: Record) { + switch (name) { + case 'browser_create_session': + return jsonResponse(await callBrowserUseApi(name, { + profileName: readOptionalString(args.profileName), + })); + case 'browser_list_sessions': + return jsonResponse(await callBrowserUseApi(name, {})); + case 'browser_snapshot': + return jsonResponse(await callBrowserUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') })); + case 'browser_take_screenshot': { + return jsonResponse(await callBrowserUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') })); + } + case 'browser_navigate': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + url: readString(args.url, 'url'), + })); + case 'browser_click': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + selector: readOptionalString(args.selector), + text: readOptionalString(args.text), + x: readNumber(args.x), + y: readNumber(args.y), + })); + case 'browser_type': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + selector: readOptionalString(args.selector), + text: readString(args.text, 'text'), + submit: args.submit === true, + })); + case 'browser_fill_form': { + const fields = Array.isArray(args.fields) + ? args.fields.map((field) => { + const record = field as Record; + return { + selector: readString(record.selector, 'field.selector'), + value: readString(record.value, 'field.value'), + }; + }) + : []; + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + fields, + })); + } + case 'browser_press_key': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + key: readString(args.key, 'key'), + })); + case 'browser_select_option': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + selector: readString(args.selector, 'selector'), + values: Array.isArray(args.values) ? args.values.filter((value): value is string => typeof value === 'string') : [], + })); + case 'browser_wait_for': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + text: readOptionalString(args.text), + url: readOptionalString(args.url), + timeoutMs: readNumber(args.timeoutMs), + })); + case 'browser_tabs': + return jsonResponse(await callBrowserUseApi(name, { + sessionId: readString(args.sessionId, 'sessionId'), + action: args.action === 'new' || args.action === 'select' || args.action === 'close' || args.action === 'list' + ? args.action + : undefined, + index: readNumber(args.index), + url: readOptionalString(args.url), + })); + case 'browser_close_session': + return jsonResponse(await callBrowserUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') })); + default: + throw new Error(`Unknown tool: ${name}`); + } +} + +async function handleMessage(message: JsonRpcRequest) { + if (message.method === 'initialize') { + return { + protocolVersion: '2024-11-05', + capabilities: { tools: {} }, + serverInfo: { name: 'cloudcli-browser-use', version: '1.0.0' }, + }; + } + + if (message.method === 'tools/list') { + return { tools }; + } + + if (message.method === 'tools/call') { + const params = message.params || {}; + const name = readString(params.name, 'name'); + const args = (params.arguments && typeof params.arguments === 'object' + ? params.arguments + : {}) as Record; + return callTool(name, args); + } + + if (message.method.startsWith('notifications/')) { + return undefined; + } + + throw new Error(`Unsupported method: ${message.method}`); +} + +function writeMessage(message: Record) { + const payload = JSON.stringify(message); + process.stdout.write(`Content-Length: ${Buffer.byteLength(payload, 'utf8')}\r\n\r\n${payload}`); +} + +function sendResult(id: string | number | null | undefined, result: unknown) { + if (id === undefined) { + return; + } + writeMessage({ jsonrpc: '2.0', id, result }); +} + +function sendError(id: string | number | null | undefined, error: unknown) { + if (id === undefined) { + return; + } + writeMessage({ + jsonrpc: '2.0', + id, + error: { + code: -32000, + message: error instanceof Error ? error.message : String(error), + }, + }); +} + +let buffer = Buffer.alloc(0); + +process.stdin.on('data', (chunk) => { + buffer = Buffer.concat([buffer, chunk]); + while (true) { + const headerEnd = buffer.indexOf('\r\n\r\n'); + if (headerEnd === -1) { + return; + } + + const header = buffer.slice(0, headerEnd).toString('utf8'); + const lengthMatch = /Content-Length:\s*(\d+)/i.exec(header); + if (!lengthMatch) { + buffer = buffer.slice(headerEnd + 4); + continue; + } + + const length = Number.parseInt(lengthMatch[1], 10); + const messageStart = headerEnd + 4; + const messageEnd = messageStart + length; + if (buffer.length < messageEnd) { + return; + } + + const rawMessage = buffer.slice(messageStart, messageEnd).toString('utf8'); + buffer = buffer.slice(messageEnd); + + void (async () => { + const request = JSON.parse(rawMessage) as JsonRpcRequest; + try { + const result = await handleMessage(request); + sendResult(request.id, result); + } catch (error) { + sendError(request.id, error); + } + })(); + } +}); diff --git a/server/cli.js b/server/cli.js index 9fa99ae3..e6daacc4 100755 --- a/server/cli.js +++ b/server/cli.js @@ -8,6 +8,7 @@ * (no args) - Start the server (default) * start - Start the server * sandbox - Manage Docker sandbox environments + * browser-use-mcp - Run Browser Use MCP stdio server * status - Show configuration and data locations * help - Show help information * version - Show version information @@ -605,6 +606,10 @@ async function startServer() { await import('./index.js'); } +async function startBrowserUseMcp() { + await import('./browser-use-mcp.js'); +} + // Parse CLI arguments function parseArgs(args) { const parsed = { command: 'start', options: {} }; @@ -658,6 +663,9 @@ async function main() { case 'sandbox': await sandboxCommand(remainingArgs || []); break; + case 'browser-use-mcp': + await startBrowserUseMcp(); + break; case 'status': case 'info': showStatus(); diff --git a/server/index.js b/server/index.js index 0a812920..3fcd438a 100755 --- a/server/index.js +++ b/server/index.js @@ -62,6 +62,7 @@ import geminiRoutes from './routes/gemini.js'; import pluginsRoutes from './routes/plugins.js'; import providerRoutes from './modules/providers/provider.routes.js'; import browserUseRoutes from './modules/browser-use/browser-use.routes.js'; +import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js'; import { browserUseService } from './modules/browser-use/browser-use.service.js'; import { startEnabledPluginServers, stopAllPlugins, getPluginPort } from './utils/plugin-process-manager.js'; import { initializeDatabase, projectsDb, sessionsDb } from './modules/database/index.js'; @@ -195,6 +196,9 @@ app.use('/api/gemini', authenticateToken, geminiRoutes); // Plugins API Routes (protected) app.use('/api/plugins', authenticateToken, pluginsRoutes); +// Browser Use MCP bridge API (local token protected) +app.use('/api/browser-use-mcp', browserUseMcpRoutes); + // Browser Use API Routes (protected) app.use('/api/browser-use', authenticateToken, browserUseRoutes); diff --git a/server/modules/browser-use/browser-use-mcp.routes.ts b/server/modules/browser-use/browser-use-mcp.routes.ts new file mode 100644 index 00000000..335ffa18 --- /dev/null +++ b/server/modules/browser-use/browser-use-mcp.routes.ts @@ -0,0 +1,120 @@ +import express from 'express'; + +import { browserUseService } from '@/modules/browser-use/browser-use.service.js'; + +const router = express.Router(); + +function readBearerToken(header: unknown): string | null { + if (typeof header !== 'string') { + return null; + } + const match = /^Bearer\s+(.+)$/i.exec(header.trim()); + return match?.[1] || null; +} + +router.use((req, res, next) => { + const expected = browserUseService.getMcpToken(); + const token = readBearerToken(req.headers.authorization) || String(req.headers['x-browser-use-mcp-token'] || ''); + if (!token || token !== expected) { + res.status(401).json({ success: false, error: 'Invalid Browser Use MCP token.' }); + return; + } + next(); +}); + +router.post('/tools/:toolName', async (req, res) => { + try { + const input = (req.body && typeof req.body === 'object' ? req.body : {}) as Record; + const sessionId = typeof input.sessionId === 'string' ? input.sessionId : ''; + const toolName = req.params.toolName; + let result: unknown; + + switch (toolName) { + case 'browser_create_session': + result = await browserUseService.createAgentSession({ + profileName: typeof input.profileName === 'string' ? input.profileName : null, + }); + break; + case 'browser_list_sessions': + result = await browserUseService.listAgentSessions(); + break; + case 'browser_snapshot': + case 'browser_take_screenshot': + result = await browserUseService.agentSnapshot(sessionId); + break; + case 'browser_navigate': + result = await browserUseService.agentNavigate(sessionId, String(input.url || '')); + break; + case 'browser_click': + result = await browserUseService.agentClick(sessionId, { + selector: typeof input.selector === 'string' ? input.selector : undefined, + text: typeof input.text === 'string' ? input.text : undefined, + x: typeof input.x === 'number' ? input.x : undefined, + y: typeof input.y === 'number' ? input.y : undefined, + }); + break; + case 'browser_type': + result = await browserUseService.agentType(sessionId, { + selector: typeof input.selector === 'string' ? input.selector : undefined, + text: String(input.text || ''), + submit: input.submit === true, + }); + break; + case 'browser_fill_form': + result = await browserUseService.agentFillForm( + sessionId, + Array.isArray(input.fields) + ? input.fields.map((field) => { + const record = field as Record; + return { + selector: String(record.selector || ''), + value: String(record.value || ''), + }; + }) + : [], + ); + break; + case 'browser_press_key': + result = await browserUseService.agentPressKey(sessionId, String(input.key || '')); + break; + case 'browser_select_option': + result = await browserUseService.agentSelectOption( + sessionId, + String(input.selector || ''), + Array.isArray(input.values) ? input.values.filter((value): value is string => typeof value === 'string') : [], + ); + break; + case 'browser_wait_for': + result = await browserUseService.agentWaitFor(sessionId, { + text: typeof input.text === 'string' ? input.text : undefined, + url: typeof input.url === 'string' ? input.url : undefined, + timeoutMs: typeof input.timeoutMs === 'number' ? input.timeoutMs : undefined, + }); + break; + case 'browser_tabs': + result = await browserUseService.agentTabs(sessionId, { + action: input.action === 'new' || input.action === 'select' || input.action === 'close' || input.action === 'list' + ? input.action + : undefined, + index: typeof input.index === 'number' ? input.index : undefined, + url: typeof input.url === 'string' ? input.url : undefined, + }); + break; + case 'browser_close_session': + result = await browserUseService.agentStopSession(sessionId); + break; + default: + res.status(404).json({ success: false, error: `Unknown Browser Use MCP tool "${toolName}".` }); + return; + } + + res.json({ success: true, data: result }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Browser Use MCP tool failed.', + }); + } +}); + +export default router; diff --git a/server/modules/browser-use/browser-use.routes.ts b/server/modules/browser-use/browser-use.routes.ts index c730dd53..cab7e592 100644 --- a/server/modules/browser-use/browser-use.routes.ts +++ b/server/modules/browser-use/browser-use.routes.ts @@ -56,6 +56,18 @@ router.put('/settings', async (req, res) => { } }); +router.post('/agent-tools/register', async (_req, res) => { + try { + const result = await browserUseService.registerAgentMcp(); + res.status(201).json({ success: true, data: result }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Failed to register Browser Use MCP.', + }); + } +}); + router.post('/runtime/install', async (_req, res) => { try { const result = await browserUseService.installRuntime(); @@ -107,6 +119,30 @@ router.post('/sessions/:sessionId/navigate', async (req: AuthenticatedRequest, r } }); +router.post('/sessions/:sessionId/agent-access/grant', async (req: AuthenticatedRequest, res) => { + try { + const session = await browserUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId)); + res.json({ success: true, data: { session } }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Failed to grant agent access.', + }); + } +}); + +router.post('/sessions/:sessionId/agent-access/revoke', async (req: AuthenticatedRequest, res) => { + try { + const session = await browserUseService.revokeAgentAccess(requireUser(req), readParam(req.params.sessionId)); + res.json({ success: true, data: { session } }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Failed to revoke agent access.', + }); + } +}); + router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) => { try { const result = await browserUseService.stopSession(requireUser(req), readParam(req.params.sessionId)); diff --git a/server/modules/browser-use/browser-use.service.ts b/server/modules/browser-use/browser-use.service.ts index bb53147f..e4ab8bec 100644 --- a/server/modules/browser-use/browser-use.service.ts +++ b/server/modules/browser-use/browser-use.service.ts @@ -1,18 +1,24 @@ import { createRequire } from 'node:module'; -import { randomUUID } from 'node:crypto'; +import { randomBytes, randomUUID } from 'node:crypto'; import { spawn } from 'node:child_process'; import dns from 'node:dns/promises'; import fs from 'node:fs'; +import os from 'node:os'; import net from 'node:net'; +import path from 'node:path'; import { appConfigDb } from '@/modules/database/repositories/app-config.js'; +import { providerMcpService } from '@/modules/providers/services/mcp.service.js'; +import { getModuleDir } from '@/utils/runtime-paths.js'; const require = createRequire(import.meta.url); +const __dirname = getModuleDir(import.meta.url); const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true'; const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_MAX_SESSIONS_PER_OWNER || '3', 10); const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10); const ALLOW_PRIVATE_NETWORKS = process.env.CLOUDCLI_BROWSER_USE_ALLOW_PRIVATE_NETWORKS === '1'; const BROWSER_USE_SETTINGS_KEY = 'browser_use_settings'; +const BROWSER_USE_MCP_TOKEN_KEY = 'browser_use_mcp_token'; type BrowserUseRuntime = 'cloud' | 'local'; type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable'; @@ -20,6 +26,7 @@ type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable'; type BrowserUseSession = { id: string; ownerId: string; + createdBy: 'user' | 'agent'; runtime: BrowserUseRuntime; status: BrowserUseSessionStatus; url: string | null; @@ -29,12 +36,15 @@ type BrowserUseSession = { updatedAt: string; lastAction: string | null; message: string | null; + agentAccessEnabled: boolean; + profileName: string | null; }; type PublicBrowserUseSession = Omit; type RuntimeHandle = { browser?: any; + context?: any; page?: any; }; @@ -44,6 +54,7 @@ type BrowserUseOwner = { type BrowserUseSettings = { enabled: boolean; + agentToolsEnabled: boolean; }; type RuntimeReadiness = { @@ -62,7 +73,12 @@ let lastInstallMessage: string | null = null; const DEFAULT_SETTINGS: BrowserUseSettings = { enabled: false, + agentToolsEnabled: false, }; +const AGENT_OWNER_ID = 'agent'; +const PROFILE_ROOT = path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles'); +const MCP_SERVER_NAME = 'cloudcli-browser-use'; +const MCP_PROVIDERS = ['claude', 'codex', 'cursor', 'gemini', 'opencode']; function getRuntime(): BrowserUseRuntime { return IS_PLATFORM ? 'cloud' : 'local'; @@ -78,6 +94,7 @@ function readSettings(): BrowserUseSettings { const parsed = JSON.parse(raw) as Partial; return { enabled: parsed.enabled === true, + agentToolsEnabled: parsed.agentToolsEnabled === true, }; } catch (error: any) { console.warn('[Browser Use] Failed to read settings:', error?.message || error); @@ -88,12 +105,23 @@ function readSettings(): BrowserUseSettings { function writeSettings(settings: BrowserUseSettings): BrowserUseSettings { const normalized = { enabled: settings.enabled === true, + agentToolsEnabled: settings.agentToolsEnabled === true, }; appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized)); return normalized; } +function getOrCreateMcpToken(): string { + const existing = appConfigDb.get(BROWSER_USE_MCP_TOKEN_KEY); + if (existing) { + return existing; + } + const token = randomBytes(32).toString('hex'); + appConfigDb.set(BROWSER_USE_MCP_TOKEN_KEY, token); + return token; +} + function getSetupMessage(settings: BrowserUseSettings, readiness: RuntimeReadiness): string { if (!settings.enabled) { return 'Browser Use is disabled in settings.'; @@ -118,6 +146,45 @@ function getPlaywright(): any | null { } } +function getMcpCommand(): { command: string; args: string[] } { + const serverDir = path.resolve(__dirname, '..', '..'); + const mcpScriptPath = path.join(serverDir, 'browser-use-mcp.js'); + if (fs.existsSync(mcpScriptPath)) { + return { + command: process.execPath, + args: [mcpScriptPath], + }; + } + + return { + command: 'cloudcli', + args: ['browser-use-mcp'], + }; +} + +function getMcpApiUrl(): string { + const port = process.env.SERVER_PORT || process.env.PORT || '3001'; + return `http://127.0.0.1:${port}/api/browser-use-mcp`; +} + +function normalizeProfileName(profileName?: string | null): string | null { + const normalized = String(profileName || '').trim(); + if (!normalized) { + return null; + } + + return normalized.slice(0, 80); +} + +function getProfilePath(profileName: string): string { + const safeName = profileName + .toLowerCase() + .replace(/[^a-z0-9._-]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 80) || 'default'; + return path.join(PROFILE_ROOT, safeName); +} + function getRuntimeReadiness(): RuntimeReadiness { const playwright = getPlaywright(); const readiness: RuntimeReadiness = { @@ -333,6 +400,7 @@ function ownerSessions(ownerId: string): BrowserUseSession[] { async function closeHandle(sessionId: string): Promise { const handle = handles.get(sessionId); handles.delete(sessionId); + await handle?.context?.close?.().catch(() => undefined); await handle?.browser?.close().catch(() => undefined); } @@ -370,10 +438,24 @@ export const browserUseService = { async updateSettings(settings: Partial) { const current = readSettings(); - return writeSettings({ + const nextSettings = { ...current, enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled, - }); + agentToolsEnabled: typeof settings.agentToolsEnabled === 'boolean' + ? settings.agentToolsEnabled + : current.agentToolsEnabled, + }; + if (!nextSettings.enabled) { + nextSettings.agentToolsEnabled = false; + } + + const next = writeSettings(nextSettings); + if (next.agentToolsEnabled) { + await this.registerAgentMcp(); + } else if (current.agentToolsEnabled) { + await this.unregisterAgentMcp(); + } + return next; }, async getStatus() { @@ -389,13 +471,53 @@ export const browserUseService = { chromiumInstalled: readiness.chromiumInstalled, installInProgress: readiness.installInProgress, sessionCount: sessions.size, - mcpRecommended: true, + agentToolsEnabled: settings.agentToolsEnabled, + mcpRecommended: !settings.agentToolsEnabled, message: available ? 'Browser Use runtime is available.' : getSetupMessage(settings, readiness), }; }, + async registerAgentMcp() { + const { command, args } = getMcpCommand(); + const results = await providerMcpService.addMcpServerToAllProviders({ + name: MCP_SERVER_NAME, + scope: 'user', + transport: 'stdio', + command, + args, + env: { + CLOUDCLI_BROWSER_USE_MCP_TOKEN: getOrCreateMcpToken(), + CLOUDCLI_BROWSER_USE_API_URL: getMcpApiUrl(), + }, + }); + return { name: MCP_SERVER_NAME, command, args, results }; + }, + + getMcpToken() { + return getOrCreateMcpToken(); + }, + + async unregisterAgentMcp() { + const results = await Promise.all(MCP_PROVIDERS.map(async (provider) => { + try { + const result = await providerMcpService.removeProviderMcpServer(provider, { + name: MCP_SERVER_NAME, + scope: 'user', + }); + return { provider, removed: result.removed }; + } catch (error) { + return { + provider, + removed: false, + error: error instanceof Error ? error.message : 'Unknown error', + }; + } + })); + return { name: MCP_SERVER_NAME, results }; + }, + async installRuntime() { const result = await installRuntime(); return { @@ -407,17 +529,22 @@ export const browserUseService = { async listSessions(owner: BrowserUseOwner) { const ownerId = getOwnerId(owner); await expireStaleSessions(); - return ownerSessions(ownerId).map(publicSession); + return [...sessions.values()] + .filter((session) => session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID || session.agentAccessEnabled) + .map(publicSession); }, - async createSession(owner: BrowserUseOwner) { + async createSession(owner: BrowserUseOwner, options?: { createdBy?: 'user' | 'agent'; profileName?: string | null; agentAccessEnabled?: boolean }) { const ownerId = getOwnerId(owner); await expireStaleSessions(); + const createdBy = options?.createdBy ?? 'user'; + const profileName = normalizeProfileName(options?.profileName); const now = new Date().toISOString(); const session: BrowserUseSession = { id: randomUUID(), ownerId, + createdBy, runtime: getRuntime(), status: 'unavailable', url: null, @@ -427,6 +554,8 @@ export const browserUseService = { updatedAt: now, lastAction: 'create', message: null, + agentAccessEnabled: options?.agentAccessEnabled ?? createdBy === 'agent', + profileName, }; const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready'); @@ -442,20 +571,97 @@ export const browserUseService = { return publicSession(session); } - const browser = await readiness.playwright.chromium.launch({ + let browser: any | undefined; + let context: any | undefined; + let page: any; + const launchOptions = { headless: true, args: ['--disable-dev-shm-usage'], - }); - const page = await browser.newPage({ viewport: { width: 1440, height: 900 } }); + }; + const contextOptions = { + viewport: { width: 1440, height: 900 }, + serviceWorkers: 'block', + }; + + if (profileName) { + fs.mkdirSync(PROFILE_ROOT, { recursive: true }); + context = await readiness.playwright.chromium.launchPersistentContext(getProfilePath(profileName), { + ...launchOptions, + ...contextOptions, + }); + page = context.pages()[0] || await context.newPage(); + } else { + browser = await readiness.playwright.chromium.launch(launchOptions); + context = await browser.newContext(contextOptions); + page = await context.newPage(); + } await attachRequestGuard(page); session.status = 'ready'; session.message = 'Browser session is ready.'; sessions.set(session.id, session); - handles.set(session.id, { browser, page }); + handles.set(session.id, { browser, context, page }); await captureSession(session, page); return publicSession(session); }, + async grantAgentAccess(owner: BrowserUseOwner, sessionId: string) { + const ownerId = getOwnerId(owner); + const session = sessions.get(sessionId); + if (!session || (session.ownerId !== ownerId && session.ownerId !== AGENT_OWNER_ID)) { + throw new Error('Browser session not found.'); + } + session.agentAccessEnabled = true; + session.updatedAt = new Date().toISOString(); + session.lastAction = 'agent_access:grant'; + return publicSession(session); + }, + + async revokeAgentAccess(owner: BrowserUseOwner, sessionId: string) { + const ownerId = getOwnerId(owner); + const session = sessions.get(sessionId); + if (!session || (session.ownerId !== ownerId && session.ownerId !== AGENT_OWNER_ID)) { + throw new Error('Browser session not found.'); + } + session.agentAccessEnabled = false; + session.updatedAt = new Date().toISOString(); + session.lastAction = 'agent_access:revoke'; + return publicSession(session); + }, + + async listAgentSessions() { + const settings = readSettings(); + if (!settings.enabled || !settings.agentToolsEnabled) { + return []; + } + await expireStaleSessions(); + return [...sessions.values()] + .filter((session) => session.agentAccessEnabled || session.ownerId === AGENT_OWNER_ID) + .map(publicSession); + }, + + async createAgentSession(options?: { profileName?: string | null }) { + const settings = readSettings(); + if (!settings.enabled || !settings.agentToolsEnabled) { + throw new Error('Browser Use agent tools are disabled.'); + } + return this.createSession( + { id: AGENT_OWNER_ID }, + { createdBy: 'agent', profileName: options?.profileName, agentAccessEnabled: true }, + ); + }, + + async getAgentSession(sessionId: string) { + const settings = readSettings(); + if (!settings.enabled || !settings.agentToolsEnabled) { + throw new Error('Browser Use agent tools are disabled.'); + } + const session = sessions.get(sessionId); + if (!session || (!session.agentAccessEnabled && session.ownerId !== AGENT_OWNER_ID)) { + throw new Error('Browser session is not shared with agents.'); + } + return session; + }, + async navigate(owner: BrowserUseOwner, sessionId: string, rawUrl: string) { const ownerId = getOwnerId(owner); await expireStaleSessions(); @@ -481,10 +687,184 @@ export const browserUseService = { return publicSession(session); }, + async agentNavigate(sessionId: string, rawUrl: string) { + await this.getAgentSession(sessionId); + return this.navigate({ id: AGENT_OWNER_ID }, sessionId, rawUrl).catch(async (error) => { + const session = await this.getAgentSession(sessionId); + if (session.ownerId !== AGENT_OWNER_ID) { + const url = await normalizeUrl(rawUrl); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + await handle.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 }); + session.lastAction = `navigate:${url}`; + await captureSession(session, handle.page); + return publicSession(session); + } + throw error; + }); + }, + + async agentSnapshot(sessionId: string) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + await captureSession(session, handle.page); + const text = await handle.page.locator('body').innerText({ timeout: 5_000 }).catch(() => ''); + return { + session: publicSession(session), + text: text.slice(0, 30_000), + }; + }, + + async agentClick(sessionId: string, input: { selector?: string; text?: string; x?: number; y?: number }) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + + if (input.selector) { + await handle.page.locator(input.selector).first().click({ timeout: 10_000 }); + } else if (input.text) { + await handle.page.getByText(input.text, { exact: false }).first().click({ timeout: 10_000 }); + } else if (typeof input.x === 'number' && typeof input.y === 'number') { + await handle.page.mouse.click(input.x, input.y); + } else { + throw new Error('Provide selector, text, or x/y coordinates.'); + } + + session.lastAction = 'click'; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async agentType(sessionId: string, input: { selector?: string; text: string; submit?: boolean }) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + + if (input.selector) { + await handle.page.locator(input.selector).first().fill(input.text, { timeout: 10_000 }); + } else { + await handle.page.keyboard.type(input.text); + } + if (input.submit) { + await handle.page.keyboard.press('Enter'); + } + + session.lastAction = 'type'; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async agentFillForm(sessionId: string, fields: Array<{ selector: string; value: string }>) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + for (const field of fields) { + await handle.page.locator(field.selector).first().fill(field.value, { timeout: 10_000 }); + } + session.lastAction = 'fill_form'; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async agentPressKey(sessionId: string, key: string) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + await handle.page.keyboard.press(key); + session.lastAction = `press_key:${key}`; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async agentSelectOption(sessionId: string, selector: string, values: string[]) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + await handle.page.locator(selector).first().selectOption(values, { timeout: 10_000 }); + session.lastAction = 'select_option'; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async agentWaitFor(sessionId: string, input: { text?: string; url?: string; timeoutMs?: number }) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + const timeout = Math.max(250, Math.min(input.timeoutMs || 5_000, 30_000)); + if (input.text) { + await handle.page.getByText(input.text, { exact: false }).first().waitFor({ timeout }); + } else if (input.url) { + await handle.page.waitForURL(input.url, { timeout }); + } else { + await handle.page.waitForTimeout(timeout); + } + session.lastAction = 'wait_for'; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async agentTabs(sessionId: string, input: { action?: 'list' | 'new' | 'select' | 'close'; index?: number; url?: string }) { + const session = await this.getAgentSession(sessionId); + const handle = handles.get(sessionId); + if (!handle?.context || !handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + const action = input.action || 'list'; + if (action === 'new') { + const page = await handle.context.newPage(); + handles.set(sessionId, { ...handle, page }); + await attachRequestGuard(page); + if (input.url) { + await this.agentNavigate(sessionId, input.url); + } + } else if (action === 'select') { + const page = handle.context.pages()[input.index || 0]; + if (!page) { + throw new Error('Tab not found.'); + } + handles.set(sessionId, { ...handle, page }); + } else if (action === 'close') { + const pages = handle.context.pages(); + const page = pages[input.index ?? pages.indexOf(handle.page)]; + if (!page) { + throw new Error('Tab not found.'); + } + await page.close(); + handles.set(sessionId, { ...handle, page: handle.context.pages()[0] || await handle.context.newPage() }); + } + const updatedHandle = handles.get(sessionId); + await captureSession(session, updatedHandle?.page || handle.page); + return { + session: publicSession(session), + tabs: handle.context.pages().map((page: any, index: number) => ({ + index, + url: page.url(), + active: page === (updatedHandle?.page || handle.page), + })), + }; + }, + async stopSession(owner: BrowserUseOwner, sessionId: string) { const ownerId = getOwnerId(owner); const session = sessions.get(sessionId); - if (!session || session.ownerId !== ownerId) { + if (!session || (session.ownerId !== ownerId && session.ownerId !== AGENT_OWNER_ID && !session.agentAccessEnabled)) { return { stopped: false }; } @@ -497,6 +877,11 @@ export const browserUseService = { return { stopped: true, session: publicSession(session) }; }, + async agentStopSession(sessionId: string) { + await this.getAgentSession(sessionId); + return this.stopSession({ id: AGENT_OWNER_ID }, sessionId); + }, + async stopAllSessions() { await Promise.all([...sessions.keys()].map(async (sessionId) => { await closeHandle(sessionId); diff --git a/src/components/browser-use/view/BrowserUsePanel.tsx b/src/components/browser-use/view/BrowserUsePanel.tsx index 22d1153b..41e25e8e 100644 --- a/src/components/browser-use/view/BrowserUsePanel.tsx +++ b/src/components/browser-use/view/BrowserUsePanel.tsx @@ -1,5 +1,5 @@ import { useCallback, useEffect, useMemo, useState } from 'react'; -import { Download, ExternalLink, Globe, Loader2, MonitorPlay, Navigation, Pause, RefreshCw, Square } from 'lucide-react'; +import { Bot, Download, ExternalLink, Globe, Loader2, MonitorPlay, Navigation, Pause, RefreshCw, Share2, Square, X } from 'lucide-react'; import { Badge, Button } from '../../../shared/view/ui'; import { authenticatedFetch } from '../../../utils/api'; @@ -12,6 +12,7 @@ type BrowserUseStatus = { chromiumInstalled: boolean; installInProgress: boolean; sessionCount: number; + agentToolsEnabled: boolean; mcpRecommended: boolean; message: string; }; @@ -27,6 +28,9 @@ type BrowserUseSession = { updatedAt: string; lastAction: string | null; message: string | null; + agentAccessEnabled: boolean; + createdBy: 'user' | 'agent'; + profileName: string | null; }; type BrowserUsePanelProps = { @@ -112,6 +116,18 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { await readJson(response); }); + const grantAgentAccess = () => runAction(async () => { + if (!selectedSession) return; + const response = await authenticatedFetch(`/api/browser-use/sessions/${selectedSession.id}/agent-access/grant`, { method: 'POST' }); + await readJson(response); + }); + + const revokeAgentAccess = () => runAction(async () => { + if (!selectedSession) return; + const response = await authenticatedFetch(`/api/browser-use/sessions/${selectedSession.id}/agent-access/revoke`, { method: 'POST' }); + await readJson(response); + }); + const installRuntime = () => runAction(async () => { setIsInstalling(true); try { @@ -138,7 +154,7 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { )}

- Managed Playwright browser sessions with owner-scoped screenshots and navigation. + Create browser sessions, watch agent activity, and decide which sessions agents may control.

@@ -159,6 +175,11 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) {
Runtime
{status?.available ? 'Available' : 'Setup required'}

{status?.message || 'Loading Browser Use status...'}

+ {status?.enabled && ( +
+ Agent tools: {status.agentToolsEnabled ? 'enabled' : 'disabled in settings'} +
+ )} {canInstallRuntime && (
+
+ {session.createdBy === 'agent' && ( + agent + )} + {session.agentAccessEnabled && ( + + shared + + )} + {session.profileName && ( + profile: {session.profileName} + )} +
{session.url || session.message || session.id}
))} @@ -215,6 +254,17 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { Go + {selectedSession?.agentAccessEnabled ? ( + + ) : ( + + )}