feat(browser-use): simplify agent session monitoring

This commit is contained in:
Simos Mikelatos
2026-06-17 17:04:11 +00:00
parent a0d56429a7
commit 086df034b4
7 changed files with 301 additions and 582 deletions

View File

@@ -4,20 +4,6 @@ import { browserUseService } from '@/modules/browser-use/browser-use.service.js'
const router = express.Router();
type AuthenticatedRequest = express.Request & {
user?: {
id?: string | number;
};
};
function requireUser(req: AuthenticatedRequest): { id: string | number } {
const userId = req.user?.id;
if (userId === undefined || userId === null) {
throw new Error('Authenticated user is required.');
}
return { id: userId };
}
function readParam(value: string | string[] | undefined): string {
return Array.isArray(value) ? value[0] || '' : value || '';
}
@@ -56,18 +42,6 @@ router.put('/settings', async (req, res) => {
}
});
router.post('/agent-tools/register', async (_req, res) => {
try {
const result = await browserUseService.registerAgentMcp();
res.status(201).json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to register Browser Use MCP.',
});
}
});
router.post('/runtime/install', async (_req, res) => {
try {
const result = await browserUseService.installRuntime();
@@ -84,9 +58,9 @@ router.post('/runtime/install', async (_req, res) => {
}
});
router.get('/sessions', async (req: AuthenticatedRequest, res) => {
router.get('/sessions', async (_req, res) => {
try {
res.json({ success: true, data: { sessions: await browserUseService.listSessions(requireUser(req)) } });
res.json({ success: true, data: { sessions: await browserUseService.listSessions() } });
} catch (error) {
res.status(401).json({
success: false,
@@ -95,90 +69,9 @@ router.get('/sessions', async (req: AuthenticatedRequest, res) => {
}
});
router.post('/sessions', async (req: AuthenticatedRequest, res) => {
router.post('/sessions/:sessionId/stop', async (req, res) => {
try {
const session = await browserUseService.createSession(requireUser(req));
res.status(session.status === 'unavailable' ? 202 : 201).json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to create browser session.',
});
}
});
router.post('/sessions/:sessionId/navigate', async (req: AuthenticatedRequest, res) => {
try {
const session = await browserUseService.navigate(requireUser(req), readParam(req.params.sessionId), String(req.body?.url || ''));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to navigate browser session.',
});
}
});
router.post('/sessions/:sessionId/click', async (req: AuthenticatedRequest, res) => {
try {
const x = Number(req.body?.x);
const y = Number(req.body?.y);
if (!Number.isFinite(x) || !Number.isFinite(y)) {
throw new Error('Click requires numeric x and y coordinates.');
}
const session = await browserUseService.userClick(requireUser(req), readParam(req.params.sessionId), { x, y });
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to click browser session.',
});
}
});
router.post('/sessions/:sessionId/press-key', async (req: AuthenticatedRequest, res) => {
try {
const key = String(req.body?.key || '').trim();
if (!key) {
throw new Error('A key is required.');
}
const session = await browserUseService.userPressKey(requireUser(req), readParam(req.params.sessionId), key);
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to send browser key input.',
});
}
});
router.post('/sessions/:sessionId/agent-access/grant', async (req: AuthenticatedRequest, res) => {
try {
const session = await browserUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to grant agent access.',
});
}
});
router.post('/sessions/:sessionId/agent-access/revoke', async (req: AuthenticatedRequest, res) => {
try {
const session = await browserUseService.revokeAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to revoke agent access.',
});
}
});
router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) => {
try {
const result = await browserUseService.stopSession(requireUser(req), readParam(req.params.sessionId));
const result = await browserUseService.stopSession(readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
@@ -188,9 +81,9 @@ router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res)
}
});
router.delete('/sessions/:sessionId', async (req: AuthenticatedRequest, res) => {
router.delete('/sessions/:sessionId', async (req, res) => {
try {
const result = await browserUseService.deleteSession(requireUser(req), readParam(req.params.sessionId));
const result = await browserUseService.deleteSession(readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({

View File

@@ -8,7 +8,7 @@ import net from 'node:net';
import path from 'node:path';
import { appConfigDb } from '@/modules/database/index.js';
import { providerMcpService } from '@/modules/providers/services/mcp.service.js';
import { providerMcpService } from '@/modules/providers/index.js';
import { getModuleDir } from '@/utils/runtime-paths.js';
const require = createRequire(import.meta.url);
@@ -26,7 +26,7 @@ type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
type BrowserUseSession = {
id: string;
ownerId: string;
createdBy: 'user' | 'agent';
createdBy: 'agent';
runtime: BrowserUseRuntime;
status: BrowserUseSessionStatus;
url: string | null;
@@ -36,7 +36,6 @@ type BrowserUseSession = {
updatedAt: string;
lastAction: string | null;
message: string | null;
agentAccessEnabled: boolean;
profileName: string | null;
viewport: {
width: number;
@@ -45,7 +44,7 @@ type BrowserUseSession = {
cursor: {
x: number;
y: number;
actor: 'agent' | 'user';
actor: 'agent';
} | null;
};
@@ -57,13 +56,8 @@ type RuntimeHandle = {
page?: any;
};
type BrowserUseOwner = {
id: string | number;
};
type BrowserUseSettings = {
enabled: boolean;
agentToolsEnabled: boolean;
};
type RuntimeReadiness = {
@@ -82,7 +76,6 @@ let lastInstallMessage: string | null = null;
const DEFAULT_SETTINGS: BrowserUseSettings = {
enabled: false,
agentToolsEnabled: false,
};
const AGENT_OWNER_ID = 'agent';
const PROFILE_ROOT = path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles');
@@ -103,7 +96,6 @@ function readSettings(): BrowserUseSettings {
const parsed = JSON.parse(raw) as Partial<BrowserUseSettings>;
return {
enabled: parsed.enabled === true,
agentToolsEnabled: parsed.agentToolsEnabled === true,
};
} catch (error: any) {
console.warn('[Browser Use] Failed to read settings:', error?.message || error);
@@ -114,7 +106,6 @@ function readSettings(): BrowserUseSettings {
function writeSettings(settings: BrowserUseSettings): BrowserUseSettings {
const normalized = {
enabled: settings.enabled === true,
agentToolsEnabled: settings.agentToolsEnabled === true,
};
appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized));
@@ -244,7 +235,6 @@ function runCommand(command: string, args: string[]): Promise<void> {
fn();
};
// Guard against a stuck npm/playwright process hanging the install request forever.
const timer = setTimeout(() => {
child.kill('SIGKILL');
finish(() => reject(new Error(
@@ -309,14 +299,6 @@ async function installRuntime(): Promise<{ success: boolean; message: string }>
}
}
function getOwnerId(owner: BrowserUseOwner): string {
if (owner.id === undefined || owner.id === null || String(owner.id).trim() === '') {
throw new Error('Authenticated user is required.');
}
return String(owner.id);
}
function isPrivateIpv4(address: string): boolean {
const parts = address.split('.').map((part) => Number.parseInt(part, 10));
if (parts.length !== 4 || parts.some((part) => Number.isNaN(part) || part < 0 || part > 255)) {
@@ -410,8 +392,6 @@ async function assertAllowedBrowserRequest(rawUrl: string): Promise<void> {
}
async function attachRequestGuard(context: any): Promise<void> {
// Attach at the context level so the guard also covers popups, window.open targets,
// and any replacement pages created during the session lifecycle.
await context.route('**/*', async (route: any) => {
try {
await assertAllowedBrowserRequest(route.request().url());
@@ -431,10 +411,6 @@ function ownerSessions(ownerId: string): BrowserUseSession[] {
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
}
function canAccessSession(ownerId: string, session: BrowserUseSession): boolean {
return session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID || session.agentAccessEnabled;
}
async function closeHandle(sessionId: string): Promise<void> {
const handle = handles.get(sessionId);
handles.delete(sessionId);
@@ -504,21 +480,15 @@ export const browserUseService = {
async updateSettings(settings: Partial<BrowserUseSettings>) {
const current = readSettings();
const nextSettings = {
...current,
enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled,
agentToolsEnabled: typeof settings.agentToolsEnabled === 'boolean'
? settings.agentToolsEnabled
: current.agentToolsEnabled,
};
if (!nextSettings.enabled) {
nextSettings.agentToolsEnabled = false;
}
const next = writeSettings(nextSettings);
if (next.agentToolsEnabled) {
if (next.enabled) {
await this.registerAgentMcp();
} else if (current.agentToolsEnabled) {
} else if (current.enabled) {
await this.unregisterAgentMcp();
await this.stopAllSessions();
}
return next;
},
@@ -536,8 +506,6 @@ export const browserUseService = {
chromiumInstalled: readiness.chromiumInstalled,
installInProgress: readiness.installInProgress,
sessionCount: sessions.size,
agentToolsEnabled: settings.agentToolsEnabled,
mcpRecommended: !settings.agentToolsEnabled,
message: available
? 'Browser Use runtime is available.'
: getSetupMessage(settings, readiness),
@@ -591,25 +559,27 @@ export const browserUseService = {
};
},
async listSessions(owner: BrowserUseOwner) {
const ownerId = getOwnerId(owner);
async listSessions() {
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => canAccessSession(ownerId, session))
.filter((session) => session.ownerId === AGENT_OWNER_ID)
.map(publicSession);
},
async createSession(owner: BrowserUseOwner, options?: { createdBy?: 'user' | 'agent'; profileName?: string | null; agentAccessEnabled?: boolean }) {
const ownerId = getOwnerId(owner);
async createAgentSession(options?: { profileName?: string | null }) {
const settings = readSettings();
if (!settings.enabled) {
throw new Error('Browser Use agent tools are disabled.');
}
await expireStaleSessions();
const createdBy = options?.createdBy ?? 'user';
const profileName = normalizeProfileName(options?.profileName);
const now = new Date().toISOString();
const session: BrowserUseSession = {
id: randomUUID(),
ownerId,
createdBy,
ownerId: AGENT_OWNER_ID,
createdBy: 'agent',
runtime: getRuntime(),
status: 'unavailable',
url: null,
@@ -619,18 +589,16 @@ export const browserUseService = {
updatedAt: now,
lastAction: 'create',
message: null,
agentAccessEnabled: options?.agentAccessEnabled ?? createdBy === 'agent',
profileName,
viewport: { width: 1440, height: 900 },
cursor: null,
};
const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready');
const activeOwnerSessions = ownerSessions(AGENT_OWNER_ID).filter((item) => item.status === 'ready');
if (activeOwnerSessions.length >= MAX_SESSIONS_PER_OWNER) {
throw new Error(`Browser Use is limited to ${MAX_SESSIONS_PER_OWNER} active sessions per user.`);
throw new Error(`Browser Use is limited to ${MAX_SESSIONS_PER_OWNER} active agent sessions.`);
}
const settings = readSettings();
const readiness = getRuntimeReadiness();
if (!settings.enabled || !readiness.playwrightInstalled || !readiness.chromiumInstalled || !readiness.playwright) {
session.message = getSetupMessage(settings, readiness);
@@ -671,70 +639,35 @@ export const browserUseService = {
return publicSession(session);
},
async grantAgentAccess(owner: BrowserUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || (session.ownerId !== ownerId && session.ownerId !== AGENT_OWNER_ID)) {
throw new Error('Browser session not found.');
}
session.agentAccessEnabled = true;
session.updatedAt = new Date().toISOString();
session.lastAction = 'agent_access:grant';
return publicSession(session);
},
async revokeAgentAccess(owner: BrowserUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || (session.ownerId !== ownerId && session.ownerId !== AGENT_OWNER_ID)) {
throw new Error('Browser session not found.');
}
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'agent_access:revoke';
return publicSession(session);
},
async listAgentSessions() {
const settings = readSettings();
if (!settings.enabled || !settings.agentToolsEnabled) {
if (!settings.enabled) {
return [];
}
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => session.agentAccessEnabled || session.ownerId === AGENT_OWNER_ID)
.filter((session) => session.ownerId === AGENT_OWNER_ID)
.map(publicSession);
},
async createAgentSession(options?: { profileName?: string | null }) {
const settings = readSettings();
if (!settings.enabled || !settings.agentToolsEnabled) {
throw new Error('Browser Use agent tools are disabled.');
}
return this.createSession(
{ id: AGENT_OWNER_ID },
{ createdBy: 'agent', profileName: options?.profileName, agentAccessEnabled: true },
);
},
async getAgentSession(sessionId: string) {
const settings = readSettings();
if (!settings.enabled || !settings.agentToolsEnabled) {
if (!settings.enabled) {
throw new Error('Browser Use agent tools are disabled.');
}
const session = sessions.get(sessionId);
if (!session || (!session.agentAccessEnabled && session.ownerId !== AGENT_OWNER_ID)) {
throw new Error('Browser session is not shared with agents.');
if (!session || session.ownerId !== AGENT_OWNER_ID) {
throw new Error('Browser session not found.');
}
return session;
},
async navigate(owner: BrowserUseOwner, sessionId: string, rawUrl: string) {
const ownerId = getOwnerId(owner);
async agentNavigate(sessionId: string, rawUrl: string) {
await this.getAgentSession(sessionId);
await expireStaleSessions();
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
if (!session || session.ownerId !== AGENT_OWNER_ID) {
throw new Error('Browser session not found.');
}
@@ -755,25 +688,6 @@ export const browserUseService = {
return publicSession(session);
},
async agentNavigate(sessionId: string, rawUrl: string) {
await this.getAgentSession(sessionId);
return this.navigate({ id: AGENT_OWNER_ID }, sessionId, rawUrl).catch(async (error) => {
const session = await this.getAgentSession(sessionId);
if (session.ownerId !== AGENT_OWNER_ID) {
const url = await normalizeUrl(rawUrl);
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
await handle.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
session.lastAction = `navigate:${url}`;
await captureSession(session, handle.page);
return publicSession(session);
}
throw error;
});
},
async agentSnapshot(sessionId: string) {
const session = await this.getAgentSession(sessionId);
const handle = handles.get(sessionId);
@@ -911,7 +825,6 @@ export const browserUseService = {
if (action === 'new') {
const page = await handle.context.newPage();
handles.set(sessionId, { ...handle, page });
// Request guard is attached at the context level, so new pages are already covered.
if (input.url) {
await this.agentNavigate(sessionId, input.url);
}
@@ -942,10 +855,9 @@ export const browserUseService = {
};
},
async stopSession(owner: BrowserUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
async stopSession(sessionId: string) {
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
if (!session || session.ownerId !== AGENT_OWNER_ID) {
return { stopped: false };
}
@@ -958,10 +870,9 @@ export const browserUseService = {
return { stopped: true, session: publicSession(session) };
},
async deleteSession(owner: BrowserUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
async deleteSession(sessionId: string) {
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
if (!session || session.ownerId !== AGENT_OWNER_ID) {
return { deleted: false };
}
@@ -970,52 +881,9 @@ export const browserUseService = {
return { deleted: true, sessionId };
},
async userClick(owner: BrowserUseOwner, sessionId: string, input: { x: number; y: number }) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Browser session not found.');
}
if (session.status !== 'ready') {
throw new Error(session.message || 'Browser session is not available.');
}
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
await handle.page.mouse.click(input.x, input.y);
session.lastAction = 'click';
session.cursor = { x: input.x, y: input.y, actor: 'user' };
await captureSession(session, handle.page);
return publicSession(session);
},
async userPressKey(owner: BrowserUseOwner, sessionId: string, key: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Browser session not found.');
}
if (session.status !== 'ready') {
throw new Error(session.message || 'Browser session is not available.');
}
const handle = handles.get(sessionId);
if (!handle?.page) {
throw new Error('Browser runtime handle is not available.');
}
await handle.page.keyboard.press(key);
session.lastAction = `press_key:${key}`;
await captureSession(session, handle.page);
return publicSession(session);
},
async agentStopSession(sessionId: string) {
await this.getAgentSession(sessionId);
return this.stopSession({ id: AGENT_OWNER_ID }, sessionId);
return this.stopSession(sessionId);
},
async stopAllSessions() {

View File

@@ -14,17 +14,8 @@ test('browser use blocks private and local network addresses by default', () =>
assert.equal(isBlockedBrowserUseAddress('2001:4860:4860::8888'), false);
});
test('browser use sessions are listed only for their owner', async () => {
const ownerA = { id: `owner-a-${Date.now()}-${Math.random()}` };
const ownerB = { id: `owner-b-${Date.now()}-${Math.random()}` };
test('browser use monitor list starts empty without agent sessions', async () => {
const sessions = await browserUseService.listSessions();
const ownerASession = await browserUseService.createSession(ownerA);
await browserUseService.createSession(ownerB);
const ownerASessions = await browserUseService.listSessions(ownerA);
const ownerBSessions = await browserUseService.listSessions(ownerB);
assert.equal(ownerASessions.some((session) => session.id === ownerASession.id), true);
assert.equal(ownerBSessions.some((session) => session.id === ownerASession.id), false);
assert.equal(Object.hasOwn(ownerASession, 'ownerId'), false);
assert.deepEqual(sessions, []);
});