diff --git a/server/modules/browser-use/browser-use.routes.ts b/server/modules/browser-use/browser-use.routes.ts index cab7e592..16f65d7e 100644 --- a/server/modules/browser-use/browser-use.routes.ts +++ b/server/modules/browser-use/browser-use.routes.ts @@ -119,6 +119,33 @@ router.post('/sessions/:sessionId/navigate', async (req: AuthenticatedRequest, r } }); +router.post('/sessions/:sessionId/click', async (req: AuthenticatedRequest, res) => { + try { + const session = await browserUseService.userClick(requireUser(req), readParam(req.params.sessionId), { + x: Number(req.body?.x), + y: Number(req.body?.y), + }); + res.json({ success: true, data: { session } }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Failed to click browser session.', + }); + } +}); + +router.post('/sessions/:sessionId/press-key', async (req: AuthenticatedRequest, res) => { + try { + const session = await browserUseService.userPressKey(requireUser(req), readParam(req.params.sessionId), String(req.body?.key || '')); + res.json({ success: true, data: { session } }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Failed to send browser key input.', + }); + } +}); + router.post('/sessions/:sessionId/agent-access/grant', async (req: AuthenticatedRequest, res) => { try { const session = await browserUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId)); @@ -155,4 +182,16 @@ router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) } }); +router.delete('/sessions/:sessionId', async (req: AuthenticatedRequest, res) => { + try { + const result = await browserUseService.deleteSession(requireUser(req), readParam(req.params.sessionId)); + res.json({ success: true, data: result }); + } catch (error) { + res.status(400).json({ + success: false, + error: error instanceof Error ? error.message : 'Failed to delete browser session.', + }); + } +}); + export default router; diff --git a/server/modules/browser-use/browser-use.service.ts b/server/modules/browser-use/browser-use.service.ts index e4ab8bec..06fe255b 100644 --- a/server/modules/browser-use/browser-use.service.ts +++ b/server/modules/browser-use/browser-use.service.ts @@ -38,6 +38,15 @@ type BrowserUseSession = { message: string | null; agentAccessEnabled: boolean; profileName: string | null; + viewport: { + width: number; + height: number; + } | null; + cursor: { + x: number; + y: number; + actor: 'agent' | 'user'; + } | null; }; type PublicBrowserUseSession = Omit; @@ -397,6 +406,10 @@ function ownerSessions(ownerId: string): BrowserUseSession[] { return [...sessions.values()].filter((session) => session.ownerId === ownerId); } +function canAccessSession(ownerId: string, session: BrowserUseSession): boolean { + return session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID || session.agentAccessEnabled; +} + async function closeHandle(sessionId: string): Promise { const handle = handles.get(sessionId); handles.delete(sessionId); @@ -428,9 +441,36 @@ async function captureSession(session: BrowserUseSession, page: any): Promise null); session.url = page.url() || session.url; + session.viewport = page.viewportSize?.() || session.viewport; session.updatedAt = new Date().toISOString(); } +async function getActionPoint(page: any, input: { selector?: string; text?: string; x?: number; y?: number }) { + if (typeof input.x === 'number' && typeof input.y === 'number') { + return { x: input.x, y: input.y }; + } + + const locator = input.selector + ? page.locator(input.selector).first() + : input.text + ? page.getByText(input.text, { exact: false }).first() + : null; + + if (!locator) { + return null; + } + + const box = await locator.boundingBox().catch(() => null); + if (!box) { + return null; + } + + return { + x: Math.round(box.x + box.width / 2), + y: Math.round(box.y + box.height / 2), + }; +} + export const browserUseService = { async getSettings() { return readSettings(); @@ -530,7 +570,7 @@ export const browserUseService = { const ownerId = getOwnerId(owner); await expireStaleSessions(); return [...sessions.values()] - .filter((session) => session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID || session.agentAccessEnabled) + .filter((session) => canAccessSession(ownerId, session)) .map(publicSession); }, @@ -556,6 +596,8 @@ export const browserUseService = { message: null, agentAccessEnabled: options?.agentAccessEnabled ?? createdBy === 'agent', profileName, + viewport: { width: 1440, height: 900 }, + cursor: null, }; const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready'); @@ -667,7 +709,7 @@ export const browserUseService = { await expireStaleSessions(); const session = sessions.get(sessionId); - if (!session || session.ownerId !== ownerId) { + if (!session || !canAccessSession(ownerId, session)) { throw new Error('Browser session not found.'); } @@ -683,6 +725,7 @@ export const browserUseService = { const url = await normalizeUrl(rawUrl); await handle.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 }); session.lastAction = `navigate:${url}`; + session.cursor = null; await captureSession(session, handle.page); return publicSession(session); }, @@ -726,6 +769,7 @@ export const browserUseService = { if (!handle?.page) { throw new Error('Browser runtime handle is not available.'); } + const point = await getActionPoint(handle.page, input); if (input.selector) { await handle.page.locator(input.selector).first().click({ timeout: 10_000 }); @@ -738,6 +782,7 @@ export const browserUseService = { } session.lastAction = 'click'; + session.cursor = point ? { ...point, actor: 'agent' } : null; await captureSession(session, handle.page); return publicSession(session); }, @@ -751,6 +796,9 @@ export const browserUseService = { if (input.selector) { await handle.page.locator(input.selector).first().fill(input.text, { timeout: 10_000 }); + session.cursor = await getActionPoint(handle.page, input).then((point) => ( + point ? { ...point, actor: 'agent' as const } : null + )); } else { await handle.page.keyboard.type(input.text); } @@ -773,6 +821,11 @@ export const browserUseService = { await handle.page.locator(field.selector).first().fill(field.value, { timeout: 10_000 }); } session.lastAction = 'fill_form'; + if (fields[0]) { + session.cursor = await getActionPoint(handle.page, { selector: fields[0].selector }).then((point) => ( + point ? { ...point, actor: 'agent' as const } : null + )); + } await captureSession(session, handle.page); return publicSession(session); }, @@ -797,6 +850,9 @@ export const browserUseService = { } await handle.page.locator(selector).first().selectOption(values, { timeout: 10_000 }); session.lastAction = 'select_option'; + session.cursor = await getActionPoint(handle.page, { selector }).then((point) => ( + point ? { ...point, actor: 'agent' as const } : null + )); await captureSession(session, handle.page); return publicSession(session); }, @@ -864,7 +920,7 @@ export const browserUseService = { async stopSession(owner: BrowserUseOwner, sessionId: string) { const ownerId = getOwnerId(owner); const session = sessions.get(sessionId); - if (!session || (session.ownerId !== ownerId && session.ownerId !== AGENT_OWNER_ID && !session.agentAccessEnabled)) { + if (!session || !canAccessSession(ownerId, session)) { return { stopped: false }; } @@ -873,10 +929,65 @@ export const browserUseService = { session.status = 'stopped'; session.updatedAt = new Date().toISOString(); session.lastAction = 'stop'; - session.message = 'Browser session stopped.'; + session.message = 'Browser session stopped. Create a new session to continue browsing.'; return { stopped: true, session: publicSession(session) }; }, + async deleteSession(owner: BrowserUseOwner, sessionId: string) { + const ownerId = getOwnerId(owner); + const session = sessions.get(sessionId); + if (!session || !canAccessSession(ownerId, session)) { + return { deleted: false }; + } + + await closeHandle(sessionId); + sessions.delete(sessionId); + return { deleted: true, sessionId }; + }, + + async userClick(owner: BrowserUseOwner, sessionId: string, input: { x: number; y: number }) { + const ownerId = getOwnerId(owner); + const session = sessions.get(sessionId); + if (!session || !canAccessSession(ownerId, session)) { + throw new Error('Browser session not found.'); + } + if (session.status !== 'ready') { + throw new Error(session.message || 'Browser session is not available.'); + } + + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + + await handle.page.mouse.click(input.x, input.y); + session.lastAction = 'click'; + session.cursor = { x: input.x, y: input.y, actor: 'user' }; + await captureSession(session, handle.page); + return publicSession(session); + }, + + async userPressKey(owner: BrowserUseOwner, sessionId: string, key: string) { + const ownerId = getOwnerId(owner); + const session = sessions.get(sessionId); + if (!session || !canAccessSession(ownerId, session)) { + throw new Error('Browser session not found.'); + } + if (session.status !== 'ready') { + throw new Error(session.message || 'Browser session is not available.'); + } + + const handle = handles.get(sessionId); + if (!handle?.page) { + throw new Error('Browser runtime handle is not available.'); + } + + await handle.page.keyboard.press(key); + session.lastAction = `press_key:${key}`; + await captureSession(session, handle.page); + return publicSession(session); + }, + async agentStopSession(sessionId: string) { await this.getAgentSession(sessionId); return this.stopSession({ id: AGENT_OWNER_ID }, sessionId); diff --git a/src/components/browser-use/view/BrowserUsePanel.tsx b/src/components/browser-use/view/BrowserUsePanel.tsx index c6f1a128..73cc41d9 100644 --- a/src/components/browser-use/view/BrowserUsePanel.tsx +++ b/src/components/browser-use/view/BrowserUsePanel.tsx @@ -1,5 +1,5 @@ -import { useCallback, useEffect, useMemo, useState } from 'react'; -import { Bot, Download, ExternalLink, Globe, Loader2, MonitorPlay, Navigation, Pause, RefreshCw, Share2, Square, X } from 'lucide-react'; +import { useCallback, useEffect, useMemo, useRef, useState, type KeyboardEvent, type MouseEvent } from 'react'; +import { Bot, Download, Expand, ExternalLink, Globe, Loader2, MonitorPlay, Navigation, RefreshCw, Share2, Square, Trash2, X } from 'lucide-react'; import { Badge, Button } from '../../../shared/view/ui'; import { authenticatedFetch } from '../../../utils/api'; @@ -29,6 +29,15 @@ type BrowserUseSession = { agentAccessEnabled: boolean; createdBy: 'user' | 'agent'; profileName: string | null; + viewport: { + width: number; + height: number; + } | null; + cursor: { + x: number; + y: number; + actor: 'agent' | 'user'; + } | null; }; type BrowserUsePanelProps = { @@ -50,7 +59,9 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { const [targetUrl, setTargetUrl] = useState('https://example.com'); const [isBusy, setIsBusy] = useState(false); const [isInstalling, setIsInstalling] = useState(false); + const [isFullscreen, setIsFullscreen] = useState(false); const [error, setError] = useState(null); + const viewerRef = useRef(null); const selectedSession = useMemo( () => sessions.find((session) => session.id === selectedSessionId) || sessions[0] || null, @@ -78,6 +89,11 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { void refresh().catch((err) => setError(err instanceof Error ? err.message : 'Failed to load Browser Use')); }, [isVisible, refresh]); + useEffect(() => { + if (!selectedSession?.url) return; + setTargetUrl(selectedSession.url); + }, [selectedSession?.id, selectedSession?.url]); + const runAction = useCallback(async (action: () => Promise) => { setIsBusy(true); setError(null); @@ -114,6 +130,13 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { await readJson(response); }); + const deleteSession = () => runAction(async () => { + if (!selectedSession) return; + const response = await authenticatedFetch(`/api/browser-use/sessions/${selectedSession.id}`, { method: 'DELETE' }); + await readJson(response); + setIsFullscreen(false); + }); + const grantAgentAccess = () => runAction(async () => { if (!selectedSession) return; const response = await authenticatedFetch(`/api/browser-use/sessions/${selectedSession.id}/agent-access/grant`, { method: 'POST' }); @@ -126,7 +149,7 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { await readJson(response); }); - const installRuntime = () => runAction(async () => { + const installBrowserBinaries = () => runAction(async () => { setIsInstalling(true); try { const response = await authenticatedFetch('/api/browser-use/runtime/install', { method: 'POST' }); @@ -136,7 +159,99 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) { } }); - const canInstallRuntime = Boolean(status?.enabled && (!status.playwrightInstalled || !status.chromiumInstalled)); + const clickViewer = useCallback((event: MouseEvent) => { + if (!selectedSession || selectedSession.status !== 'ready' || !selectedSession.viewport) { + return; + } + viewerRef.current?.focus(); + + const bounds = event.currentTarget.getBoundingClientRect(); + const scaleX = selectedSession.viewport.width / bounds.width; + const scaleY = selectedSession.viewport.height / bounds.height; + const x = Math.round((event.clientX - bounds.left) * scaleX); + const y = Math.round((event.clientY - bounds.top) * scaleY); + + void runAction(async () => { + const response = await authenticatedFetch(`/api/browser-use/sessions/${selectedSession.id}/click`, { + method: 'POST', + body: JSON.stringify({ x, y }), + }); + await readJson(response); + }); + }, [runAction, selectedSession]); + + const keyForEvent = useCallback((event: KeyboardEvent) => { + if (event.key === ' ') return 'Space'; + return event.key; + }, []); + + const pressViewerKey = useCallback((event: KeyboardEvent) => { + if (!selectedSession || selectedSession.status !== 'ready') { + return; + } + + const ignoredKeys = new Set(['Shift', 'Control', 'Alt', 'Meta', 'CapsLock']); + if (ignoredKeys.has(event.key)) { + return; + } + + event.preventDefault(); + const key = keyForEvent(event); + void runAction(async () => { + const response = await authenticatedFetch(`/api/browser-use/sessions/${selectedSession.id}/press-key`, { + method: 'POST', + body: JSON.stringify({ key }), + }); + await readJson(response); + }); + }, [keyForEvent, runAction, selectedSession]); + + const needsBrowserBinaries = Boolean(status?.enabled && (!status.playwrightInstalled || !status.chromiumInstalled)); + + const cursorStyle = selectedSession?.cursor && selectedSession.viewport + ? { + left: `${(selectedSession.cursor.x / selectedSession.viewport.width) * 100}%`, + top: `${(selectedSession.cursor.y / selectedSession.viewport.height) * 100}%`, + } + : null; + + const renderBrowserSurface = (fullscreen = false) => ( +
+ {selectedSession?.screenshotDataUrl ? ( +
+ Browser session screenshot + {cursorStyle && ( +
+
+
+ )} +
+ ) : ( +
+ +
+ {selectedSession?.message || 'Create a browser session to start.'} +
+

+ Install browser binaries from this panel or enable Browser Use from Settings. +

+
+ )} +
+ ); return (
@@ -164,21 +279,25 @@ export default function BrowserUsePanel({ isVisible }: BrowserUsePanelProps) {
+ {isFullscreen && selectedSession && ( +
+
+
+
{selectedSession.title || selectedSession.url || 'Browser session'}
+ +
+ {renderBrowserSurface(true)} +
+
+ )} ); } diff --git a/src/components/settings/view/tabs/browser-use-settings/BrowserUseSettingsTab.tsx b/src/components/settings/view/tabs/browser-use-settings/BrowserUseSettingsTab.tsx index 8a2a1ef8..d19f4593 100644 --- a/src/components/settings/view/tabs/browser-use-settings/BrowserUseSettingsTab.tsx +++ b/src/components/settings/view/tabs/browser-use-settings/BrowserUseSettingsTab.tsx @@ -1,5 +1,5 @@ import { useCallback, useEffect, useState } from 'react'; -import { Download, Loader2, MonitorPlay, RefreshCw } from 'lucide-react'; +import { Download, ExternalLink, Loader2 } from 'lucide-react'; import { Button } from '../../../../../shared/view/ui'; import { authenticatedFetch } from '../../../../../utils/api'; @@ -77,7 +77,7 @@ export default function BrowserUseSettingsTab() { } }; - const installRuntime = async () => { + const installBrowserBinaries = async () => { setIsInstalling(true); setError(null); try { @@ -85,13 +85,13 @@ export default function BrowserUseSettingsTab() { await readJson(response); await loadState(); } catch (err) { - setError(err instanceof Error ? err.message : 'Failed to install Browser Use runtime'); + setError(err instanceof Error ? err.message : 'Failed to install browser binaries'); } finally { setIsInstalling(false); } }; - const needsRuntime = Boolean(settings.enabled && status && (!status.playwrightInstalled || !status.chromiumInstalled)); + const needsBrowserBinaries = Boolean(settings.enabled && status && (!status.playwrightInstalled || !status.chromiumInstalled)); return (
@@ -100,6 +100,24 @@ export default function BrowserUseSettingsTab() { description="Manage local Playwright browser sessions used for captured browser screenshots and guarded navigation." > +
+
+
How Browser Use Works
+

+ Learn what agents can do with browser sessions, when to share access, and what the current limitations are. +

+
+ + Open Guide + + +
+ -
-
-
-
- - Runtime -
-

- {status?.message || (isLoading ? 'Checking Browser Use runtime...' : 'Runtime status unavailable.')} -

- {status && ( -
- - Playwright: {status.playwrightInstalled ? 'installed' : 'missing'} - - - Chromium: {status.chromiumInstalled ? 'installed' : 'missing'} - + {(needsBrowserBinaries || error) && ( +
+ {needsBrowserBinaries && ( +
+
+
Browser binaries required
+

+ {status?.message || 'Install the browser binaries needed to create Browser Use sessions.'} +

+
+ + Playwright: {status?.playwrightInstalled ? 'installed' : 'missing'} + + + Chromium: {status?.chromiumInstalled ? 'installed' : 'missing'} + +
- )} -
-
- - {needsRuntime && ( - - )} -
-
+
+ )} - {error && ( -
- {error} -
- )} -
+ {error && ( +
+ {error} +
+ )} +
+ )}