feat: add desktop computer use runtime

This commit is contained in:
Simos Mikelatos
2026-06-17 19:01:15 +00:00
parent fc71fc7d2b
commit 7e6028b113
28 changed files with 4741 additions and 126 deletions

View File

@@ -0,0 +1,77 @@
name: Desktop macOS Branch Build
on:
workflow_dispatch:
jobs:
build-macos:
name: Build macOS desktop artifact
runs-on: macos-latest
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Set up Node.js
uses: actions/setup-node@v6
with:
node-version: 22
cache: npm
- name: Install dependencies
run: npm ci
- name: Typecheck
run: npm run typecheck
- name: Resolve artifact metadata
id: artifact
run: |
SAFE_REF="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')"
echo "name=CloudCLI-macOS-${SAFE_REF}-${GITHUB_RUN_NUMBER}" >> "$GITHUB_OUTPUT"
- name: Verify signing secrets are configured
run: |
test -n "$CSC_LINK"
test -n "$CSC_KEY_PASSWORD"
test -n "$APPLE_ID"
test -n "$APPLE_APP_SPECIFIC_PASSWORD"
test -n "$APPLE_TEAM_ID"
env:
CSC_LINK: ${{ secrets.CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
- name: Build signed and notarized macOS artifacts
run: npm run desktop:dist:mac -- --publish never
env:
CSC_LINK: ${{ secrets.CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
- name: Verify macOS artifacts
run: |
test -n "$(find release -maxdepth 1 -name '*.dmg' -print -quit)"
test -n "$(find release -maxdepth 1 -name '*.zip' -print -quit)"
shasum -a 256 release/*.{dmg,zip} > release/SHASUMS256.txt
cat release/SHASUMS256.txt
- name: Upload branch build artifacts
uses: actions/upload-artifact@v4
with:
name: ${{ steps.artifact.outputs.name }}
path: |
release/*.dmg
release/*.zip
release/*.yml
release/*.blockmap
release/SHASUMS256.txt
if-no-files-found: error
retention-days: 14

View File

@@ -92,6 +92,7 @@ jobs:
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.release.outputs.tag }}
target_commitish: ${{ github.sha }}
name: ${{ steps.release.outputs.release_name }}
prerelease: ${{ inputs.prerelease }}
fail_on_unmatched_files: false

225
electron/computerAgent.js Normal file
View File

@@ -0,0 +1,225 @@
import { spawn } from 'node:child_process';
import fs from 'node:fs/promises';
import path from 'node:path';
const IPC_PREFIX = '@@CUAGENT@@';
function getDesktopPath() {
const currentPath = process.env.PATH || '';
const commonPaths = process.platform === 'win32'
? []
: ['/opt/homebrew/bin', '/usr/local/bin', '/usr/bin', '/bin', '/usr/sbin', '/sbin'];
return [...commonPaths, currentPath].filter(Boolean).join(path.delimiter);
}
function getNodeRuntime(isPackaged) {
if (isPackaged && process.versions.electron) {
return { command: process.execPath, env: { ELECTRON_RUN_AS_NODE: '1' } };
}
if (process.env.npm_node_execpath) {
return { command: process.env.npm_node_execpath, env: {} };
}
return { command: 'node', env: {} };
}
/** Converts an environment access URL (https://x) to its desktop-agent ws URL. */
function toAgentWsUrl(httpUrl) {
try {
const parsed = new URL(httpUrl);
parsed.protocol = parsed.protocol === 'http:' ? 'ws:' : 'wss:';
parsed.pathname = '/desktop-agent';
parsed.search = '';
parsed.hash = '';
return parsed.toString();
} catch {
return null;
}
}
/**
* Manages the standalone Computer Use desktop agent process. While the user has
* Computer Use enabled, this keeps an agent connected to every running cloud
* environment so hosted sessions can drive this machine. The local CloudCLI
* server is not involved.
*/
export class ComputerAgentController {
constructor({ appRoot, settingsPath, isPackaged = false, getRunningEnvironmentUrls, promptConsent, onChange }) {
this.appRoot = appRoot;
this.settingsPath = settingsPath;
this.isPackaged = isPackaged;
this.getRunningEnvironmentUrls = getRunningEnvironmentUrls;
this.promptConsent = promptConsent;
this.onChange = onChange;
this.settings = { enabled: false, consentMode: 'ask' };
this.child = null;
this.connectedUrls = new Set();
this.currentTargets = [];
this.stdoutBuffer = '';
}
getSettings() {
return { ...this.settings };
}
getState() {
return {
enabled: this.settings.enabled,
consentMode: this.settings.consentMode,
running: Boolean(this.child),
connectedCount: this.connectedUrls.size,
targetCount: this.currentTargets.length,
};
}
async loadSettings() {
try {
const raw = await fs.readFile(this.settingsPath, 'utf8');
const stored = JSON.parse(raw);
this.settings = {
enabled: Boolean(stored.enabled),
consentMode: stored.consentMode === 'auto' ? 'auto' : 'ask',
};
} catch {
this.settings = { enabled: false, consentMode: 'ask' };
}
return this.settings;
}
async saveSettings(next) {
this.settings = {
enabled: Boolean(next.enabled),
consentMode: next.consentMode === 'auto' ? 'auto' : 'ask',
};
await fs.mkdir(path.dirname(this.settingsPath), { recursive: true });
await fs.writeFile(this.settingsPath, JSON.stringify(this.settings, null, 2), 'utf8');
await this.sync();
this.onChange?.();
return this.settings;
}
/** Reconciles the agent process with the current settings + environments. */
async sync() {
const targets = this.settings.enabled ? (this.getRunningEnvironmentUrls?.() || []) : [];
const wsTargets = targets.map(toAgentWsUrl).filter(Boolean);
const sameTargets =
wsTargets.length === this.currentTargets.length &&
wsTargets.every((url) => this.currentTargets.includes(url));
if (!this.settings.enabled || wsTargets.length === 0) {
this.stop();
this.currentTargets = [];
return;
}
if (this.child && sameTargets) {
return; // already running with the right targets
}
this.currentTargets = wsTargets;
this.restart(wsTargets);
}
restart(wsTargets) {
this.stop();
const agentEntry = process.env.CLOUDCLI_COMPUTER_AGENT_ENTRY
|| path.join(this.appRoot, 'dist-server', 'server', 'computer-use-agent.js');
const runtime = getNodeRuntime(this.isPackaged);
this.child = spawn(runtime.command, [agentEntry], {
cwd: this.appRoot,
env: {
...process.env,
...runtime.env,
PATH: getDesktopPath(),
CLOUDCLI_DESKTOP_AGENT_URLS: wsTargets.join(','),
CLOUDCLI_COMPUTER_USE_CONSENT_MODE: this.settings.consentMode,
},
stdio: ['pipe', 'pipe', 'pipe'],
windowsHide: true,
});
this.connectedUrls = new Set();
this.child.once('error', (error) => {
console.error('[ComputerAgent] failed to start:', error.message);
this.child = null;
this.onChange?.();
});
this.child.stdout?.on('data', (chunk) => this.handleStdout(String(chunk)));
this.child.stderr?.on('data', (chunk) => {
for (const line of String(chunk).split(/\r?\n/)) {
if (line.trim()) console.error('[ComputerAgent]', line);
}
});
this.child.once('exit', (code) => {
console.log(`[ComputerAgent] exited (code ${code ?? 'null'})`);
this.child = null;
this.connectedUrls = new Set();
this.onChange?.();
});
this.onChange?.();
}
handleStdout(chunk) {
this.stdoutBuffer += chunk;
const lines = this.stdoutBuffer.split('\n');
this.stdoutBuffer = lines.pop() || '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith(IPC_PREFIX)) {
if (trimmed) console.log('[ComputerAgent]', trimmed);
continue;
}
let payload;
try {
payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim());
} catch {
continue;
}
void this.handleAgentEvent(payload);
}
}
async handleAgentEvent(payload) {
switch (payload.type) {
case 'connected':
this.connectedUrls.add(payload.url);
this.onChange?.();
break;
case 'disconnected':
this.connectedUrls.delete(payload.url);
this.onChange?.();
break;
case 'consent-request': {
const allow = await this.promptConsent?.(payload.sessionId);
this.sendToChild({ type: 'consent-response', sessionId: payload.sessionId, allow: Boolean(allow) });
break;
}
default:
break;
}
}
sendToChild(message) {
if (this.child?.stdin?.writable) {
this.child.stdin.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
}
}
revokeSession(sessionId) {
this.sendToChild({ type: 'revoke-session', sessionId });
}
stop() {
if (!this.child) return;
const child = this.child;
this.child = null;
this.connectedUrls = new Set();
try { child.kill('SIGTERM'); } catch { /* noop */ }
}
}

View File

@@ -1,9 +1,10 @@
import { app, BrowserWindow, clipboard, dialog, ipcMain, shell } from 'electron';
import { app, BrowserWindow, clipboard, dialog, ipcMain, shell, systemPreferences } from 'electron';
import { spawn } from 'node:child_process';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { CloudController } from './cloud.js';
import { ComputerAgentController } from './computerAgent.js';
import { DesktopWindowManager } from './desktopWindow.js';
import { LocalServerController } from './localServer.js';
import { TabsController } from './tabs.js';
@@ -22,6 +23,7 @@ let activeTarget = { kind: 'launcher', name: APP_NAME, url: null };
let desktopWindow = null;
let localServer = null;
let cloud = null;
let computerAgent = null;
let isQuitting = false;
let isRefreshingCloud = false;
@@ -52,6 +54,34 @@ function getSettingsPath() {
return path.join(app.getPath('userData'), 'desktop-settings.json');
}
function getComputerUseSettingsPath() {
return path.join(app.getPath('userData'), 'computer-use-settings.json');
}
function getRunningEnvironmentUrls() {
return cloud.getEnvironments()
.filter((environment) => environment.status === 'running')
.map((environment) => cloud.getEnvironmentUrl(environment))
.filter(Boolean);
}
async function promptComputerUseConsent(sessionId) {
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'warning',
buttons: ['Allow this session', 'Deny'],
defaultId: 0,
cancelId: 1,
title: 'Computer Use request',
message: 'An agent wants to control this computer',
detail: [
'A cloud agent is requesting control of your mouse, keyboard, and screen for this session.',
'Approval lasts for this session only. You can stop it any time from the Computer panel.',
sessionId ? `\nSession: ${sessionId}` : '',
].join('\n'),
});
return response === 0;
}
function getDisplayTargetName() {
return activeTarget?.name || APP_NAME;
}
@@ -108,6 +138,7 @@ function getDesktopState() {
tabs: tabs.getSerializableTabs(),
activeTabId: tabs.activeTabId,
environments: cloud.getEnvironments().map(serializeEnvironment),
computerUse: computerAgent?.getState() || { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 },
};
}
@@ -217,18 +248,87 @@ async function copyDiagnostics() {
});
}
async function showComputerUsePreview() {
await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
async function showMacComputerUsePermissions() {
if (process.platform !== 'darwin') return;
const screenStatus = systemPreferences.getMediaAccessStatus('screen');
const accessibilityTrusted = systemPreferences.isTrustedAccessibilityClient(false);
const detail = [
`Screen Recording: ${screenStatus === 'granted' ? 'granted' : 'not granted'}`,
`Accessibility: ${accessibilityTrusted ? 'granted' : 'not granted'}`,
'',
'Computer Use needs both permissions to capture the screen and control the mouse and keyboard.',
'After granting a permission, fully quit and reopen CloudCLI so the change takes effect.',
].join('\n');
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'info',
buttons: ['OK'],
title: 'Computer Use Preview',
message: 'Computer use needs an explicit safety gate before it can run.',
buttons: ['Open Screen Recording', 'Open Accessibility', 'Close'],
defaultId: 0,
cancelId: 2,
title: 'Computer Use Permissions',
message: 'Grant macOS permissions for Computer Use',
detail,
});
if (response === 0) {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
} else if (response === 1) {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility');
}
}
// Desktop control for cloud Computer Use: the desktop acts as a TeamViewer-style
// agent for hosted environments. Enabling here lets cloud agents drive THIS
// machine; the user picks whether to auto-connect or be asked per session.
async function showComputerUsePreview() {
const state = computerAgent?.getState() || { enabled: false, consentMode: 'ask' };
const buttons = [];
const actions = [];
if (!state.enabled) {
buttons.push('Enable — ask each session'); actions.push({ kind: 'enable', consentMode: 'ask' });
buttons.push('Enable — auto-connect'); actions.push({ kind: 'enable', consentMode: 'auto' });
} else {
buttons.push('Disable Computer Use'); actions.push({ kind: 'disable' });
const otherMode = state.consentMode === 'auto' ? 'ask' : 'auto';
buttons.push(`Switch to ${otherMode === 'auto' ? 'auto-connect' : 'ask each session'}`);
actions.push({ kind: 'enable', consentMode: otherMode });
}
if (process.platform === 'darwin') {
buttons.push('macOS Permissions…'); actions.push({ kind: 'permissions' });
}
buttons.push('Close'); actions.push({ kind: 'close' });
const statusLine = state.enabled
? `Enabled — ${state.consentMode === 'auto' ? 'auto-connect' : 'ask each session'} · ${state.connectedCount || 0} environment(s) linked`
: 'Disabled';
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'question',
buttons,
defaultId: 0,
cancelId: buttons.length - 1,
title: 'Computer Use (Desktop Agent)',
message: 'Let cloud agents control this computer',
detail: [
'The desktop shell is ready for controlled automation hooks, but full computer use is not enabled yet.',
`Status: ${statusLine}`,
'',
'Before this is exposed, CloudCLI needs per-session consent, a stop control, screen-capture permission checks, app/window scoping, and a provider-specific action loop.',
'When enabled, agents running in your CloudCLI cloud environments can see this screen and drive its mouse and keyboard.',
'• Ask each session: you approve a prompt the first time each session wants control.',
'• Auto-connect: sessions can act without a prompt.',
process.platform === 'linux' ? '\nLinux needs X utilities (libxtst, imagemagick) installed to capture the screen and drive input.' : '',
].join('\n'),
});
const action = actions[response];
if (!action) return;
if (action.kind === 'enable') {
await computerAgent?.saveSettings({ enabled: true, consentMode: action.consentMode });
} else if (action.kind === 'disable') {
await computerAgent?.saveSettings({ enabled: false, consentMode: state.consentMode });
} else if (action.kind === 'permissions') {
await showMacComputerUsePermissions();
}
}
async function refreshCloudEnvironments({ showErrors = false } = {}) {
@@ -253,6 +353,8 @@ async function refreshCloudEnvironments({ showErrors = false } = {}) {
throw error;
} finally {
isRefreshingCloud = false;
// Reconcile the Computer Use desktop agent with the latest running environments.
void computerAgent?.sync().catch((error) => console.error('[ComputerAgent] sync failed:', error?.message || error));
syncDesktopState();
}
}
@@ -658,6 +760,10 @@ function registerAppEvents() {
}
});
app.on('before-quit', () => {
computerAgent?.stop();
});
app.on('before-quit', (event) => {
if (isQuitting || !localServer?.hasOwnedServer()) return;
if (localServer.getSettings().keepLocalServerRunning) {
@@ -770,9 +876,18 @@ async function bootstrap() {
callbackUrl: CALLBACK_URL,
onChange: syncDesktopState,
});
computerAgent = new ComputerAgentController({
appRoot: getAppRoot(),
settingsPath: getComputerUseSettingsPath(),
isPackaged: app.isPackaged,
getRunningEnvironmentUrls,
promptConsent: promptComputerUseConsent,
onChange: syncDesktopState,
});
await localServer.loadDesktopSettings();
await cloud.loadCloudAccount();
await computerAgent.loadSettings();
registerProtocolHandler();
registerIpcHandlers();

1416
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -227,5 +227,9 @@
"lint-staged": {
"src/**/*.{ts,tsx,js,jsx}": "eslint",
"server/**/*.{js,ts}": "eslint"
},
"optionalDependencies": {
"@nut-tree-fork/nut-js": "^4.2.6",
"screenshot-desktop": "^1.15.4"
}
}

View File

@@ -0,0 +1,260 @@
#!/usr/bin/env node
/**
* CloudCLI Computer Use — Desktop Agent.
*
* Standalone executor for the cloud relay. The Electron desktop app spawns this
* process (via ELECTRON_RUN_AS_NODE) whenever Computer Use is enabled and the
* user has running cloud environments. It opens an outbound websocket to each
* environment's `/desktop-agent` endpoint and executes the `computer_*` actions
* the hosted server relays, returning a fresh screenshot each time.
*
* It is fully self-contained: it reuses the shared nut-js executor module and
* does NOT depend on the local CloudCLI server. Consent is enforced here (the
* controlled machine is the authority): in `ask` mode the agent asks the parent
* Electron process for a per-session decision before the first action runs.
*/
import readline from 'node:readline';
import { WebSocket } from 'ws';
import {
executor,
captureScreenshot,
getRuntimeReadiness,
type ExecutorTarget,
type Point,
type ClickButton,
type ScrollDirection,
} from './modules/computer-use/computer-executor.js';
type ConsentMode = 'ask' | 'auto';
type RelayMessage = {
kind?: string;
type?: string;
id?: string;
params?: Record<string, unknown>;
};
const IPC_PREFIX = '@@CUAGENT@@';
const RECONNECT_BASE_MS = 2000;
const RECONNECT_MAX_MS = 30_000;
const consentMode: ConsentMode = process.env.CLOUDCLI_COMPUTER_USE_CONSENT_MODE === 'auto' ? 'auto' : 'ask';
const agentLabel = process.env.CLOUDCLI_DESKTOP_AGENT_LABEL || 'cloudcli-desktop';
function parseTargets(): string[] {
const raw =
process.env.CLOUDCLI_DESKTOP_AGENT_URLS ||
process.env.CLOUDCLI_DESKTOP_AGENT_URL ||
'';
return raw
.split(',')
.map((value) => value.trim())
.filter(Boolean);
}
// --- Parent (Electron) IPC over stdout/stdin -------------------------------
function emitToParent(message: Record<string, unknown>): void {
process.stdout.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
}
/** Per-session consent decisions, and resolvers awaiting a parent reply. */
const sessionConsent = new Map<string, 'granted' | 'denied'>();
const pendingConsent = new Map<string, Array<(allow: boolean) => void>>();
const stdinReader = readline.createInterface({ input: process.stdin });
stdinReader.on('line', (line) => {
const trimmed = line.trim();
if (!trimmed.startsWith(IPC_PREFIX)) {
return;
}
try {
const payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim()) as Record<string, unknown>;
if (payload.type === 'consent-response' && typeof payload.sessionId === 'string') {
const allow = payload.allow === true;
sessionConsent.set(payload.sessionId, allow ? 'granted' : 'denied');
const waiters = pendingConsent.get(payload.sessionId) || [];
pendingConsent.delete(payload.sessionId);
for (const resolve of waiters) {
resolve(allow);
}
} else if (payload.type === 'revoke-session' && typeof payload.sessionId === 'string') {
sessionConsent.delete(payload.sessionId);
}
} catch {
// ignore malformed control lines
}
});
async function ensureConsent(sessionId: string): Promise<boolean> {
if (consentMode === 'auto') {
return true;
}
const existing = sessionConsent.get(sessionId);
if (existing === 'granted') return true;
if (existing === 'denied') return false;
// Ask the parent (Electron) to prompt the user, and wait for the decision.
return new Promise<boolean>((resolve) => {
const waiters = pendingConsent.get(sessionId) || [];
waiters.push(resolve);
pendingConsent.set(sessionId, waiters);
emitToParent({ type: 'consent-request', sessionId });
});
}
// --- Action execution ------------------------------------------------------
function asPoint(value: unknown): Point | undefined {
if (value && typeof value === 'object') {
const point = value as Record<string, unknown>;
if (typeof point.x === 'number' && typeof point.y === 'number') {
return { x: point.x, y: point.y };
}
}
return undefined;
}
async function snapshot(target: ExecutorTarget) {
const { dataUrl, size } = await captureScreenshot();
return { screenshotDataUrl: dataUrl, displaySize: size || target.displaySize };
}
async function runAction(type: string, params: Record<string, unknown>): Promise<Record<string, unknown>> {
const readiness = getRuntimeReadiness();
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
throw new Error('Computer Use runtime is not installed on the desktop agent.');
}
const target: ExecutorTarget = {
displaySize: (params.displaySize as ExecutorTarget['displaySize']) ?? null,
};
const point = asPoint(params.point);
switch (type) {
case 'screenshot':
return snapshot(target);
case 'cursor_position': {
const position = await executor.cursorPosition(target);
return { ...(await snapshot(target)), position, cursor: position };
}
case 'mouse_move':
await executor.moveTo(target, point as Point);
return { ...(await snapshot(target)), cursor: point };
case 'click':
await executor.click(target, (params.button as ClickButton) || 'left', point, params.double === true);
return { ...(await snapshot(target)), cursor: point ?? null };
case 'drag':
await executor.drag(target, asPoint(params.from) as Point, asPoint(params.to) as Point, (params.button as ClickButton) || 'left');
return { ...(await snapshot(target)), cursor: asPoint(params.to) ?? null };
case 'type':
await executor.type(String(params.text ?? ''));
return snapshot(target);
case 'key':
await executor.pressChord(String(params.key ?? ''));
return snapshot(target);
case 'scroll':
await executor.scroll(
target,
(params.direction as ScrollDirection) || 'down',
typeof params.amount === 'number' ? params.amount : 3,
point,
);
return { ...(await snapshot(target)), cursor: point ?? null };
case 'wait':
await new Promise((resolve) => setTimeout(resolve, Math.max(0, Math.min(Number(params.ms) || 1000, 10_000))));
return snapshot(target);
default:
throw new Error(`Unsupported computer action: ${type}`);
}
}
// --- Relay connection ------------------------------------------------------
function connect(url: string): void {
let reconnectMs = RECONNECT_BASE_MS;
let socket: WebSocket | null = null;
const open = () => {
socket = new WebSocket(url, {
headers: process.env.CLOUDCLI_DESKTOP_AGENT_TOKEN
? { 'x-cloudcli-agent-token': process.env.CLOUDCLI_DESKTOP_AGENT_TOKEN }
: undefined,
});
socket.on('open', () => {
reconnectMs = RECONNECT_BASE_MS;
emitToParent({ type: 'connected', url });
socket?.send(JSON.stringify({ kind: 'register', label: agentLabel, consentMode }));
});
socket.on('message', async (raw) => {
let message: RelayMessage;
try {
message = JSON.parse(String(raw)) as RelayMessage;
} catch {
return;
}
const kind = message.kind || message.type;
if (kind !== 'computer_relay' || typeof message.id !== 'string') {
return;
}
const id = message.id;
const type = String(message.type || (message.params?.type as string) || '');
const params = message.params || {};
const sessionId = typeof params.sessionId === 'string' ? params.sessionId : 'default';
if (type === 'stop_session') {
sessionConsent.delete(sessionId);
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result: { ok: true } }));
return;
}
try {
const allowed = await ensureConsent(sessionId);
if (!allowed) {
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, error: 'The user denied desktop control for this session.' }));
return;
}
const result = await runAction(type, params);
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result }));
} catch (error) {
socket?.send(JSON.stringify({
kind: 'computer_relay_result',
id,
error: error instanceof Error ? error.message : 'Desktop agent action failed.',
}));
}
});
const scheduleReconnect = () => {
emitToParent({ type: 'disconnected', url });
setTimeout(open, reconnectMs);
reconnectMs = Math.min(reconnectMs * 2, RECONNECT_MAX_MS);
};
socket.on('close', scheduleReconnect);
socket.on('error', () => {
try { socket?.close(); } catch { /* noop */ }
});
};
open();
}
function main(): void {
const targets = parseTargets();
if (targets.length === 0) {
emitToParent({ type: 'error', message: 'No desktop-agent target URLs provided.' });
return;
}
emitToParent({ type: 'starting', targets, consentMode });
for (const url of targets) {
connect(url);
}
}
main();

388
server/computer-use-mcp.ts Normal file
View File

@@ -0,0 +1,388 @@
#!/usr/bin/env node
import './load-env.js';
type JsonRpcRequest = {
jsonrpc: '2.0';
id?: string | number | null;
method: string;
params?: Record<string, unknown>;
};
type ToolDefinition = {
name: string;
description: string;
inputSchema: Record<string, unknown>;
};
const readString = (value: unknown, name: string): string => {
if (typeof value !== 'string' || value.trim() === '') {
throw new Error(`${name} is required.`);
}
return value.trim();
};
const readNumber = (value: unknown): number | undefined =>
typeof value === 'number' && Number.isFinite(value) ? value : undefined;
const apiUrl = (process.env.CLOUDCLI_COMPUTER_USE_API_URL || 'http://127.0.0.1:3001/api/computer-use-mcp').replace(/\/$/, '');
const apiToken = process.env.CLOUDCLI_COMPUTER_USE_MCP_TOKEN || '';
async function callComputerUseApi(toolName: string, input: Record<string, unknown>) {
if (!apiToken) {
throw new Error('CLOUDCLI_COMPUTER_USE_MCP_TOKEN is not configured.');
}
const response = await fetch(`${apiUrl}/tools/${encodeURIComponent(toolName)}`, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(input),
});
const data = await response.json() as { success?: boolean; data?: unknown; error?: string };
if (!response.ok || data.success === false) {
throw new Error(data.error || `Computer Use API request failed (${response.status})`);
}
return data.data;
}
/** Pulls the most recent screenshot data URL out of an API result, if present. */
function findScreenshot(value: unknown): string | null {
if (!value || typeof value !== 'object') {
return null;
}
const record = value as Record<string, unknown>;
if (typeof record.screenshotDataUrl === 'string') {
return record.screenshotDataUrl;
}
if (record.session && typeof record.session === 'object') {
const session = record.session as Record<string, unknown>;
if (typeof session.screenshotDataUrl === 'string') {
return session.screenshotDataUrl;
}
}
return null;
}
/** Removes the large data URL from JSON so the text block stays small. */
function stripScreenshot(value: unknown): unknown {
if (Array.isArray(value)) {
return value.map(stripScreenshot);
}
if (value && typeof value === 'object') {
const out: Record<string, unknown> = {};
for (const [key, val] of Object.entries(value as Record<string, unknown>)) {
if (key === 'screenshotDataUrl' && typeof val === 'string') {
out.screenshot = '[returned as image]';
continue;
}
out[key] = stripScreenshot(val);
}
return out;
}
return value;
}
/**
* Builds an MCP tool result. Screenshots are returned as an `image` content block so
* vision-capable models actually see the desktop — a JSON data-URL string would not work.
*/
function toolResult(value: unknown) {
const content: Array<Record<string, unknown>> = [
{ type: 'text', text: JSON.stringify(stripScreenshot(value), null, 2) },
];
const screenshot = findScreenshot(value);
const match = screenshot ? /^data:(image\/[a-z]+);base64,(.+)$/i.exec(screenshot) : null;
if (match) {
content.push({ type: 'image', data: match[2], mimeType: match[1] });
}
return { content };
}
const sessionIdSchema = {
type: 'object',
properties: {
sessionId: { type: 'string', description: 'Computer Use session id.' },
},
required: ['sessionId'],
};
const pointSchema = {
type: 'object',
properties: {
sessionId: { type: 'string' },
x: { type: 'number', description: 'X coordinate in screenshot pixel space.' },
y: { type: 'number', description: 'Y coordinate in screenshot pixel space.' },
},
required: ['sessionId'],
};
const tools: ToolDefinition[] = [
{
name: 'computer_create_session',
description: 'Create a Computer Use session that controls the user desktop. The session starts WITHOUT control: the user must grant control in the Computer panel before any action will work. Returns a screenshot once available.',
inputSchema: { type: 'object', properties: {} },
},
{
name: 'computer_list_sessions',
description: 'List Computer Use sessions and whether the user has granted control.',
inputSchema: { type: 'object', properties: {} },
},
{
name: 'computer_screenshot',
description: 'Capture the current desktop screenshot. Returns the image plus the display size to use for coordinates.',
inputSchema: sessionIdSchema,
},
{
name: 'computer_cursor_position',
description: 'Get the current mouse cursor position in screenshot pixel space.',
inputSchema: sessionIdSchema,
},
{
name: 'computer_mouse_move',
description: 'Move the mouse cursor to x/y (screenshot pixel space).',
inputSchema: {
type: 'object',
properties: { sessionId: { type: 'string' }, x: { type: 'number' }, y: { type: 'number' } },
required: ['sessionId', 'x', 'y'],
},
},
{
name: 'computer_left_click',
description: 'Left-click. Optionally provide x/y to move there first.',
inputSchema: pointSchema,
},
{
name: 'computer_right_click',
description: 'Right-click. Optionally provide x/y to move there first.',
inputSchema: pointSchema,
},
{
name: 'computer_middle_click',
description: 'Middle-click. Optionally provide x/y to move there first.',
inputSchema: pointSchema,
},
{
name: 'computer_double_click',
description: 'Double-click. Optionally provide x/y to move there first.',
inputSchema: pointSchema,
},
{
name: 'computer_left_click_drag',
description: 'Press the left button at start coordinates and release at end coordinates (drag).',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
startX: { type: 'number' }, startY: { type: 'number' },
endX: { type: 'number' }, endY: { type: 'number' },
},
required: ['sessionId', 'startX', 'startY', 'endX', 'endY'],
},
},
{
name: 'computer_type',
description: 'Type a string of text at the current focus.',
inputSchema: {
type: 'object',
properties: { sessionId: { type: 'string' }, text: { type: 'string' } },
required: ['sessionId', 'text'],
},
},
{
name: 'computer_key',
description: 'Press a key or key chord using xdotool-style names, e.g. "Return", "Escape", "ctrl+a", "Page_Down".',
inputSchema: {
type: 'object',
properties: { sessionId: { type: 'string' }, key: { type: 'string' } },
required: ['sessionId', 'key'],
},
},
{
name: 'computer_scroll',
description: 'Scroll the mouse wheel. direction is up/down/left/right; amount is the number of steps. Optionally provide x/y to move there first.',
inputSchema: {
type: 'object',
properties: {
sessionId: { type: 'string' },
direction: { type: 'string', enum: ['up', 'down', 'left', 'right'] },
amount: { type: 'number' },
x: { type: 'number' },
y: { type: 'number' },
},
required: ['sessionId', 'direction'],
},
},
{
name: 'computer_wait',
description: 'Wait for a short period (milliseconds, max 10000) then return a fresh screenshot.',
inputSchema: {
type: 'object',
properties: { sessionId: { type: 'string' }, timeoutMs: { type: 'number' } },
required: ['sessionId'],
},
},
{
name: 'computer_close_session',
description: 'Stop a Computer Use session and revoke control.',
inputSchema: sessionIdSchema,
},
];
async function callTool(name: string, args: Record<string, unknown>) {
switch (name) {
case 'computer_create_session':
return toolResult(await callComputerUseApi(name, {}));
case 'computer_list_sessions':
return toolResult(await callComputerUseApi(name, {}));
case 'computer_screenshot':
case 'computer_cursor_position':
case 'computer_close_session':
return toolResult(await callComputerUseApi(name, { sessionId: readString(args.sessionId, 'sessionId') }));
case 'computer_mouse_move':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
x: readNumber(args.x),
y: readNumber(args.y),
}));
case 'computer_left_click':
case 'computer_right_click':
case 'computer_middle_click':
case 'computer_double_click':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
x: readNumber(args.x),
y: readNumber(args.y),
}));
case 'computer_left_click_drag':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
startX: readNumber(args.startX),
startY: readNumber(args.startY),
endX: readNumber(args.endX),
endY: readNumber(args.endY),
}));
case 'computer_type':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
text: readString(args.text, 'text'),
}));
case 'computer_key':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
key: readString(args.key, 'key'),
}));
case 'computer_scroll':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
direction: typeof args.direction === 'string' ? args.direction : 'up',
amount: readNumber(args.amount),
x: readNumber(args.x),
y: readNumber(args.y),
}));
case 'computer_wait':
return toolResult(await callComputerUseApi(name, {
sessionId: readString(args.sessionId, 'sessionId'),
timeoutMs: readNumber(args.timeoutMs),
}));
default:
throw new Error(`Unknown tool: ${name}`);
}
}
async function handleMessage(message: JsonRpcRequest) {
if (message.method === 'initialize') {
return {
protocolVersion: '2024-11-05',
capabilities: { tools: {} },
serverInfo: { name: 'cloudcli-computer-use', version: '1.0.0' },
};
}
if (message.method === 'tools/list') {
return { tools };
}
if (message.method === 'tools/call') {
const params = message.params || {};
const name = readString(params.name, 'name');
const args = (params.arguments && typeof params.arguments === 'object'
? params.arguments
: {}) as Record<string, unknown>;
return callTool(name, args);
}
if (message.method.startsWith('notifications/')) {
return undefined;
}
throw new Error(`Unsupported method: ${message.method}`);
}
function writeMessage(message: Record<string, unknown>) {
const payload = JSON.stringify(message);
process.stdout.write(`Content-Length: ${Buffer.byteLength(payload, 'utf8')}\r\n\r\n${payload}`);
}
function sendResult(id: string | number | null | undefined, result: unknown) {
if (id === undefined) {
return;
}
writeMessage({ jsonrpc: '2.0', id, result });
}
function sendError(id: string | number | null | undefined, error: unknown) {
if (id === undefined) {
return;
}
writeMessage({
jsonrpc: '2.0',
id,
error: {
code: -32000,
message: error instanceof Error ? error.message : String(error),
},
});
}
let buffer = Buffer.alloc(0);
process.stdin.on('data', (chunk) => {
buffer = Buffer.concat([buffer, chunk]);
while (true) {
const headerEnd = buffer.indexOf('\r\n\r\n');
if (headerEnd === -1) {
return;
}
const header = buffer.slice(0, headerEnd).toString('utf8');
const lengthMatch = /Content-Length:\s*(\d+)/i.exec(header);
if (!lengthMatch) {
buffer = buffer.slice(headerEnd + 4);
continue;
}
const length = Number.parseInt(lengthMatch[1], 10);
const messageStart = headerEnd + 4;
const messageEnd = messageStart + length;
if (buffer.length < messageEnd) {
return;
}
const rawMessage = buffer.slice(messageStart, messageEnd).toString('utf8');
buffer = buffer.slice(messageEnd);
void (async () => {
const request = JSON.parse(rawMessage) as JsonRpcRequest;
try {
const result = await handleMessage(request);
sendResult(request.id, result);
} catch (error) {
sendError(request.id, error);
}
})();
}
});

View File

@@ -65,6 +65,8 @@ import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
import { browserUseService } from './modules/browser-use/browser-use.service.js';
import computerUseRoutes from './modules/computer-use/computer-use.routes.js';
import computerUseMcpRoutes from './modules/computer-use/computer-use-mcp.routes.js';
import { computerUseService } from './modules/computer-use/computer-use.service.js';
import { startEnabledPluginServers, stopAllPlugins, getPluginPort } from './utils/plugin-process-manager.js';
import { initializeDatabase, projectsDb, sessionsDb } from './modules/database/index.js';
import { configureWebPush } from './services/vapid-keys.js';
@@ -203,6 +205,9 @@ app.use('/api/browser-use-mcp', browserUseMcpRoutes);
// Browser Use API Routes (protected)
app.use('/api/browser-use', authenticateToken, browserUseRoutes);
// Computer Use MCP bridge API (local token protected)
app.use('/api/computer-use-mcp', computerUseMcpRoutes);
// Computer Use API Routes (protected)
app.use('/api/computer-use', authenticateToken, computerUseRoutes);
@@ -1760,6 +1765,11 @@ async function startServer() {
} catch (err) {
console.error('[Browser Use] Error stopping sessions during shutdown:', err?.message || err);
}
try {
await computerUseService.stopAllSessions();
} catch (err) {
console.error('[Computer Use] Error stopping sessions during shutdown:', err?.message || err);
}
try {
await stopAllPlugins();
} catch (err) {

View File

@@ -0,0 +1,242 @@
import { createRequire } from 'node:module';
const require = createRequire(import.meta.url);
export type Point = { x: number; y: number };
export type ClickButton = 'left' | 'right' | 'middle';
export type ScrollDirection = 'up' | 'down' | 'left' | 'right';
export type DisplaySize = { width: number; height: number };
export type RuntimeReadiness = {
nut: any | null;
screenshot: any | null;
nutInstalled: boolean;
screenshotInstalled: boolean;
};
/**
* Coordinate space the executor reports/accepts. The screenshot pixel space is
* the canonical space agents and users address; it is mapped to the nut-js
* logical mouse space before any action runs.
*/
export type ExecutorTarget = {
displaySize: DisplaySize | null;
};
export function getNut(): any | null {
try {
return require('@nut-tree-fork/nut-js');
} catch {
return null;
}
}
export function getScreenshot(): any | null {
try {
const mod = require('screenshot-desktop');
return mod?.default || mod;
} catch {
return null;
}
}
export function getRuntimeReadiness(): RuntimeReadiness {
const nut = getNut();
const screenshot = getScreenshot();
return {
nut,
screenshot,
nutInstalled: Boolean(nut),
screenshotInstalled: typeof screenshot === 'function',
};
}
/** Reads the pixel dimensions from a PNG/JPEG buffer header without decoding it. */
export function readImageSize(buffer: Buffer): DisplaySize | null {
// PNG: 8-byte signature, then IHDR chunk with width/height as big-endian uint32.
if (buffer.length >= 24 && buffer[0] === 0x89 && buffer[1] === 0x50) {
return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) };
}
// JPEG: scan for a Start-Of-Frame marker (0xFFC0..0xFFCF, excluding C4/C8/CC).
if (buffer.length >= 4 && buffer[0] === 0xff && buffer[1] === 0xd8) {
let offset = 2;
while (offset + 9 < buffer.length) {
if (buffer[offset] !== 0xff) {
offset += 1;
continue;
}
const marker = buffer[offset + 1];
if (marker >= 0xc0 && marker <= 0xcf && marker !== 0xc4 && marker !== 0xc8 && marker !== 0xcc) {
return { height: buffer.readUInt16BE(offset + 5), width: buffer.readUInt16BE(offset + 7) };
}
offset += 2 + buffer.readUInt16BE(offset + 2);
}
}
return null;
}
export async function captureScreenshot(): Promise<{ dataUrl: string; size: DisplaySize | null }> {
const screenshot = getScreenshot();
if (typeof screenshot !== 'function') {
throw new Error('Computer Use runtime is not available.');
}
const buffer: Buffer = await screenshot({ format: 'png' });
return {
dataUrl: `data:image/png;base64,${buffer.toString('base64')}`,
size: readImageSize(buffer),
};
}
/** Returns the mouse coordinate space size (logical screen pixels). */
export async function getMouseSpaceSize(): Promise<DisplaySize> {
const nut = getNut();
if (!nut) {
throw new Error('Computer Use runtime is not available.');
}
const width = await nut.screen.width();
const height = await nut.screen.height();
return { width, height };
}
/** Maps a point from screenshot/image space to the mouse coordinate space. */
export async function toMouseSpace(target: ExecutorTarget, point: Point): Promise<Point> {
const mouseSize = await getMouseSpaceSize();
const image = target.displaySize || mouseSize;
const scaleX = image.width ? mouseSize.width / image.width : 1;
const scaleY = image.height ? mouseSize.height / image.height : 1;
return {
x: Math.round(point.x * scaleX),
y: Math.round(point.y * scaleY),
};
}
/** Maps a point from the mouse coordinate space back to screenshot/image space. */
export function toImageSpace(target: ExecutorTarget, point: Point, mouseSize: DisplaySize): Point {
const image = target.displaySize || mouseSize;
const scaleX = mouseSize.width ? image.width / mouseSize.width : 1;
const scaleY = mouseSize.height ? image.height / mouseSize.height : 1;
return {
x: Math.round(point.x * scaleX),
y: Math.round(point.y * scaleY),
};
}
function nutButton(nut: any, button: ClickButton) {
if (button === 'right') return nut.Button.RIGHT;
if (button === 'middle') return nut.Button.MIDDLE;
return nut.Button.LEFT;
}
/** Maps a key name (xdotool-style, as Anthropic's computer tool emits) to a nut-js Key. */
function nutKey(nut: any, token: string): any {
const map: Record<string, string> = {
return: 'Enter', enter: 'Enter', esc: 'Escape', escape: 'Escape', tab: 'Tab',
space: 'Space', backspace: 'Backspace', delete: 'Delete', del: 'Delete', insert: 'Insert',
up: 'Up', down: 'Down', left: 'Left', right: 'Right',
home: 'Home', end: 'End', pageup: 'PageUp', page_up: 'PageUp', pagedown: 'PageDown', page_down: 'PageDown',
ctrl: 'LeftControl', control: 'LeftControl', alt: 'LeftAlt', shift: 'LeftShift',
meta: 'LeftSuper', super: 'LeftSuper', cmd: 'LeftSuper', win: 'LeftSuper',
capslock: 'CapsLock',
};
const lower = token.toLowerCase();
if (map[lower]) {
return nut.Key[map[lower]];
}
if (/^f([1-9]|1[0-9]|2[0-4])$/.test(lower)) {
return nut.Key[`F${lower.slice(1)}`];
}
if (token.length === 1) {
const upper = token.toUpperCase();
if (nut.Key[upper] !== undefined) {
return nut.Key[upper];
}
if (nut.Key[`Num${token}`] !== undefined && /[0-9]/.test(token)) {
return nut.Key[`Num${token}`];
}
}
throw new Error(`Unsupported key: ${token}`);
}
/**
* The cross-platform OS executor. It is intentionally free of any server,
* database, or session dependencies so it can run both inside the local server
* process (OSS mode) and inside the standalone desktop agent (cloud relay).
*/
export const executor = {
async configure() {
const nut = getNut();
if (nut) {
// Make actions responsive; the agent loop already paces itself with screenshots.
nut.mouse.config.autoDelayMs = 2;
nut.keyboard.config.autoDelayMs = 2;
}
return nut;
},
async cursorPosition(target: ExecutorTarget): Promise<Point> {
const nut = await this.configure();
const mouseSize = await getMouseSpaceSize();
const pos = await nut.mouse.getPosition();
return toImageSpace(target, { x: pos.x, y: pos.y }, mouseSize);
},
async moveTo(target: ExecutorTarget, point: Point): Promise<void> {
const nut = await this.configure();
const dest = await toMouseSpace(target, point);
await nut.mouse.setPosition(new nut.Point(dest.x, dest.y));
},
async click(target: ExecutorTarget, button: ClickButton, point?: Point, doubleClick = false): Promise<void> {
const nut = await this.configure();
if (point) {
await this.moveTo(target, point);
}
if (doubleClick) {
await nut.mouse.doubleClick(nutButton(nut, button));
} else {
await nut.mouse.click(nutButton(nut, button));
}
},
async drag(target: ExecutorTarget, from: Point, to: Point, button: ClickButton = 'left'): Promise<void> {
const nut = await this.configure();
const start = await toMouseSpace(target, from);
const end = await toMouseSpace(target, to);
await nut.mouse.setPosition(new nut.Point(start.x, start.y));
await nut.mouse.pressButton(nutButton(nut, button));
await nut.mouse.setPosition(new nut.Point(end.x, end.y));
await nut.mouse.releaseButton(nutButton(nut, button));
},
async type(text: string): Promise<void> {
const nut = await this.configure();
await nut.keyboard.type(text);
},
async pressChord(chord: string): Promise<void> {
const nut = await this.configure();
const tokens = chord.split('+').map((token) => token.trim()).filter(Boolean);
if (tokens.length === 0) {
return;
}
const keys = tokens.map((token) => nutKey(nut, token));
for (const key of keys) {
await nut.keyboard.pressKey(key);
}
for (const key of [...keys].reverse()) {
await nut.keyboard.releaseKey(key);
}
},
async scroll(target: ExecutorTarget, direction: ScrollDirection, amount: number, point?: Point): Promise<void> {
const nut = await this.configure();
if (point) {
await this.moveTo(target, point);
}
const steps = Math.max(1, Math.round(amount));
if (direction === 'up') await nut.mouse.scrollUp(steps);
else if (direction === 'down') await nut.mouse.scrollDown(steps);
else if (direction === 'left') await nut.mouse.scrollLeft(steps);
else await nut.mouse.scrollRight(steps);
},
};

View File

@@ -0,0 +1,118 @@
import express from 'express';
import { computerUseService } from '@/modules/computer-use/computer-use.service.js';
const router = express.Router();
function readBearerToken(header: unknown): string | null {
if (typeof header !== 'string') {
return null;
}
const match = /^Bearer\s+(.+)$/i.exec(header.trim());
return match?.[1] || null;
}
function toButton(value: unknown): 'left' | 'right' | 'middle' {
return value === 'right' || value === 'middle' ? value : 'left';
}
function toScrollDirection(value: unknown): 'up' | 'down' | 'left' | 'right' {
return value === 'down' || value === 'left' || value === 'right' ? value : 'up';
}
function point(input: Record<string, unknown>): { x: number; y: number } | undefined {
return typeof input.x === 'number' && typeof input.y === 'number'
? { x: input.x, y: input.y }
: undefined;
}
router.use((req, res, next) => {
const expected = computerUseService.getMcpToken();
const token = readBearerToken(req.headers.authorization) || String(req.headers['x-computer-use-mcp-token'] || '');
if (!token || token !== expected) {
res.status(401).json({ success: false, error: 'Invalid Computer Use MCP token.' });
return;
}
next();
});
router.post('/tools/:toolName', async (req, res) => {
try {
const input = (req.body && typeof req.body === 'object' ? req.body : {}) as Record<string, unknown>;
const sessionId = typeof input.sessionId === 'string' ? input.sessionId : '';
const toolName = req.params.toolName;
let result: unknown;
switch (toolName) {
case 'computer_create_session':
result = await computerUseService.createAgentSession();
break;
case 'computer_list_sessions':
result = await computerUseService.listAgentSessions();
break;
case 'computer_screenshot':
result = await computerUseService.agentScreenshot(sessionId);
break;
case 'computer_cursor_position':
result = await computerUseService.agentCursorPosition(sessionId);
break;
case 'computer_mouse_move':
result = await computerUseService.agentMouseMove(sessionId, point(input) || { x: 0, y: 0 });
break;
case 'computer_left_click':
result = await computerUseService.agentClick(sessionId, 'left', point(input));
break;
case 'computer_right_click':
result = await computerUseService.agentClick(sessionId, 'right', point(input));
break;
case 'computer_middle_click':
result = await computerUseService.agentClick(sessionId, 'middle', point(input));
break;
case 'computer_double_click':
result = await computerUseService.agentClick(sessionId, toButton(input.button), point(input), true);
break;
case 'computer_left_click_drag': {
const from = typeof input.startX === 'number' && typeof input.startY === 'number'
? { x: input.startX, y: input.startY }
: { x: 0, y: 0 };
const to = typeof input.endX === 'number' && typeof input.endY === 'number'
? { x: input.endX, y: input.endY }
: { x: 0, y: 0 };
result = await computerUseService.agentDrag(sessionId, from, to, 'left');
break;
}
case 'computer_type':
result = await computerUseService.agentType(sessionId, String(input.text || ''));
break;
case 'computer_key':
result = await computerUseService.agentKey(sessionId, String(input.key || ''));
break;
case 'computer_scroll':
result = await computerUseService.agentScroll(sessionId, {
direction: toScrollDirection(input.direction),
amount: typeof input.amount === 'number' ? input.amount : undefined,
x: typeof input.x === 'number' ? input.x : undefined,
y: typeof input.y === 'number' ? input.y : undefined,
});
break;
case 'computer_wait':
result = await computerUseService.agentWait(sessionId, typeof input.timeoutMs === 'number' ? input.timeoutMs : undefined);
break;
case 'computer_close_session':
result = await computerUseService.agentStopSession(sessionId);
break;
default:
res.status(404).json({ success: false, error: `Unknown Computer Use MCP tool "${toolName}".` });
return;
}
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Computer Use MCP tool failed.',
});
}
});
export default router;

View File

@@ -4,16 +4,212 @@ import { computerUseService } from '@/modules/computer-use/computer-use.service.
const router = express.Router();
router.get('/status', (_req, res) => {
res.json({ success: true, data: computerUseService.getStatus() });
type AuthenticatedRequest = express.Request & {
user?: {
id?: string | number;
};
};
function requireUser(req: AuthenticatedRequest): { id: string | number } {
const userId = req.user?.id;
if (userId === undefined || userId === null) {
throw new Error('Authenticated user is required.');
}
return { id: userId };
}
function readParam(value: string | string[] | undefined): string {
return Array.isArray(value) ? value[0] || '' : value || '';
}
function toButton(value: unknown): 'left' | 'right' | 'middle' {
return value === 'right' || value === 'middle' ? value : 'left';
}
router.get('/status', async (_req, res) => {
try {
res.json({ success: true, data: await computerUseService.getStatus() });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Computer Use status.',
});
}
});
router.post('/sessions', (_req, res) => {
res.status(409).json({
success: false,
error: 'Computer Use is not enabled until a local CloudCLI Desktop Agent is connected and approved by the user.',
data: computerUseService.getStatus(),
});
router.get('/settings', async (_req, res) => {
try {
res.json({ success: true, data: { settings: await computerUseService.getSettings() } });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Computer Use settings.',
});
}
});
router.put('/settings', async (req, res) => {
try {
const settings = await computerUseService.updateSettings(req.body || {});
res.json({ success: true, data: { settings } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to save Computer Use settings.',
});
}
});
router.post('/agent-tools/register', async (_req, res) => {
try {
const result = await computerUseService.registerAgentMcp();
res.status(201).json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to register Computer Use MCP.',
});
}
});
router.post('/runtime/install', async (_req, res) => {
try {
const result = await computerUseService.installRuntime();
res.status(result.success ? 200 : 500).json({
success: result.success,
data: result,
error: result.success ? undefined : result.message,
});
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to install Computer Use runtime.',
});
}
});
router.get('/sessions', async (req: AuthenticatedRequest, res) => {
try {
res.json({ success: true, data: { sessions: await computerUseService.listSessions(requireUser(req)) } });
} catch (error) {
res.status(401).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to list Computer Use sessions.',
});
}
});
router.post('/sessions', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.createSession(requireUser(req));
res.status(session.status === 'unavailable' ? 202 : 201).json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to create Computer Use session.',
});
}
});
router.post('/sessions/:sessionId/screenshot', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userScreenshot(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to capture the screen.',
});
}
});
router.post('/sessions/:sessionId/click', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userClick(requireUser(req), readParam(req.params.sessionId), {
x: Number(req.body?.x),
y: Number(req.body?.y),
button: toButton(req.body?.button),
double: req.body?.double === true,
});
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to click.',
});
}
});
router.post('/sessions/:sessionId/type', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userType(requireUser(req), readParam(req.params.sessionId), String(req.body?.text || ''));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to type text.',
});
}
});
router.post('/sessions/:sessionId/press-key', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userPressKey(requireUser(req), readParam(req.params.sessionId), String(req.body?.key || ''));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to send key input.',
});
}
});
router.post('/sessions/:sessionId/consent/grant', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to grant control.',
});
}
});
router.post('/sessions/:sessionId/consent/revoke', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.revokeAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to revoke control.',
});
}
});
router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) => {
try {
const result = await computerUseService.stopSession(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to stop Computer Use session.',
});
}
});
router.delete('/sessions/:sessionId', async (req: AuthenticatedRequest, res) => {
try {
const result = await computerUseService.deleteSession(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to delete Computer Use session.',
});
}
});
export default router;

View File

@@ -1,22 +1,883 @@
import { createRequire } from 'node:module';
import { randomBytes, randomUUID } from 'node:crypto';
import { spawn } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { appConfigDb } from '@/modules/database/repositories/app-config.js';
import { providerMcpService } from '@/modules/providers/services/mcp.service.js';
import { getModuleDir } from '@/utils/runtime-paths.js';
import {
executor,
captureScreenshot as captureScreenshotRuntime,
getRuntimeReadiness as getExecutorReadiness,
type Point,
type ClickButton,
type ScrollDirection,
} from '@/modules/computer-use/computer-executor.js';
import { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';
const __dirname = getModuleDir(import.meta.url);
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_MAX_SESSIONS_PER_OWNER || '1', 10);
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
const COMPUTER_USE_SETTINGS_KEY = 'computer_use_settings';
const COMPUTER_USE_MCP_TOKEN_KEY = 'computer_use_mcp_token';
type ComputerUseRuntime = 'cloud' | 'local';
type ComputerUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
type ComputerUseSession = {
id: string;
ownerId: string;
createdBy: 'user' | 'agent';
runtime: ComputerUseRuntime;
status: ComputerUseSessionStatus;
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
/** Per-session consent: agents may act only while this is true. */
agentAccessEnabled: boolean;
/** Size of the captured screenshot in pixels — the coordinate space agents/users use. */
displaySize: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent' | 'user';
} | null;
};
type PublicComputerUseSession = Omit<ComputerUseSession, 'ownerId'>;
type ComputerUseOwner = {
id: string | number;
};
type ComputerUseSettings = {
enabled: boolean;
agentToolsEnabled: boolean;
};
type RuntimeReadiness = {
nut: any | null;
screenshot: any | null;
nutInstalled: boolean;
screenshotInstalled: boolean;
installInProgress: boolean;
installMessage: string | null;
};
const sessions = new Map<string, ComputerUseSession>();
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
let lastInstallMessage: string | null = null;
const DEFAULT_SETTINGS: ComputerUseSettings = {
enabled: false,
agentToolsEnabled: false,
};
const AGENT_OWNER_ID = 'agent';
const MCP_SERVER_NAME = 'cloudcli-computer-use';
const MCP_PROVIDERS = ['claude', 'codex', 'cursor', 'gemini', 'opencode'];
function getRuntime(): ComputerUseRuntime {
return IS_PLATFORM ? 'cloud' : 'local';
}
function readSettings(): ComputerUseSettings {
try {
const raw = appConfigDb.get(COMPUTER_USE_SETTINGS_KEY);
if (!raw) {
return DEFAULT_SETTINGS;
}
const parsed = JSON.parse(raw) as Partial<ComputerUseSettings>;
return {
enabled: parsed.enabled === true,
agentToolsEnabled: parsed.agentToolsEnabled === true,
};
} catch (error: any) {
console.warn('[Computer Use] Failed to read settings:', error?.message || error);
return DEFAULT_SETTINGS;
}
}
function writeSettings(settings: ComputerUseSettings): ComputerUseSettings {
const normalized = {
enabled: settings.enabled === true,
agentToolsEnabled: settings.agentToolsEnabled === true,
};
appConfigDb.set(COMPUTER_USE_SETTINGS_KEY, JSON.stringify(normalized));
return normalized;
}
function getOrCreateMcpToken(): string {
const existing = appConfigDb.get(COMPUTER_USE_MCP_TOKEN_KEY);
if (existing) {
return existing;
}
const token = randomBytes(32).toString('hex');
appConfigDb.set(COMPUTER_USE_MCP_TOKEN_KEY, token);
return token;
}
function getSetupMessage(settings: ComputerUseSettings, readiness: RuntimeReadiness): string {
if (getRuntime() === 'cloud') {
return 'Cloud Computer Use requires a linked CloudCLI Desktop Agent on the user machine.';
}
if (!settings.enabled) {
return 'Computer Use is disabled in settings.';
}
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
return 'Install the desktop control runtime to capture the screen and drive the mouse and keyboard.';
}
return readiness.installMessage || 'Computer Use runtime is not ready.';
}
function getMcpCommand(): { command: string; args: string[] } {
const serverDir = path.resolve(__dirname, '..', '..');
const mcpScriptPath = path.join(serverDir, 'computer-use-mcp.js');
if (fs.existsSync(mcpScriptPath)) {
return {
command: process.execPath,
args: [mcpScriptPath],
};
}
return {
command: 'cloudcli',
args: ['computer-use-mcp'],
};
}
function getMcpApiUrl(): string {
const port = process.env.SERVER_PORT || process.env.PORT || '3001';
return `http://127.0.0.1:${port}/api/computer-use-mcp`;
}
function getRuntimeReadiness(): RuntimeReadiness {
const base = getExecutorReadiness();
return {
...base,
installInProgress: Boolean(installPromise),
installMessage: lastInstallMessage,
};
}
function runCommand(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: process.cwd(),
env: process.env,
shell: false,
stdio: ['ignore', 'pipe', 'pipe'],
});
const output: string[] = [];
child.stdout.on('data', (chunk) => output.push(String(chunk)));
child.stderr.on('data', (chunk) => output.push(String(chunk)));
child.on('error', reject);
child.on('close', (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(output.join('').trim() || `${command} ${args.join(' ')} exited with code ${code}`));
});
});
}
function formatInstallError(error: unknown): string {
const message = error instanceof Error ? error.message : String(error);
if (process.platform === 'linux' && /libxtst|x11|xtst|libpng|imagemagick|scrot/i.test(message)) {
return [
'Installing the desktop control runtime needs system packages.',
'On Debian/Ubuntu run: sudo apt-get install -y libxtst-dev libpng-dev imagemagick',
'then try again.',
].join(' ');
}
return message || 'Failed to install the Computer Use runtime.';
}
function isPackagedElectronNodeRuntime(): boolean {
return process.env.ELECTRON_RUN_AS_NODE === '1' && Boolean(process.versions.electron);
}
async function installRuntime(): Promise<{ success: boolean; message: string }> {
if (installPromise) {
return installPromise;
}
const readiness = getExecutorReadiness();
if (readiness.nutInstalled && readiness.screenshotInstalled) {
lastInstallMessage = 'Computer Use runtime is available.';
return { success: true, message: lastInstallMessage };
}
if (isPackagedElectronNodeRuntime()) {
lastInstallMessage = 'Computer Use runtime was not bundled with this desktop build.';
return { success: false, message: lastInstallMessage };
}
const npmCommand = process.platform === 'win32' ? 'npm.cmd' : 'npm';
installPromise = (async () => {
try {
lastInstallMessage = 'Installing desktop control runtime…';
await runCommand(npmCommand, [
'install',
'--no-save',
'--no-package-lock',
'@nut-tree-fork/nut-js',
'screenshot-desktop',
]);
lastInstallMessage = 'Computer Use runtime installed.';
return { success: true, message: lastInstallMessage };
} catch (error) {
lastInstallMessage = formatInstallError(error);
return { success: false, message: lastInstallMessage };
}
})();
try {
return await installPromise;
} finally {
installPromise = null;
}
}
function getOwnerId(owner: ComputerUseOwner): string {
if (owner.id === undefined || owner.id === null || String(owner.id).trim() === '') {
throw new Error('Authenticated user is required.');
}
return String(owner.id);
}
function publicSession(session: ComputerUseSession): PublicComputerUseSession {
const { ownerId: _ownerId, ...publicFields } = session;
return publicFields;
}
function ownerSessions(ownerId: string): ComputerUseSession[] {
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
}
function canAccessSession(ownerId: string, session: ComputerUseSession): boolean {
return session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID;
}
async function expireStaleSessions(now = Date.now()): Promise<void> {
for (const session of sessions.values()) {
if (session.status !== 'ready') {
continue;
}
const updatedAt = Date.parse(session.updatedAt);
if (!Number.isFinite(updatedAt) || now - updatedAt <= SESSION_TTL_MS) {
continue;
}
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date(now).toISOString();
session.lastAction = 'expire';
session.message = 'Computer Use session expired after inactivity.';
}
}
// --- Action layer: local executor (OSS) or cloud relay to the desktop agent --
//
// Every desktop interaction goes through `performAction` / `getCursorPosition`.
// In local mode it drives the in-process nut-js executor (computer-executor.ts);
// in cloud mode it forwards the action to the linked desktop agent over
// `desktopAgentRelay` and applies the returned screenshot. The local server
// itself never touches the OS in cloud mode.
/** One desktop interaction expressed in screenshot-pixel coordinate space. */
export type ComputerAction =
| { type: 'screenshot' }
| { type: 'mouse_move'; point: Point }
| { type: 'click'; button: ClickButton; point?: Point; double?: boolean }
| { type: 'drag'; from: Point; to: Point; button?: ClickButton }
| { type: 'type'; text: string }
| { type: 'key'; key: string }
| { type: 'scroll'; direction: ScrollDirection; amount?: number; point?: Point }
| { type: 'wait'; ms?: number };
/** Shape the desktop agent returns for any relayed action. */
type RelayResult = {
screenshotDataUrl?: string | null;
displaySize?: { width: number; height: number } | null;
cursor?: { x: number; y: number } | null;
position?: Point | null;
};
function applyRelayResult(session: ComputerUseSession, result: RelayResult): void {
if (typeof result.screenshotDataUrl === 'string') {
session.screenshotDataUrl = result.screenshotDataUrl;
}
if (result.displaySize) {
session.displaySize = result.displaySize;
}
if (result.cursor) {
session.cursor = { x: result.cursor.x, y: result.cursor.y, actor: session.cursor?.actor ?? 'agent' };
}
session.updatedAt = new Date().toISOString();
}
async function refreshScreenshot(session: ComputerUseSession): Promise<void> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay('screenshot', { sessionId: session.id })) as RelayResult;
applyRelayResult(session, result);
return;
}
const { dataUrl, size } = await captureScreenshotRuntime();
session.screenshotDataUrl = dataUrl;
if (size) {
session.displaySize = size;
}
session.updatedAt = new Date().toISOString();
}
/** Runs one action and refreshes the session screenshot afterwards. */
async function performAction(session: ComputerUseSession, action: ComputerAction): Promise<void> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay(action.type, {
...action,
sessionId: session.id,
displaySize: session.displaySize,
})) as RelayResult;
applyRelayResult(session, result);
return;
}
switch (action.type) {
case 'screenshot':
break;
case 'mouse_move':
await executor.moveTo(session, action.point);
break;
case 'click':
await executor.click(session, action.button, action.point, action.double === true);
break;
case 'drag':
await executor.drag(session, action.from, action.to, action.button ?? 'left');
break;
case 'type':
await executor.type(action.text);
break;
case 'key':
await executor.pressChord(action.key);
break;
case 'scroll':
await executor.scroll(session, action.direction, action.amount ?? 3, action.point);
break;
case 'wait':
await new Promise((resolve) => setTimeout(resolve, Math.max(0, Math.min(action.ms ?? 1000, 10_000))));
break;
}
await refreshScreenshot(session);
}
/** Reads the current cursor position in screenshot-pixel space. */
async function getCursorPosition(session: ComputerUseSession): Promise<Point> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay('cursor_position', {
sessionId: session.id,
displaySize: session.displaySize,
})) as RelayResult;
applyRelayResult(session, result);
if (result.position) {
return result.position;
}
return session.cursor ? { x: session.cursor.x, y: session.cursor.y } : { x: 0, y: 0 };
}
return executor.cursorPosition(session);
}
function assertReady(session: ComputerUseSession): void {
if (session.status !== 'ready') {
throw new Error(session.message || 'Computer Use session is not available.');
}
}
/**
* Whether agent tools may operate right now. Cloud mode depends purely on a
* connected desktop agent; local mode depends on the two opt-in settings.
*/
function agentToolsAvailable(): boolean {
if (getRuntime() === 'cloud') {
return desktopAgentRelay.isConnected();
}
const settings = readSettings();
return settings.enabled && settings.agentToolsEnabled;
}
function assertAgentToolsAvailable(): void {
if (agentToolsAvailable()) {
return;
}
throw new Error(
getRuntime() === 'cloud'
? 'No desktop agent is connected. Open the CloudCLI desktop app with Computer Use enabled.'
: 'Computer Use agent tools are disabled.'
);
}
export const computerUseService = {
getStatus() {
async getSettings() {
return readSettings();
},
async updateSettings(settings: Partial<ComputerUseSettings>) {
const current = readSettings();
const nextSettings = {
...current,
enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled,
agentToolsEnabled: typeof settings.agentToolsEnabled === 'boolean'
? settings.agentToolsEnabled
: current.agentToolsEnabled,
};
if (!nextSettings.enabled) {
nextSettings.agentToolsEnabled = false;
}
const next = writeSettings(nextSettings);
if (next.agentToolsEnabled) {
await this.registerAgentMcp();
} else if (current.agentToolsEnabled) {
await this.unregisterAgentMcp();
}
return next;
},
async getStatus() {
const settings = readSettings();
const readiness = getRuntimeReadiness();
const isCloud = getRuntime() === 'cloud';
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
// Cloud availability is purely a function of a connected desktop agent; the
// hosted server has no screen of its own. Local availability needs the
// in-process nut-js runtime installed and the feature enabled.
const desktopAgentConnected = desktopAgentRelay.isConnected();
const available = isCloud
? desktopAgentConnected
: settings.enabled && runtimeReady;
return {
available: false,
bridgeConnected: false,
runtime: IS_PLATFORM ? 'cloud' : 'local',
requiresDesktopBridge: true,
message: IS_PLATFORM
? 'Cloud Computer Use requires a linked CloudCLI Desktop Agent on the user machine.'
: 'Local Computer Use requires a desktop bridge with screen recording and accessibility permissions.',
capabilities: {
screenshots: false,
mouse: false,
keyboard: false,
clipboard: false,
stopControl: false,
},
enabled: isCloud ? true : settings.enabled,
runtime: getRuntime(),
available,
requiresDesktopBridge: isCloud,
desktopAgentConnected,
nutInstalled: readiness.nutInstalled,
screenshotInstalled: readiness.screenshotInstalled,
installInProgress: readiness.installInProgress,
sessionCount: sessions.size,
agentToolsEnabled: isCloud ? desktopAgentConnected : settings.agentToolsEnabled,
mcpRecommended: !settings.agentToolsEnabled,
message: available ? 'Computer Use runtime is available.' : getSetupMessage(settings, readiness),
};
},
async registerAgentMcp() {
const { command, args } = getMcpCommand();
const results = await providerMcpService.addMcpServerToAllProviders({
name: MCP_SERVER_NAME,
scope: 'user',
transport: 'stdio',
command,
args,
env: {
CLOUDCLI_COMPUTER_USE_MCP_TOKEN: getOrCreateMcpToken(),
CLOUDCLI_COMPUTER_USE_API_URL: getMcpApiUrl(),
},
});
return { name: MCP_SERVER_NAME, command, args, results };
},
getMcpToken() {
return getOrCreateMcpToken();
},
async unregisterAgentMcp() {
const results = await Promise.all(MCP_PROVIDERS.map(async (provider) => {
try {
const result = await providerMcpService.removeProviderMcpServer(provider, {
name: MCP_SERVER_NAME,
scope: 'user',
});
return { provider, removed: result.removed };
} catch (error) {
return {
provider,
removed: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}));
return { name: MCP_SERVER_NAME, results };
},
async installRuntime() {
const result = await installRuntime();
return {
...result,
status: await this.getStatus(),
};
},
async listSessions(owner: ComputerUseOwner) {
const ownerId = getOwnerId(owner);
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => canAccessSession(ownerId, session))
.map(publicSession);
},
async createSession(owner: ComputerUseOwner, options?: { createdBy?: 'user' | 'agent' }) {
const ownerId = getOwnerId(owner);
await expireStaleSessions();
const createdBy = options?.createdBy ?? 'user';
const now = new Date().toISOString();
const session: ComputerUseSession = {
id: randomUUID(),
ownerId,
createdBy,
runtime: getRuntime(),
status: 'unavailable',
screenshotDataUrl: null,
createdAt: now,
updatedAt: now,
lastAction: 'create',
// Consent is always OFF at creation — the user must explicitly grant control,
// even for agent-initiated sessions controlling the full desktop.
agentAccessEnabled: false,
displaySize: null,
message: null,
cursor: null,
};
const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready');
if (activeOwnerSessions.length >= MAX_SESSIONS_PER_OWNER) {
throw new Error(`Computer Use is limited to ${MAX_SESSIONS_PER_OWNER} active session(s).`);
}
const settings = readSettings();
const readiness = getRuntimeReadiness();
const isCloud = getRuntime() === 'cloud';
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
const ready = isCloud
? desktopAgentRelay.isConnected()
: settings.enabled && runtimeReady;
if (!ready) {
session.message = getSetupMessage(settings, readiness);
sessions.set(session.id, session);
return publicSession(session);
}
// In cloud mode the linked desktop agent is the consent authority and prompts
// the user per its own consent mode, so the relay is allowed to act. In local
// mode the user must still grant control from the panel.
if (isCloud) {
session.agentAccessEnabled = true;
}
session.status = 'ready';
session.message = isCloud
? 'Computer Use session is ready on the linked desktop.'
: 'Computer Use session is ready. Grant control to let agents act.';
sessions.set(session.id, session);
try {
await refreshScreenshot(session);
} catch (error) {
session.status = 'unavailable';
session.message = error instanceof Error ? error.message : 'Failed to capture the screen.';
}
return publicSession(session);
},
async grantAgentAccess(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
session.agentAccessEnabled = true;
session.updatedAt = new Date().toISOString();
session.lastAction = 'consent:grant';
return publicSession(session);
},
async revokeAgentAccess(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'consent:revoke';
return publicSession(session);
},
async stopSession(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
return { stopped: false };
}
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'stop';
session.message = 'Computer Use session stopped. Agent control is revoked.';
if (getRuntime() === 'cloud' && desktopAgentRelay.isConnected()) {
// Best-effort: tell the desktop agent to forget this session's consent.
void desktopAgentRelay.relay('stop_session', { sessionId }).catch(() => undefined);
}
return { stopped: true, session: publicSession(session) };
},
async deleteSession(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
return { deleted: false };
}
sessions.delete(sessionId);
return { deleted: true, sessionId };
},
// --- User-initiated actions (from the panel) -------------------------------
async userScreenshot(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await refreshScreenshot(session);
session.lastAction = 'screenshot';
return publicSession(session);
},
async userClick(owner: ComputerUseOwner, sessionId: string, input: { x: number; y: number; button?: ClickButton; double?: boolean }) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, {
type: 'click',
button: input.button || 'left',
point: { x: input.x, y: input.y },
double: input.double === true,
});
session.cursor = { x: input.x, y: input.y, actor: 'user' };
session.lastAction = input.double ? 'double_click' : 'click';
return publicSession(session);
},
async userType(owner: ComputerUseOwner, sessionId: string, text: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, { type: 'type', text });
session.lastAction = 'type';
return publicSession(session);
},
async userPressKey(owner: ComputerUseOwner, sessionId: string, key: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, { type: 'key', key });
session.lastAction = `key:${key}`;
return publicSession(session);
},
// --- Agent-initiated actions (via MCP) ------------------------------------
async createAgentSession() {
assertAgentToolsAvailable();
return this.createSession({ id: AGENT_OWNER_ID }, { createdBy: 'agent' });
},
async listAgentSessions() {
if (!agentToolsAvailable()) {
return [];
}
await expireStaleSessions();
return [...sessions.values()].map(publicSession);
},
/**
* Resolves a session the agent is allowed to act on. In local mode this
* enforces the in-process per-session consent flag. In cloud mode the linked
* desktop agent is the consent authority (it prompts the user per its own
* consent mode), so this only requires the relay to be connected.
*/
async getConsentedSession(sessionId: string): Promise<ComputerUseSession> {
assertAgentToolsAvailable();
const session = sessions.get(sessionId);
if (!session) {
throw new Error('Computer Use session not found.');
}
if (getRuntime() !== 'cloud' && !session.agentAccessEnabled) {
throw new Error('Computer Use session is awaiting user consent. Ask the user to grant control in the Computer panel.');
}
assertReady(session);
return session;
},
async agentScreenshot(sessionId: string) {
const session = await this.getConsentedSession(sessionId);
await refreshScreenshot(session);
session.lastAction = 'screenshot';
return publicSession(session);
},
async agentCursorPosition(sessionId: string) {
const session = await this.getConsentedSession(sessionId);
const point = await getCursorPosition(session);
session.cursor = { ...point, actor: 'agent' };
session.lastAction = 'cursor_position';
return { session: publicSession(session), position: point };
},
async agentMouseMove(sessionId: string, point: Point) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'mouse_move', point });
session.cursor = { ...point, actor: 'agent' };
session.lastAction = 'mouse_move';
return publicSession(session);
},
async agentClick(sessionId: string, button: ClickButton, point?: Point, doubleClick = false) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'click', button, point, double: doubleClick });
if (point) {
session.cursor = { ...point, actor: 'agent' };
}
session.lastAction = doubleClick ? 'double_click' : `${button}_click`;
return publicSession(session);
},
async agentDrag(sessionId: string, from: Point, to: Point, button: ClickButton = 'left') {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'drag', from, to, button });
session.cursor = { ...to, actor: 'agent' };
session.lastAction = 'left_click_drag';
return publicSession(session);
},
async agentType(sessionId: string, text: string) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'type', text });
session.lastAction = 'type';
return publicSession(session);
},
async agentKey(sessionId: string, key: string) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'key', key });
session.lastAction = `key:${key}`;
return publicSession(session);
},
async agentScroll(sessionId: string, input: { direction: ScrollDirection; amount?: number; x?: number; y?: number }) {
const session = await this.getConsentedSession(sessionId);
const point = typeof input.x === 'number' && typeof input.y === 'number' ? { x: input.x, y: input.y } : undefined;
await performAction(session, { type: 'scroll', direction: input.direction, amount: input.amount, point });
if (point) {
session.cursor = { ...point, actor: 'agent' };
}
session.lastAction = `scroll:${input.direction}`;
return publicSession(session);
},
async agentWait(sessionId: string, timeoutMs?: number) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'wait', ms: timeoutMs });
session.lastAction = 'wait';
return publicSession(session);
},
async agentStopSession(sessionId: string) {
assertAgentToolsAvailable();
return this.stopSession({ id: AGENT_OWNER_ID }, sessionId);
},
/**
* Cloud only: when a desktop agent links to this hosted environment, expose
* the computer_* MCP tools to every provider so the running agent can use
* them. Mirrors `registerAgentMcp` but is driven by relay connectivity rather
* than a settings toggle.
*/
async onDesktopAgentConnected() {
if (getRuntime() !== 'cloud') {
return;
}
try {
await this.registerAgentMcp();
} catch (error) {
console.warn('[Computer Use] Failed to register MCP for linked desktop agent:', error instanceof Error ? error.message : error);
}
},
/** Cloud only: tear down sessions when the last desktop agent disconnects. */
async onDesktopAgentDisconnected() {
if (getRuntime() !== 'cloud' || desktopAgentRelay.isConnected()) {
return;
}
for (const session of sessions.values()) {
if (session.status === 'ready') {
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'agent-disconnected';
session.message = 'The linked desktop agent disconnected.';
}
}
},
async stopAllSessions() {
for (const session of sessions.values()) {
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'shutdown';
session.message = 'Computer Use session stopped during server shutdown.';
}
},
};
// Drive cloud MCP exposure + session teardown off desktop-agent connectivity.
desktopAgentRelay.setHooks({
onFirstConnect: () => computerUseService.onDesktopAgentConnected(),
onLastDisconnect: () => computerUseService.onDesktopAgentDisconnected(),
});
process.once('beforeExit', () => {
void computerUseService.stopAllSessions();
});

View File

@@ -0,0 +1,129 @@
import { randomUUID } from 'node:crypto';
import type { WebSocket } from 'ws';
const RELAY_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_RELAY_TIMEOUT_MS || '60000', 10);
const WS_OPEN = 1;
type PendingRelay = {
resolve: (value: unknown) => void;
reject: (reason: Error) => void;
timer: ReturnType<typeof setTimeout>;
};
type ConnectedAgent = {
ws: WebSocket;
label: string;
registeredAt: string;
};
type RelayLifecycleHooks = {
onFirstConnect?: () => void | Promise<void>;
onLastDisconnect?: () => void | Promise<void>;
};
const agents = new Map<WebSocket, ConnectedAgent>();
const pending = new Map<string, PendingRelay>();
let hooks: RelayLifecycleHooks = {};
function rejectAllPending(reason: string): void {
for (const [callId, call] of pending.entries()) {
clearTimeout(call.timer);
call.reject(new Error(reason));
pending.delete(callId);
}
}
function pickAgent(): ConnectedAgent | undefined {
for (const agent of agents.values()) {
if (agent.ws.readyState === WS_OPEN) {
return agent;
}
}
return undefined;
}
/**
* Cloud-side registry of linked desktop agents and the request/response relay
* used to drive the user's real desktop. The hosted server never touches the OS
* itself — it only forwards `computer_*` actions to a connected desktop agent
* and awaits the screenshot it returns.
*/
export const desktopAgentRelay = {
setHooks(next: RelayLifecycleHooks): void {
hooks = next;
},
register(ws: WebSocket, label = 'desktop-agent'): void {
const wasEmpty = pickAgent() === undefined;
agents.set(ws, { ws, label, registeredAt: new Date().toISOString() });
console.log(`[DesktopAgent] Registered (${label}); ${agents.size} connected.`);
ws.on('close', () => {
agents.delete(ws);
console.log(`[DesktopAgent] Disconnected (${label}); ${agents.size} remain.`);
if (pickAgent() === undefined) {
rejectAllPending('Desktop agent disconnected.');
void hooks.onLastDisconnect?.();
}
});
if (wasEmpty) {
void hooks.onFirstConnect?.();
}
},
/** Resolves a pending relay call with the desktop agent's reply. */
handleResult(id: string, result: unknown, error?: string): void {
const call = pending.get(id);
if (!call) {
return;
}
clearTimeout(call.timer);
pending.delete(id);
if (error) {
call.reject(new Error(error));
} else {
call.resolve(result);
}
},
isConnected(): boolean {
return pickAgent() !== undefined;
},
connectedCount(): number {
let count = 0;
for (const agent of agents.values()) {
if (agent.ws.readyState === WS_OPEN) {
count++;
}
}
return count;
},
async relay(type: string, params: Record<string, unknown>): Promise<unknown> {
const agent = pickAgent();
if (!agent) {
throw new Error(
'No desktop agent connected. Open the CloudCLI desktop app with Computer Use enabled to control this machine.'
);
}
const id = randomUUID();
return new Promise<unknown>((resolve, reject) => {
const timer = setTimeout(() => {
pending.delete(id);
reject(new Error('Desktop agent did not respond in time.'));
}, RELAY_TIMEOUT_MS);
pending.set(id, { resolve, reject, timer });
try {
agent.ws.send(JSON.stringify({ kind: 'computer_relay', id, type, params }));
} catch (error) {
clearTimeout(timer);
pending.delete(id);
reject(error instanceof Error ? error : new Error('Failed to send to desktop agent.'));
}
});
},
};

View File

@@ -0,0 +1,2 @@
export { computerUseService } from '@/modules/computer-use/computer-use.service.js';
export { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';

View File

@@ -0,0 +1,42 @@
import type { WebSocket } from 'ws';
import { desktopAgentRelay } from '@/modules/computer-use/index.js';
import type { AuthenticatedWebSocketRequest } from '@/shared/types.js';
import { parseIncomingJsonObject } from '@/shared/utils.js';
/**
* Handles the `/desktop-agent` websocket — the inbound side of the cloud
* Computer Use relay. A linked CloudCLI desktop app connects here and registers
* itself as the executor for this hosted environment. The server then forwards
* `computer_*` actions via `desktopAgentRelay`, and the agent returns results as
* `computer_relay_result` frames correlated by `id`.
*/
export function handleDesktopAgentConnection(
ws: WebSocket,
request: AuthenticatedWebSocketRequest
): void {
const label = request.user?.username ? `desktop:${request.user.username}` : 'desktop-agent';
console.log('[INFO] Desktop agent websocket connected:', label);
desktopAgentRelay.register(ws, label);
ws.on('message', (rawMessage) => {
const data = parseIncomingJsonObject(rawMessage);
if (!data) {
return;
}
const kind = typeof data.kind === 'string' ? data.kind : typeof data.type === 'string' ? data.type : '';
if (kind === 'computer_relay_result' && typeof data.id === 'string') {
desktopAgentRelay.handleResult(
data.id,
(data as Record<string, unknown>).result,
typeof (data as Record<string, unknown>).error === 'string'
? ((data as Record<string, unknown>).error as string)
: undefined
);
}
});
ws.on('close', () => {
console.log('[INFO] Desktop agent websocket disconnected:', label);
});
}

View File

@@ -6,6 +6,7 @@ import { handleChatConnection } from '@/modules/websocket/services/chat-websocke
import { verifyWebSocketClient } from '@/modules/websocket/services/websocket-auth.service.js';
import { handlePluginWsProxy } from '@/modules/websocket/services/plugin-websocket-proxy.service.js';
import { handleShellConnection } from '@/modules/websocket/services/shell-websocket.service.js';
import { handleDesktopAgentConnection } from '@/modules/websocket/services/desktop-agent-websocket.service.js';
import type { AuthenticatedWebSocketRequest } from '@/shared/types.js';
type WebSocketServerDependencies = {
@@ -63,6 +64,11 @@ export function createWebSocketServer(
return;
}
if (pathname === '/desktop-agent') {
handleDesktopAgentConnection(ws, incomingRequest);
return;
}
if (pathname.startsWith('/plugin-ws/')) {
handlePluginWsProxy(ws, pathname, dependencies.getPluginPort);
return;

View File

@@ -1,63 +1,263 @@
import { useCallback, useEffect, useState } from 'react';
import { Cable, MonitorCog, RefreshCw, ShieldCheck } from 'lucide-react';
import { useCallback, useEffect, useMemo, useRef, useState, type KeyboardEvent, type MouseEvent } from 'react';
import { Bot, Camera, Download, Expand, Loader2, MonitorCog, RefreshCw, ShieldCheck, Square, Trash2, X } from 'lucide-react';
import { Badge, Button } from '../../../shared/view/ui';
import { authenticatedFetch } from '../../../utils/api';
type ComputerUseStatus = {
available: boolean;
bridgeConnected: boolean;
enabled: boolean;
runtime: 'cloud' | 'local';
available: boolean;
requiresDesktopBridge: boolean;
nutInstalled: boolean;
screenshotInstalled: boolean;
installInProgress: boolean;
sessionCount: number;
agentToolsEnabled: boolean;
mcpRecommended: boolean;
message: string;
capabilities: {
screenshots: boolean;
mouse: boolean;
keyboard: boolean;
clipboard: boolean;
stopControl: boolean;
};
};
type ComputerUseSession = {
id: string;
status: 'ready' | 'stopped' | 'unavailable';
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
agentAccessEnabled: boolean;
createdBy: 'user' | 'agent';
displaySize: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent' | 'user';
} | null;
};
type ComputerUsePanelProps = {
isVisible: boolean;
};
async function readStatus(response: Response): Promise<ComputerUseStatus> {
async function readJson<T>(response: Response): Promise<T> {
const data = await response.json();
if (!response.ok || data.success === false) {
throw new Error(data.error || `Request failed (${response.status})`);
throw new Error(data.error || data.details || `Request failed (${response.status})`);
}
return data.data;
return data as T;
}
export default function ComputerUsePanel({ isVisible }: ComputerUsePanelProps) {
const [status, setStatus] = useState<ComputerUseStatus | null>(null);
const [sessions, setSessions] = useState<ComputerUseSession[]>([]);
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
const [isBusy, setIsBusy] = useState(false);
const [isInstalling, setIsInstalling] = useState(false);
const [isFullscreen, setIsFullscreen] = useState(false);
const [error, setError] = useState<string | null>(null);
const viewerRef = useRef<HTMLDivElement | null>(null);
const selectedSession = useMemo(
() => sessions.find((session) => session.id === selectedSessionId) || sessions[0] || null,
[selectedSessionId, sessions],
);
const refresh = useCallback(async () => {
setError(null);
try {
const response = await authenticatedFetch('/api/computer-use/status');
setStatus(await readStatus(response));
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load Computer Use status');
}
const [statusResponse, sessionsResponse] = await Promise.all([
authenticatedFetch('/api/computer-use/status'),
authenticatedFetch('/api/computer-use/sessions'),
]);
const statusData = await readJson<{ data: ComputerUseStatus }>(statusResponse);
const sessionsData = await readJson<{ data: { sessions: ComputerUseSession[] } }>(sessionsResponse);
setStatus(statusData.data);
setSessions(sessionsData.data.sessions);
setSelectedSessionId((current) => (
current && sessionsData.data.sessions.some((session) => session.id === current)
? current
: sessionsData.data.sessions[0]?.id || null
));
}, []);
useEffect(() => {
if (isVisible) {
void refresh();
}
if (!isVisible) return;
void refresh().catch((err) => setError(err instanceof Error ? err.message : 'Failed to load Computer Use'));
}, [isVisible, refresh]);
const capabilities = status?.capabilities || {
screenshots: false,
mouse: false,
keyboard: false,
clipboard: false,
stopControl: false,
};
// Poll while an active session exists so agent-driven changes show up live.
useEffect(() => {
if (!isVisible || !selectedSession || selectedSession.status !== 'ready') return;
const timer = window.setInterval(() => {
void refresh().catch(() => undefined);
}, 1500);
return () => window.clearInterval(timer);
}, [isVisible, selectedSession, refresh]);
const runAction = useCallback(async (action: () => Promise<void>) => {
setIsBusy(true);
setError(null);
try {
await action();
await refresh();
} catch (err) {
setError(err instanceof Error ? err.message : 'Computer Use action failed');
} finally {
setIsBusy(false);
}
}, [refresh]);
const createSession = () => runAction(async () => {
const response = await authenticatedFetch('/api/computer-use/sessions', { method: 'POST' });
const data = await readJson<{ data: { session: ComputerUseSession } }>(response);
setSelectedSessionId(data.data.session.id);
});
const captureScreenshot = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/screenshot`, { method: 'POST' });
await readJson(response);
});
const stopSession = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/stop`, { method: 'POST' });
await readJson(response);
});
const deleteSession = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}`, { method: 'DELETE' });
await readJson(response);
setIsFullscreen(false);
});
const grantControl = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/consent/grant`, { method: 'POST' });
await readJson(response);
});
const revokeControl = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/consent/revoke`, { method: 'POST' });
await readJson(response);
});
const installRuntime = () => runAction(async () => {
setIsInstalling(true);
try {
const response = await authenticatedFetch('/api/computer-use/runtime/install', { method: 'POST' });
await readJson(response);
} finally {
setIsInstalling(false);
}
});
const clickViewer = useCallback((event: MouseEvent<HTMLImageElement>) => {
if (!selectedSession || selectedSession.status !== 'ready' || !selectedSession.displaySize) {
return;
}
viewerRef.current?.focus();
const bounds = event.currentTarget.getBoundingClientRect();
const scaleX = selectedSession.displaySize.width / bounds.width;
const scaleY = selectedSession.displaySize.height / bounds.height;
const x = Math.round((event.clientX - bounds.left) * scaleX);
const y = Math.round((event.clientY - bounds.top) * scaleY);
void runAction(async () => {
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/click`, {
method: 'POST',
body: JSON.stringify({ x, y, double: event.detail === 2 }),
});
await readJson(response);
});
}, [runAction, selectedSession]);
const keyForEvent = useCallback((event: KeyboardEvent<HTMLDivElement>) => {
if (event.key === ' ') return 'Space';
const parts: string[] = [];
if (event.ctrlKey) parts.push('ctrl');
if (event.altKey) parts.push('alt');
if (event.shiftKey && event.key.length > 1) parts.push('shift');
if (event.metaKey) parts.push('meta');
parts.push(event.key);
return parts.join('+');
}, []);
const pressViewerKey = useCallback((event: KeyboardEvent<HTMLDivElement>) => {
if (!selectedSession || selectedSession.status !== 'ready') {
return;
}
const ignoredKeys = new Set(['Shift', 'Control', 'Alt', 'Meta', 'CapsLock']);
if (ignoredKeys.has(event.key)) {
return;
}
event.preventDefault();
const key = keyForEvent(event);
void runAction(async () => {
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/press-key`, {
method: 'POST',
body: JSON.stringify({ key }),
});
await readJson(response);
});
}, [keyForEvent, runAction, selectedSession]);
const needsRuntime = Boolean(status?.enabled && status.runtime === 'local' && (!status.nutInstalled || !status.screenshotInstalled));
const isCloud = status?.runtime === 'cloud';
const cursorStyle = selectedSession?.cursor && selectedSession.displaySize
? {
left: `${(selectedSession.cursor.x / selectedSession.displaySize.width) * 100}%`,
top: `${(selectedSession.cursor.y / selectedSession.displaySize.height) * 100}%`,
}
: null;
const renderSurface = (fullscreen = false) => (
<div
ref={viewerRef}
tabIndex={selectedSession?.status === 'ready' ? 0 : -1}
onKeyDown={pressViewerKey}
className={`flex min-h-[360px] flex-1 items-center justify-center bg-neutral-950 outline-none ${fullscreen ? 'min-h-[80vh]' : ''}`}
>
{selectedSession?.screenshotDataUrl ? (
<div className="relative inline-block max-h-full">
<img
src={selectedSession.screenshotDataUrl}
alt="Desktop screenshot"
className={fullscreen ? 'block max-h-[80vh] w-auto max-w-full object-contain' : 'block max-h-[70vh] w-auto max-w-full object-contain'}
onClick={clickViewer}
/>
{cursorStyle && (
<div
className="pointer-events-none absolute h-5 w-5 -translate-x-1/2 -translate-y-1/2 rounded-full border-2 border-white/90 bg-sky-500/80 shadow-[0_0_0_6px_rgba(14,165,233,0.18)]"
style={cursorStyle}
>
<div className="absolute left-1/2 top-1/2 h-2 w-2 -translate-x-1/2 -translate-y-1/2 rounded-full bg-white" />
</div>
)}
</div>
) : (
<div className="max-w-md px-6 text-center">
<MonitorCog className="mx-auto h-10 w-10 text-neutral-500" />
<div className="mt-3 text-sm font-medium text-neutral-100">
{selectedSession?.message || 'Start a Computer Use session to capture your desktop.'}
</div>
<p className="mt-2 text-xs leading-relaxed text-neutral-400">
{isCloud
? 'Cloud Computer Use requires a linked local CloudCLI Desktop Agent.'
: 'Install the desktop control runtime from this panel or enable Computer Use from Settings.'}
</p>
</div>
)}
</div>
);
return (
<div className="flex h-full min-h-0 flex-col bg-background">
@@ -69,64 +269,186 @@ export default function ComputerUsePanel({ isVisible }: ComputerUsePanelProps) {
{status && <Badge variant="outline" className="text-[11px]">{status.runtime}</Badge>}
</div>
<p className="mt-0.5 text-xs text-muted-foreground">
Local desktop control through a user-approved CloudCLI Desktop Agent.
Capture your desktop and let agents drive the mouse and keyboard only while you grant control.
</p>
</div>
<Button variant="outline" size="sm" onClick={() => void refresh()}>
<RefreshCw className="h-4 w-4" />
Refresh
</Button>
<div className="flex flex-wrap items-center gap-2">
<Button variant="outline" size="sm" onClick={() => void refresh()} disabled={isBusy}>
<RefreshCw className="h-4 w-4" />
Refresh
</Button>
<Button size="sm" onClick={createSession} disabled={isBusy || !status?.available}>
<MonitorCog className="h-4 w-4" />
New Session
</Button>
</div>
</div>
{error && (
<div className="border-b border-red-200 bg-red-50 px-4 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
{error}
</div>
)}
<div className="grid flex-1 grid-cols-1 gap-4 overflow-auto p-4 xl:grid-cols-[minmax(0,1fr)_340px]">
<section className="rounded-lg border border-border bg-card/40 p-5">
<div className="flex items-start gap-3">
<div className="flex h-10 w-10 flex-shrink-0 items-center justify-center rounded-md bg-muted text-muted-foreground">
<Cable className="h-5 w-5" />
</div>
<div className="min-w-0 flex-1">
<div className="flex flex-wrap items-center gap-2">
<h4 className="text-sm font-semibold text-foreground">Desktop bridge</h4>
<Badge variant={status?.bridgeConnected ? 'default' : 'outline'} className="text-[11px]">
{status?.bridgeConnected ? 'connected' : 'not connected'}
</Badge>
</div>
<p className="mt-2 max-w-3xl text-sm leading-relaxed text-muted-foreground">
{status?.message || 'Loading Computer Use status...'}
<div className="grid min-h-0 flex-1 grid-cols-1 lg:grid-cols-[300px_minmax(0,1fr)]">
<aside className="border-b border-border/60 p-3 lg:border-b-0 lg:border-r">
{needsRuntime && (
<div className="rounded-lg border border-border/70 bg-card/40 p-3">
<div className="text-xs font-medium uppercase tracking-wide text-muted-foreground">Desktop runtime required</div>
<p className="mt-2 text-xs leading-relaxed text-muted-foreground">
{status?.message || 'Install the desktop control runtime to enable Computer Use.'}
</p>
<div className="mt-4 rounded-lg border border-dashed border-border/70 bg-background/60 p-4">
<div className="text-sm font-medium text-foreground">Architecture boundary</div>
<p className="mt-1 text-sm leading-relaxed text-muted-foreground">
Hosted CloudCLI can request Computer Use only through a linked local agent. The hosted server should never receive a permanent raw ability to control a user machine.
</p>
<div className="mt-3 flex flex-wrap gap-2 text-xs text-muted-foreground">
<span className="rounded-md border border-border px-2 py-1">
Control lib: {status?.nutInstalled ? 'installed' : 'missing'}
</span>
<span className="rounded-md border border-border px-2 py-1">
Screen capture: {status?.screenshotInstalled ? 'installed' : 'missing'}
</span>
</div>
<Button
type="button"
size="sm"
className="mt-3 w-full"
onClick={installRuntime}
disabled={isBusy || isInstalling || status?.installInProgress}
>
{isInstalling || status?.installInProgress ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Download className="h-4 w-4" />
)}
{isInstalling || status?.installInProgress ? 'Installing…' : 'Install Runtime'}
</Button>
</div>
</div>
</section>
)}
<aside className="rounded-lg border border-border bg-card/40 p-4">
<div className="flex items-center gap-2">
<ShieldCheck className="h-4 w-4 text-primary" />
<h4 className="text-sm font-semibold text-foreground">Required controls</h4>
</div>
<div className="mt-3 space-y-2">
{Object.entries(capabilities).map(([name, enabled]) => (
<div key={name} className="flex items-center justify-between rounded-md border border-border/60 px-3 py-2 text-sm">
<span className="capitalize text-foreground">{name.replace(/([A-Z])/g, ' $1')}</span>
<Badge variant={enabled ? 'default' : 'outline'} className="text-[10px]">
{enabled ? 'ready' : 'blocked'}
</Badge>
<div className="rounded-lg border border-border/70 bg-muted/30 p-3 text-xs leading-relaxed text-muted-foreground">
<div className="flex items-center gap-1.5 font-medium text-foreground">
<ShieldCheck className="h-3.5 w-3.5" />
Safety
</div>
<p className="mt-1.5">
Agents can act on a session only while you have granted control. Use
<span className="font-medium text-foreground"> Grant Control </span>
to allow agent actions, and
<span className="font-medium text-foreground"> Stop </span>
to revoke instantly.
</p>
</div>
{sessions.map((session) => (
<button
key={session.id}
type="button"
onClick={() => setSelectedSessionId(session.id)}
className={`w-full rounded-lg border px-3 py-2 text-left text-sm transition-colors ${selectedSession?.id === session.id
? 'border-primary/50 bg-primary/10 text-foreground'
: 'border-border/60 bg-card/30 text-muted-foreground hover:bg-muted/50'
}`}
>
<div className="flex items-center justify-between gap-2">
<span className="truncate font-medium">
{session.createdBy === 'agent' ? 'Agent session' : 'Desktop session'}
</span>
<Badge variant="outline" className="text-[10px]">{session.status}</Badge>
</div>
<div className="mt-1 flex flex-wrap gap-1">
{session.agentAccessEnabled ? (
<span className="rounded border border-emerald-500/30 px-1.5 py-0.5 text-[10px] text-emerald-600 dark:text-emerald-300">
control granted
</span>
) : (
<span className="rounded border border-amber-500/30 px-1.5 py-0.5 text-[10px] text-amber-600 dark:text-amber-300">
awaiting consent
</span>
)}
</div>
<div className="mt-1 truncate text-xs">{session.lastAction || session.message || session.id}</div>
</button>
))}
{sessions.length === 0 && (
<div className="rounded-lg border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
No Computer Use sessions yet.
</div>
)}
</div>
</aside>
<main className="flex min-h-0 flex-col">
<div className="flex flex-wrap items-center gap-2 border-b border-border/60 px-3 py-2">
<Button variant="outline" size="sm" onClick={captureScreenshot} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
<Camera className="h-4 w-4" />
Screenshot
</Button>
{selectedSession?.agentAccessEnabled ? (
<Button variant="outline" size="sm" onClick={revokeControl} disabled={isBusy || !selectedSession}>
<X className="h-4 w-4" />
Revoke Control
</Button>
) : (
<Button
variant="outline"
size="sm"
onClick={grantControl}
disabled={isBusy || !selectedSession || selectedSession.status !== 'ready' || !status?.agentToolsEnabled}
>
<Bot className="h-4 w-4" />
Grant Control
</Button>
)}
<Button variant="outline" size="sm" onClick={() => setIsFullscreen(true)} disabled={!selectedSession?.screenshotDataUrl}>
<Expand className="h-4 w-4" />
Full Screen
</Button>
<Button variant="outline" size="sm" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
<Square className="h-4 w-4" />
Stop
</Button>
<Button variant="outline" size="sm" onClick={deleteSession} disabled={isBusy || !selectedSession}>
<Trash2 className="h-4 w-4" />
Delete
</Button>
</div>
{error && (
<div className="border-b border-red-200 bg-red-50 px-4 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
{error}
</div>
)}
<div className="min-h-0 flex-1 overflow-auto bg-muted/20 p-4">
<div className="mx-auto flex min-h-[420px] max-w-6xl flex-col overflow-hidden rounded-lg border border-border bg-background shadow-sm">
<div className="flex items-center gap-2 border-b border-border/60 px-3 py-2 text-xs text-muted-foreground">
<MonitorCog className="h-3.5 w-3.5" />
<span className="truncate">
{selectedSession?.displaySize
? `${selectedSession.displaySize.width}×${selectedSession.displaySize.height}`
: 'No screen captured'}
</span>
{selectedSession?.agentAccessEnabled && (
<span className="ml-auto inline-flex items-center gap-1 rounded border border-emerald-500/30 px-2 py-0.5 text-emerald-600 dark:text-emerald-300">
<Bot className="h-3.5 w-3.5" />
Agent control active
</span>
)}
</div>
{renderSurface()}
</div>
<p className="mx-auto mt-2 max-w-6xl text-center text-xs text-muted-foreground">
Click the screenshot to click the real desktop. Focus the view and type to send keystrokes.
</p>
</div>
</main>
</div>
{isFullscreen && selectedSession && (
<div className="fixed inset-0 z-50 bg-black/90 p-6">
<div className="flex h-full flex-col rounded-lg border border-white/10 bg-black">
<div className="flex items-center justify-between border-b border-white/10 px-4 py-3 text-sm text-white/80">
<div className="min-w-0 truncate">Desktop session</div>
<Button variant="outline" size="sm" onClick={() => setIsFullscreen(false)}>
<X className="h-4 w-4" />
Close
</Button>
</div>
{renderSurface(true)}
</div>
</div>
)}
</div>
);
}

View File

@@ -65,6 +65,7 @@ export type MainContentHeaderProps = {
selectedSession: ProjectSession | null;
shouldShowTasksTab: boolean;
shouldShowBrowserTab: boolean;
shouldShowComputerTab: boolean;
isMobile: boolean;
onMenuClick: () => void;
};

View File

@@ -59,9 +59,11 @@ function MainContent({
const { currentProject, setCurrentProject } = useTaskMaster() as TaskMasterContextValue;
const { tasksEnabled, isTaskMasterInstalled } = useTasksSettings() as TasksSettingsContextValue;
const [browserUseEnabled, setBrowserUseEnabled] = useState(false);
const [computerUseEnabled, setComputerUseEnabled] = useState(false);
const shouldShowTasksTab = Boolean(tasksEnabled && isTaskMasterInstalled);
const shouldShowBrowserTab = browserUseEnabled;
const shouldShowComputerTab = computerUseEnabled;
const {
editingFile,
@@ -117,6 +119,28 @@ function MainContent({
}
}, [shouldShowBrowserTab, activeTab, setActiveTab]);
const loadComputerUseSettings = useCallback(async () => {
try {
const response = await authenticatedFetch('/api/computer-use/settings');
const data = await response.json();
setComputerUseEnabled(Boolean(response.ok && data?.success !== false && data?.data?.settings?.enabled));
} catch {
setComputerUseEnabled(false);
}
}, []);
useEffect(() => {
void loadComputerUseSettings();
window.addEventListener('computerUseSettingsChanged', loadComputerUseSettings);
return () => window.removeEventListener('computerUseSettingsChanged', loadComputerUseSettings);
}, [loadComputerUseSettings]);
useEffect(() => {
if (!shouldShowComputerTab && activeTab === 'computer') {
setActiveTab('chat');
}
}, [shouldShowComputerTab, activeTab, setActiveTab]);
usePaletteOpsRegister({
openFile: (filePath: string) => {
setActiveTab('files');
@@ -141,6 +165,7 @@ function MainContent({
selectedSession={selectedSession}
shouldShowTasksTab={shouldShowTasksTab}
shouldShowBrowserTab={shouldShowBrowserTab}
shouldShowComputerTab={shouldShowComputerTab}
isMobile={isMobile}
onMenuClick={onMenuClick}
/>
@@ -205,7 +230,7 @@ function MainContent({
</div>
)}
{activeTab === 'computer' && (
{shouldShowComputerTab && activeTab === 'computer' && (
<div className="h-full overflow-hidden">
<ComputerUsePanel isVisible={activeTab === 'computer'} />
</div>

View File

@@ -11,6 +11,7 @@ export default function MainContentHeader({
selectedSession,
shouldShowTasksTab,
shouldShowBrowserTab,
shouldShowComputerTab,
isMobile,
onMenuClick,
}: MainContentHeaderProps) {
@@ -61,6 +62,7 @@ export default function MainContentHeader({
setActiveTab={setActiveTab}
shouldShowTasksTab={shouldShowTasksTab}
shouldShowBrowserTab={shouldShowBrowserTab}
shouldShowComputerTab={shouldShowComputerTab}
/>
</div>
{canScrollRight && (

View File

@@ -12,6 +12,7 @@ type MainContentTabSwitcherProps = {
setActiveTab: Dispatch<SetStateAction<AppTab>>;
shouldShowTasksTab: boolean;
shouldShowBrowserTab: boolean;
shouldShowComputerTab: boolean;
};
type BuiltInTab = {
@@ -36,7 +37,6 @@ const BASE_TABS: BuiltInTab[] = [
{ kind: 'builtin', id: 'shell', labelKey: 'tabs.shell', icon: Terminal },
{ kind: 'builtin', id: 'files', labelKey: 'tabs.files', icon: Folder },
{ kind: 'builtin', id: 'git', labelKey: 'tabs.git', icon: GitBranch },
{ kind: 'builtin', id: 'computer', labelKey: 'tabs.computer', icon: MonitorCog },
];
const BROWSER_TAB: BuiltInTab = {
@@ -46,6 +46,13 @@ const BROWSER_TAB: BuiltInTab = {
icon: MonitorPlay,
};
const COMPUTER_TAB: BuiltInTab = {
kind: 'builtin',
id: 'computer',
labelKey: 'tabs.computer',
icon: MonitorCog,
};
const TASKS_TAB: BuiltInTab = {
kind: 'builtin',
id: 'tasks',
@@ -58,6 +65,7 @@ export default function MainContentTabSwitcher({
setActiveTab,
shouldShowTasksTab,
shouldShowBrowserTab,
shouldShowComputerTab,
}: MainContentTabSwitcherProps) {
const { t } = useTranslation();
const { plugins } = usePlugins();
@@ -65,6 +73,7 @@ export default function MainContentTabSwitcher({
const builtInTabs: BuiltInTab[] = [
...BASE_TABS,
...(shouldShowBrowserTab ? [BROWSER_TAB] : []),
...(shouldShowComputerTab ? [COMPUTER_TAB] : []),
...(shouldShowTasksTab ? [TASKS_TAB] : []),
];

View File

@@ -54,7 +54,7 @@ type NotificationPreferencesResponse = {
type ActiveLoginProvider = AgentProvider | '';
const KNOWN_MAIN_TABS: SettingsMainTab[] = ['agents', 'appearance', 'git', 'api', 'tasks', 'browser', 'notifications', 'plugins', 'about'];
const KNOWN_MAIN_TABS: SettingsMainTab[] = ['agents', 'appearance', 'git', 'api', 'tasks', 'browser', 'computer', 'notifications', 'plugins', 'about'];
const normalizeMainTab = (tab: string): SettingsMainTab => {
// Keep backwards compatibility with older callers that still pass "tools".

View File

@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
import type { LLMProvider } from '../../../types/app';
import type { ProviderAuthStatus } from '../../provider-auth/types';
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'tasks' | 'browser' | 'computer' | 'notifications' | 'plugins' | 'about';
export type AgentProvider = LLMProvider;
export type AgentCategory = 'account' | 'permissions' | 'mcp';
export type ProjectSortOrder = 'name' | 'date';

View File

@@ -8,6 +8,7 @@ import AppearanceSettingsTab from '../view/tabs/AppearanceSettingsTab';
import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSettingsTab';
import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
import ComputerUseSettingsTab from '../view/tabs/computer-use-settings/ComputerUseSettingsTab';
import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
import TasksSettingsTab from '../view/tabs/tasks-settings/TasksSettingsTab';
import PluginSettingsTab from '../../plugins/view/PluginSettingsTab';
@@ -142,6 +143,8 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
{activeTab === 'browser' && <BrowserUseSettingsTab />}
{activeTab === 'computer' && <ComputerUseSettingsTab />}
{activeTab === 'notifications' && (
<NotificationsSettingsTab
notificationPreferences={notificationPreferences}

View File

@@ -1,4 +1,4 @@
import { Bell, Bot, GitBranch, Info, Key, ListChecks, MonitorPlay, Palette, Puzzle } from 'lucide-react';
import { Bell, Bot, GitBranch, Info, Key, ListChecks, MonitorCog, MonitorPlay, Palette, Puzzle } from 'lucide-react';
import { useTranslation } from 'react-i18next';
import { cn } from '../../../lib/utils';
import { PillBar, Pill } from '../../../shared/view/ui';
@@ -22,6 +22,7 @@ const NAV_ITEMS: NavItem[] = [
{ id: 'api', labelKey: 'mainTabs.apiTokens', icon: Key },
{ id: 'tasks', labelKey: 'mainTabs.tasks', icon: ListChecks },
{ id: 'browser', labelKey: 'mainTabs.browser', icon: MonitorPlay },
{ id: 'computer', labelKey: 'mainTabs.computer', icon: MonitorCog },
{ id: 'plugins', labelKey: 'mainTabs.plugins', icon: Puzzle },
{ id: 'notifications', labelKey: 'mainTabs.notifications', icon: Bell },
{ id: 'about', labelKey: 'mainTabs.about', icon: Info },

View File

@@ -0,0 +1,189 @@
import { useCallback, useEffect, useState } from 'react';
import { Download, Loader2 } from 'lucide-react';
import { Button } from '../../../../../shared/view/ui';
import { authenticatedFetch } from '../../../../../utils/api';
import SettingsCard from '../../SettingsCard';
import SettingsRow from '../../SettingsRow';
import SettingsSection from '../../SettingsSection';
import SettingsToggle from '../../SettingsToggle';
type ComputerUseSettings = {
enabled: boolean;
agentToolsEnabled: boolean;
};
type ComputerUseStatus = {
enabled: boolean;
runtime: 'cloud' | 'local';
available: boolean;
nutInstalled: boolean;
screenshotInstalled: boolean;
installInProgress: boolean;
agentToolsEnabled: boolean;
message: string;
};
async function readJson<T>(response: Response): Promise<T> {
const data = await response.json();
if (!response.ok || data.success === false) {
throw new Error(data.error || data.details || `Request failed (${response.status})`);
}
return data as T;
}
export default function ComputerUseSettingsTab() {
const [settings, setSettings] = useState<ComputerUseSettings>({ enabled: false, agentToolsEnabled: false });
const [status, setStatus] = useState<ComputerUseStatus | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [isSaving, setIsSaving] = useState(false);
const [isInstalling, setIsInstalling] = useState(false);
const [error, setError] = useState<string | null>(null);
const loadState = useCallback(async () => {
setError(null);
const [settingsResponse, statusResponse] = await Promise.all([
authenticatedFetch('/api/computer-use/settings'),
authenticatedFetch('/api/computer-use/status'),
]);
const settingsData = await readJson<{ data: { settings: ComputerUseSettings } }>(settingsResponse);
const statusData = await readJson<{ data: ComputerUseStatus }>(statusResponse);
setSettings(settingsData.data.settings);
setStatus(statusData.data);
}, []);
useEffect(() => {
setIsLoading(true);
void loadState()
.catch((err) => setError(err instanceof Error ? err.message : 'Failed to load Computer Use settings'))
.finally(() => setIsLoading(false));
}, [loadState]);
const updateSettings = async (nextSettings: Partial<ComputerUseSettings>) => {
setIsSaving(true);
setError(null);
try {
const response = await authenticatedFetch('/api/computer-use/settings', {
method: 'PUT',
body: JSON.stringify(nextSettings),
});
const data = await readJson<{ data: { settings: ComputerUseSettings } }>(response);
setSettings(data.data.settings);
window.dispatchEvent(new Event('computerUseSettingsChanged'));
await loadState();
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to save Computer Use settings');
} finally {
setIsSaving(false);
}
};
const installRuntime = async () => {
setIsInstalling(true);
setError(null);
try {
const response = await authenticatedFetch('/api/computer-use/runtime/install', { method: 'POST' });
await readJson(response);
await loadState();
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to install Computer Use runtime');
} finally {
setIsInstalling(false);
}
};
const isCloud = status?.runtime === 'cloud';
const needsRuntime = Boolean(settings.enabled && !isCloud && status && (!status.nutInstalled || !status.screenshotInstalled));
return (
<div className="space-y-8">
<SettingsSection
title="Computer Use"
description="Let agents see your desktop and drive the mouse and keyboard through a guarded, consent-gated control loop."
>
<SettingsCard divided>
<div className="flex flex-col gap-3 px-4 py-4">
<div className="rounded-md border border-amber-300/50 bg-amber-50 px-3 py-2 text-sm text-amber-800 dark:border-amber-900/50 dark:bg-amber-950/30 dark:text-amber-200">
Computer Use can control your entire desktop. Agents act only while you grant control from the
Computer panel, and any action stops the moment you press Stop.
</div>
</div>
<SettingsRow
label="Enable Computer Use"
description="Allow CloudCLI to capture the screen and create desktop control sessions on this machine."
>
<SettingsToggle
checked={settings.enabled}
onChange={(value) => void updateSettings({ enabled: value })}
ariaLabel="Enable Computer Use"
disabled={isLoading || isSaving}
/>
</SettingsRow>
<SettingsRow
label="Enable Computer Tools for Agents"
description="Register the Computer Use MCP server for all agent providers. Agents can request desktop control, but actions require your explicit per-session consent."
>
<SettingsToggle
checked={settings.agentToolsEnabled}
onChange={(value) => void updateSettings({ agentToolsEnabled: value })}
ariaLabel="Enable Computer Tools for Agents"
disabled={isLoading || isSaving || !settings.enabled}
/>
</SettingsRow>
{(needsRuntime || isCloud || error) && (
<div className="space-y-4 px-4 py-4">
{isCloud && (
<div className="rounded-md border border-border bg-muted/40 px-3 py-2 text-sm text-muted-foreground">
{status?.message || 'Cloud Computer Use requires a linked CloudCLI Desktop Agent on the user machine.'}
</div>
)}
{needsRuntime && (
<div className="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
<div className="min-w-0 space-y-1">
<div className="text-sm font-medium text-foreground">Desktop runtime required</div>
<p className="text-sm text-muted-foreground">
{status?.message || 'Install the desktop control runtime needed to capture the screen and drive input.'}
</p>
<div className="flex flex-wrap gap-2 pt-1 text-xs text-muted-foreground">
<span className="rounded-md border border-border px-2 py-1">
Control lib: {status?.nutInstalled ? 'installed' : 'missing'}
</span>
<span className="rounded-md border border-border px-2 py-1">
Screen capture: {status?.screenshotInstalled ? 'installed' : 'missing'}
</span>
</div>
</div>
<Button
type="button"
size="sm"
onClick={() => void installRuntime()}
disabled={isInstalling || status?.installInProgress}
className="flex-shrink-0"
>
{isInstalling || status?.installInProgress ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Download className="h-4 w-4" />
)}
{isInstalling || status?.installInProgress ? 'Installing…' : 'Install Runtime'}
</Button>
</div>
)}
{error && (
<div className="rounded-md border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
{error}
</div>
)}
</div>
)}
</SettingsCard>
</SettingsSection>
</div>
);
}

View File

@@ -95,6 +95,7 @@
"apiTokens": "API & Tokens",
"tasks": "Tasks",
"browser": "Browser Use",
"computer": "Computer Use",
"notifications": "Notifications",
"plugins": "Plugins",
"about": "About"