feat: add desktop computer use runtime

This commit is contained in:
Simos Mikelatos
2026-06-17 19:01:15 +00:00
parent fc71fc7d2b
commit 7e6028b113
28 changed files with 4741 additions and 126 deletions

225
electron/computerAgent.js Normal file
View File

@@ -0,0 +1,225 @@
import { spawn } from 'node:child_process';
import fs from 'node:fs/promises';
import path from 'node:path';
const IPC_PREFIX = '@@CUAGENT@@';
function getDesktopPath() {
const currentPath = process.env.PATH || '';
const commonPaths = process.platform === 'win32'
? []
: ['/opt/homebrew/bin', '/usr/local/bin', '/usr/bin', '/bin', '/usr/sbin', '/sbin'];
return [...commonPaths, currentPath].filter(Boolean).join(path.delimiter);
}
function getNodeRuntime(isPackaged) {
if (isPackaged && process.versions.electron) {
return { command: process.execPath, env: { ELECTRON_RUN_AS_NODE: '1' } };
}
if (process.env.npm_node_execpath) {
return { command: process.env.npm_node_execpath, env: {} };
}
return { command: 'node', env: {} };
}
/** Converts an environment access URL (https://x) to its desktop-agent ws URL. */
function toAgentWsUrl(httpUrl) {
try {
const parsed = new URL(httpUrl);
parsed.protocol = parsed.protocol === 'http:' ? 'ws:' : 'wss:';
parsed.pathname = '/desktop-agent';
parsed.search = '';
parsed.hash = '';
return parsed.toString();
} catch {
return null;
}
}
/**
* Manages the standalone Computer Use desktop agent process. While the user has
* Computer Use enabled, this keeps an agent connected to every running cloud
* environment so hosted sessions can drive this machine. The local CloudCLI
* server is not involved.
*/
export class ComputerAgentController {
constructor({ appRoot, settingsPath, isPackaged = false, getRunningEnvironmentUrls, promptConsent, onChange }) {
this.appRoot = appRoot;
this.settingsPath = settingsPath;
this.isPackaged = isPackaged;
this.getRunningEnvironmentUrls = getRunningEnvironmentUrls;
this.promptConsent = promptConsent;
this.onChange = onChange;
this.settings = { enabled: false, consentMode: 'ask' };
this.child = null;
this.connectedUrls = new Set();
this.currentTargets = [];
this.stdoutBuffer = '';
}
getSettings() {
return { ...this.settings };
}
getState() {
return {
enabled: this.settings.enabled,
consentMode: this.settings.consentMode,
running: Boolean(this.child),
connectedCount: this.connectedUrls.size,
targetCount: this.currentTargets.length,
};
}
async loadSettings() {
try {
const raw = await fs.readFile(this.settingsPath, 'utf8');
const stored = JSON.parse(raw);
this.settings = {
enabled: Boolean(stored.enabled),
consentMode: stored.consentMode === 'auto' ? 'auto' : 'ask',
};
} catch {
this.settings = { enabled: false, consentMode: 'ask' };
}
return this.settings;
}
async saveSettings(next) {
this.settings = {
enabled: Boolean(next.enabled),
consentMode: next.consentMode === 'auto' ? 'auto' : 'ask',
};
await fs.mkdir(path.dirname(this.settingsPath), { recursive: true });
await fs.writeFile(this.settingsPath, JSON.stringify(this.settings, null, 2), 'utf8');
await this.sync();
this.onChange?.();
return this.settings;
}
/** Reconciles the agent process with the current settings + environments. */
async sync() {
const targets = this.settings.enabled ? (this.getRunningEnvironmentUrls?.() || []) : [];
const wsTargets = targets.map(toAgentWsUrl).filter(Boolean);
const sameTargets =
wsTargets.length === this.currentTargets.length &&
wsTargets.every((url) => this.currentTargets.includes(url));
if (!this.settings.enabled || wsTargets.length === 0) {
this.stop();
this.currentTargets = [];
return;
}
if (this.child && sameTargets) {
return; // already running with the right targets
}
this.currentTargets = wsTargets;
this.restart(wsTargets);
}
restart(wsTargets) {
this.stop();
const agentEntry = process.env.CLOUDCLI_COMPUTER_AGENT_ENTRY
|| path.join(this.appRoot, 'dist-server', 'server', 'computer-use-agent.js');
const runtime = getNodeRuntime(this.isPackaged);
this.child = spawn(runtime.command, [agentEntry], {
cwd: this.appRoot,
env: {
...process.env,
...runtime.env,
PATH: getDesktopPath(),
CLOUDCLI_DESKTOP_AGENT_URLS: wsTargets.join(','),
CLOUDCLI_COMPUTER_USE_CONSENT_MODE: this.settings.consentMode,
},
stdio: ['pipe', 'pipe', 'pipe'],
windowsHide: true,
});
this.connectedUrls = new Set();
this.child.once('error', (error) => {
console.error('[ComputerAgent] failed to start:', error.message);
this.child = null;
this.onChange?.();
});
this.child.stdout?.on('data', (chunk) => this.handleStdout(String(chunk)));
this.child.stderr?.on('data', (chunk) => {
for (const line of String(chunk).split(/\r?\n/)) {
if (line.trim()) console.error('[ComputerAgent]', line);
}
});
this.child.once('exit', (code) => {
console.log(`[ComputerAgent] exited (code ${code ?? 'null'})`);
this.child = null;
this.connectedUrls = new Set();
this.onChange?.();
});
this.onChange?.();
}
handleStdout(chunk) {
this.stdoutBuffer += chunk;
const lines = this.stdoutBuffer.split('\n');
this.stdoutBuffer = lines.pop() || '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith(IPC_PREFIX)) {
if (trimmed) console.log('[ComputerAgent]', trimmed);
continue;
}
let payload;
try {
payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim());
} catch {
continue;
}
void this.handleAgentEvent(payload);
}
}
async handleAgentEvent(payload) {
switch (payload.type) {
case 'connected':
this.connectedUrls.add(payload.url);
this.onChange?.();
break;
case 'disconnected':
this.connectedUrls.delete(payload.url);
this.onChange?.();
break;
case 'consent-request': {
const allow = await this.promptConsent?.(payload.sessionId);
this.sendToChild({ type: 'consent-response', sessionId: payload.sessionId, allow: Boolean(allow) });
break;
}
default:
break;
}
}
sendToChild(message) {
if (this.child?.stdin?.writable) {
this.child.stdin.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
}
}
revokeSession(sessionId) {
this.sendToChild({ type: 'revoke-session', sessionId });
}
stop() {
if (!this.child) return;
const child = this.child;
this.child = null;
this.connectedUrls = new Set();
try { child.kill('SIGTERM'); } catch { /* noop */ }
}
}

View File

@@ -1,9 +1,10 @@
import { app, BrowserWindow, clipboard, dialog, ipcMain, shell } from 'electron';
import { app, BrowserWindow, clipboard, dialog, ipcMain, shell, systemPreferences } from 'electron';
import { spawn } from 'node:child_process';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { CloudController } from './cloud.js';
import { ComputerAgentController } from './computerAgent.js';
import { DesktopWindowManager } from './desktopWindow.js';
import { LocalServerController } from './localServer.js';
import { TabsController } from './tabs.js';
@@ -22,6 +23,7 @@ let activeTarget = { kind: 'launcher', name: APP_NAME, url: null };
let desktopWindow = null;
let localServer = null;
let cloud = null;
let computerAgent = null;
let isQuitting = false;
let isRefreshingCloud = false;
@@ -52,6 +54,34 @@ function getSettingsPath() {
return path.join(app.getPath('userData'), 'desktop-settings.json');
}
function getComputerUseSettingsPath() {
return path.join(app.getPath('userData'), 'computer-use-settings.json');
}
function getRunningEnvironmentUrls() {
return cloud.getEnvironments()
.filter((environment) => environment.status === 'running')
.map((environment) => cloud.getEnvironmentUrl(environment))
.filter(Boolean);
}
async function promptComputerUseConsent(sessionId) {
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'warning',
buttons: ['Allow this session', 'Deny'],
defaultId: 0,
cancelId: 1,
title: 'Computer Use request',
message: 'An agent wants to control this computer',
detail: [
'A cloud agent is requesting control of your mouse, keyboard, and screen for this session.',
'Approval lasts for this session only. You can stop it any time from the Computer panel.',
sessionId ? `\nSession: ${sessionId}` : '',
].join('\n'),
});
return response === 0;
}
function getDisplayTargetName() {
return activeTarget?.name || APP_NAME;
}
@@ -108,6 +138,7 @@ function getDesktopState() {
tabs: tabs.getSerializableTabs(),
activeTabId: tabs.activeTabId,
environments: cloud.getEnvironments().map(serializeEnvironment),
computerUse: computerAgent?.getState() || { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 },
};
}
@@ -217,18 +248,87 @@ async function copyDiagnostics() {
});
}
async function showComputerUsePreview() {
await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
async function showMacComputerUsePermissions() {
if (process.platform !== 'darwin') return;
const screenStatus = systemPreferences.getMediaAccessStatus('screen');
const accessibilityTrusted = systemPreferences.isTrustedAccessibilityClient(false);
const detail = [
`Screen Recording: ${screenStatus === 'granted' ? 'granted' : 'not granted'}`,
`Accessibility: ${accessibilityTrusted ? 'granted' : 'not granted'}`,
'',
'Computer Use needs both permissions to capture the screen and control the mouse and keyboard.',
'After granting a permission, fully quit and reopen CloudCLI so the change takes effect.',
].join('\n');
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'info',
buttons: ['OK'],
title: 'Computer Use Preview',
message: 'Computer use needs an explicit safety gate before it can run.',
buttons: ['Open Screen Recording', 'Open Accessibility', 'Close'],
defaultId: 0,
cancelId: 2,
title: 'Computer Use Permissions',
message: 'Grant macOS permissions for Computer Use',
detail,
});
if (response === 0) {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
} else if (response === 1) {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility');
}
}
// Desktop control for cloud Computer Use: the desktop acts as a TeamViewer-style
// agent for hosted environments. Enabling here lets cloud agents drive THIS
// machine; the user picks whether to auto-connect or be asked per session.
async function showComputerUsePreview() {
const state = computerAgent?.getState() || { enabled: false, consentMode: 'ask' };
const buttons = [];
const actions = [];
if (!state.enabled) {
buttons.push('Enable — ask each session'); actions.push({ kind: 'enable', consentMode: 'ask' });
buttons.push('Enable — auto-connect'); actions.push({ kind: 'enable', consentMode: 'auto' });
} else {
buttons.push('Disable Computer Use'); actions.push({ kind: 'disable' });
const otherMode = state.consentMode === 'auto' ? 'ask' : 'auto';
buttons.push(`Switch to ${otherMode === 'auto' ? 'auto-connect' : 'ask each session'}`);
actions.push({ kind: 'enable', consentMode: otherMode });
}
if (process.platform === 'darwin') {
buttons.push('macOS Permissions…'); actions.push({ kind: 'permissions' });
}
buttons.push('Close'); actions.push({ kind: 'close' });
const statusLine = state.enabled
? `Enabled — ${state.consentMode === 'auto' ? 'auto-connect' : 'ask each session'} · ${state.connectedCount || 0} environment(s) linked`
: 'Disabled';
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'question',
buttons,
defaultId: 0,
cancelId: buttons.length - 1,
title: 'Computer Use (Desktop Agent)',
message: 'Let cloud agents control this computer',
detail: [
'The desktop shell is ready for controlled automation hooks, but full computer use is not enabled yet.',
`Status: ${statusLine}`,
'',
'Before this is exposed, CloudCLI needs per-session consent, a stop control, screen-capture permission checks, app/window scoping, and a provider-specific action loop.',
'When enabled, agents running in your CloudCLI cloud environments can see this screen and drive its mouse and keyboard.',
'• Ask each session: you approve a prompt the first time each session wants control.',
'• Auto-connect: sessions can act without a prompt.',
process.platform === 'linux' ? '\nLinux needs X utilities (libxtst, imagemagick) installed to capture the screen and drive input.' : '',
].join('\n'),
});
const action = actions[response];
if (!action) return;
if (action.kind === 'enable') {
await computerAgent?.saveSettings({ enabled: true, consentMode: action.consentMode });
} else if (action.kind === 'disable') {
await computerAgent?.saveSettings({ enabled: false, consentMode: state.consentMode });
} else if (action.kind === 'permissions') {
await showMacComputerUsePermissions();
}
}
async function refreshCloudEnvironments({ showErrors = false } = {}) {
@@ -253,6 +353,8 @@ async function refreshCloudEnvironments({ showErrors = false } = {}) {
throw error;
} finally {
isRefreshingCloud = false;
// Reconcile the Computer Use desktop agent with the latest running environments.
void computerAgent?.sync().catch((error) => console.error('[ComputerAgent] sync failed:', error?.message || error));
syncDesktopState();
}
}
@@ -658,6 +760,10 @@ function registerAppEvents() {
}
});
app.on('before-quit', () => {
computerAgent?.stop();
});
app.on('before-quit', (event) => {
if (isQuitting || !localServer?.hasOwnedServer()) return;
if (localServer.getSettings().keepLocalServerRunning) {
@@ -770,9 +876,18 @@ async function bootstrap() {
callbackUrl: CALLBACK_URL,
onChange: syncDesktopState,
});
computerAgent = new ComputerAgentController({
appRoot: getAppRoot(),
settingsPath: getComputerUseSettingsPath(),
isPackaged: app.isPackaged,
getRunningEnvironmentUrls,
promptConsent: promptComputerUseConsent,
onChange: syncDesktopState,
});
await localServer.loadDesktopSettings();
await cloud.loadCloudAccount();
await computerAgent.loadSettings();
registerProtocolHandler();
registerIpcHandlers();