feat: add desktop computer use runtime

This commit is contained in:
Simos Mikelatos
2026-06-17 19:01:15 +00:00
parent fc71fc7d2b
commit 7e6028b113
28 changed files with 4741 additions and 126 deletions

View File

@@ -0,0 +1,242 @@
import { createRequire } from 'node:module';
const require = createRequire(import.meta.url);
export type Point = { x: number; y: number };
export type ClickButton = 'left' | 'right' | 'middle';
export type ScrollDirection = 'up' | 'down' | 'left' | 'right';
export type DisplaySize = { width: number; height: number };
export type RuntimeReadiness = {
nut: any | null;
screenshot: any | null;
nutInstalled: boolean;
screenshotInstalled: boolean;
};
/**
* Coordinate space the executor reports/accepts. The screenshot pixel space is
* the canonical space agents and users address; it is mapped to the nut-js
* logical mouse space before any action runs.
*/
export type ExecutorTarget = {
displaySize: DisplaySize | null;
};
export function getNut(): any | null {
try {
return require('@nut-tree-fork/nut-js');
} catch {
return null;
}
}
export function getScreenshot(): any | null {
try {
const mod = require('screenshot-desktop');
return mod?.default || mod;
} catch {
return null;
}
}
export function getRuntimeReadiness(): RuntimeReadiness {
const nut = getNut();
const screenshot = getScreenshot();
return {
nut,
screenshot,
nutInstalled: Boolean(nut),
screenshotInstalled: typeof screenshot === 'function',
};
}
/** Reads the pixel dimensions from a PNG/JPEG buffer header without decoding it. */
export function readImageSize(buffer: Buffer): DisplaySize | null {
// PNG: 8-byte signature, then IHDR chunk with width/height as big-endian uint32.
if (buffer.length >= 24 && buffer[0] === 0x89 && buffer[1] === 0x50) {
return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) };
}
// JPEG: scan for a Start-Of-Frame marker (0xFFC0..0xFFCF, excluding C4/C8/CC).
if (buffer.length >= 4 && buffer[0] === 0xff && buffer[1] === 0xd8) {
let offset = 2;
while (offset + 9 < buffer.length) {
if (buffer[offset] !== 0xff) {
offset += 1;
continue;
}
const marker = buffer[offset + 1];
if (marker >= 0xc0 && marker <= 0xcf && marker !== 0xc4 && marker !== 0xc8 && marker !== 0xcc) {
return { height: buffer.readUInt16BE(offset + 5), width: buffer.readUInt16BE(offset + 7) };
}
offset += 2 + buffer.readUInt16BE(offset + 2);
}
}
return null;
}
export async function captureScreenshot(): Promise<{ dataUrl: string; size: DisplaySize | null }> {
const screenshot = getScreenshot();
if (typeof screenshot !== 'function') {
throw new Error('Computer Use runtime is not available.');
}
const buffer: Buffer = await screenshot({ format: 'png' });
return {
dataUrl: `data:image/png;base64,${buffer.toString('base64')}`,
size: readImageSize(buffer),
};
}
/** Returns the mouse coordinate space size (logical screen pixels). */
export async function getMouseSpaceSize(): Promise<DisplaySize> {
const nut = getNut();
if (!nut) {
throw new Error('Computer Use runtime is not available.');
}
const width = await nut.screen.width();
const height = await nut.screen.height();
return { width, height };
}
/** Maps a point from screenshot/image space to the mouse coordinate space. */
export async function toMouseSpace(target: ExecutorTarget, point: Point): Promise<Point> {
const mouseSize = await getMouseSpaceSize();
const image = target.displaySize || mouseSize;
const scaleX = image.width ? mouseSize.width / image.width : 1;
const scaleY = image.height ? mouseSize.height / image.height : 1;
return {
x: Math.round(point.x * scaleX),
y: Math.round(point.y * scaleY),
};
}
/** Maps a point from the mouse coordinate space back to screenshot/image space. */
export function toImageSpace(target: ExecutorTarget, point: Point, mouseSize: DisplaySize): Point {
const image = target.displaySize || mouseSize;
const scaleX = mouseSize.width ? image.width / mouseSize.width : 1;
const scaleY = mouseSize.height ? image.height / mouseSize.height : 1;
return {
x: Math.round(point.x * scaleX),
y: Math.round(point.y * scaleY),
};
}
function nutButton(nut: any, button: ClickButton) {
if (button === 'right') return nut.Button.RIGHT;
if (button === 'middle') return nut.Button.MIDDLE;
return nut.Button.LEFT;
}
/** Maps a key name (xdotool-style, as Anthropic's computer tool emits) to a nut-js Key. */
function nutKey(nut: any, token: string): any {
const map: Record<string, string> = {
return: 'Enter', enter: 'Enter', esc: 'Escape', escape: 'Escape', tab: 'Tab',
space: 'Space', backspace: 'Backspace', delete: 'Delete', del: 'Delete', insert: 'Insert',
up: 'Up', down: 'Down', left: 'Left', right: 'Right',
home: 'Home', end: 'End', pageup: 'PageUp', page_up: 'PageUp', pagedown: 'PageDown', page_down: 'PageDown',
ctrl: 'LeftControl', control: 'LeftControl', alt: 'LeftAlt', shift: 'LeftShift',
meta: 'LeftSuper', super: 'LeftSuper', cmd: 'LeftSuper', win: 'LeftSuper',
capslock: 'CapsLock',
};
const lower = token.toLowerCase();
if (map[lower]) {
return nut.Key[map[lower]];
}
if (/^f([1-9]|1[0-9]|2[0-4])$/.test(lower)) {
return nut.Key[`F${lower.slice(1)}`];
}
if (token.length === 1) {
const upper = token.toUpperCase();
if (nut.Key[upper] !== undefined) {
return nut.Key[upper];
}
if (nut.Key[`Num${token}`] !== undefined && /[0-9]/.test(token)) {
return nut.Key[`Num${token}`];
}
}
throw new Error(`Unsupported key: ${token}`);
}
/**
* The cross-platform OS executor. It is intentionally free of any server,
* database, or session dependencies so it can run both inside the local server
* process (OSS mode) and inside the standalone desktop agent (cloud relay).
*/
export const executor = {
async configure() {
const nut = getNut();
if (nut) {
// Make actions responsive; the agent loop already paces itself with screenshots.
nut.mouse.config.autoDelayMs = 2;
nut.keyboard.config.autoDelayMs = 2;
}
return nut;
},
async cursorPosition(target: ExecutorTarget): Promise<Point> {
const nut = await this.configure();
const mouseSize = await getMouseSpaceSize();
const pos = await nut.mouse.getPosition();
return toImageSpace(target, { x: pos.x, y: pos.y }, mouseSize);
},
async moveTo(target: ExecutorTarget, point: Point): Promise<void> {
const nut = await this.configure();
const dest = await toMouseSpace(target, point);
await nut.mouse.setPosition(new nut.Point(dest.x, dest.y));
},
async click(target: ExecutorTarget, button: ClickButton, point?: Point, doubleClick = false): Promise<void> {
const nut = await this.configure();
if (point) {
await this.moveTo(target, point);
}
if (doubleClick) {
await nut.mouse.doubleClick(nutButton(nut, button));
} else {
await nut.mouse.click(nutButton(nut, button));
}
},
async drag(target: ExecutorTarget, from: Point, to: Point, button: ClickButton = 'left'): Promise<void> {
const nut = await this.configure();
const start = await toMouseSpace(target, from);
const end = await toMouseSpace(target, to);
await nut.mouse.setPosition(new nut.Point(start.x, start.y));
await nut.mouse.pressButton(nutButton(nut, button));
await nut.mouse.setPosition(new nut.Point(end.x, end.y));
await nut.mouse.releaseButton(nutButton(nut, button));
},
async type(text: string): Promise<void> {
const nut = await this.configure();
await nut.keyboard.type(text);
},
async pressChord(chord: string): Promise<void> {
const nut = await this.configure();
const tokens = chord.split('+').map((token) => token.trim()).filter(Boolean);
if (tokens.length === 0) {
return;
}
const keys = tokens.map((token) => nutKey(nut, token));
for (const key of keys) {
await nut.keyboard.pressKey(key);
}
for (const key of [...keys].reverse()) {
await nut.keyboard.releaseKey(key);
}
},
async scroll(target: ExecutorTarget, direction: ScrollDirection, amount: number, point?: Point): Promise<void> {
const nut = await this.configure();
if (point) {
await this.moveTo(target, point);
}
const steps = Math.max(1, Math.round(amount));
if (direction === 'up') await nut.mouse.scrollUp(steps);
else if (direction === 'down') await nut.mouse.scrollDown(steps);
else if (direction === 'left') await nut.mouse.scrollLeft(steps);
else await nut.mouse.scrollRight(steps);
},
};

View File

@@ -0,0 +1,118 @@
import express from 'express';
import { computerUseService } from '@/modules/computer-use/computer-use.service.js';
const router = express.Router();
function readBearerToken(header: unknown): string | null {
if (typeof header !== 'string') {
return null;
}
const match = /^Bearer\s+(.+)$/i.exec(header.trim());
return match?.[1] || null;
}
function toButton(value: unknown): 'left' | 'right' | 'middle' {
return value === 'right' || value === 'middle' ? value : 'left';
}
function toScrollDirection(value: unknown): 'up' | 'down' | 'left' | 'right' {
return value === 'down' || value === 'left' || value === 'right' ? value : 'up';
}
function point(input: Record<string, unknown>): { x: number; y: number } | undefined {
return typeof input.x === 'number' && typeof input.y === 'number'
? { x: input.x, y: input.y }
: undefined;
}
router.use((req, res, next) => {
const expected = computerUseService.getMcpToken();
const token = readBearerToken(req.headers.authorization) || String(req.headers['x-computer-use-mcp-token'] || '');
if (!token || token !== expected) {
res.status(401).json({ success: false, error: 'Invalid Computer Use MCP token.' });
return;
}
next();
});
router.post('/tools/:toolName', async (req, res) => {
try {
const input = (req.body && typeof req.body === 'object' ? req.body : {}) as Record<string, unknown>;
const sessionId = typeof input.sessionId === 'string' ? input.sessionId : '';
const toolName = req.params.toolName;
let result: unknown;
switch (toolName) {
case 'computer_create_session':
result = await computerUseService.createAgentSession();
break;
case 'computer_list_sessions':
result = await computerUseService.listAgentSessions();
break;
case 'computer_screenshot':
result = await computerUseService.agentScreenshot(sessionId);
break;
case 'computer_cursor_position':
result = await computerUseService.agentCursorPosition(sessionId);
break;
case 'computer_mouse_move':
result = await computerUseService.agentMouseMove(sessionId, point(input) || { x: 0, y: 0 });
break;
case 'computer_left_click':
result = await computerUseService.agentClick(sessionId, 'left', point(input));
break;
case 'computer_right_click':
result = await computerUseService.agentClick(sessionId, 'right', point(input));
break;
case 'computer_middle_click':
result = await computerUseService.agentClick(sessionId, 'middle', point(input));
break;
case 'computer_double_click':
result = await computerUseService.agentClick(sessionId, toButton(input.button), point(input), true);
break;
case 'computer_left_click_drag': {
const from = typeof input.startX === 'number' && typeof input.startY === 'number'
? { x: input.startX, y: input.startY }
: { x: 0, y: 0 };
const to = typeof input.endX === 'number' && typeof input.endY === 'number'
? { x: input.endX, y: input.endY }
: { x: 0, y: 0 };
result = await computerUseService.agentDrag(sessionId, from, to, 'left');
break;
}
case 'computer_type':
result = await computerUseService.agentType(sessionId, String(input.text || ''));
break;
case 'computer_key':
result = await computerUseService.agentKey(sessionId, String(input.key || ''));
break;
case 'computer_scroll':
result = await computerUseService.agentScroll(sessionId, {
direction: toScrollDirection(input.direction),
amount: typeof input.amount === 'number' ? input.amount : undefined,
x: typeof input.x === 'number' ? input.x : undefined,
y: typeof input.y === 'number' ? input.y : undefined,
});
break;
case 'computer_wait':
result = await computerUseService.agentWait(sessionId, typeof input.timeoutMs === 'number' ? input.timeoutMs : undefined);
break;
case 'computer_close_session':
result = await computerUseService.agentStopSession(sessionId);
break;
default:
res.status(404).json({ success: false, error: `Unknown Computer Use MCP tool "${toolName}".` });
return;
}
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Computer Use MCP tool failed.',
});
}
});
export default router;

View File

@@ -4,16 +4,212 @@ import { computerUseService } from '@/modules/computer-use/computer-use.service.
const router = express.Router();
router.get('/status', (_req, res) => {
res.json({ success: true, data: computerUseService.getStatus() });
type AuthenticatedRequest = express.Request & {
user?: {
id?: string | number;
};
};
function requireUser(req: AuthenticatedRequest): { id: string | number } {
const userId = req.user?.id;
if (userId === undefined || userId === null) {
throw new Error('Authenticated user is required.');
}
return { id: userId };
}
function readParam(value: string | string[] | undefined): string {
return Array.isArray(value) ? value[0] || '' : value || '';
}
function toButton(value: unknown): 'left' | 'right' | 'middle' {
return value === 'right' || value === 'middle' ? value : 'left';
}
router.get('/status', async (_req, res) => {
try {
res.json({ success: true, data: await computerUseService.getStatus() });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Computer Use status.',
});
}
});
router.post('/sessions', (_req, res) => {
res.status(409).json({
success: false,
error: 'Computer Use is not enabled until a local CloudCLI Desktop Agent is connected and approved by the user.',
data: computerUseService.getStatus(),
});
router.get('/settings', async (_req, res) => {
try {
res.json({ success: true, data: { settings: await computerUseService.getSettings() } });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Computer Use settings.',
});
}
});
router.put('/settings', async (req, res) => {
try {
const settings = await computerUseService.updateSettings(req.body || {});
res.json({ success: true, data: { settings } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to save Computer Use settings.',
});
}
});
router.post('/agent-tools/register', async (_req, res) => {
try {
const result = await computerUseService.registerAgentMcp();
res.status(201).json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to register Computer Use MCP.',
});
}
});
router.post('/runtime/install', async (_req, res) => {
try {
const result = await computerUseService.installRuntime();
res.status(result.success ? 200 : 500).json({
success: result.success,
data: result,
error: result.success ? undefined : result.message,
});
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to install Computer Use runtime.',
});
}
});
router.get('/sessions', async (req: AuthenticatedRequest, res) => {
try {
res.json({ success: true, data: { sessions: await computerUseService.listSessions(requireUser(req)) } });
} catch (error) {
res.status(401).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to list Computer Use sessions.',
});
}
});
router.post('/sessions', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.createSession(requireUser(req));
res.status(session.status === 'unavailable' ? 202 : 201).json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to create Computer Use session.',
});
}
});
router.post('/sessions/:sessionId/screenshot', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userScreenshot(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to capture the screen.',
});
}
});
router.post('/sessions/:sessionId/click', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userClick(requireUser(req), readParam(req.params.sessionId), {
x: Number(req.body?.x),
y: Number(req.body?.y),
button: toButton(req.body?.button),
double: req.body?.double === true,
});
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to click.',
});
}
});
router.post('/sessions/:sessionId/type', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userType(requireUser(req), readParam(req.params.sessionId), String(req.body?.text || ''));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to type text.',
});
}
});
router.post('/sessions/:sessionId/press-key', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userPressKey(requireUser(req), readParam(req.params.sessionId), String(req.body?.key || ''));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to send key input.',
});
}
});
router.post('/sessions/:sessionId/consent/grant', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to grant control.',
});
}
});
router.post('/sessions/:sessionId/consent/revoke', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.revokeAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to revoke control.',
});
}
});
router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) => {
try {
const result = await computerUseService.stopSession(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to stop Computer Use session.',
});
}
});
router.delete('/sessions/:sessionId', async (req: AuthenticatedRequest, res) => {
try {
const result = await computerUseService.deleteSession(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to delete Computer Use session.',
});
}
});
export default router;

View File

@@ -1,22 +1,883 @@
import { createRequire } from 'node:module';
import { randomBytes, randomUUID } from 'node:crypto';
import { spawn } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { appConfigDb } from '@/modules/database/repositories/app-config.js';
import { providerMcpService } from '@/modules/providers/services/mcp.service.js';
import { getModuleDir } from '@/utils/runtime-paths.js';
import {
executor,
captureScreenshot as captureScreenshotRuntime,
getRuntimeReadiness as getExecutorReadiness,
type Point,
type ClickButton,
type ScrollDirection,
} from '@/modules/computer-use/computer-executor.js';
import { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';
const __dirname = getModuleDir(import.meta.url);
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_MAX_SESSIONS_PER_OWNER || '1', 10);
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
const COMPUTER_USE_SETTINGS_KEY = 'computer_use_settings';
const COMPUTER_USE_MCP_TOKEN_KEY = 'computer_use_mcp_token';
type ComputerUseRuntime = 'cloud' | 'local';
type ComputerUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
type ComputerUseSession = {
id: string;
ownerId: string;
createdBy: 'user' | 'agent';
runtime: ComputerUseRuntime;
status: ComputerUseSessionStatus;
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
/** Per-session consent: agents may act only while this is true. */
agentAccessEnabled: boolean;
/** Size of the captured screenshot in pixels — the coordinate space agents/users use. */
displaySize: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent' | 'user';
} | null;
};
type PublicComputerUseSession = Omit<ComputerUseSession, 'ownerId'>;
type ComputerUseOwner = {
id: string | number;
};
type ComputerUseSettings = {
enabled: boolean;
agentToolsEnabled: boolean;
};
type RuntimeReadiness = {
nut: any | null;
screenshot: any | null;
nutInstalled: boolean;
screenshotInstalled: boolean;
installInProgress: boolean;
installMessage: string | null;
};
const sessions = new Map<string, ComputerUseSession>();
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
let lastInstallMessage: string | null = null;
const DEFAULT_SETTINGS: ComputerUseSettings = {
enabled: false,
agentToolsEnabled: false,
};
const AGENT_OWNER_ID = 'agent';
const MCP_SERVER_NAME = 'cloudcli-computer-use';
const MCP_PROVIDERS = ['claude', 'codex', 'cursor', 'gemini', 'opencode'];
function getRuntime(): ComputerUseRuntime {
return IS_PLATFORM ? 'cloud' : 'local';
}
function readSettings(): ComputerUseSettings {
try {
const raw = appConfigDb.get(COMPUTER_USE_SETTINGS_KEY);
if (!raw) {
return DEFAULT_SETTINGS;
}
const parsed = JSON.parse(raw) as Partial<ComputerUseSettings>;
return {
enabled: parsed.enabled === true,
agentToolsEnabled: parsed.agentToolsEnabled === true,
};
} catch (error: any) {
console.warn('[Computer Use] Failed to read settings:', error?.message || error);
return DEFAULT_SETTINGS;
}
}
function writeSettings(settings: ComputerUseSettings): ComputerUseSettings {
const normalized = {
enabled: settings.enabled === true,
agentToolsEnabled: settings.agentToolsEnabled === true,
};
appConfigDb.set(COMPUTER_USE_SETTINGS_KEY, JSON.stringify(normalized));
return normalized;
}
function getOrCreateMcpToken(): string {
const existing = appConfigDb.get(COMPUTER_USE_MCP_TOKEN_KEY);
if (existing) {
return existing;
}
const token = randomBytes(32).toString('hex');
appConfigDb.set(COMPUTER_USE_MCP_TOKEN_KEY, token);
return token;
}
function getSetupMessage(settings: ComputerUseSettings, readiness: RuntimeReadiness): string {
if (getRuntime() === 'cloud') {
return 'Cloud Computer Use requires a linked CloudCLI Desktop Agent on the user machine.';
}
if (!settings.enabled) {
return 'Computer Use is disabled in settings.';
}
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
return 'Install the desktop control runtime to capture the screen and drive the mouse and keyboard.';
}
return readiness.installMessage || 'Computer Use runtime is not ready.';
}
function getMcpCommand(): { command: string; args: string[] } {
const serverDir = path.resolve(__dirname, '..', '..');
const mcpScriptPath = path.join(serverDir, 'computer-use-mcp.js');
if (fs.existsSync(mcpScriptPath)) {
return {
command: process.execPath,
args: [mcpScriptPath],
};
}
return {
command: 'cloudcli',
args: ['computer-use-mcp'],
};
}
function getMcpApiUrl(): string {
const port = process.env.SERVER_PORT || process.env.PORT || '3001';
return `http://127.0.0.1:${port}/api/computer-use-mcp`;
}
function getRuntimeReadiness(): RuntimeReadiness {
const base = getExecutorReadiness();
return {
...base,
installInProgress: Boolean(installPromise),
installMessage: lastInstallMessage,
};
}
function runCommand(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: process.cwd(),
env: process.env,
shell: false,
stdio: ['ignore', 'pipe', 'pipe'],
});
const output: string[] = [];
child.stdout.on('data', (chunk) => output.push(String(chunk)));
child.stderr.on('data', (chunk) => output.push(String(chunk)));
child.on('error', reject);
child.on('close', (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(output.join('').trim() || `${command} ${args.join(' ')} exited with code ${code}`));
});
});
}
function formatInstallError(error: unknown): string {
const message = error instanceof Error ? error.message : String(error);
if (process.platform === 'linux' && /libxtst|x11|xtst|libpng|imagemagick|scrot/i.test(message)) {
return [
'Installing the desktop control runtime needs system packages.',
'On Debian/Ubuntu run: sudo apt-get install -y libxtst-dev libpng-dev imagemagick',
'then try again.',
].join(' ');
}
return message || 'Failed to install the Computer Use runtime.';
}
function isPackagedElectronNodeRuntime(): boolean {
return process.env.ELECTRON_RUN_AS_NODE === '1' && Boolean(process.versions.electron);
}
async function installRuntime(): Promise<{ success: boolean; message: string }> {
if (installPromise) {
return installPromise;
}
const readiness = getExecutorReadiness();
if (readiness.nutInstalled && readiness.screenshotInstalled) {
lastInstallMessage = 'Computer Use runtime is available.';
return { success: true, message: lastInstallMessage };
}
if (isPackagedElectronNodeRuntime()) {
lastInstallMessage = 'Computer Use runtime was not bundled with this desktop build.';
return { success: false, message: lastInstallMessage };
}
const npmCommand = process.platform === 'win32' ? 'npm.cmd' : 'npm';
installPromise = (async () => {
try {
lastInstallMessage = 'Installing desktop control runtime…';
await runCommand(npmCommand, [
'install',
'--no-save',
'--no-package-lock',
'@nut-tree-fork/nut-js',
'screenshot-desktop',
]);
lastInstallMessage = 'Computer Use runtime installed.';
return { success: true, message: lastInstallMessage };
} catch (error) {
lastInstallMessage = formatInstallError(error);
return { success: false, message: lastInstallMessage };
}
})();
try {
return await installPromise;
} finally {
installPromise = null;
}
}
function getOwnerId(owner: ComputerUseOwner): string {
if (owner.id === undefined || owner.id === null || String(owner.id).trim() === '') {
throw new Error('Authenticated user is required.');
}
return String(owner.id);
}
function publicSession(session: ComputerUseSession): PublicComputerUseSession {
const { ownerId: _ownerId, ...publicFields } = session;
return publicFields;
}
function ownerSessions(ownerId: string): ComputerUseSession[] {
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
}
function canAccessSession(ownerId: string, session: ComputerUseSession): boolean {
return session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID;
}
async function expireStaleSessions(now = Date.now()): Promise<void> {
for (const session of sessions.values()) {
if (session.status !== 'ready') {
continue;
}
const updatedAt = Date.parse(session.updatedAt);
if (!Number.isFinite(updatedAt) || now - updatedAt <= SESSION_TTL_MS) {
continue;
}
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date(now).toISOString();
session.lastAction = 'expire';
session.message = 'Computer Use session expired after inactivity.';
}
}
// --- Action layer: local executor (OSS) or cloud relay to the desktop agent --
//
// Every desktop interaction goes through `performAction` / `getCursorPosition`.
// In local mode it drives the in-process nut-js executor (computer-executor.ts);
// in cloud mode it forwards the action to the linked desktop agent over
// `desktopAgentRelay` and applies the returned screenshot. The local server
// itself never touches the OS in cloud mode.
/** One desktop interaction expressed in screenshot-pixel coordinate space. */
export type ComputerAction =
| { type: 'screenshot' }
| { type: 'mouse_move'; point: Point }
| { type: 'click'; button: ClickButton; point?: Point; double?: boolean }
| { type: 'drag'; from: Point; to: Point; button?: ClickButton }
| { type: 'type'; text: string }
| { type: 'key'; key: string }
| { type: 'scroll'; direction: ScrollDirection; amount?: number; point?: Point }
| { type: 'wait'; ms?: number };
/** Shape the desktop agent returns for any relayed action. */
type RelayResult = {
screenshotDataUrl?: string | null;
displaySize?: { width: number; height: number } | null;
cursor?: { x: number; y: number } | null;
position?: Point | null;
};
function applyRelayResult(session: ComputerUseSession, result: RelayResult): void {
if (typeof result.screenshotDataUrl === 'string') {
session.screenshotDataUrl = result.screenshotDataUrl;
}
if (result.displaySize) {
session.displaySize = result.displaySize;
}
if (result.cursor) {
session.cursor = { x: result.cursor.x, y: result.cursor.y, actor: session.cursor?.actor ?? 'agent' };
}
session.updatedAt = new Date().toISOString();
}
async function refreshScreenshot(session: ComputerUseSession): Promise<void> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay('screenshot', { sessionId: session.id })) as RelayResult;
applyRelayResult(session, result);
return;
}
const { dataUrl, size } = await captureScreenshotRuntime();
session.screenshotDataUrl = dataUrl;
if (size) {
session.displaySize = size;
}
session.updatedAt = new Date().toISOString();
}
/** Runs one action and refreshes the session screenshot afterwards. */
async function performAction(session: ComputerUseSession, action: ComputerAction): Promise<void> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay(action.type, {
...action,
sessionId: session.id,
displaySize: session.displaySize,
})) as RelayResult;
applyRelayResult(session, result);
return;
}
switch (action.type) {
case 'screenshot':
break;
case 'mouse_move':
await executor.moveTo(session, action.point);
break;
case 'click':
await executor.click(session, action.button, action.point, action.double === true);
break;
case 'drag':
await executor.drag(session, action.from, action.to, action.button ?? 'left');
break;
case 'type':
await executor.type(action.text);
break;
case 'key':
await executor.pressChord(action.key);
break;
case 'scroll':
await executor.scroll(session, action.direction, action.amount ?? 3, action.point);
break;
case 'wait':
await new Promise((resolve) => setTimeout(resolve, Math.max(0, Math.min(action.ms ?? 1000, 10_000))));
break;
}
await refreshScreenshot(session);
}
/** Reads the current cursor position in screenshot-pixel space. */
async function getCursorPosition(session: ComputerUseSession): Promise<Point> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay('cursor_position', {
sessionId: session.id,
displaySize: session.displaySize,
})) as RelayResult;
applyRelayResult(session, result);
if (result.position) {
return result.position;
}
return session.cursor ? { x: session.cursor.x, y: session.cursor.y } : { x: 0, y: 0 };
}
return executor.cursorPosition(session);
}
function assertReady(session: ComputerUseSession): void {
if (session.status !== 'ready') {
throw new Error(session.message || 'Computer Use session is not available.');
}
}
/**
* Whether agent tools may operate right now. Cloud mode depends purely on a
* connected desktop agent; local mode depends on the two opt-in settings.
*/
function agentToolsAvailable(): boolean {
if (getRuntime() === 'cloud') {
return desktopAgentRelay.isConnected();
}
const settings = readSettings();
return settings.enabled && settings.agentToolsEnabled;
}
function assertAgentToolsAvailable(): void {
if (agentToolsAvailable()) {
return;
}
throw new Error(
getRuntime() === 'cloud'
? 'No desktop agent is connected. Open the CloudCLI desktop app with Computer Use enabled.'
: 'Computer Use agent tools are disabled.'
);
}
export const computerUseService = {
getStatus() {
async getSettings() {
return readSettings();
},
async updateSettings(settings: Partial<ComputerUseSettings>) {
const current = readSettings();
const nextSettings = {
...current,
enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled,
agentToolsEnabled: typeof settings.agentToolsEnabled === 'boolean'
? settings.agentToolsEnabled
: current.agentToolsEnabled,
};
if (!nextSettings.enabled) {
nextSettings.agentToolsEnabled = false;
}
const next = writeSettings(nextSettings);
if (next.agentToolsEnabled) {
await this.registerAgentMcp();
} else if (current.agentToolsEnabled) {
await this.unregisterAgentMcp();
}
return next;
},
async getStatus() {
const settings = readSettings();
const readiness = getRuntimeReadiness();
const isCloud = getRuntime() === 'cloud';
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
// Cloud availability is purely a function of a connected desktop agent; the
// hosted server has no screen of its own. Local availability needs the
// in-process nut-js runtime installed and the feature enabled.
const desktopAgentConnected = desktopAgentRelay.isConnected();
const available = isCloud
? desktopAgentConnected
: settings.enabled && runtimeReady;
return {
available: false,
bridgeConnected: false,
runtime: IS_PLATFORM ? 'cloud' : 'local',
requiresDesktopBridge: true,
message: IS_PLATFORM
? 'Cloud Computer Use requires a linked CloudCLI Desktop Agent on the user machine.'
: 'Local Computer Use requires a desktop bridge with screen recording and accessibility permissions.',
capabilities: {
screenshots: false,
mouse: false,
keyboard: false,
clipboard: false,
stopControl: false,
},
enabled: isCloud ? true : settings.enabled,
runtime: getRuntime(),
available,
requiresDesktopBridge: isCloud,
desktopAgentConnected,
nutInstalled: readiness.nutInstalled,
screenshotInstalled: readiness.screenshotInstalled,
installInProgress: readiness.installInProgress,
sessionCount: sessions.size,
agentToolsEnabled: isCloud ? desktopAgentConnected : settings.agentToolsEnabled,
mcpRecommended: !settings.agentToolsEnabled,
message: available ? 'Computer Use runtime is available.' : getSetupMessage(settings, readiness),
};
},
async registerAgentMcp() {
const { command, args } = getMcpCommand();
const results = await providerMcpService.addMcpServerToAllProviders({
name: MCP_SERVER_NAME,
scope: 'user',
transport: 'stdio',
command,
args,
env: {
CLOUDCLI_COMPUTER_USE_MCP_TOKEN: getOrCreateMcpToken(),
CLOUDCLI_COMPUTER_USE_API_URL: getMcpApiUrl(),
},
});
return { name: MCP_SERVER_NAME, command, args, results };
},
getMcpToken() {
return getOrCreateMcpToken();
},
async unregisterAgentMcp() {
const results = await Promise.all(MCP_PROVIDERS.map(async (provider) => {
try {
const result = await providerMcpService.removeProviderMcpServer(provider, {
name: MCP_SERVER_NAME,
scope: 'user',
});
return { provider, removed: result.removed };
} catch (error) {
return {
provider,
removed: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}));
return { name: MCP_SERVER_NAME, results };
},
async installRuntime() {
const result = await installRuntime();
return {
...result,
status: await this.getStatus(),
};
},
async listSessions(owner: ComputerUseOwner) {
const ownerId = getOwnerId(owner);
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => canAccessSession(ownerId, session))
.map(publicSession);
},
async createSession(owner: ComputerUseOwner, options?: { createdBy?: 'user' | 'agent' }) {
const ownerId = getOwnerId(owner);
await expireStaleSessions();
const createdBy = options?.createdBy ?? 'user';
const now = new Date().toISOString();
const session: ComputerUseSession = {
id: randomUUID(),
ownerId,
createdBy,
runtime: getRuntime(),
status: 'unavailable',
screenshotDataUrl: null,
createdAt: now,
updatedAt: now,
lastAction: 'create',
// Consent is always OFF at creation — the user must explicitly grant control,
// even for agent-initiated sessions controlling the full desktop.
agentAccessEnabled: false,
displaySize: null,
message: null,
cursor: null,
};
const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready');
if (activeOwnerSessions.length >= MAX_SESSIONS_PER_OWNER) {
throw new Error(`Computer Use is limited to ${MAX_SESSIONS_PER_OWNER} active session(s).`);
}
const settings = readSettings();
const readiness = getRuntimeReadiness();
const isCloud = getRuntime() === 'cloud';
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
const ready = isCloud
? desktopAgentRelay.isConnected()
: settings.enabled && runtimeReady;
if (!ready) {
session.message = getSetupMessage(settings, readiness);
sessions.set(session.id, session);
return publicSession(session);
}
// In cloud mode the linked desktop agent is the consent authority and prompts
// the user per its own consent mode, so the relay is allowed to act. In local
// mode the user must still grant control from the panel.
if (isCloud) {
session.agentAccessEnabled = true;
}
session.status = 'ready';
session.message = isCloud
? 'Computer Use session is ready on the linked desktop.'
: 'Computer Use session is ready. Grant control to let agents act.';
sessions.set(session.id, session);
try {
await refreshScreenshot(session);
} catch (error) {
session.status = 'unavailable';
session.message = error instanceof Error ? error.message : 'Failed to capture the screen.';
}
return publicSession(session);
},
async grantAgentAccess(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
session.agentAccessEnabled = true;
session.updatedAt = new Date().toISOString();
session.lastAction = 'consent:grant';
return publicSession(session);
},
async revokeAgentAccess(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'consent:revoke';
return publicSession(session);
},
async stopSession(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
return { stopped: false };
}
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'stop';
session.message = 'Computer Use session stopped. Agent control is revoked.';
if (getRuntime() === 'cloud' && desktopAgentRelay.isConnected()) {
// Best-effort: tell the desktop agent to forget this session's consent.
void desktopAgentRelay.relay('stop_session', { sessionId }).catch(() => undefined);
}
return { stopped: true, session: publicSession(session) };
},
async deleteSession(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
return { deleted: false };
}
sessions.delete(sessionId);
return { deleted: true, sessionId };
},
// --- User-initiated actions (from the panel) -------------------------------
async userScreenshot(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await refreshScreenshot(session);
session.lastAction = 'screenshot';
return publicSession(session);
},
async userClick(owner: ComputerUseOwner, sessionId: string, input: { x: number; y: number; button?: ClickButton; double?: boolean }) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, {
type: 'click',
button: input.button || 'left',
point: { x: input.x, y: input.y },
double: input.double === true,
});
session.cursor = { x: input.x, y: input.y, actor: 'user' };
session.lastAction = input.double ? 'double_click' : 'click';
return publicSession(session);
},
async userType(owner: ComputerUseOwner, sessionId: string, text: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, { type: 'type', text });
session.lastAction = 'type';
return publicSession(session);
},
async userPressKey(owner: ComputerUseOwner, sessionId: string, key: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, { type: 'key', key });
session.lastAction = `key:${key}`;
return publicSession(session);
},
// --- Agent-initiated actions (via MCP) ------------------------------------
async createAgentSession() {
assertAgentToolsAvailable();
return this.createSession({ id: AGENT_OWNER_ID }, { createdBy: 'agent' });
},
async listAgentSessions() {
if (!agentToolsAvailable()) {
return [];
}
await expireStaleSessions();
return [...sessions.values()].map(publicSession);
},
/**
* Resolves a session the agent is allowed to act on. In local mode this
* enforces the in-process per-session consent flag. In cloud mode the linked
* desktop agent is the consent authority (it prompts the user per its own
* consent mode), so this only requires the relay to be connected.
*/
async getConsentedSession(sessionId: string): Promise<ComputerUseSession> {
assertAgentToolsAvailable();
const session = sessions.get(sessionId);
if (!session) {
throw new Error('Computer Use session not found.');
}
if (getRuntime() !== 'cloud' && !session.agentAccessEnabled) {
throw new Error('Computer Use session is awaiting user consent. Ask the user to grant control in the Computer panel.');
}
assertReady(session);
return session;
},
async agentScreenshot(sessionId: string) {
const session = await this.getConsentedSession(sessionId);
await refreshScreenshot(session);
session.lastAction = 'screenshot';
return publicSession(session);
},
async agentCursorPosition(sessionId: string) {
const session = await this.getConsentedSession(sessionId);
const point = await getCursorPosition(session);
session.cursor = { ...point, actor: 'agent' };
session.lastAction = 'cursor_position';
return { session: publicSession(session), position: point };
},
async agentMouseMove(sessionId: string, point: Point) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'mouse_move', point });
session.cursor = { ...point, actor: 'agent' };
session.lastAction = 'mouse_move';
return publicSession(session);
},
async agentClick(sessionId: string, button: ClickButton, point?: Point, doubleClick = false) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'click', button, point, double: doubleClick });
if (point) {
session.cursor = { ...point, actor: 'agent' };
}
session.lastAction = doubleClick ? 'double_click' : `${button}_click`;
return publicSession(session);
},
async agentDrag(sessionId: string, from: Point, to: Point, button: ClickButton = 'left') {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'drag', from, to, button });
session.cursor = { ...to, actor: 'agent' };
session.lastAction = 'left_click_drag';
return publicSession(session);
},
async agentType(sessionId: string, text: string) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'type', text });
session.lastAction = 'type';
return publicSession(session);
},
async agentKey(sessionId: string, key: string) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'key', key });
session.lastAction = `key:${key}`;
return publicSession(session);
},
async agentScroll(sessionId: string, input: { direction: ScrollDirection; amount?: number; x?: number; y?: number }) {
const session = await this.getConsentedSession(sessionId);
const point = typeof input.x === 'number' && typeof input.y === 'number' ? { x: input.x, y: input.y } : undefined;
await performAction(session, { type: 'scroll', direction: input.direction, amount: input.amount, point });
if (point) {
session.cursor = { ...point, actor: 'agent' };
}
session.lastAction = `scroll:${input.direction}`;
return publicSession(session);
},
async agentWait(sessionId: string, timeoutMs?: number) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'wait', ms: timeoutMs });
session.lastAction = 'wait';
return publicSession(session);
},
async agentStopSession(sessionId: string) {
assertAgentToolsAvailable();
return this.stopSession({ id: AGENT_OWNER_ID }, sessionId);
},
/**
* Cloud only: when a desktop agent links to this hosted environment, expose
* the computer_* MCP tools to every provider so the running agent can use
* them. Mirrors `registerAgentMcp` but is driven by relay connectivity rather
* than a settings toggle.
*/
async onDesktopAgentConnected() {
if (getRuntime() !== 'cloud') {
return;
}
try {
await this.registerAgentMcp();
} catch (error) {
console.warn('[Computer Use] Failed to register MCP for linked desktop agent:', error instanceof Error ? error.message : error);
}
},
/** Cloud only: tear down sessions when the last desktop agent disconnects. */
async onDesktopAgentDisconnected() {
if (getRuntime() !== 'cloud' || desktopAgentRelay.isConnected()) {
return;
}
for (const session of sessions.values()) {
if (session.status === 'ready') {
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'agent-disconnected';
session.message = 'The linked desktop agent disconnected.';
}
}
},
async stopAllSessions() {
for (const session of sessions.values()) {
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'shutdown';
session.message = 'Computer Use session stopped during server shutdown.';
}
},
};
// Drive cloud MCP exposure + session teardown off desktop-agent connectivity.
desktopAgentRelay.setHooks({
onFirstConnect: () => computerUseService.onDesktopAgentConnected(),
onLastDisconnect: () => computerUseService.onDesktopAgentDisconnected(),
});
process.once('beforeExit', () => {
void computerUseService.stopAllSessions();
});

View File

@@ -0,0 +1,129 @@
import { randomUUID } from 'node:crypto';
import type { WebSocket } from 'ws';
const RELAY_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_RELAY_TIMEOUT_MS || '60000', 10);
const WS_OPEN = 1;
type PendingRelay = {
resolve: (value: unknown) => void;
reject: (reason: Error) => void;
timer: ReturnType<typeof setTimeout>;
};
type ConnectedAgent = {
ws: WebSocket;
label: string;
registeredAt: string;
};
type RelayLifecycleHooks = {
onFirstConnect?: () => void | Promise<void>;
onLastDisconnect?: () => void | Promise<void>;
};
const agents = new Map<WebSocket, ConnectedAgent>();
const pending = new Map<string, PendingRelay>();
let hooks: RelayLifecycleHooks = {};
function rejectAllPending(reason: string): void {
for (const [callId, call] of pending.entries()) {
clearTimeout(call.timer);
call.reject(new Error(reason));
pending.delete(callId);
}
}
function pickAgent(): ConnectedAgent | undefined {
for (const agent of agents.values()) {
if (agent.ws.readyState === WS_OPEN) {
return agent;
}
}
return undefined;
}
/**
* Cloud-side registry of linked desktop agents and the request/response relay
* used to drive the user's real desktop. The hosted server never touches the OS
* itself — it only forwards `computer_*` actions to a connected desktop agent
* and awaits the screenshot it returns.
*/
export const desktopAgentRelay = {
setHooks(next: RelayLifecycleHooks): void {
hooks = next;
},
register(ws: WebSocket, label = 'desktop-agent'): void {
const wasEmpty = pickAgent() === undefined;
agents.set(ws, { ws, label, registeredAt: new Date().toISOString() });
console.log(`[DesktopAgent] Registered (${label}); ${agents.size} connected.`);
ws.on('close', () => {
agents.delete(ws);
console.log(`[DesktopAgent] Disconnected (${label}); ${agents.size} remain.`);
if (pickAgent() === undefined) {
rejectAllPending('Desktop agent disconnected.');
void hooks.onLastDisconnect?.();
}
});
if (wasEmpty) {
void hooks.onFirstConnect?.();
}
},
/** Resolves a pending relay call with the desktop agent's reply. */
handleResult(id: string, result: unknown, error?: string): void {
const call = pending.get(id);
if (!call) {
return;
}
clearTimeout(call.timer);
pending.delete(id);
if (error) {
call.reject(new Error(error));
} else {
call.resolve(result);
}
},
isConnected(): boolean {
return pickAgent() !== undefined;
},
connectedCount(): number {
let count = 0;
for (const agent of agents.values()) {
if (agent.ws.readyState === WS_OPEN) {
count++;
}
}
return count;
},
async relay(type: string, params: Record<string, unknown>): Promise<unknown> {
const agent = pickAgent();
if (!agent) {
throw new Error(
'No desktop agent connected. Open the CloudCLI desktop app with Computer Use enabled to control this machine.'
);
}
const id = randomUUID();
return new Promise<unknown>((resolve, reject) => {
const timer = setTimeout(() => {
pending.delete(id);
reject(new Error('Desktop agent did not respond in time.'));
}, RELAY_TIMEOUT_MS);
pending.set(id, { resolve, reject, timer });
try {
agent.ws.send(JSON.stringify({ kind: 'computer_relay', id, type, params }));
} catch (error) {
clearTimeout(timer);
pending.delete(id);
reject(error instanceof Error ? error : new Error('Failed to send to desktop agent.'));
}
});
},
};

View File

@@ -0,0 +1,2 @@
export { computerUseService } from '@/modules/computer-use/computer-use.service.js';
export { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';