mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-18 14:32:06 +08:00
261 lines
8.7 KiB
JavaScript
261 lines
8.7 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* CloudCLI Computer Use — Desktop Agent.
|
|
*
|
|
* Standalone executor for the cloud relay. The Electron desktop app spawns this
|
|
* process (via ELECTRON_RUN_AS_NODE) whenever Computer Use is enabled and the
|
|
* user has running cloud environments. It opens an outbound websocket to each
|
|
* environment's `/desktop-agent` endpoint and executes the `computer_*` actions
|
|
* the hosted server relays, returning a fresh screenshot each time.
|
|
*
|
|
* It is fully self-contained: it reuses the shared nut-js executor module and
|
|
* does NOT depend on the local CloudCLI server. Consent is enforced here (the
|
|
* controlled machine is the authority): in `ask` mode the agent asks the parent
|
|
* Electron process for a per-session decision before the first action runs.
|
|
*/
|
|
import readline from 'node:readline';
|
|
|
|
import { WebSocket } from 'ws';
|
|
|
|
import {
|
|
executor,
|
|
captureScreenshot,
|
|
getRuntimeReadiness,
|
|
type ExecutorTarget,
|
|
type Point,
|
|
type ClickButton,
|
|
type ScrollDirection,
|
|
} from './modules/computer-use/computer-executor.js';
|
|
|
|
type ConsentMode = 'ask' | 'auto';
|
|
|
|
type RelayMessage = {
|
|
kind?: string;
|
|
type?: string;
|
|
id?: string;
|
|
params?: Record<string, unknown>;
|
|
};
|
|
|
|
const IPC_PREFIX = '@@CUAGENT@@';
|
|
const RECONNECT_BASE_MS = 2000;
|
|
const RECONNECT_MAX_MS = 30_000;
|
|
|
|
const consentMode: ConsentMode = process.env.CLOUDCLI_COMPUTER_USE_CONSENT_MODE === 'auto' ? 'auto' : 'ask';
|
|
const agentLabel = process.env.CLOUDCLI_DESKTOP_AGENT_LABEL || 'cloudcli-desktop';
|
|
|
|
function parseTargets(): string[] {
|
|
const raw =
|
|
process.env.CLOUDCLI_DESKTOP_AGENT_URLS ||
|
|
process.env.CLOUDCLI_DESKTOP_AGENT_URL ||
|
|
'';
|
|
return raw
|
|
.split(',')
|
|
.map((value) => value.trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
// --- Parent (Electron) IPC over stdout/stdin -------------------------------
|
|
|
|
function emitToParent(message: Record<string, unknown>): void {
|
|
process.stdout.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
|
|
}
|
|
|
|
/** Per-session consent decisions, and resolvers awaiting a parent reply. */
|
|
const sessionConsent = new Map<string, 'granted' | 'denied'>();
|
|
const pendingConsent = new Map<string, Array<(allow: boolean) => void>>();
|
|
|
|
const stdinReader = readline.createInterface({ input: process.stdin });
|
|
stdinReader.on('line', (line) => {
|
|
const trimmed = line.trim();
|
|
if (!trimmed.startsWith(IPC_PREFIX)) {
|
|
return;
|
|
}
|
|
try {
|
|
const payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim()) as Record<string, unknown>;
|
|
if (payload.type === 'consent-response' && typeof payload.sessionId === 'string') {
|
|
const allow = payload.allow === true;
|
|
sessionConsent.set(payload.sessionId, allow ? 'granted' : 'denied');
|
|
const waiters = pendingConsent.get(payload.sessionId) || [];
|
|
pendingConsent.delete(payload.sessionId);
|
|
for (const resolve of waiters) {
|
|
resolve(allow);
|
|
}
|
|
} else if (payload.type === 'revoke-session' && typeof payload.sessionId === 'string') {
|
|
sessionConsent.delete(payload.sessionId);
|
|
}
|
|
} catch {
|
|
// ignore malformed control lines
|
|
}
|
|
});
|
|
|
|
async function ensureConsent(sessionId: string): Promise<boolean> {
|
|
if (consentMode === 'auto') {
|
|
return true;
|
|
}
|
|
const existing = sessionConsent.get(sessionId);
|
|
if (existing === 'granted') return true;
|
|
if (existing === 'denied') return false;
|
|
|
|
// Ask the parent (Electron) to prompt the user, and wait for the decision.
|
|
return new Promise<boolean>((resolve) => {
|
|
const waiters = pendingConsent.get(sessionId) || [];
|
|
waiters.push(resolve);
|
|
pendingConsent.set(sessionId, waiters);
|
|
emitToParent({ type: 'consent-request', sessionId });
|
|
});
|
|
}
|
|
|
|
// --- Action execution ------------------------------------------------------
|
|
|
|
function asPoint(value: unknown): Point | undefined {
|
|
if (value && typeof value === 'object') {
|
|
const point = value as Record<string, unknown>;
|
|
if (typeof point.x === 'number' && typeof point.y === 'number') {
|
|
return { x: point.x, y: point.y };
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
async function snapshot(target: ExecutorTarget) {
|
|
const { dataUrl, size } = await captureScreenshot();
|
|
return { screenshotDataUrl: dataUrl, displaySize: size || target.displaySize };
|
|
}
|
|
|
|
async function runAction(type: string, params: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
const readiness = getRuntimeReadiness();
|
|
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
|
|
throw new Error('Computer Use runtime is not installed on the desktop agent.');
|
|
}
|
|
|
|
const target: ExecutorTarget = {
|
|
displaySize: (params.displaySize as ExecutorTarget['displaySize']) ?? null,
|
|
};
|
|
const point = asPoint(params.point);
|
|
|
|
switch (type) {
|
|
case 'screenshot':
|
|
return snapshot(target);
|
|
case 'cursor_position': {
|
|
const position = await executor.cursorPosition(target);
|
|
return { ...(await snapshot(target)), position, cursor: position };
|
|
}
|
|
case 'mouse_move':
|
|
await executor.moveTo(target, point as Point);
|
|
return { ...(await snapshot(target)), cursor: point };
|
|
case 'click':
|
|
await executor.click(target, (params.button as ClickButton) || 'left', point, params.double === true);
|
|
return { ...(await snapshot(target)), cursor: point ?? null };
|
|
case 'drag':
|
|
await executor.drag(target, asPoint(params.from) as Point, asPoint(params.to) as Point, (params.button as ClickButton) || 'left');
|
|
return { ...(await snapshot(target)), cursor: asPoint(params.to) ?? null };
|
|
case 'type':
|
|
await executor.type(String(params.text ?? ''));
|
|
return snapshot(target);
|
|
case 'key':
|
|
await executor.pressChord(String(params.key ?? ''));
|
|
return snapshot(target);
|
|
case 'scroll':
|
|
await executor.scroll(
|
|
target,
|
|
(params.direction as ScrollDirection) || 'down',
|
|
typeof params.amount === 'number' ? params.amount : 3,
|
|
point,
|
|
);
|
|
return { ...(await snapshot(target)), cursor: point ?? null };
|
|
case 'wait':
|
|
await new Promise((resolve) => setTimeout(resolve, Math.max(0, Math.min(Number(params.ms) || 1000, 10_000))));
|
|
return snapshot(target);
|
|
default:
|
|
throw new Error(`Unsupported computer action: ${type}`);
|
|
}
|
|
}
|
|
|
|
// --- Relay connection ------------------------------------------------------
|
|
|
|
function connect(url: string): void {
|
|
let reconnectMs = RECONNECT_BASE_MS;
|
|
let socket: WebSocket | null = null;
|
|
|
|
const open = () => {
|
|
socket = new WebSocket(url, {
|
|
headers: process.env.CLOUDCLI_DESKTOP_AGENT_TOKEN
|
|
? { 'x-cloudcli-agent-token': process.env.CLOUDCLI_DESKTOP_AGENT_TOKEN }
|
|
: undefined,
|
|
});
|
|
|
|
socket.on('open', () => {
|
|
reconnectMs = RECONNECT_BASE_MS;
|
|
emitToParent({ type: 'connected', url });
|
|
socket?.send(JSON.stringify({ kind: 'register', label: agentLabel, consentMode }));
|
|
});
|
|
|
|
socket.on('message', async (raw) => {
|
|
let message: RelayMessage;
|
|
try {
|
|
message = JSON.parse(String(raw)) as RelayMessage;
|
|
} catch {
|
|
return;
|
|
}
|
|
const kind = message.kind || message.type;
|
|
if (kind !== 'computer_relay' || typeof message.id !== 'string') {
|
|
return;
|
|
}
|
|
|
|
const id = message.id;
|
|
const type = String(message.type || (message.params?.type as string) || '');
|
|
const params = message.params || {};
|
|
const sessionId = typeof params.sessionId === 'string' ? params.sessionId : 'default';
|
|
|
|
if (type === 'stop_session') {
|
|
sessionConsent.delete(sessionId);
|
|
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result: { ok: true } }));
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const allowed = await ensureConsent(sessionId);
|
|
if (!allowed) {
|
|
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, error: 'The user denied desktop control for this session.' }));
|
|
return;
|
|
}
|
|
const result = await runAction(type, params);
|
|
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result }));
|
|
} catch (error) {
|
|
socket?.send(JSON.stringify({
|
|
kind: 'computer_relay_result',
|
|
id,
|
|
error: error instanceof Error ? error.message : 'Desktop agent action failed.',
|
|
}));
|
|
}
|
|
});
|
|
|
|
const scheduleReconnect = () => {
|
|
emitToParent({ type: 'disconnected', url });
|
|
setTimeout(open, reconnectMs);
|
|
reconnectMs = Math.min(reconnectMs * 2, RECONNECT_MAX_MS);
|
|
};
|
|
|
|
socket.on('close', scheduleReconnect);
|
|
socket.on('error', () => {
|
|
try { socket?.close(); } catch { /* noop */ }
|
|
});
|
|
};
|
|
|
|
open();
|
|
}
|
|
|
|
function main(): void {
|
|
const targets = parseTargets();
|
|
if (targets.length === 0) {
|
|
emitToParent({ type: 'error', message: 'No desktop-agent target URLs provided.' });
|
|
return;
|
|
}
|
|
emitToParent({ type: 'starting', targets, consentMode });
|
|
for (const url of targets) {
|
|
connect(url);
|
|
}
|
|
}
|
|
|
|
main();
|