mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-30 00:32:57 +08:00
Compare commits
78 Commits
main
...
camoufox-n
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0907d873f6 | ||
|
|
ec437072ad | ||
|
|
54f4d8aa36 | ||
|
|
261690935f | ||
|
|
46ba8e56b4 | ||
|
|
a0899a252e | ||
|
|
fff89e6132 | ||
|
|
3bc2c777a3 | ||
|
|
63f3c3941d | ||
|
|
e6c6f89dda | ||
|
|
8adcdaa0e5 | ||
|
|
6f712269e8 | ||
|
|
52244404a3 | ||
|
|
8ad18f8587 | ||
|
|
fe116a7138 | ||
|
|
490e66ebdb | ||
|
|
81eb966904 | ||
|
|
0d68dc2cd0 | ||
|
|
0610cc8333 | ||
|
|
9457651fdd | ||
|
|
8c31ebcc63 | ||
|
|
bb630ef739 | ||
|
|
1c05fe0905 | ||
|
|
077baee5f2 | ||
|
|
f150fa6b09 | ||
|
|
9f8cee8919 | ||
|
|
bb323fc566 | ||
|
|
5ef40be2d3 | ||
|
|
cf4b28273e | ||
|
|
f4c68942a5 | ||
|
|
4d70a2588c | ||
|
|
218e8e2e38 | ||
|
|
53c3c4c27a | ||
|
|
901c6fc956 | ||
|
|
278fe4f7b1 | ||
|
|
d7f4d4c342 | ||
|
|
d1930fecdb | ||
|
|
1726705459 | ||
|
|
a35200f340 | ||
|
|
06c9745489 | ||
|
|
0dd22db2bb | ||
|
|
e7aa72c41e | ||
|
|
9f24f80f33 | ||
|
|
25ab273b05 | ||
|
|
5be100ea1b | ||
|
|
2af3d38afe | ||
|
|
531833bc87 | ||
|
|
b2333e7d93 | ||
|
|
f75ae385dd | ||
|
|
7786763dd1 | ||
|
|
1dbf545fd9 | ||
|
|
ac37213269 | ||
|
|
65fdc38f2e | ||
|
|
6c2652aee6 | ||
|
|
bf50d29c20 | ||
|
|
ffc0cd7501 | ||
|
|
59194d1502 | ||
|
|
7e6028b113 | ||
|
|
9881e5e366 | ||
|
|
496a895e8a | ||
|
|
086df034b4 | ||
|
|
fc71fc7d2b | ||
|
|
a0d56429a7 | ||
|
|
6af4afe6f2 | ||
|
|
7aeca52669 | ||
|
|
56532af33a | ||
|
|
9438a365f2 | ||
|
|
e5c6e5e596 | ||
|
|
0426522406 | ||
|
|
6e7e2ff4c1 | ||
|
|
e6263dbd1f | ||
|
|
260070bae0 | ||
|
|
daac6e3fd3 | ||
|
|
861cfecbaa | ||
|
|
a182765e10 | ||
|
|
828d1a2302 | ||
|
|
d427004bd7 | ||
|
|
243e6cecd5 |
151
.github/workflows/desktop-macos-release.yml
vendored
Normal file
151
.github/workflows/desktop-macos-release.yml
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
name: Desktop macOS Release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Release tag to create or update (defaults to v<package version>)'
|
||||
required: false
|
||||
type: string
|
||||
release_name:
|
||||
description: 'Release name (defaults to "CloudCLI Desktop macOS <tag>")'
|
||||
required: false
|
||||
type: string
|
||||
prerelease:
|
||||
description: 'Mark the GitHub release as a prerelease'
|
||||
required: true
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
build-macos:
|
||||
name: Build signed macOS desktop app
|
||||
runs-on: macos-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Typecheck
|
||||
run: npm run typecheck
|
||||
|
||||
- name: Resolve release metadata
|
||||
id: release
|
||||
env:
|
||||
TAG_INPUT: ${{ inputs.tag }}
|
||||
RELEASE_NAME_INPUT: ${{ inputs.release_name }}
|
||||
run: |
|
||||
VERSION="$(node -p "require('./package.json').version")"
|
||||
TAG="$TAG_INPUT"
|
||||
if [ -z "$TAG" ]; then
|
||||
TAG="v${VERSION}"
|
||||
fi
|
||||
TAG="$(printf '%s' "$TAG" | tr -d '\r\n' | sed 's/[^A-Za-z0-9._-]/-/g')"
|
||||
if [ -z "$TAG" ]; then
|
||||
echo "Resolved release tag is empty after normalization." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RELEASE_NAME="$RELEASE_NAME_INPUT"
|
||||
if [ -z "$RELEASE_NAME" ]; then
|
||||
RELEASE_NAME="CloudCLI Desktop macOS ${TAG}"
|
||||
fi
|
||||
RELEASE_NAME_DELIMITER="release_name_$(uuidgen)"
|
||||
|
||||
{
|
||||
echo "tag=$TAG"
|
||||
echo "release_name<<$RELEASE_NAME_DELIMITER"
|
||||
printf '%s\n' "$RELEASE_NAME"
|
||||
echo "$RELEASE_NAME_DELIMITER"
|
||||
echo "server_bundle_tag=cloudcli-local-server-${TAG}"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Configure release server bundle source
|
||||
env:
|
||||
SERVER_BUNDLE_TAG: ${{ steps.release.outputs.server_bundle_tag }}
|
||||
run: printf '{"releaseTag":"%s"}\n' "$SERVER_BUNDLE_TAG" > electron/server-bundle-config.json
|
||||
|
||||
- name: Verify signing secrets are configured
|
||||
run: |
|
||||
test -n "$CSC_LINK"
|
||||
test -n "$CSC_KEY_PASSWORD"
|
||||
test -n "$APPLE_ID"
|
||||
test -n "$APPLE_APP_SPECIFIC_PASSWORD"
|
||||
test -n "$APPLE_TEAM_ID"
|
||||
env:
|
||||
CSC_LINK: ${{ secrets.CSC_LINK }}
|
||||
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
|
||||
APPLE_ID: ${{ secrets.APPLE_ID }}
|
||||
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
|
||||
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
|
||||
|
||||
- name: Build signed and notarized macOS artifacts
|
||||
run: npm run desktop:dist:mac -- --publish never
|
||||
env:
|
||||
CLOUDCLI_SEMANTICS_BUILD_REQUIRED: "1"
|
||||
CSC_LINK: ${{ secrets.CSC_LINK }}
|
||||
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
|
||||
APPLE_ID: ${{ secrets.APPLE_ID }}
|
||||
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
|
||||
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
|
||||
|
||||
- name: Build local server bundle
|
||||
run: node scripts/release/build-server-bundle.js
|
||||
|
||||
- name: Verify local server runtime artifacts
|
||||
run: |
|
||||
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz' -print -quit)"
|
||||
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)"
|
||||
|
||||
- name: Publish local server runtime assets
|
||||
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
|
||||
with:
|
||||
tag_name: ${{ steps.release.outputs.server_bundle_tag }}
|
||||
target_commitish: ${{ github.sha }}
|
||||
name: CloudCLI Local Server Runtime (${{ steps.release.outputs.tag }})
|
||||
body: |
|
||||
This prerelease contains the Local mode runtime for CloudCLI Desktop.
|
||||
|
||||
Download CloudCLI Desktop from the main ${{ steps.release.outputs.tag }} release. When you open Local CloudCLI, the desktop app automatically downloads the matching runtime from this prerelease.
|
||||
|
||||
You do not need to download these runtime files manually.
|
||||
prerelease: true
|
||||
fail_on_unmatched_files: false
|
||||
overwrite_files: true
|
||||
files: |
|
||||
release/local-server/*
|
||||
|
||||
- name: Verify macOS artifacts
|
||||
run: |
|
||||
test -n "$(find release/desktop -maxdepth 1 -name '*.dmg' -print -quit)"
|
||||
shasum -a 256 release/desktop/*.dmg > release/SHASUMS256.txt
|
||||
cat release/SHASUMS256.txt
|
||||
|
||||
- name: Publish GitHub release assets
|
||||
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
|
||||
with:
|
||||
tag_name: ${{ steps.release.outputs.tag }}
|
||||
target_commitish: ${{ github.sha }}
|
||||
name: ${{ steps.release.outputs.release_name }}
|
||||
body: |
|
||||
Download the CloudCLI Desktop installer for your Mac.
|
||||
|
||||
The local server runtime used by local mode is installed automatically by the desktop app. You do not need to download any server bundle manually.
|
||||
prerelease: ${{ inputs.prerelease }}
|
||||
fail_on_unmatched_files: false
|
||||
files: |
|
||||
release/desktop/*.dmg
|
||||
release/SHASUMS256.txt
|
||||
290
electron/computerAgent.js
Normal file
290
electron/computerAgent.js
Normal file
@@ -0,0 +1,290 @@
|
||||
import { spawn } from 'node:child_process';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
const IPC_PREFIX = '@@CUAGENT@@';
|
||||
const TARGET_STATUS_TIMEOUT_MS = 5000;
|
||||
|
||||
function getDesktopPath() {
|
||||
const currentPath = process.env.PATH || '';
|
||||
const commonPaths = process.platform === 'win32'
|
||||
? []
|
||||
: ['/opt/homebrew/bin', '/usr/local/bin', '/usr/bin', '/bin', '/usr/sbin', '/sbin'];
|
||||
return [...commonPaths, currentPath].filter(Boolean).join(path.delimiter);
|
||||
}
|
||||
|
||||
function getNodeRuntime(isPackaged) {
|
||||
if (isPackaged && process.versions.electron) {
|
||||
return { command: process.execPath, env: { ELECTRON_RUN_AS_NODE: '1' } };
|
||||
}
|
||||
if (process.env.npm_node_execpath) {
|
||||
return { command: process.env.npm_node_execpath, env: {} };
|
||||
}
|
||||
return { command: 'node', env: {} };
|
||||
}
|
||||
|
||||
function toAgentWsUrl(httpUrl) {
|
||||
try {
|
||||
const parsed = new URL(httpUrl);
|
||||
parsed.protocol = parsed.protocol === 'http:' ? 'ws:' : 'wss:';
|
||||
parsed.pathname = '/desktop-agent';
|
||||
parsed.search = '';
|
||||
parsed.hash = '';
|
||||
return parsed.toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function isComputerUseEnabledTarget(httpUrl, apiKey) {
|
||||
let statusUrl;
|
||||
try {
|
||||
statusUrl = new URL('/api/computer-use/status', httpUrl).toString();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), TARGET_STATUS_TIMEOUT_MS);
|
||||
try {
|
||||
const response = await fetch(statusUrl, {
|
||||
signal: controller.signal,
|
||||
headers: apiKey ? { 'X-API-Key': apiKey } : undefined,
|
||||
});
|
||||
const body = await response.json().catch(() => null);
|
||||
return response.ok && body?.success !== false && body?.data?.enabled === true;
|
||||
} catch {
|
||||
return false;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function filterEnabledComputerUseTargets(targets, apiKey) {
|
||||
const checks = await Promise.all(targets.map(async (target) => ({
|
||||
target,
|
||||
enabled: await isComputerUseEnabledTarget(target, apiKey),
|
||||
})));
|
||||
return checks.filter((item) => item.enabled).map((item) => item.target);
|
||||
}
|
||||
|
||||
/**
|
||||
* Keeps a Computer Use desktop agent connected to running cloud environments
|
||||
* while desktop access is enabled.
|
||||
*/
|
||||
export class ComputerAgentController {
|
||||
constructor({ appRoot, settingsPath, isPackaged = false, getRunningEnvironmentUrls, getApiKey, promptConsent, onChange }) {
|
||||
this.appRoot = appRoot;
|
||||
this.settingsPath = settingsPath;
|
||||
this.isPackaged = isPackaged;
|
||||
this.getRunningEnvironmentUrls = getRunningEnvironmentUrls;
|
||||
this.getApiKey = getApiKey;
|
||||
this.promptConsent = promptConsent;
|
||||
this.onChange = onChange;
|
||||
this.settings = { enabled: false, consentMode: 'ask' };
|
||||
this.child = null;
|
||||
this.connectedUrls = new Set();
|
||||
this.currentTargets = [];
|
||||
this.stdoutBuffer = '';
|
||||
this.lastEvent = null;
|
||||
this.lastError = null;
|
||||
}
|
||||
|
||||
getSettings() {
|
||||
return { ...this.settings };
|
||||
}
|
||||
|
||||
getState() {
|
||||
return {
|
||||
enabled: this.settings.enabled,
|
||||
consentMode: this.settings.consentMode,
|
||||
running: Boolean(this.child),
|
||||
connectedCount: this.connectedUrls.size,
|
||||
targetCount: this.currentTargets.length,
|
||||
targetUrls: [...this.currentTargets],
|
||||
lastEvent: this.lastEvent,
|
||||
lastError: this.lastError,
|
||||
};
|
||||
}
|
||||
|
||||
async loadSettings() {
|
||||
try {
|
||||
const raw = await fs.readFile(this.settingsPath, 'utf8');
|
||||
const stored = JSON.parse(raw);
|
||||
this.settings = {
|
||||
enabled: Boolean(stored.enabled),
|
||||
consentMode: stored.consentMode === 'auto' ? 'auto' : 'ask',
|
||||
};
|
||||
} catch {
|
||||
this.settings = { enabled: false, consentMode: 'ask' };
|
||||
}
|
||||
return this.settings;
|
||||
}
|
||||
|
||||
async saveSettings(next) {
|
||||
this.settings = {
|
||||
enabled: Boolean(next.enabled),
|
||||
consentMode: next.consentMode === 'auto' ? 'auto' : 'ask',
|
||||
};
|
||||
await fs.mkdir(path.dirname(this.settingsPath), { recursive: true });
|
||||
await fs.writeFile(this.settingsPath, JSON.stringify(this.settings, null, 2), 'utf8');
|
||||
await this.sync();
|
||||
this.onChange?.();
|
||||
return this.settings;
|
||||
}
|
||||
|
||||
async sync() {
|
||||
const targets = this.settings.enabled ? (this.getRunningEnvironmentUrls?.() || []) : [];
|
||||
const enabledTargets = this.settings.enabled ? await filterEnabledComputerUseTargets(targets, this.getApiKey?.() || '') : [];
|
||||
const wsTargets = enabledTargets.map(toAgentWsUrl).filter(Boolean);
|
||||
|
||||
const sameTargets =
|
||||
wsTargets.length === this.currentTargets.length &&
|
||||
wsTargets.every((url) => this.currentTargets.includes(url));
|
||||
|
||||
if (!this.settings.enabled || wsTargets.length === 0) {
|
||||
this.stop();
|
||||
this.currentTargets = [];
|
||||
this.lastEvent = this.settings.enabled ? 'no-targets' : 'disabled';
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.child && sameTargets) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.currentTargets = wsTargets;
|
||||
this.lastEvent = 'restarting';
|
||||
this.lastError = null;
|
||||
this.restart(wsTargets);
|
||||
}
|
||||
|
||||
restart(wsTargets) {
|
||||
this.stop();
|
||||
|
||||
const agentEntry = process.env.CLOUDCLI_COMPUTER_AGENT_ENTRY
|
||||
|| path.join(this.appRoot, 'dist-server', 'server', 'computer-use-agent.js');
|
||||
const runtime = getNodeRuntime(this.isPackaged);
|
||||
|
||||
this.child = spawn(runtime.command, [agentEntry], {
|
||||
cwd: this.appRoot,
|
||||
env: {
|
||||
...process.env,
|
||||
...runtime.env,
|
||||
PATH: getDesktopPath(),
|
||||
CLOUDCLI_DESKTOP_AGENT_URLS: wsTargets.join(','),
|
||||
CLOUDCLI_DESKTOP_AGENT_API_KEY: this.getApiKey?.() || '',
|
||||
CLOUDCLI_COMPUTER_USE_CONSENT_MODE: this.settings.consentMode,
|
||||
},
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
this.connectedUrls = new Set();
|
||||
|
||||
this.child.once('error', (error) => {
|
||||
console.error('[ComputerAgent] failed to start:', error.message);
|
||||
this.lastEvent = 'start-error';
|
||||
this.lastError = error.message;
|
||||
this.child = null;
|
||||
this.onChange?.();
|
||||
});
|
||||
|
||||
this.child.stdout?.on('data', (chunk) => this.handleStdout(String(chunk)));
|
||||
this.child.stderr?.on('data', (chunk) => {
|
||||
for (const line of String(chunk).split(/\r?\n/)) {
|
||||
if (line.trim()) {
|
||||
this.lastError = line.trim();
|
||||
console.error('[ComputerAgent]', line);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.child.once('exit', (code) => {
|
||||
console.log(`[ComputerAgent] exited (code ${code ?? 'null'})`);
|
||||
this.lastEvent = `exit:${code ?? 'null'}`;
|
||||
this.child = null;
|
||||
this.connectedUrls = new Set();
|
||||
this.onChange?.();
|
||||
});
|
||||
|
||||
this.onChange?.();
|
||||
}
|
||||
|
||||
handleStdout(chunk) {
|
||||
this.stdoutBuffer += chunk;
|
||||
const lines = this.stdoutBuffer.split('\n');
|
||||
this.stdoutBuffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed.startsWith(IPC_PREFIX)) {
|
||||
if (trimmed) console.log('[ComputerAgent]', trimmed);
|
||||
continue;
|
||||
}
|
||||
let payload;
|
||||
try {
|
||||
payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim());
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
void this.handleAgentEvent(payload);
|
||||
}
|
||||
}
|
||||
|
||||
async handleAgentEvent(payload) {
|
||||
switch (payload.type) {
|
||||
case 'connected':
|
||||
this.connectedUrls.add(payload.url);
|
||||
this.lastEvent = 'connected';
|
||||
this.lastError = null;
|
||||
this.onChange?.();
|
||||
break;
|
||||
case 'disconnected':
|
||||
this.connectedUrls.delete(payload.url);
|
||||
this.lastEvent = 'disconnected';
|
||||
this.onChange?.();
|
||||
if (payload.reason && /computer use.*disabled/i.test(payload.reason)) {
|
||||
void this.sync().catch((error) => {
|
||||
this.lastError = error instanceof Error ? error.message : 'Failed to sync Computer Use targets.';
|
||||
this.onChange?.();
|
||||
});
|
||||
}
|
||||
break;
|
||||
case 'starting':
|
||||
this.lastEvent = 'starting';
|
||||
this.lastError = null;
|
||||
this.onChange?.();
|
||||
break;
|
||||
case 'error':
|
||||
this.lastEvent = 'error';
|
||||
this.lastError = payload.message || 'Computer agent error.';
|
||||
this.onChange?.();
|
||||
break;
|
||||
case 'consent-request': {
|
||||
const allow = await this.promptConsent?.(payload.sessionId);
|
||||
this.sendToChild({ type: 'consent-response', sessionId: payload.sessionId, allow: Boolean(allow) });
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sendToChild(message) {
|
||||
if (this.child?.stdin?.writable) {
|
||||
this.child.stdin.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
revokeSession(sessionId) {
|
||||
this.sendToChild({ type: 'revoke-session', sessionId });
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (!this.child) return;
|
||||
const child = this.child;
|
||||
this.child = null;
|
||||
this.connectedUrls = new Set();
|
||||
try { child.kill('SIGTERM'); } catch { /* noop */ }
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,10 @@ import { ViewHost } from './viewHost.js';
|
||||
|
||||
const TITLEBAR_HEIGHT = 44;
|
||||
const AUTH_TOKEN_STORAGE_KEY = 'auth-token';
|
||||
// TODO: Re-enable Computer Use menus after fixing the MCP server connection
|
||||
// between the desktop app and the web UI.
|
||||
const COMPUTER_USE_MENUS_ENABLED = false;
|
||||
|
||||
function isAllowedPermissionOrigin(sourceUrl, controlPlaneUrl) {
|
||||
try {
|
||||
const source = new URL(sourceUrl);
|
||||
@@ -433,6 +437,17 @@ export class DesktopWindowManager {
|
||||
accelerator: 'CmdOrCtrl+Shift+E',
|
||||
click: () => void this.actions.showEnvironmentPicker().catch((error) => this.actions.showError('Could not switch environment', error)),
|
||||
},
|
||||
{ type: 'separator' },
|
||||
{
|
||||
label: 'Services',
|
||||
visible: COMPUTER_USE_MENUS_ENABLED,
|
||||
submenu: [
|
||||
{
|
||||
label: 'Computer Use',
|
||||
click: () => void this.showDesktopSettings(),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Diagnostics',
|
||||
submenu: [
|
||||
|
||||
@@ -8,6 +8,14 @@ window.__MOCK_STATE__ = {
|
||||
shareableWebUrl: 'http://localhost:3001',
|
||||
localServerRunning: false,
|
||||
localStartupLogs: [],
|
||||
computerUse: { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 },
|
||||
computerUsePermissions: {
|
||||
platform: 'darwin',
|
||||
supported: true,
|
||||
accessibility: 'not_granted',
|
||||
screenRecording: 'not_determined',
|
||||
message: 'macOS requires Accessibility and Screen Recording for Computer Use.',
|
||||
},
|
||||
environments: [
|
||||
{ id: 'env-api', name: 'api-gateway', subdomain: 'api-gateway', access_url: 'https://api-gateway.cloudcli.ai', status: 'running', region: 'fra1', agent: 'Claude Code' },
|
||||
{ id: 'env-web', name: 'web-frontend', subdomain: 'web-frontend', access_url: 'https://web-frontend.cloudcli.ai', status: 'stopped', region: 'sfo1', agent: 'Codex' },
|
||||
@@ -54,6 +62,7 @@ window.__MOCK_STATE__ = {
|
||||
refreshEnvironments: function () { return Promise.resolve(clone(mockState)); },
|
||||
refreshActiveTab: function () { return Promise.resolve(clone(mockState)); },
|
||||
copyDiagnostics: function () { return Promise.resolve(clone(mockState)); },
|
||||
showComputerAccess: function () { return Promise.resolve(clone(mockState)); },
|
||||
showEnvironmentPicker: function () { return Promise.resolve(clone(mockState)); },
|
||||
showLauncher: function () { return Promise.resolve(clone(mockState)); },
|
||||
showLocalSettings: function () { return Promise.resolve(clone(mockState)); },
|
||||
@@ -73,6 +82,23 @@ window.__MOCK_STATE__ = {
|
||||
mockState.desktopSettings[key] = key === 'themeMode' ? value : !!value;
|
||||
return Promise.resolve(clone(mockState));
|
||||
},
|
||||
updateComputerUse: function (settings) {
|
||||
mockState.computerUse = mockState.computerUse || { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 };
|
||||
if (typeof settings.enabled === 'boolean') mockState.computerUse.enabled = settings.enabled;
|
||||
if (settings.consentMode === 'auto' || settings.consentMode === 'ask') mockState.computerUse.consentMode = settings.consentMode;
|
||||
mockState.computerUse.running = mockState.computerUse.enabled;
|
||||
return Promise.resolve(clone(mockState));
|
||||
},
|
||||
requestComputerUsePermission: function (permission) {
|
||||
mockState.computerUsePermissions = mockState.computerUsePermissions || {};
|
||||
if (permission === 'accessibility') mockState.computerUsePermissions.accessibility = 'granted';
|
||||
if (permission === 'screen') mockState.computerUsePermissions.screenRecording = 'granted';
|
||||
if (permission === 'all') {
|
||||
mockState.computerUsePermissions.accessibility = 'granted';
|
||||
mockState.computerUsePermissions.screenRecording = 'granted';
|
||||
}
|
||||
return Promise.resolve(clone(mockState));
|
||||
},
|
||||
openEnvironment: function (id) {
|
||||
var env = (mockState.environments || []).filter(function (item) { return item.id === id; })[0];
|
||||
if (env) {
|
||||
@@ -163,6 +189,22 @@ window.__MOCK_STATE__ = {
|
||||
return window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
|
||||
}
|
||||
|
||||
function computerUseStatus(state) {
|
||||
var computerUse = state && state.computerUse ? state.computerUse : {};
|
||||
var connectedCount = computerUse.connectedCount || 0;
|
||||
var environmentLabel = connectedCount + ' environment' + (connectedCount === 1 ? '' : 's');
|
||||
if (!computerUse.enabled) {
|
||||
return { label: 'Disabled', tone: 'idle', detail: 'CloudCLI cannot use this computer.' };
|
||||
}
|
||||
if (!connectedCount) {
|
||||
return { label: 'Not connected', tone: 'warn', detail: 'No environment connected.' };
|
||||
}
|
||||
if (computerUse.consentMode === 'auto') {
|
||||
return { label: 'Connected', tone: 'warn', detail: environmentLabel + ' connected. Unattended access is on.' };
|
||||
}
|
||||
return { label: 'Connected', tone: 'ok', detail: environmentLabel + ' connected.' };
|
||||
}
|
||||
|
||||
var CC = {
|
||||
icon: icon,
|
||||
esc: esc,
|
||||
@@ -172,6 +214,7 @@ window.__MOCK_STATE__ = {
|
||||
accountLabel: accountLabel,
|
||||
localUrl: localUrl,
|
||||
envCount: envCount,
|
||||
computerUseStatus: computerUseStatus,
|
||||
version: VERSION,
|
||||
logoUrl: LOGO_URL,
|
||||
platform: 'win',
|
||||
@@ -309,12 +352,42 @@ window.__MOCK_STATE__ = {
|
||||
return CC.run('Saved', function () { return bridge.updateSetting(node.key, node.value); });
|
||||
case 'set-theme-mode':
|
||||
return CC.run('Saved', function () { return bridge.updateSetting('themeMode', node.value); });
|
||||
case 'set-computer-mode':
|
||||
CC.state.computerUse = {
|
||||
...((CC.state && CC.state.computerUse) || {}),
|
||||
enabled: true,
|
||||
consentMode: node.value === 'auto' ? 'auto' : 'ask',
|
||||
};
|
||||
return CC.run('Saved', function () {
|
||||
return bridge.updateComputerUse({
|
||||
enabled: true,
|
||||
consentMode: node.value,
|
||||
});
|
||||
});
|
||||
case 'set-computer-enabled':
|
||||
CC.state.computerUse = {
|
||||
...((CC.state && CC.state.computerUse) || {}),
|
||||
enabled: !!node.value,
|
||||
};
|
||||
return CC.run('Saved', function () {
|
||||
var current = (CC.state && CC.state.computerUse) || { consentMode: 'ask' };
|
||||
return bridge.updateComputerUse({
|
||||
enabled: !!node.value,
|
||||
consentMode: current.consentMode === 'auto' ? 'auto' : 'ask',
|
||||
});
|
||||
});
|
||||
case 'computer-permission':
|
||||
return CC.run('Opening permission settings...', function () {
|
||||
return bridge.requestComputerUsePermission(node.getAttribute('data-cc-computer-permission'));
|
||||
});
|
||||
case 'settings-toggle':
|
||||
return CC.run('Opening desktop settings...', function () { return bridge.showDesktopSettings(); });
|
||||
case 'desktop-settings-toggle':
|
||||
return CC.run('Opening desktop settings...', function () { return bridge.showDesktopSettings(); });
|
||||
case 'local-settings-toggle':
|
||||
return CC.run('Opening local settings...', function () { return bridge.showLocalSettings(); });
|
||||
case 'computer-settings-toggle':
|
||||
return CC.run('Opening desktop settings...', function () { return bridge.showDesktopSettings(); });
|
||||
case 'settings-close':
|
||||
return CC.closeSheet();
|
||||
case 'dashboard':
|
||||
@@ -468,6 +541,62 @@ window.__MOCK_STATE__ = {
|
||||
);
|
||||
};
|
||||
|
||||
function permissionLabel(value) {
|
||||
if (value === 'granted') return 'Granted';
|
||||
if (value === 'denied' || value === 'restricted') return 'Needs attention';
|
||||
if (value === 'not_applicable') return 'Not required';
|
||||
return 'Not granted';
|
||||
}
|
||||
|
||||
function permissionTone(value) {
|
||||
if (value === 'granted' || value === 'not_applicable') return 'ok';
|
||||
if (value === 'denied' || value === 'restricted') return 'warn';
|
||||
return 'idle';
|
||||
}
|
||||
|
||||
// TODO: Re-enable Computer Use menus after fixing the MCP server connection
|
||||
// between the desktop app and the web UI.
|
||||
var COMPUTER_USE_MENUS_ENABLED = false;
|
||||
|
||||
function renderComputerPermissionRow(key, label, detail, status) {
|
||||
return '<div class="cc-permission-row">' +
|
||||
'<div><div class="cc-permission-title">' + CC.esc(label) + '</div><div class="cc-permission-detail">' + CC.esc(detail) + '</div></div>' +
|
||||
'<div class="cc-permission-actions"><span class="badge ' + permissionTone(status) + '">' + CC.esc(permissionLabel(status)) + '</span>' +
|
||||
(status === 'granted' || status === 'not_applicable'
|
||||
? ''
|
||||
: '<button class="btn sm" data-cc-action="computer-permission" data-cc-computer-permission="' + CC.esc(key) + '">Open settings</button>') +
|
||||
'</div>' +
|
||||
'</div>';
|
||||
}
|
||||
|
||||
function renderComputerPermissions(state) {
|
||||
var permissions = state.computerUsePermissions || {};
|
||||
if (!permissions.supported) {
|
||||
return '<div class="cc-note">' + CC.esc(permissions.message || 'No additional OS permission setup is required from CloudCLI on this platform.') + '</div>';
|
||||
}
|
||||
return '<div class="cc-note">' + CC.esc(permissions.message || 'Grant the required OS permissions before approving agent control.') + '</div>' +
|
||||
renderComputerPermissionRow('accessibility', 'Accessibility', 'Allows CloudCLI to click, type, and use accessibility actions.', permissions.accessibility) +
|
||||
renderComputerPermissionRow('screen', 'Screen Recording', 'Allows CloudCLI to capture screenshots for agent observation.', permissions.screenRecording);
|
||||
}
|
||||
|
||||
CC.buildComputerUseSection = function (state) {
|
||||
var computerUse = state.computerUse || {};
|
||||
var status = computerUseStatus(state);
|
||||
var body =
|
||||
'<div class="cc-surface">' +
|
||||
'<label class="cc-toggle"><input type="checkbox" data-cc-computer-enabled="true"' + (computerUse.enabled ? ' checked' : '') + '><span><b>Enable Computer Use</b><br>Let CloudCLI use the computer. Agents cannot act until you approve a session.</span></label>' +
|
||||
'<div class="cc-row2"><span class="badge ' + CC.esc(status.tone) + '">' + CC.esc(status.label) + '</span><span class="cc-meta">' + CC.esc(status.detail) + '</span><button class="btn sm" data-cc-action="refresh-environments">' + CC.icon('refresh', 14) + 'Refresh / relink</button></div>';
|
||||
if (computerUse.enabled) {
|
||||
body += '<div class="cc-permissions">' + renderComputerPermissions(state) + '</div>';
|
||||
body += '<div class="cc-choice-group">' +
|
||||
CC.renderRadioOption('computer-access-mode', 'ask', computerUse.consentMode !== 'auto', 'Ask before each session', 'Agents can request control, but you approve every session.') +
|
||||
CC.renderRadioOption('computer-access-mode', 'auto', computerUse.consentMode === 'auto', 'Unattended access', 'Trusted agents can use this computer without a local approval prompt.') +
|
||||
'</div>';
|
||||
}
|
||||
body += '</div>';
|
||||
return CC.renderSection('COMPUTER USE', 'Control how agents can use this computer', body);
|
||||
};
|
||||
|
||||
CC.renderLocalSettings = function () {
|
||||
var state = CC.state || {};
|
||||
var sections = [
|
||||
@@ -483,9 +612,13 @@ window.__MOCK_STATE__ = {
|
||||
};
|
||||
|
||||
CC.renderDesktopSettings = function () {
|
||||
var state = CC.state || {};
|
||||
var sections = [
|
||||
CC.buildThemeSection(CC.state || {}),
|
||||
CC.buildThemeSection(state),
|
||||
];
|
||||
if (COMPUTER_USE_MENUS_ENABLED) {
|
||||
sections.push(CC.buildComputerUseSection(state));
|
||||
}
|
||||
CC.renderSheet('Desktop Settings', 'Manage the desktop app appearance.', sections);
|
||||
};
|
||||
|
||||
@@ -548,6 +681,15 @@ window.__MOCK_STATE__ = {
|
||||
CC.act('set-theme-mode', { value: theme.value });
|
||||
return;
|
||||
}
|
||||
var computerMode = event.target.closest('[name="computer-access-mode"]');
|
||||
if (computerMode) {
|
||||
CC.act('set-computer-mode', { value: computerMode.value });
|
||||
return;
|
||||
}
|
||||
var computerEnabled = event.target.closest('[data-cc-computer-enabled]');
|
||||
if (computerEnabled) {
|
||||
CC.act('set-computer-enabled', { value: computerEnabled.checked });
|
||||
}
|
||||
});
|
||||
|
||||
document.addEventListener('keydown', function (event) {
|
||||
|
||||
118
electron/main.js
118
electron/main.js
@@ -1,9 +1,10 @@
|
||||
import { app, BrowserWindow, clipboard, dialog, ipcMain, session, shell } from 'electron';
|
||||
import { app, BrowserWindow, clipboard, dialog, ipcMain, session, shell, systemPreferences } from 'electron';
|
||||
import { spawn } from 'node:child_process';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
import { CloudController } from './cloud.js';
|
||||
import { ComputerAgentController } from './computerAgent.js';
|
||||
import { DesktopWindowManager } from './desktopWindow.js';
|
||||
import { DesktopNotificationsController } from './desktopNotifications.js';
|
||||
import { LocalServerController } from './localServer.js';
|
||||
@@ -29,6 +30,7 @@ let activeTarget = { kind: 'launcher', name: APP_NAME, url: null };
|
||||
let desktopWindow = null;
|
||||
let localServer = null;
|
||||
let cloud = null;
|
||||
let computerAgent = null;
|
||||
let desktopNotifications = null;
|
||||
let isQuitting = false;
|
||||
let isRefreshingCloud = false;
|
||||
@@ -61,6 +63,10 @@ function getSettingsPath() {
|
||||
return path.join(app.getPath('userData'), 'desktop-settings.json');
|
||||
}
|
||||
|
||||
function getComputerUseSettingsPath() {
|
||||
return path.join(app.getPath('userData'), 'computer-use-settings.json');
|
||||
}
|
||||
|
||||
function getDesktopNotificationsSettingsPath() {
|
||||
return path.join(app.getPath('userData'), 'desktop-notifications-settings.json');
|
||||
}
|
||||
@@ -72,6 +78,23 @@ function getRunningEnvironmentUrls() {
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function promptComputerUseConsent(sessionId) {
|
||||
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
|
||||
type: 'warning',
|
||||
buttons: ['Allow this session', 'Deny'],
|
||||
defaultId: 0,
|
||||
cancelId: 1,
|
||||
title: 'Computer Use request',
|
||||
message: 'An agent wants to control this computer',
|
||||
detail: [
|
||||
'A cloud agent is requesting control of your mouse, keyboard, and screen for this session.',
|
||||
'Approval lasts for this session only. You can stop it any time from the Computer panel.',
|
||||
sessionId ? `\nSession: ${sessionId}` : '',
|
||||
].join('\n'),
|
||||
});
|
||||
return response === 0;
|
||||
}
|
||||
|
||||
function getDisplayTargetName() {
|
||||
return activeTarget?.name || APP_NAME;
|
||||
}
|
||||
@@ -128,10 +151,66 @@ function getDesktopState() {
|
||||
tabs: tabs.getSerializableTabs(),
|
||||
activeTabId: tabs.activeTabId,
|
||||
environments: cloud.getEnvironments().map(serializeEnvironment),
|
||||
computerUse: computerAgent?.getState() || { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 },
|
||||
desktopNotifications: desktopNotifications?.getState() || { enabled: false, supported: false, connectedCount: 0, targetCount: 0 },
|
||||
computerUsePermissions: getComputerUsePermissions(),
|
||||
};
|
||||
}
|
||||
|
||||
function getComputerUsePermissions() {
|
||||
if (process.platform !== 'darwin') {
|
||||
return {
|
||||
platform: process.platform,
|
||||
supported: false,
|
||||
accessibility: 'not_applicable',
|
||||
screenRecording: 'not_applicable',
|
||||
message: 'No OS permission onboarding is required from CloudCLI on this platform.',
|
||||
};
|
||||
}
|
||||
|
||||
let accessibility;
|
||||
let screenRecording;
|
||||
try {
|
||||
accessibility = systemPreferences.isTrustedAccessibilityClient(false) ? 'granted' : 'not_granted';
|
||||
} catch {
|
||||
accessibility = 'unknown';
|
||||
}
|
||||
try {
|
||||
screenRecording = systemPreferences.getMediaAccessStatus('screen');
|
||||
} catch {
|
||||
screenRecording = 'unknown';
|
||||
}
|
||||
|
||||
return {
|
||||
platform: 'darwin',
|
||||
supported: true,
|
||||
accessibility,
|
||||
screenRecording,
|
||||
message: accessibility === 'granted' && screenRecording === 'granted'
|
||||
? 'macOS permissions are granted.'
|
||||
: 'macOS requires Accessibility and Screen Recording for Computer Use.',
|
||||
};
|
||||
}
|
||||
|
||||
async function requestComputerUsePermission(permission) {
|
||||
if (process.platform !== 'darwin') {
|
||||
return getDesktopState();
|
||||
}
|
||||
|
||||
if (permission === 'accessibility') {
|
||||
systemPreferences.isTrustedAccessibilityClient(true);
|
||||
} else if (permission === 'screen') {
|
||||
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
|
||||
} else if (permission === 'all') {
|
||||
systemPreferences.isTrustedAccessibilityClient(true);
|
||||
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
|
||||
} else {
|
||||
throw new Error(`Unknown Computer Use permission: ${permission}`);
|
||||
}
|
||||
|
||||
return getDesktopState();
|
||||
}
|
||||
|
||||
async function openExternalUrl(url) {
|
||||
if (String(url).startsWith(CALLBACK_PROTOCOL + "://")) {
|
||||
await handleDeepLink(url);
|
||||
@@ -237,6 +316,8 @@ function getDiagnosticsText() {
|
||||
cloudEnvironmentCount: cloud.getEnvironments().length,
|
||||
cloudRunningEnvironmentCount: getRunningEnvironmentUrls().length,
|
||||
cloudAuthState: cloud.getAuthState(),
|
||||
computerUse: computerAgent?.getState() || null,
|
||||
computerUseSettingsPath: getComputerUseSettingsPath(),
|
||||
cloudAccountPath: getStorePath(),
|
||||
controlPlaneUrl: CLOUDCLI_CONTROL_PLANE_URL,
|
||||
}, null, 2);
|
||||
@@ -251,6 +332,22 @@ async function copyDiagnostics() {
|
||||
});
|
||||
}
|
||||
|
||||
async function showComputerAccess() {
|
||||
await desktopWindow?.showDesktopSettings();
|
||||
return getDesktopState();
|
||||
}
|
||||
|
||||
async function updateComputerUse(settings) {
|
||||
const current = computerAgent?.getSettings() || { enabled: false, consentMode: 'ask' };
|
||||
const next = {
|
||||
enabled: typeof settings?.enabled === 'boolean' ? settings.enabled : current.enabled,
|
||||
consentMode: settings?.consentMode === 'auto' ? 'auto' : 'ask',
|
||||
};
|
||||
await computerAgent?.saveSettings(next);
|
||||
syncDesktopState();
|
||||
return getDesktopState();
|
||||
}
|
||||
|
||||
async function refreshCloudEnvironments({ showErrors = false } = {}) {
|
||||
isRefreshingCloud = true;
|
||||
syncDesktopState();
|
||||
@@ -273,6 +370,7 @@ async function refreshCloudEnvironments({ showErrors = false } = {}) {
|
||||
throw error;
|
||||
} finally {
|
||||
isRefreshingCloud = false;
|
||||
void computerAgent?.sync().catch((error) => console.error('[ComputerAgent] sync failed:', error?.message || error));
|
||||
void desktopNotifications?.sync().catch((error) => console.error('[DesktopNotifications] sync failed:', error?.message || error));
|
||||
syncDesktopState();
|
||||
}
|
||||
@@ -754,10 +852,16 @@ function registerIpcHandlers() {
|
||||
await desktopWindow.showLauncher();
|
||||
return getDesktopState();
|
||||
});
|
||||
ipcMain.handle('cloudcli-desktop:show-computer-access', async () => {
|
||||
await showComputerAccess();
|
||||
return getDesktopState();
|
||||
});
|
||||
ipcMain.handle('cloudcli-desktop:update-computer-use', async (_event, settings) => updateComputerUse(settings));
|
||||
ipcMain.handle('cloudcli-desktop:update-desktop-notifications', async (_event, settings) => {
|
||||
await desktopNotifications?.saveSettings(settings);
|
||||
return getDesktopState();
|
||||
});
|
||||
ipcMain.handle('cloudcli-desktop:request-computer-use-permission', async (_event, permission) => requestComputerUsePermission(permission));
|
||||
ipcMain.handle('cloudcli-desktop:show-desktop-settings', async () => desktopWindow.showDesktopSettings());
|
||||
ipcMain.handle('cloudcli-desktop:show-local-settings', async () => desktopWindow.showLocalSettings());
|
||||
ipcMain.handle('cloudcli-desktop:close-settings-window', async () => {
|
||||
@@ -795,6 +899,7 @@ function registerAppEvents() {
|
||||
});
|
||||
|
||||
app.on('before-quit', () => {
|
||||
computerAgent?.stop();
|
||||
desktopNotifications?.stop();
|
||||
});
|
||||
|
||||
@@ -846,6 +951,7 @@ async function createDesktopWindow() {
|
||||
openCloudDashboard,
|
||||
refreshCloudEnvironments: () => refreshCloudEnvironments({ showErrors: true }),
|
||||
setActiveTarget,
|
||||
showComputerAccess,
|
||||
showEnvironmentPicker,
|
||||
showError,
|
||||
startEnvironment,
|
||||
@@ -911,6 +1017,15 @@ async function bootstrap() {
|
||||
callbackUrl: CALLBACK_URL,
|
||||
onChange: syncDesktopState,
|
||||
});
|
||||
computerAgent = new ComputerAgentController({
|
||||
appRoot: getAppRoot(),
|
||||
settingsPath: getComputerUseSettingsPath(),
|
||||
isPackaged: app.isPackaged,
|
||||
getRunningEnvironmentUrls,
|
||||
getApiKey: () => cloud.getAccount()?.apiKey || '',
|
||||
promptConsent: promptComputerUseConsent,
|
||||
onChange: syncDesktopState,
|
||||
});
|
||||
desktopNotifications = new DesktopNotificationsController({
|
||||
settingsPath: getDesktopNotificationsSettingsPath(),
|
||||
appVersion: app.getVersion(),
|
||||
@@ -927,6 +1042,7 @@ async function bootstrap() {
|
||||
|
||||
await localServer.loadDesktopSettings();
|
||||
await cloud.loadCloudAccount();
|
||||
await computerAgent.loadSettings();
|
||||
await desktopNotifications.loadSettings();
|
||||
|
||||
registerProtocolHandler();
|
||||
|
||||
@@ -44,7 +44,10 @@ if (window.location.protocol === 'file:') {
|
||||
refreshActiveTab: () => ipcRenderer.invoke('cloudcli-desktop:reload-active-tab'),
|
||||
showEnvironmentPicker: () => ipcRenderer.invoke('cloudcli-desktop:show-environment-picker'),
|
||||
showLauncher: () => ipcRenderer.invoke('cloudcli-desktop:show-launcher'),
|
||||
showComputerAccess: () => ipcRenderer.invoke('cloudcli-desktop:show-computer-access'),
|
||||
showLocalSettings: () => ipcRenderer.invoke('cloudcli-desktop:show-local-settings'),
|
||||
updateComputerUse: (settings) => ipcRenderer.invoke('cloudcli-desktop:update-computer-use', settings),
|
||||
requestComputerUsePermission: (permission) => ipcRenderer.invoke('cloudcli-desktop:request-computer-use-permission', permission),
|
||||
showDesktopSettings: () => ipcRenderer.invoke('cloudcli-desktop:show-desktop-settings'),
|
||||
closeSettingsWindow: () => ipcRenderer.invoke('cloudcli-desktop:close-settings-window'),
|
||||
showActiveEnvironmentActionsMenu: () => ipcRenderer.invoke('cloudcli-desktop:show-active-environment-actions-menu'),
|
||||
|
||||
@@ -29,10 +29,13 @@
|
||||
"scripts": {
|
||||
"dev": "concurrently --kill-others \"npm run server:dev\" \"npm run client\"",
|
||||
"server": "node dist-server/server/index.js",
|
||||
"preserver:dev": "npm run build:semantics",
|
||||
"server:dev": "tsx --tsconfig server/tsconfig.json server/index.js",
|
||||
"preserver:dev-watch": "npm run build:semantics",
|
||||
"server:dev-watch": "tsx watch --tsconfig server/tsconfig.json server/index.js",
|
||||
"client": "vite",
|
||||
"desktop": "electron electron/main.js",
|
||||
"predesktop:dev": "npm run build:semantics",
|
||||
"desktop:dev": "cross-env ELECTRON_DEV_URL=http://127.0.0.1:5173 electron electron/main.js",
|
||||
"desktop:stage": "node scripts/release/prepare-desktop-app.js",
|
||||
"desktop:pack": "npm run build && npm run desktop:stage && electron-builder --projectDir .desktop-build/desktop-app --dir",
|
||||
@@ -40,10 +43,12 @@
|
||||
"desktop:dist:win": "npm run build && npm run desktop:stage && electron-builder --projectDir .desktop-build/desktop-app --win nsis",
|
||||
"server:bundle": "npm run build && node scripts/release/build-server-bundle.js",
|
||||
"desktop:icon:mac": "node electron/scripts/generate-macos-icon.js",
|
||||
"build": "npm run build:client && npm run build:server",
|
||||
"build": "npm run build:semantics && npm run build:client && npm run build:server",
|
||||
"build:client": "vite build",
|
||||
"build:semantics": "node scripts/build-computer-semantics.mjs",
|
||||
"prebuild:server": "node -e \"require('node:fs').rmSync('dist-server', { recursive: true, force: true })\"",
|
||||
"build:server": "tsc -p server/tsconfig.json && tsc-alias -p server/tsconfig.json",
|
||||
"postbuild:server": "node scripts/copy-computer-semantics-bin.mjs",
|
||||
"preview": "vite preview",
|
||||
"typecheck": "tsc --noEmit -p tsconfig.json && tsc --noEmit -p server/tsconfig.json",
|
||||
"lint": "eslint src/ server/",
|
||||
@@ -51,7 +56,7 @@
|
||||
"start": "npm run build && npm run server",
|
||||
"release": "./release.sh",
|
||||
"prepublishOnly": "npm run build",
|
||||
"postinstall": "node scripts/fix-node-pty.js",
|
||||
"postinstall": "node scripts/fix-node-pty.js && npm run build:semantics",
|
||||
"prepare": "husky",
|
||||
"update:platform": "./update-platform.sh"
|
||||
},
|
||||
|
||||
133
scripts/build-computer-semantics.mjs
Normal file
133
scripts/build-computer-semantics.mjs
Normal file
@@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env node
|
||||
import { spawn } from 'node:child_process';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const rootDir = path.resolve(__dirname, '..');
|
||||
const platform = process.env.CLOUDCLI_SEMANTICS_PLATFORM || process.platform;
|
||||
const arch = process.env.CLOUDCLI_SEMANTICS_ARCH || process.arch;
|
||||
const platformArch = `${platform}-${arch}`;
|
||||
const semanticsRoot = path.join(rootDir, 'server', 'modules', 'computer-use', 'semantics');
|
||||
const outDir = path.join(semanticsRoot, 'bin', platformArch);
|
||||
const requireBuild = process.env.CLOUDCLI_SEMANTICS_BUILD_REQUIRED === '1';
|
||||
|
||||
function run(command, args, options = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: 'inherit',
|
||||
shell: process.platform === 'win32',
|
||||
...options,
|
||||
});
|
||||
child.once('error', reject);
|
||||
child.once('exit', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function commandExists(command) {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(command, ['--version'], {
|
||||
stdio: 'ignore',
|
||||
shell: process.platform === 'win32',
|
||||
});
|
||||
child.once('error', () => resolve(false));
|
||||
child.once('exit', (code) => resolve(code === 0));
|
||||
});
|
||||
}
|
||||
|
||||
async function pathExists(filePath) {
|
||||
try {
|
||||
await fs.access(filePath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function isUpToDate(output, inputs) {
|
||||
if (!(await pathExists(output))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const outputStat = await fs.stat(output);
|
||||
for (const input of inputs) {
|
||||
const inputStat = await fs.stat(input);
|
||||
if (inputStat.mtimeMs > outputStat.mtimeMs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async function ensureCommand(command, helpText) {
|
||||
if (await commandExists(command)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const message = `${command} was not found. ${helpText}`;
|
||||
if (requireBuild) {
|
||||
throw new Error(message);
|
||||
}
|
||||
console.log(`Skipping semantic helper build: ${message}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (platform === 'darwin') {
|
||||
const source = path.join(semanticsRoot, 'helpers', 'macos', 'CloudCLISemantics.swift');
|
||||
const output = path.join(outDir, 'CloudCLISemantics');
|
||||
|
||||
if (!(await ensureCommand('swiftc', 'Install Xcode Command Line Tools to compile the macOS helper.'))) {
|
||||
process.exit(0);
|
||||
}
|
||||
if (await isUpToDate(output, [source])) {
|
||||
console.log(`Semantic helper is up to date: ${path.relative(rootDir, output)}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
await fs.mkdir(outDir, { recursive: true });
|
||||
await run('swiftc', [
|
||||
source,
|
||||
'-o',
|
||||
output,
|
||||
'-framework',
|
||||
'AppKit',
|
||||
'-framework',
|
||||
'ApplicationServices',
|
||||
]);
|
||||
await fs.chmod(output, 0o755);
|
||||
console.log(`Built ${path.relative(rootDir, output)}`);
|
||||
} else if (platform === 'win32') {
|
||||
const project = path.join(semanticsRoot, 'helpers', 'windows', 'CloudCLISemantics.csproj');
|
||||
const source = path.join(semanticsRoot, 'helpers', 'windows', 'Program.cs');
|
||||
const output = path.join(outDir, 'CloudCLISemantics.exe');
|
||||
|
||||
if (!(await ensureCommand('dotnet', '.NET SDK is required to compile the Windows helper.'))) {
|
||||
process.exit(0);
|
||||
}
|
||||
if (await isUpToDate(output, [project, source])) {
|
||||
console.log(`Semantic helper is up to date: ${path.relative(rootDir, output)}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
await fs.mkdir(outDir, { recursive: true });
|
||||
await run('dotnet', [
|
||||
'publish',
|
||||
project,
|
||||
'-c',
|
||||
'Release',
|
||||
'-r',
|
||||
arch === 'arm64' ? 'win-arm64' : 'win-x64',
|
||||
'--self-contained',
|
||||
'false',
|
||||
'-p:PublishSingleFile=true',
|
||||
'-o',
|
||||
outDir,
|
||||
]);
|
||||
console.log(`Built ${path.relative(rootDir, output)}`);
|
||||
} else {
|
||||
console.log(`Semantic helper build is not supported for ${platform}-${arch}.`);
|
||||
}
|
||||
24
scripts/copy-computer-semantics-bin.mjs
Normal file
24
scripts/copy-computer-semantics-bin.mjs
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env node
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const rootDir = path.resolve(__dirname, '..');
|
||||
const sourceDir = path.join(rootDir, 'server', 'modules', 'computer-use', 'semantics', 'bin');
|
||||
const targetDir = path.join(rootDir, 'dist-server', 'server', 'modules', 'computer-use', 'semantics', 'bin');
|
||||
|
||||
async function pathExists(filePath) {
|
||||
try {
|
||||
await fs.access(filePath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (await pathExists(sourceDir)) {
|
||||
await fs.mkdir(path.dirname(targetDir), { recursive: true });
|
||||
await fs.cp(sourceDir, targetDir, { recursive: true });
|
||||
console.log(`Copied Computer Use semantic helpers to ${path.relative(rootDir, targetDir)}`);
|
||||
}
|
||||
@@ -113,6 +113,12 @@ await copyRequired('electron');
|
||||
await copyRequired('dist');
|
||||
await copyRequired('public');
|
||||
|
||||
// The desktop app still ships the standalone Computer Use desktop agent, but
|
||||
// not the full local server. Local CloudCLI is downloaded on demand.
|
||||
await copyRequired('dist-server/server/computer-use-agent.js');
|
||||
await copyIfExists('dist-server/server/computer-use-agent.js.map');
|
||||
await copyRequired('dist-server/server/modules/computer-use');
|
||||
|
||||
const copiedRuntimeDependencies = [];
|
||||
if (await copyNodeModule('ws')) {
|
||||
copiedRuntimeDependencies.push('ws');
|
||||
|
||||
@@ -69,7 +69,7 @@ const sessionIdSchema = {
|
||||
const tools: ToolDefinition[] = [
|
||||
{
|
||||
name: 'browser_create_session',
|
||||
description: 'Create a temporary Browser session that the agent can control. Optionally provide a background profileName to reuse cookies and storage.',
|
||||
description: 'Create a Browser session that the agent can control. Provide profileName to use a specific persistent profile; when omitted, the configured persistent profile is used only if session persistence is enabled, otherwise a temporary session is created.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
|
||||
279
server/computer-use-agent.ts
Normal file
279
server/computer-use-agent.ts
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* CloudCLI Computer Use — Desktop Agent.
|
||||
*
|
||||
* Standalone executor for the cloud relay. The Electron desktop app spawns this
|
||||
* process (via ELECTRON_RUN_AS_NODE) whenever Computer Use is enabled and the
|
||||
* user has running cloud environments. It opens an outbound websocket to each
|
||||
* environment's `/desktop-agent` endpoint and executes the `computer_*` actions
|
||||
* the hosted server relays, returning a fresh screenshot each time.
|
||||
*
|
||||
* It is fully self-contained: it reuses the shared nut-js executor module and
|
||||
* does NOT depend on the local CloudCLI server. Consent is enforced here (the
|
||||
* controlled machine is the authority): in `ask` mode the agent asks the parent
|
||||
* Electron process for a per-session decision before the first action runs.
|
||||
*/
|
||||
import readline from 'node:readline';
|
||||
|
||||
import { WebSocket } from 'ws';
|
||||
|
||||
import {
|
||||
getRuntimeReadiness,
|
||||
type Point,
|
||||
type ClickButton,
|
||||
type ScrollDirection,
|
||||
} from './modules/computer-use/computer-executor.js';
|
||||
import { runRawComputerAction } from './modules/computer-use/actions/raw-action-dispatcher.js';
|
||||
import type { RawActionTarget, RawComputerAction } from './modules/computer-use/actions/raw-action-types.js';
|
||||
import { computerSemanticsService } from './modules/computer-use/computer-semantics.service.js';
|
||||
|
||||
type ConsentMode = 'ask' | 'auto';
|
||||
|
||||
type RelayMessage = {
|
||||
kind?: string;
|
||||
type?: string;
|
||||
id?: string;
|
||||
params?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
const IPC_PREFIX = '@@CUAGENT@@';
|
||||
const RECONNECT_BASE_MS = 2000;
|
||||
const RECONNECT_MAX_MS = 30_000;
|
||||
|
||||
const consentMode: ConsentMode = process.env.CLOUDCLI_COMPUTER_USE_CONSENT_MODE === 'auto' ? 'auto' : 'ask';
|
||||
const agentLabel = process.env.CLOUDCLI_DESKTOP_AGENT_LABEL || 'cloudcli-desktop';
|
||||
const desktopAgentApiKey = process.env.CLOUDCLI_DESKTOP_AGENT_API_KEY || '';
|
||||
|
||||
function parseTargets(): string[] {
|
||||
const raw =
|
||||
process.env.CLOUDCLI_DESKTOP_AGENT_URLS ||
|
||||
process.env.CLOUDCLI_DESKTOP_AGENT_URL ||
|
||||
'';
|
||||
return raw
|
||||
.split(',')
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
// --- Parent (Electron) IPC over stdout/stdin -------------------------------
|
||||
|
||||
function emitToParent(message: Record<string, unknown>): void {
|
||||
process.stdout.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
|
||||
}
|
||||
|
||||
/** Per-session consent decisions, and resolvers awaiting a parent reply. */
|
||||
const sessionConsent = new Map<string, 'granted' | 'denied'>();
|
||||
const pendingConsent = new Map<string, Array<(allow: boolean) => void>>();
|
||||
|
||||
const stdinReader = readline.createInterface({ input: process.stdin });
|
||||
stdinReader.on('line', (line) => {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed.startsWith(IPC_PREFIX)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim()) as Record<string, unknown>;
|
||||
if (payload.type === 'consent-response' && typeof payload.sessionId === 'string') {
|
||||
const allow = payload.allow === true;
|
||||
sessionConsent.set(payload.sessionId, allow ? 'granted' : 'denied');
|
||||
const waiters = pendingConsent.get(payload.sessionId) || [];
|
||||
pendingConsent.delete(payload.sessionId);
|
||||
for (const resolve of waiters) {
|
||||
resolve(allow);
|
||||
}
|
||||
} else if (payload.type === 'revoke-session' && typeof payload.sessionId === 'string') {
|
||||
sessionConsent.delete(payload.sessionId);
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed control lines
|
||||
}
|
||||
});
|
||||
|
||||
async function ensureConsent(sessionId: string): Promise<boolean> {
|
||||
if (consentMode === 'auto') {
|
||||
return true;
|
||||
}
|
||||
const existing = sessionConsent.get(sessionId);
|
||||
if (existing === 'granted') return true;
|
||||
if (existing === 'denied') return false;
|
||||
|
||||
// Ask the parent (Electron) to prompt the user, and wait for the decision.
|
||||
return new Promise<boolean>((resolve) => {
|
||||
const waiters = pendingConsent.get(sessionId) || [];
|
||||
waiters.push(resolve);
|
||||
pendingConsent.set(sessionId, waiters);
|
||||
emitToParent({ type: 'consent-request', sessionId });
|
||||
});
|
||||
}
|
||||
|
||||
// --- Action execution ------------------------------------------------------
|
||||
|
||||
function asPoint(value: unknown): Point | undefined {
|
||||
if (value && typeof value === 'object') {
|
||||
const point = value as Record<string, unknown>;
|
||||
if (typeof point.x === 'number' && typeof point.y === 'number') {
|
||||
return { x: point.x, y: point.y };
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function rawActionFromRelay(type: string, params: Record<string, unknown>): RawComputerAction {
|
||||
const point = asPoint(params.point);
|
||||
|
||||
switch (type) {
|
||||
case 'screenshot':
|
||||
return { type: 'screenshot' };
|
||||
case 'cursor_position':
|
||||
return { type: 'cursor_position' };
|
||||
case 'mouse_move':
|
||||
if (!point) {
|
||||
throw new Error('mouse_move requires a valid point.');
|
||||
}
|
||||
return { type: 'mouse_move', point };
|
||||
case 'click':
|
||||
return {
|
||||
type: 'click',
|
||||
button: (params.button as ClickButton) || 'left',
|
||||
point,
|
||||
double: params.double === true,
|
||||
};
|
||||
case 'drag': {
|
||||
const from = asPoint(params.from);
|
||||
const to = asPoint(params.to);
|
||||
if (!from || !to) {
|
||||
throw new Error('drag requires valid from and to points.');
|
||||
}
|
||||
return { type: 'drag', from, to, button: (params.button as ClickButton) || 'left' };
|
||||
}
|
||||
case 'type':
|
||||
return { type: 'type', text: String(params.text ?? '') };
|
||||
case 'key':
|
||||
return { type: 'key', key: String(params.key ?? '') };
|
||||
case 'scroll':
|
||||
return {
|
||||
type: 'scroll',
|
||||
direction: (params.direction as ScrollDirection) || 'down',
|
||||
amount: typeof params.amount === 'number' ? params.amount : 3,
|
||||
point,
|
||||
};
|
||||
case 'wait':
|
||||
return { type: 'wait', ms: typeof params.ms === 'number' ? params.ms : undefined };
|
||||
default:
|
||||
throw new Error(`Unsupported computer action: ${type}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function runAction(type: string, params: Record<string, unknown>): Promise<Record<string, unknown>> {
|
||||
if (type === 'semantic_tool') {
|
||||
const toolName = typeof params.toolName === 'string' ? params.toolName : '';
|
||||
const args = params.arguments && typeof params.arguments === 'object'
|
||||
? params.arguments as Record<string, unknown>
|
||||
: {};
|
||||
const sessionId = typeof params.sessionId === 'string' ? params.sessionId : 'default';
|
||||
if (!toolName) {
|
||||
throw new Error('semantic_tool requires toolName.');
|
||||
}
|
||||
return await computerSemanticsService.callTool(toolName, { ...args, sessionId }) as Record<string, unknown>;
|
||||
}
|
||||
|
||||
const readiness = getRuntimeReadiness();
|
||||
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
|
||||
throw new Error('Computer Use runtime is not installed on the desktop agent.');
|
||||
}
|
||||
|
||||
const target: RawActionTarget = {
|
||||
displaySize: (params.displaySize as RawActionTarget['displaySize']) ?? null,
|
||||
};
|
||||
return await runRawComputerAction(rawActionFromRelay(type, params), target) as Record<string, unknown>;
|
||||
}
|
||||
|
||||
// --- Relay connection ------------------------------------------------------
|
||||
|
||||
function connect(url: string): void {
|
||||
let reconnectMs = RECONNECT_BASE_MS;
|
||||
let socket: WebSocket | null = null;
|
||||
|
||||
const open = () => {
|
||||
socket = new WebSocket(url, {
|
||||
headers: desktopAgentApiKey ? { 'X-API-Key': desktopAgentApiKey } : undefined,
|
||||
});
|
||||
|
||||
socket.on('open', () => {
|
||||
reconnectMs = RECONNECT_BASE_MS;
|
||||
emitToParent({ type: 'connected', url });
|
||||
socket?.send(JSON.stringify({ kind: 'register', label: agentLabel, consentMode }));
|
||||
});
|
||||
|
||||
socket.on('message', async (raw) => {
|
||||
let message: RelayMessage;
|
||||
try {
|
||||
message = JSON.parse(String(raw)) as RelayMessage;
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
const kind = message.kind || message.type;
|
||||
if (kind !== 'computer_relay' || typeof message.id !== 'string') {
|
||||
return;
|
||||
}
|
||||
|
||||
const id = message.id;
|
||||
const type = String(message.type || (message.params?.type as string) || '');
|
||||
const params = message.params || {};
|
||||
const sessionId = typeof params.sessionId === 'string' ? params.sessionId : 'default';
|
||||
|
||||
if (type === 'stop_session') {
|
||||
sessionConsent.delete(sessionId);
|
||||
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result: { ok: true } }));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const allowed = await ensureConsent(sessionId);
|
||||
if (!allowed) {
|
||||
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, error: 'The user denied desktop control for this session.' }));
|
||||
return;
|
||||
}
|
||||
const result = await runAction(type, params);
|
||||
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result }));
|
||||
} catch (error) {
|
||||
socket?.send(JSON.stringify({
|
||||
kind: 'computer_relay_result',
|
||||
id,
|
||||
error: error instanceof Error ? error.message : 'Desktop agent action failed.',
|
||||
}));
|
||||
}
|
||||
});
|
||||
|
||||
const scheduleReconnect = (code?: number, reason?: Buffer) => {
|
||||
const reasonText = reason?.toString() || '';
|
||||
emitToParent({ type: 'disconnected', url, code, reason: reasonText });
|
||||
if (code === 1008 && /computer use.*disabled/i.test(reasonText)) {
|
||||
return;
|
||||
}
|
||||
setTimeout(open, reconnectMs);
|
||||
reconnectMs = Math.min(reconnectMs * 2, RECONNECT_MAX_MS);
|
||||
};
|
||||
|
||||
socket.on('close', scheduleReconnect);
|
||||
socket.on('error', () => {
|
||||
try { socket?.close(); } catch { /* noop */ }
|
||||
});
|
||||
};
|
||||
|
||||
open();
|
||||
}
|
||||
|
||||
function main(): void {
|
||||
const targets = parseTargets();
|
||||
if (targets.length === 0) {
|
||||
emitToParent({ type: 'error', message: 'No desktop-agent target URLs provided.' });
|
||||
return;
|
||||
}
|
||||
emitToParent({ type: 'starting', targets, consentMode });
|
||||
for (const url of targets) {
|
||||
connect(url);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
574
server/computer-use-mcp.ts
Normal file
574
server/computer-use-mcp.ts
Normal file
@@ -0,0 +1,574 @@
|
||||
#!/usr/bin/env node
|
||||
import './load-env.js';
|
||||
|
||||
type JsonRpcRequest = {
|
||||
jsonrpc: '2.0';
|
||||
id?: string | number | null;
|
||||
method: string;
|
||||
params?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type ToolDefinition = {
|
||||
name: string;
|
||||
description: string;
|
||||
inputSchema: Record<string, unknown>;
|
||||
};
|
||||
|
||||
const readString = (value: unknown, name: string): string => {
|
||||
if (typeof value !== 'string' || value.trim() === '') {
|
||||
throw new Error(`${name} is required.`);
|
||||
}
|
||||
return value.trim();
|
||||
};
|
||||
|
||||
const readOptionalString = (value: unknown): string | undefined =>
|
||||
typeof value === 'string' && value.trim() !== '' ? value.trim() : undefined;
|
||||
|
||||
const readNumber = (value: unknown): number | undefined =>
|
||||
typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
|
||||
const readMouseButton = (value: unknown): 'left' | 'right' | 'middle' =>
|
||||
value === 'right' || value === 'middle' ? value : 'left';
|
||||
|
||||
const apiUrl = (process.env.CLOUDCLI_COMPUTER_USE_API_URL || 'http://127.0.0.1:3001/api/computer-use-mcp').replace(/\/$/, '');
|
||||
const apiToken = process.env.CLOUDCLI_COMPUTER_USE_MCP_TOKEN || '';
|
||||
|
||||
const computerUseInstructions = `
|
||||
CloudCLI Computer Use lets you operate the user's real desktop through guarded sessions. Use it deliberately: observe first, act second, then verify.
|
||||
|
||||
Recommended app workflow:
|
||||
1. If you do not know the target app name, call computer_list_apps.
|
||||
2. Call computer_get_app_state for the target app before app-scoped actions. It returns a screenshot, accessibility elements, and a stateId.
|
||||
3. Prefer semantic element actions using stateId + element_index from the latest computer_get_app_state result. Do not guess element indexes or reuse them after large UI changes without refreshing state.
|
||||
4. Use x/y coordinates from the returned screenshot only when no suitable element_index is available.
|
||||
5. After every action, inspect the returned screenshot/state before deciding the next action.
|
||||
|
||||
Use app-scoped tools when the target app is known: computer_list_apps, computer_get_app_state, computer_click_element, computer_perform_secondary_action, computer_set_value, computer_type_text, computer_press_key, computer_scroll_element, and computer_app_drag.
|
||||
|
||||
Use raw desktop tools only when you need full-screen coordinate control, cursor position, or current-focus input: computer_screenshot, computer_cursor_position, computer_mouse_move, computer_click, computer_drag, computer_type, computer_key, computer_scroll, computer_wait, and computer_close_session. Raw coordinates are screenshot pixels, so call computer_screenshot first when you need a coordinate frame.
|
||||
|
||||
Most tools can use or create the active agent session automatically when sessionId is omitted. In local mode, input actions require the user to grant control in the Computer tab before they work. In cloud mode, approval is handled by the linked CloudCLI desktop app.
|
||||
|
||||
If a tool reports missing permission, denied control, or no available desktop session, stop retrying and ask the user to fix access. For local mode, ask them to open CloudCLI Desktop, go to the Computer tab, enable Computer Use, grant the requested OS permissions, and allow the session. On macOS this usually means Accessibility and Screen Recording. For cloud mode, ask them to keep the linked CloudCLI Desktop app running and approve the cloud agent's Computer Use request there.
|
||||
|
||||
Ask before sending, deleting, purchasing, approving, uploading, publishing, changing account settings, or making other externally visible or destructive changes. Do not inspect unrelated private content unless the user explicitly asked for that task.
|
||||
`.trim();
|
||||
|
||||
async function callComputerUseApi(toolName: string, input: Record<string, unknown>) {
|
||||
if (!apiToken) {
|
||||
throw new Error('CLOUDCLI_COMPUTER_USE_MCP_TOKEN is not configured.');
|
||||
}
|
||||
|
||||
const response = await fetch(`${apiUrl}/tools/${encodeURIComponent(toolName)}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(input),
|
||||
});
|
||||
const data = await response.json() as { success?: boolean; data?: unknown; error?: string };
|
||||
if (!response.ok || data.success === false) {
|
||||
throw new Error(data.error || `Computer Use API request failed (${response.status})`);
|
||||
}
|
||||
return data.data;
|
||||
}
|
||||
|
||||
/** Pulls the most recent screenshot data URL out of an API result, if present. */
|
||||
function findScreenshot(value: unknown): string | null {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const record = value as Record<string, unknown>;
|
||||
if (typeof record.screenshotDataUrl === 'string') {
|
||||
return record.screenshotDataUrl;
|
||||
}
|
||||
if (record.session && typeof record.session === 'object') {
|
||||
const session = record.session as Record<string, unknown>;
|
||||
if (typeof session.screenshotDataUrl === 'string') {
|
||||
return session.screenshotDataUrl;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Removes the large data URL from JSON so the text block stays small. */
|
||||
function stripScreenshot(value: unknown): unknown {
|
||||
if (Array.isArray(value)) {
|
||||
return value.map(stripScreenshot);
|
||||
}
|
||||
if (value && typeof value === 'object') {
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const [key, val] of Object.entries(value as Record<string, unknown>)) {
|
||||
if (key === 'screenshotDataUrl' && typeof val === 'string') {
|
||||
out.screenshot = '[returned as image]';
|
||||
continue;
|
||||
}
|
||||
out[key] = stripScreenshot(val);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an MCP tool result. Screenshots are returned as an `image` content block so
|
||||
* vision-capable models actually see the desktop — a JSON data-URL string would not work.
|
||||
*/
|
||||
function toolResult(value: unknown) {
|
||||
const content: Array<Record<string, unknown>> = [
|
||||
{ type: 'text', text: JSON.stringify(stripScreenshot(value), null, 2) },
|
||||
];
|
||||
|
||||
const screenshot = findScreenshot(value);
|
||||
const match = screenshot ? /^data:(image\/[a-z]+);base64,(.+)$/i.exec(screenshot) : null;
|
||||
if (match) {
|
||||
content.push({ type: 'image', data: match[2], mimeType: match[1] });
|
||||
}
|
||||
|
||||
return { content };
|
||||
}
|
||||
|
||||
const sessionIdSchema = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
sessionId: { type: 'string', description: 'Optional. Omit to use or create the active agent session automatically.' },
|
||||
},
|
||||
};
|
||||
|
||||
const optionalSessionProperty = sessionIdSchema.properties.sessionId;
|
||||
|
||||
const withOptionalSession = (properties: Record<string, unknown> = {}) => ({
|
||||
sessionId: optionalSessionProperty,
|
||||
...properties,
|
||||
});
|
||||
|
||||
const optionalSessionInput = (args: Record<string, unknown>, extra: Record<string, unknown> = {}) => ({
|
||||
sessionId: readOptionalString(args.sessionId),
|
||||
...extra,
|
||||
});
|
||||
|
||||
const stateIdProperty = {
|
||||
type: 'string',
|
||||
description: 'State id returned by the latest computer_get_app_state call for this app. Send it with element_index so the runtime can resolve the cached element.',
|
||||
};
|
||||
|
||||
const elementIndexProperty = {
|
||||
type: 'string',
|
||||
description: 'Element index from the latest computer_get_app_state result for this app. Use with stateId when possible.',
|
||||
};
|
||||
|
||||
const tools: ToolDefinition[] = [
|
||||
{
|
||||
name: 'computer_list_apps',
|
||||
description: 'Discover app names, bundle identifiers, process names, and window titles that can be used as the app target for app-scoped Computer Use tools. Call this first when you are unsure which app string to pass to computer_get_app_state.',
|
||||
inputSchema: { type: 'object', properties: withOptionalSession() },
|
||||
},
|
||||
{
|
||||
name: 'computer_get_app_state',
|
||||
description: 'Inspect a target app and return its current screenshot, accessibility elements, and stateId. Call this before element-targeted actions, after navigation, and whenever the UI may have changed enough that old element indexes could be stale.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'App name, process name, bundle identifier, or window title from computer_list_apps or the user request.' },
|
||||
}),
|
||||
required: ['app'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_click_element',
|
||||
description: 'Click a target inside an app. Prefer stateId + element_index from computer_get_app_state; use x/y screenshot coordinates only when the target is not represented in the accessibility elements.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
stateId: stateIdProperty,
|
||||
element_index: elementIndexProperty,
|
||||
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
click_count: { type: 'integer', description: 'Number of clicks, usually 1. Defaults to 1 and is capped by the runtime.' },
|
||||
mouse_button: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Button for the click; omitted means left.' },
|
||||
}),
|
||||
required: ['app'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_perform_secondary_action',
|
||||
description: 'Open the secondary action for a target inside an app, typically a context menu. Prefer stateId + element_index; if native secondary actions are unavailable, the runtime falls back to a right-click at the resolved point.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
stateId: stateIdProperty,
|
||||
element_index: elementIndexProperty,
|
||||
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
}),
|
||||
required: ['app'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_set_value',
|
||||
description: 'Set the value of a specific editable element in an app. Prefer stateId + element_index for a settable accessibility element; coordinate fallback focuses the resolved point and replaces the current value, so do not call this unless the target is resolved.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
stateId: stateIdProperty,
|
||||
element_index: elementIndexProperty,
|
||||
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
value: { type: 'string', description: 'Exact value to put into the target element.' },
|
||||
}),
|
||||
required: ['app', 'value'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_type_text',
|
||||
description: 'Type literal text into the target app using keyboard input. Use after you have focused the intended field with computer_click_element or verified the correct focus in computer_get_app_state.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
text: { type: 'string', description: 'Text to enter exactly as provided.' },
|
||||
}),
|
||||
required: ['app', 'text'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_press_key',
|
||||
description: 'Press a key or key combination in the target app. Use for navigation, shortcuts, and confirmation keys after verifying the intended app/focus.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
key: { type: 'string', description: 'Key or chord, using names such as Return, Escape, Tab, ctrl+s, cmd+a, Up, or Page_Down.' },
|
||||
}),
|
||||
required: ['app', 'key'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_scroll_element',
|
||||
description: 'Scroll a target area inside an app. Prefer stateId + element_index for scrollable elements; use x/y screenshot coordinates only when the scroll target is visible but not represented as an element.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
stateId: stateIdProperty,
|
||||
element_index: elementIndexProperty,
|
||||
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
|
||||
direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Direction to scroll the target.' },
|
||||
pages: { type: 'number', description: 'How far to scroll, measured in page units. Fractional values are allowed; default is 1.' },
|
||||
}),
|
||||
required: ['app', 'direction'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_app_drag',
|
||||
description: 'Drag inside a target app from one screenshot coordinate to another. Use for sliders, selections, map/canvas gestures, or drag-and-drop when no semantic element action is available.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: withOptionalSession({
|
||||
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
|
||||
from_x: { type: 'number', description: 'Start X coordinate in screenshot pixels.' },
|
||||
from_y: { type: 'number', description: 'Start Y coordinate in screenshot pixels.' },
|
||||
to_x: { type: 'number', description: 'End X coordinate in screenshot pixels.' },
|
||||
to_y: { type: 'number', description: 'End Y coordinate in screenshot pixels.' },
|
||||
}),
|
||||
required: ['app', 'from_x', 'from_y', 'to_x', 'to_y'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_screenshot',
|
||||
description: 'Capture the full desktop screenshot and current display size. Use this before raw coordinate actions when an app-specific accessibility state is unavailable or the task spans multiple apps.',
|
||||
inputSchema: sessionIdSchema,
|
||||
},
|
||||
{
|
||||
name: 'computer_cursor_position',
|
||||
description: 'Get the current mouse cursor position in desktop screenshot pixel coordinates. Useful after a raw action misses or when coordinating pointer-relative steps.',
|
||||
inputSchema: sessionIdSchema,
|
||||
},
|
||||
{
|
||||
name: 'computer_mouse_move',
|
||||
description: 'Move the mouse cursor to an exact full-desktop screenshot coordinate. Call computer_screenshot first if you do not already have a current coordinate frame.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
sessionId: optionalSessionProperty,
|
||||
x: { type: 'number', description: 'X coordinate in full-desktop screenshot pixels.' },
|
||||
y: { type: 'number', description: 'Y coordinate in full-desktop screenshot pixels.' },
|
||||
},
|
||||
required: ['x', 'y'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_click',
|
||||
description: 'Raw desktop click at the current cursor or at optional full-desktop screenshot coordinates. Prefer computer_click_element when the target app and element are known.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
sessionId: optionalSessionProperty,
|
||||
x: { type: 'number', description: 'Optional X coordinate in full-desktop screenshot pixels.' },
|
||||
y: { type: 'number', description: 'Optional Y coordinate in full-desktop screenshot pixels.' },
|
||||
mouseButton: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Button for the click; omitted means left.' },
|
||||
clickCount: { type: 'integer', description: 'How many times to click; omitted means 1.' },
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_drag',
|
||||
description: 'Raw desktop drag from start coordinates to end coordinates in full-desktop screenshot pixels. Prefer computer_app_drag for app-scoped drags when the target app is known.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
sessionId: optionalSessionProperty,
|
||||
startX: { type: 'number', description: 'Start X coordinate in full-desktop screenshot pixels.' },
|
||||
startY: { type: 'number', description: 'Start Y coordinate in full-desktop screenshot pixels.' },
|
||||
endX: { type: 'number', description: 'End X coordinate in full-desktop screenshot pixels.' },
|
||||
endY: { type: 'number', description: 'End Y coordinate in full-desktop screenshot pixels.' },
|
||||
mouseButton: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Button to hold during the drag; omitted means left.' },
|
||||
},
|
||||
required: ['startX', 'startY', 'endX', 'endY'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_type',
|
||||
description: 'Type literal text at the current desktop focus. This is not app-scoped; use only after verifying the intended field is focused.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { sessionId: optionalSessionProperty, text: { type: 'string', description: 'Text to enter exactly as provided at current focus.' } },
|
||||
required: ['text'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_key',
|
||||
description: 'Press a key or key chord at the current desktop focus. This is not app-scoped; use computer_press_key when the target app is known.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { sessionId: optionalSessionProperty, key: { type: 'string', description: 'Key or chord, using names such as Return, Escape, Tab, ctrl+s, cmd+a, Up, or Page_Down.' } },
|
||||
required: ['key'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_scroll',
|
||||
description: 'Raw desktop scroll at the current cursor or optional full-desktop screenshot coordinates. Prefer computer_scroll_element when the target app/element is known.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
sessionId: optionalSessionProperty,
|
||||
direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Direction to scroll the desktop target.' },
|
||||
amount: { type: 'number', description: 'Scroll amount in wheel/page-like units. Defaults are runtime-defined.' },
|
||||
x: { type: 'number', description: 'Optional X coordinate in full-desktop screenshot pixels.' },
|
||||
y: { type: 'number', description: 'Optional Y coordinate in full-desktop screenshot pixels.' },
|
||||
},
|
||||
required: ['direction'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_wait',
|
||||
description: 'Wait briefly, up to 10000 ms, then return an updated desktop screenshot. Use after actions that trigger loading, animation, or delayed UI changes.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { sessionId: optionalSessionProperty, timeoutMs: { type: 'number', description: 'Milliseconds to wait. The runtime caps long waits.' } },
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'computer_close_session',
|
||||
description: 'Stop the active auto-created Computer Use session, or the specified session, and revoke agent input control for that session.',
|
||||
inputSchema: sessionIdSchema,
|
||||
},
|
||||
];
|
||||
|
||||
async function callTool(name: string, args: Record<string, unknown>) {
|
||||
switch (name) {
|
||||
case 'computer_app_drag':
|
||||
case 'computer_click_element':
|
||||
case 'computer_get_app_state':
|
||||
case 'computer_list_apps':
|
||||
case 'computer_perform_secondary_action':
|
||||
case 'computer_press_key':
|
||||
case 'computer_scroll_element':
|
||||
case 'computer_set_value':
|
||||
case 'computer_type_text':
|
||||
return toolResult(await callComputerUseApi(name, args));
|
||||
case 'computer_screenshot':
|
||||
case 'computer_cursor_position':
|
||||
case 'computer_close_session':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args)));
|
||||
case 'computer_mouse_move':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
x: readNumber(args.x),
|
||||
y: readNumber(args.y),
|
||||
})));
|
||||
case 'computer_click':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
x: readNumber(args.x),
|
||||
y: readNumber(args.y),
|
||||
mouseButton: readMouseButton(args.mouseButton ?? args.mouse_button ?? args.button),
|
||||
clickCount: readNumber(args.clickCount ?? args.click_count),
|
||||
})));
|
||||
case 'computer_drag':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
startX: readNumber(args.startX),
|
||||
startY: readNumber(args.startY),
|
||||
endX: readNumber(args.endX),
|
||||
endY: readNumber(args.endY),
|
||||
mouseButton: readMouseButton(args.mouseButton ?? args.mouse_button ?? args.button),
|
||||
})));
|
||||
case 'computer_type':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
text: readString(args.text, 'text'),
|
||||
})));
|
||||
case 'computer_key':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
key: readString(args.key, 'key'),
|
||||
})));
|
||||
case 'computer_scroll':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
direction: typeof args.direction === 'string' ? args.direction : 'up',
|
||||
amount: readNumber(args.amount),
|
||||
x: readNumber(args.x),
|
||||
y: readNumber(args.y),
|
||||
})));
|
||||
case 'computer_wait':
|
||||
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
|
||||
timeoutMs: readNumber(args.timeoutMs),
|
||||
})));
|
||||
default:
|
||||
throw new Error(`Unknown tool: ${name}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleMessage(message: JsonRpcRequest) {
|
||||
if (message.method === 'initialize') {
|
||||
return {
|
||||
protocolVersion: '2024-11-05',
|
||||
capabilities: { tools: {} },
|
||||
serverInfo: { name: 'cloudcli-computer-use', version: '1.0.0' },
|
||||
instructions: computerUseInstructions,
|
||||
};
|
||||
}
|
||||
|
||||
if (message.method === 'tools/list') {
|
||||
return { tools };
|
||||
}
|
||||
|
||||
if (message.method === 'tools/call') {
|
||||
const params = message.params || {};
|
||||
const name = readString(params.name, 'name');
|
||||
const args = (params.arguments && typeof params.arguments === 'object'
|
||||
? params.arguments
|
||||
: {}) as Record<string, unknown>;
|
||||
return callTool(name, args);
|
||||
}
|
||||
|
||||
if (message.method.startsWith('notifications/')) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported method: ${message.method}`);
|
||||
}
|
||||
|
||||
type MessageFraming = 'content-length' | 'line';
|
||||
|
||||
function writeMessage(message: Record<string, unknown>, framing: MessageFraming) {
|
||||
const payload = JSON.stringify(message);
|
||||
if (framing === 'line') {
|
||||
process.stdout.write(`${payload}\n`);
|
||||
return;
|
||||
}
|
||||
process.stdout.write(`Content-Length: ${Buffer.byteLength(payload, 'utf8')}\r\n\r\n${payload}`);
|
||||
}
|
||||
|
||||
function sendResult(id: string | number | null | undefined, result: unknown, framing: MessageFraming) {
|
||||
if (id === undefined) {
|
||||
return;
|
||||
}
|
||||
writeMessage({ jsonrpc: '2.0', id, result }, framing);
|
||||
}
|
||||
|
||||
function sendError(id: string | number | null | undefined, error: unknown, framing: MessageFraming) {
|
||||
if (id === undefined) {
|
||||
return;
|
||||
}
|
||||
writeMessage({
|
||||
jsonrpc: '2.0',
|
||||
id,
|
||||
error: {
|
||||
code: -32000,
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
}, framing);
|
||||
}
|
||||
|
||||
let buffer = Buffer.alloc(0);
|
||||
|
||||
function handleRawMessage(rawMessage: string, framing: MessageFraming) {
|
||||
void (async () => {
|
||||
let request: JsonRpcRequest | null = null;
|
||||
try {
|
||||
request = JSON.parse(rawMessage) as JsonRpcRequest;
|
||||
const result = await handleMessage(request);
|
||||
sendResult(request.id, result, framing);
|
||||
} catch (error) {
|
||||
sendError(request?.id ?? null, error, framing);
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
function findHeaderEnd(input: Buffer): { index: number; length: number } | null {
|
||||
const crlf = input.indexOf('\r\n\r\n');
|
||||
if (crlf !== -1) {
|
||||
return { index: crlf, length: 4 };
|
||||
}
|
||||
const lf = input.indexOf('\n\n');
|
||||
if (lf !== -1) {
|
||||
return { index: lf, length: 2 };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
process.stdin.on('data', (chunk) => {
|
||||
buffer = Buffer.concat([buffer, chunk]);
|
||||
while (true) {
|
||||
const headerEnd = findHeaderEnd(buffer);
|
||||
if (!headerEnd) {
|
||||
if (/^Content-Length:/i.test(buffer.toString('utf8', 0, Math.min(buffer.length, 32)))) {
|
||||
return;
|
||||
}
|
||||
|
||||
const newline = buffer.indexOf('\n');
|
||||
if (newline === -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
const rawLine = buffer.slice(0, newline).toString('utf8').trim();
|
||||
buffer = buffer.slice(newline + 1);
|
||||
if (!rawLine) {
|
||||
continue;
|
||||
}
|
||||
|
||||
handleRawMessage(rawLine, 'line');
|
||||
continue;
|
||||
}
|
||||
|
||||
const header = buffer.slice(0, headerEnd.index).toString('utf8');
|
||||
const lengthMatch = /Content-Length:\s*(\d+)/i.exec(header);
|
||||
if (!lengthMatch) {
|
||||
buffer = buffer.slice(headerEnd.index + headerEnd.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
const length = Number.parseInt(lengthMatch[1], 10);
|
||||
const messageStart = headerEnd.index + headerEnd.length;
|
||||
const messageEnd = messageStart + length;
|
||||
if (buffer.length < messageEnd) {
|
||||
return;
|
||||
}
|
||||
|
||||
const rawMessage = buffer.slice(messageStart, messageEnd).toString('utf8');
|
||||
buffer = buffer.slice(messageEnd);
|
||||
handleRawMessage(rawMessage, 'content-length');
|
||||
}
|
||||
});
|
||||
@@ -65,7 +65,10 @@ import providerRoutes from './modules/providers/provider.routes.js';
|
||||
import voiceRoutes from './voice-proxy.js';
|
||||
import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
|
||||
import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
|
||||
import { browserUseService } from './modules/browser-use/browser-use.service.js';
|
||||
import { browserUseService, VIEWER_COOKIE_NAME } from './modules/browser-use/index.js';
|
||||
import computerUseRoutes from './modules/computer-use/computer-use.routes.js';
|
||||
import computerUseMcpRoutes from './modules/computer-use/computer-use-mcp.routes.js';
|
||||
import { computerUseService } from './modules/computer-use/computer-use.service.js';
|
||||
import { startEnabledPluginServers, stopAllPlugins, getPluginPort } from './utils/plugin-process-manager.js';
|
||||
import { initializeDatabase, projectsDb, sessionsDb } from './modules/database/index.js';
|
||||
import { configureWebPush } from './services/vapid-keys.js';
|
||||
@@ -147,6 +150,8 @@ const wss = createWebSocketServer(server, {
|
||||
shouldAutoOpenUrlFromOutput,
|
||||
},
|
||||
getPluginPort,
|
||||
browserUseViewer: (ws, pathname) => browserUseService.handleViewerWebSocket(ws, pathname),
|
||||
authenticateBrowserUseViewer: authenticateBrowserUseViewerPath,
|
||||
});
|
||||
|
||||
// Make WebSocket server available to routes
|
||||
@@ -214,11 +219,48 @@ app.use('/api/gemini', authenticateToken, geminiRoutes);
|
||||
// Plugins API Routes (protected)
|
||||
app.use('/api/plugins', authenticateToken, pluginsRoutes);
|
||||
|
||||
function readCookieValue(header, name) {
|
||||
if (!header) return null;
|
||||
const prefix = `${name}=`;
|
||||
const cookie = String(header).split(';').map((part) => part.trim()).find((part) => part.startsWith(prefix));
|
||||
return cookie ? decodeURIComponent(cookie.slice(prefix.length)) : null;
|
||||
}
|
||||
|
||||
function authenticateBrowserUseViewerPath(pathname, token) {
|
||||
const parts = String(pathname || '').split('/');
|
||||
const sessionId = parts[4];
|
||||
if (parts[1] !== 'api' || parts[2] !== 'browser-use' || parts[3] !== 'sessions' || parts[5] !== 'viewer' || parts[6] !== 'websockify') {
|
||||
return false;
|
||||
}
|
||||
return browserUseService.validateViewerToken(decodeURIComponent(sessionId), token);
|
||||
}
|
||||
|
||||
function authenticateBrowserUse(req, res, next) {
|
||||
const match = /^\/sessions\/([^/]+)\/viewer(?:\/|$)/.exec(req.path || '');
|
||||
if (match) {
|
||||
const sessionId = decodeURIComponent(match[1]);
|
||||
const token = typeof req.query.viewerToken === 'string'
|
||||
? req.query.viewerToken
|
||||
: readCookieValue(req.headers.cookie, VIEWER_COOKIE_NAME);
|
||||
if (browserUseService.validateViewerToken(sessionId, token)) {
|
||||
return next();
|
||||
}
|
||||
return res.status(401).json({ error: 'Browser viewer access requires a valid session token.' });
|
||||
}
|
||||
return authenticateToken(req, res, next);
|
||||
}
|
||||
|
||||
// Browser MCP bridge API (local token protected)
|
||||
app.use('/api/browser-use-mcp', browserUseMcpRoutes);
|
||||
|
||||
// Browser API Routes (protected)
|
||||
app.use('/api/browser-use', authenticateToken, browserUseRoutes);
|
||||
app.use('/api/browser-use', authenticateBrowserUse, browserUseRoutes);
|
||||
|
||||
// Computer Use MCP bridge API (local token protected)
|
||||
app.use('/api/computer-use-mcp', computerUseMcpRoutes);
|
||||
|
||||
// Computer Use API Routes (protected)
|
||||
app.use('/api/computer-use', authenticateToken, computerUseRoutes);
|
||||
|
||||
// Unified provider MCP routes (protected)
|
||||
app.use('/api/providers', authenticateToken, providerRoutes);
|
||||
@@ -1776,6 +1818,11 @@ async function startServer() {
|
||||
} catch (err) {
|
||||
console.error('[Browser] Error stopping sessions during shutdown:', err?.message || err);
|
||||
}
|
||||
try {
|
||||
await computerUseService.stopAllSessions();
|
||||
} catch (err) {
|
||||
console.error('[Computer Use] Error stopping sessions during shutdown:', err?.message || err);
|
||||
}
|
||||
try {
|
||||
await stopAllPlugins();
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import express from 'express';
|
||||
|
||||
import { browserUseService } from '@/modules/browser-use/browser-use.service.js';
|
||||
import { VIEWER_COOKIE_NAME, VIEWER_TOKEN_TTL_MS } from '@/modules/browser-use/browser-use.viewer.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
@@ -8,6 +9,45 @@ function readParam(value: string | string[] | undefined): string {
|
||||
return Array.isArray(value) ? value[0] || '' : value || '';
|
||||
}
|
||||
|
||||
const SAFE_VIEWER_ROOT_FILES = new Set(['vnc.html', 'favicon.ico', 'manifest.json']);
|
||||
const SAFE_VIEWER_ROOT_DIRS = new Set(['app', 'core', 'vendor', 'assets', 'images', 'utils']);
|
||||
|
||||
function isSafeViewerPath(viewerPath: string): boolean {
|
||||
if (!viewerPath || viewerPath.startsWith('/') || viewerPath.includes('..') || viewerPath.includes('\\')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!/^[A-Za-z0-9][A-Za-z0-9._~/-]*$/.test(viewerPath)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (SAFE_VIEWER_ROOT_FILES.has(viewerPath)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const [rootDir] = viewerPath.split('/');
|
||||
return Boolean(rootDir && SAFE_VIEWER_ROOT_DIRS.has(rootDir));
|
||||
}
|
||||
|
||||
function isSecureRequest(req: express.Request): boolean {
|
||||
const forwardedProto = String(req.headers['x-forwarded-proto'] || '')
|
||||
.split(',')[0]
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
return req.secure || forwardedProto === 'https';
|
||||
}
|
||||
|
||||
function readQueryString(originalUrl: string): string {
|
||||
const queryIndex = originalUrl.indexOf('?');
|
||||
if (queryIndex < 0) {
|
||||
return '';
|
||||
}
|
||||
const params = new URLSearchParams(originalUrl.slice(queryIndex + 1));
|
||||
params.delete('viewerToken');
|
||||
const nextQuery = params.toString();
|
||||
return nextQuery ? `?${nextQuery}` : '';
|
||||
}
|
||||
|
||||
router.get('/status', async (_req, res) => {
|
||||
try {
|
||||
res.json({ success: true, data: await browserUseService.getStatus() });
|
||||
@@ -62,13 +102,60 @@ router.get('/sessions', async (_req, res) => {
|
||||
try {
|
||||
res.json({ success: true, data: { sessions: await browserUseService.listSessions() } });
|
||||
} catch (error) {
|
||||
res.status(401).json({
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to list browser sessions.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/sessions/:sessionId/viewer/*', async (req, res) => {
|
||||
try {
|
||||
const sessionId = readParam(req.params.sessionId);
|
||||
const originalPath = req.originalUrl.split('?')[0] || '';
|
||||
const viewerMarker = `/sessions/${sessionId}/viewer/`;
|
||||
const markerIndex = originalPath.indexOf(viewerMarker);
|
||||
const rawViewerPath = markerIndex >= 0 ? originalPath.slice(markerIndex + viewerMarker.length) : 'vnc.html';
|
||||
const viewerPath = decodeURIComponent(rawViewerPath).replace(/^\/+/, '') || 'vnc.html';
|
||||
if (!isSafeViewerPath(viewerPath)) {
|
||||
res.status(400).json({ success: false, error: 'Invalid Browser viewer path.' });
|
||||
return;
|
||||
}
|
||||
|
||||
const viewerToken = readParam(req.query.viewerToken as string | string[] | undefined);
|
||||
if (viewerPath === 'vnc.html' && browserUseService.validateViewerToken(sessionId, viewerToken)) {
|
||||
res.cookie(VIEWER_COOKIE_NAME, viewerToken, {
|
||||
httpOnly: true,
|
||||
sameSite: 'lax',
|
||||
secure: isSecureRequest(req),
|
||||
maxAge: VIEWER_TOKEN_TTL_MS,
|
||||
path: '/api/browser-use/sessions/' + encodeURIComponent(sessionId) + '/viewer',
|
||||
});
|
||||
}
|
||||
const target = browserUseService.getViewerProxyTarget(sessionId);
|
||||
const query = readQueryString(req.originalUrl);
|
||||
const upstream = await fetch(`http://127.0.0.1:${target.websockifyPort}/${viewerPath}${query}`, {
|
||||
headers: {
|
||||
accept: String(req.headers.accept || '*/*'),
|
||||
},
|
||||
});
|
||||
const contentType = upstream.headers.get('content-type');
|
||||
if (contentType) {
|
||||
res.setHeader('content-type', contentType);
|
||||
}
|
||||
const cacheControl = viewerPath === 'vnc.html' ? 'no-store' : 'public, max-age=3600';
|
||||
res.setHeader('cache-control', cacheControl);
|
||||
res.status(upstream.status);
|
||||
const body = Buffer.from(await upstream.arrayBuffer());
|
||||
res.send(body);
|
||||
} catch (error) {
|
||||
res.status(404).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Browser viewer is not available.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/stop', async (req, res) => {
|
||||
try {
|
||||
const result = await browserUseService.stopSession(readParam(req.params.sessionId));
|
||||
|
||||
@@ -1,128 +1,86 @@
|
||||
import { createRequire } from 'node:module';
|
||||
import { randomBytes, randomUUID } from 'node:crypto';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { execFileSync, spawn } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import net from 'node:net';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
import { appConfigDb } from '@/modules/database/index.js';
|
||||
import { WebSocket } from 'ws';
|
||||
|
||||
import { providerMcpService } from '@/modules/providers/index.js';
|
||||
import { getModuleDir } from '@/utils/runtime-paths.js';
|
||||
|
||||
import {
|
||||
getOrCreateMcpToken,
|
||||
getProfilePath,
|
||||
normalizeBrowserBackend,
|
||||
PROFILE_ROOT,
|
||||
readSettings,
|
||||
resolveSessionProfileName,
|
||||
useVisibleCamoufoxBackend,
|
||||
writeSettings,
|
||||
} from './browser-use.settings.js';
|
||||
import type {
|
||||
BrowserUseSession,
|
||||
BrowserUseSettings,
|
||||
PublicBrowserUseSession,
|
||||
RuntimeHandle,
|
||||
RuntimeProbe,
|
||||
RuntimeReadiness,
|
||||
} from './browser-use.types.js';
|
||||
import { getViewerUrl, handleViewerWebSocket, VIEWER_TOKEN_TTL_MS } from './browser-use.viewer.js';
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const __dirname = getModuleDir(import.meta.url);
|
||||
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
|
||||
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_MAX_SESSIONS_PER_OWNER || '3', 10);
|
||||
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_BROWSER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
|
||||
const BROWSER_USE_SETTINGS_KEY = 'browser_use_settings';
|
||||
const BROWSER_USE_MCP_TOKEN_KEY = 'browser_use_mcp_token';
|
||||
|
||||
type BrowserUseRuntime = 'cloud' | 'local';
|
||||
type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
|
||||
|
||||
type BrowserUseSession = {
|
||||
id: string;
|
||||
ownerId: string;
|
||||
createdBy: 'agent';
|
||||
runtime: BrowserUseRuntime;
|
||||
status: BrowserUseSessionStatus;
|
||||
url: string | null;
|
||||
title: string | null;
|
||||
screenshotDataUrl: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
lastAction: string | null;
|
||||
message: string | null;
|
||||
profileName: string | null;
|
||||
viewport: {
|
||||
width: number;
|
||||
height: number;
|
||||
} | null;
|
||||
cursor: {
|
||||
x: number;
|
||||
y: number;
|
||||
actor: 'agent';
|
||||
} | null;
|
||||
};
|
||||
|
||||
type PublicBrowserUseSession = Omit<BrowserUseSession, 'ownerId'>;
|
||||
|
||||
type RuntimeHandle = {
|
||||
browser?: any;
|
||||
context?: any;
|
||||
page?: any;
|
||||
};
|
||||
|
||||
type BrowserUseSettings = {
|
||||
enabled: boolean;
|
||||
};
|
||||
|
||||
type RuntimeReadiness = {
|
||||
playwright: any | null;
|
||||
playwrightInstalled: boolean;
|
||||
chromiumInstalled: boolean;
|
||||
chromiumExecutablePath: string | null;
|
||||
installInProgress: boolean;
|
||||
installMessage: string | null;
|
||||
};
|
||||
|
||||
type RuntimeProbe = Omit<RuntimeReadiness, 'installInProgress' | 'installMessage'>;
|
||||
|
||||
const sessions = new Map<string, BrowserUseSession>();
|
||||
const handles = new Map<string, RuntimeHandle>();
|
||||
const reservedDisplays = new Set<string>();
|
||||
const viewerTokens = new Map<string, { token: string; expiresAt: number }>();
|
||||
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
|
||||
let lastInstallMessage: string | null = null;
|
||||
let runtimeProbeCache: { value: RuntimeProbe; updatedAt: number } | null = null;
|
||||
|
||||
const DEFAULT_SETTINGS: BrowserUseSettings = {
|
||||
enabled: false,
|
||||
};
|
||||
const AGENT_OWNER_ID = 'agent';
|
||||
const PROFILE_ROOT = path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles');
|
||||
const MCP_SERVER_NAME = 'cloudcli-browser';
|
||||
const LEGACY_MCP_SERVER_NAMES = ['cloudcli-browser-use'];
|
||||
const RUNTIME_READINESS_CACHE_TTL_MS = 30_000;
|
||||
const VISIBLE_BROWSER_ENABLED = process.env.CLOUDCLI_BROWSER_USE_VISIBLE !== 'false';
|
||||
const RUNTIME_ROOT = process.env.CLOUDCLI_BROWSER_USE_RUNTIME_ROOT || '/opt/claudecodeui/.runtime-browser';
|
||||
const NOVNC_ROOT = process.env.CLOUDCLI_BROWSER_USE_NOVNC_ROOT || path.join(RUNTIME_ROOT, 'novnc');
|
||||
const X11VNC_BIN = process.env.CLOUDCLI_BROWSER_USE_X11VNC_BIN || path.join(RUNTIME_ROOT, 'rootfs/usr/bin/x11vnc');
|
||||
const X11VNC_LIB_DIR = process.env.CLOUDCLI_BROWSER_USE_X11VNC_LIB_DIR || path.join(RUNTIME_ROOT, 'rootfs/usr/lib/x86_64-linux-gnu');
|
||||
const X11VNC_EXTRA_LIB_DIR = process.env.CLOUDCLI_BROWSER_USE_X11VNC_EXTRA_LIB_DIR || path.join(RUNTIME_ROOT, 'rootfs/lib/x86_64-linux-gnu');
|
||||
const LOG_RUNTIME_PROCESS_OUTPUT = process.env.CLOUDCLI_BROWSER_USE_RUNTIME_LOGS === 'true';
|
||||
|
||||
function getRuntime(): BrowserUseRuntime {
|
||||
function getRuntime(): 'cloud' | 'local' {
|
||||
return IS_PLATFORM ? 'cloud' : 'local';
|
||||
}
|
||||
|
||||
function readSettings(): BrowserUseSettings {
|
||||
function getCamoufoxExecutablePath(): string | null {
|
||||
const configured = process.env.CLOUDCLI_BROWSER_USE_CAMOUFOX_EXECUTABLE;
|
||||
if (configured && fs.existsSync(configured)) {
|
||||
return configured;
|
||||
}
|
||||
|
||||
try {
|
||||
const raw = appConfigDb.get(BROWSER_USE_SETTINGS_KEY);
|
||||
if (!raw) {
|
||||
return DEFAULT_SETTINGS;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(raw) as Partial<BrowserUseSettings>;
|
||||
return {
|
||||
enabled: parsed.enabled === true,
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.warn('[Browser] Failed to read settings:', error?.message || error);
|
||||
return DEFAULT_SETTINGS;
|
||||
const output = execFileSync(path.join(os.homedir(), '.local/bin/camoufox'), ['path'], {
|
||||
encoding: 'utf8',
|
||||
stdio: ['ignore', 'pipe', 'ignore'],
|
||||
}).trim();
|
||||
const executablePath = fs.statSync(output).isDirectory()
|
||||
? path.join(output, 'camoufox')
|
||||
: output;
|
||||
return fs.existsSync(executablePath) ? executablePath : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function writeSettings(settings: BrowserUseSettings): BrowserUseSettings {
|
||||
const normalized = {
|
||||
enabled: settings.enabled === true,
|
||||
};
|
||||
|
||||
appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized));
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function getOrCreateMcpToken(): string {
|
||||
const existing = appConfigDb.get(BROWSER_USE_MCP_TOKEN_KEY);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const token = randomBytes(32).toString('hex');
|
||||
appConfigDb.set(BROWSER_USE_MCP_TOKEN_KEY, token);
|
||||
return token;
|
||||
}
|
||||
|
||||
function getSetupMessage(settings: BrowserUseSettings, readiness: RuntimeReadiness): string {
|
||||
if (!settings.enabled) {
|
||||
return 'Browser is disabled in settings.';
|
||||
@@ -132,6 +90,26 @@ function getSetupMessage(settings: BrowserUseSettings, readiness: RuntimeReadine
|
||||
return 'Install Playwright and Chromium to use browser sessions.';
|
||||
}
|
||||
|
||||
if (settings.browserBackend === 'camoufox-vnc' && !getCamoufoxExecutablePath()) {
|
||||
return 'Camoufox is selected, but Camoufox is not installed.';
|
||||
}
|
||||
|
||||
if (useVisibleCamoufoxBackend(settings)) {
|
||||
if (!VISIBLE_BROWSER_ENABLED) {
|
||||
return 'Camoufox is selected, but visible browser sessions are disabled.';
|
||||
}
|
||||
if (!getCamoufoxExecutablePath()) {
|
||||
return 'Camoufox is selected, but Camoufox is not installed.';
|
||||
}
|
||||
if (!fs.existsSync(X11VNC_BIN)) {
|
||||
return 'Camoufox is selected, but x11vnc is missing.';
|
||||
}
|
||||
if (!fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'))) {
|
||||
return 'Camoufox is selected, but noVNC is missing.';
|
||||
}
|
||||
return readiness.installMessage || 'Camoufox runtime is not ready.';
|
||||
}
|
||||
|
||||
if (!readiness.chromiumInstalled) {
|
||||
return 'Playwright is installed, but Chromium is missing. Install the Chromium runtime to continue.';
|
||||
}
|
||||
@@ -176,24 +154,6 @@ async function removeMcpServerFromAllProviders(name: string) {
|
||||
return results.map((result) => ({ ...result, name }));
|
||||
}
|
||||
|
||||
function normalizeProfileName(profileName?: string | null): string | null {
|
||||
const normalized = String(profileName || '').trim();
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return normalized.slice(0, 80);
|
||||
}
|
||||
|
||||
function getProfilePath(profileName: string): string {
|
||||
const safeName = profileName
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9._-]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.slice(0, 80) || 'default';
|
||||
return path.join(PROFILE_ROOT, safeName);
|
||||
}
|
||||
|
||||
function probeRuntime(): RuntimeProbe {
|
||||
const playwright = getPlaywright();
|
||||
const readiness: RuntimeProbe = {
|
||||
@@ -238,6 +198,175 @@ function getRuntimeReadiness(options: { force?: boolean } = {}): RuntimeReadines
|
||||
};
|
||||
}
|
||||
|
||||
function findAvailablePort(): Promise<number> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const server = net.createServer();
|
||||
server.on('error', reject);
|
||||
server.listen(0, '127.0.0.1', () => {
|
||||
const address = server.address();
|
||||
server.close(() => {
|
||||
if (typeof address === 'object' && address?.port) {
|
||||
resolve(address.port);
|
||||
} else {
|
||||
reject(new Error('Failed to reserve a browser runtime port.'));
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isRuntimeProcessAlive(child: ReturnType<typeof spawn>): boolean {
|
||||
return child.exitCode === null && child.signalCode === null && !child.killed;
|
||||
}
|
||||
|
||||
function assertRuntimeProcessesAlive(processes: Array<ReturnType<typeof spawn>>, label: string) {
|
||||
const exited = processes.find((child) => !isRuntimeProcessAlive(child));
|
||||
if (exited) {
|
||||
throw new Error(`${label} exited before the Browser viewer runtime was ready.`);
|
||||
}
|
||||
}
|
||||
|
||||
async function isPortListening(port: number): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const socket = net.createConnection({ host: '127.0.0.1', port });
|
||||
let settled = false;
|
||||
const finish = (listening: boolean) => {
|
||||
if (settled) {
|
||||
return;
|
||||
}
|
||||
settled = true;
|
||||
socket.destroy();
|
||||
resolve(listening);
|
||||
};
|
||||
socket.setTimeout(250);
|
||||
socket.once('connect', () => finish(true));
|
||||
socket.once('timeout', () => finish(false));
|
||||
socket.once('error', () => finish(false));
|
||||
});
|
||||
}
|
||||
|
||||
async function waitForRuntimePort(
|
||||
port: number,
|
||||
label: string,
|
||||
processes: Array<ReturnType<typeof spawn>>,
|
||||
timeoutMs = 5_000,
|
||||
) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
assertRuntimeProcessesAlive(processes, label);
|
||||
if (await isPortListening(port)) {
|
||||
return;
|
||||
}
|
||||
await delay(100);
|
||||
}
|
||||
assertRuntimeProcessesAlive(processes, label);
|
||||
throw new Error(`${label} did not start listening on 127.0.0.1:${port}.`);
|
||||
}
|
||||
|
||||
function killRuntimeProcesses(processes?: Array<ReturnType<typeof spawn>>) {
|
||||
processes?.forEach((child) => child.kill('SIGTERM'));
|
||||
}
|
||||
|
||||
function reserveDisplay(): string {
|
||||
for (let index = 90; index < 140; index += 1) {
|
||||
const display = `:${index}`;
|
||||
if (!reservedDisplays.has(display)) {
|
||||
reservedDisplays.add(display);
|
||||
return display;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('No browser display slots are available.');
|
||||
}
|
||||
|
||||
function spawnRuntimeProcess(command: string, args: string[], options: { env?: NodeJS.ProcessEnv } = {}) {
|
||||
const child = spawn(command, args, {
|
||||
env: { ...process.env, ...options.env },
|
||||
stdio: ['ignore', 'ignore', 'pipe'],
|
||||
});
|
||||
child.stderr?.on('data', (chunk) => {
|
||||
if (!LOG_RUNTIME_PROCESS_OUTPUT) {
|
||||
return;
|
||||
}
|
||||
const text = String(chunk).trim();
|
||||
if (text) {
|
||||
console.warn(`[Browser runtime] ${path.basename(command)}: ${text}`);
|
||||
}
|
||||
});
|
||||
child.on('error', (error) => {
|
||||
console.warn(`[Browser runtime] ${path.basename(command)} failed:`, error.message);
|
||||
});
|
||||
return child;
|
||||
}
|
||||
|
||||
async function startVisibleRuntime(): Promise<NonNullable<RuntimeHandle['viewer']> & { processes: Array<ReturnType<typeof spawn>> }> {
|
||||
const display = reserveDisplay();
|
||||
const vncPort = await findAvailablePort();
|
||||
const websockifyPort = await findAvailablePort();
|
||||
const processes: Array<ReturnType<typeof spawn>> = [];
|
||||
|
||||
try {
|
||||
processes.push(spawnRuntimeProcess('Xvfb', [
|
||||
display,
|
||||
'-screen',
|
||||
'0',
|
||||
'1440x900x24',
|
||||
'-ac',
|
||||
'-nolisten',
|
||||
'tcp',
|
||||
]));
|
||||
await delay(700);
|
||||
assertRuntimeProcessesAlive(processes, 'Xvfb');
|
||||
|
||||
if (!fs.existsSync(X11VNC_BIN)) {
|
||||
throw new Error(`x11vnc is missing at ${X11VNC_BIN}.`);
|
||||
}
|
||||
processes.push(spawnRuntimeProcess(X11VNC_BIN, [
|
||||
'-display',
|
||||
display,
|
||||
'-localhost',
|
||||
'-forever',
|
||||
'-shared',
|
||||
'-rfbport',
|
||||
String(vncPort),
|
||||
'-nopw',
|
||||
'-quiet',
|
||||
], {
|
||||
env: {
|
||||
LD_LIBRARY_PATH: `${X11VNC_LIB_DIR}:${X11VNC_EXTRA_LIB_DIR}:${process.env.LD_LIBRARY_PATH || ''}`,
|
||||
},
|
||||
}));
|
||||
await waitForRuntimePort(vncPort, 'x11vnc', processes);
|
||||
|
||||
if (!fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'))) {
|
||||
throw new Error(`noVNC is missing at ${NOVNC_ROOT}.`);
|
||||
}
|
||||
processes.push(spawnRuntimeProcess(path.join(os.homedir(), '.local/bin/websockify'), [
|
||||
'--web',
|
||||
NOVNC_ROOT,
|
||||
`127.0.0.1:${websockifyPort}`,
|
||||
`127.0.0.1:${vncPort}`,
|
||||
]));
|
||||
await waitForRuntimePort(websockifyPort, 'websockify', processes);
|
||||
|
||||
return {
|
||||
display,
|
||||
vncPort,
|
||||
websockifyPort,
|
||||
noVncRoot: NOVNC_ROOT,
|
||||
processes,
|
||||
};
|
||||
} catch (error) {
|
||||
killRuntimeProcesses(processes);
|
||||
reservedDisplays.delete(display);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
const INSTALL_COMMAND_TIMEOUT_MS = Number.parseInt(
|
||||
process.env.CLOUDCLI_BROWSER_USE_INSTALL_TIMEOUT_MS || String(10 * 60 * 1000),
|
||||
10,
|
||||
@@ -350,6 +479,45 @@ function publicSession(session: BrowserUseSession): PublicBrowserUseSession {
|
||||
return publicFields;
|
||||
}
|
||||
|
||||
function getSessionViewer(sessionId: string): RuntimeHandle['viewer'] | null {
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || session.ownerId !== AGENT_OWNER_ID || session.status !== 'ready') {
|
||||
return null;
|
||||
}
|
||||
return handles.get(sessionId)?.viewer || null;
|
||||
}
|
||||
|
||||
function createViewerToken(sessionId: string): string {
|
||||
const token = randomUUID();
|
||||
viewerTokens.set(sessionId, {
|
||||
token,
|
||||
expiresAt: Date.now() + VIEWER_TOKEN_TTL_MS,
|
||||
});
|
||||
return token;
|
||||
}
|
||||
|
||||
function deleteViewerToken(sessionId: string) {
|
||||
viewerTokens.delete(sessionId);
|
||||
}
|
||||
|
||||
function validateViewerTokenForSession(sessionId: string, token: string | null | undefined): boolean {
|
||||
if (!token) {
|
||||
return false;
|
||||
}
|
||||
const session = sessions.get(sessionId);
|
||||
const viewer = session?.ownerId === AGENT_OWNER_ID && session.status === 'ready'
|
||||
? handles.get(sessionId)?.viewer || null
|
||||
: null;
|
||||
const stored = viewerTokens.get(sessionId);
|
||||
if (!viewer || !stored || stored.token !== token || stored.expiresAt < Date.now()) {
|
||||
if (stored?.expiresAt && stored.expiresAt < Date.now()) {
|
||||
viewerTokens.delete(sessionId);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function ownerSessions(ownerId: string): BrowserUseSession[] {
|
||||
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
|
||||
}
|
||||
@@ -357,8 +525,13 @@ function ownerSessions(ownerId: string): BrowserUseSession[] {
|
||||
async function closeHandle(sessionId: string): Promise<void> {
|
||||
const handle = handles.get(sessionId);
|
||||
handles.delete(sessionId);
|
||||
deleteViewerToken(sessionId);
|
||||
await handle?.context?.close?.().catch(() => undefined);
|
||||
await handle?.browser?.close().catch(() => undefined);
|
||||
killRuntimeProcesses(handle?.processes);
|
||||
if (handle?.viewer?.display) {
|
||||
reservedDisplays.delete(handle.viewer.display);
|
||||
}
|
||||
}
|
||||
|
||||
async function expireStaleSessions(now = Date.now()): Promise<void> {
|
||||
@@ -424,6 +597,11 @@ export const browserUseService = {
|
||||
const current = readSettings();
|
||||
const nextSettings = {
|
||||
enabled: typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled,
|
||||
persistSessions: typeof settings.persistSessions === 'boolean' ? settings.persistSessions : current.persistSessions,
|
||||
defaultProfileName: typeof settings.defaultProfileName === 'string'
|
||||
? settings.defaultProfileName
|
||||
: current.defaultProfileName,
|
||||
browserBackend: settings.browserBackend ? normalizeBrowserBackend(settings.browserBackend) : current.browserBackend,
|
||||
};
|
||||
|
||||
const next = writeSettings(nextSettings);
|
||||
@@ -439,14 +617,28 @@ export const browserUseService = {
|
||||
async getStatus() {
|
||||
const settings = readSettings();
|
||||
const readiness = getRuntimeReadiness();
|
||||
const available = settings.enabled && readiness.playwrightInstalled && readiness.chromiumInstalled;
|
||||
const useVisibleBackend = useVisibleCamoufoxBackend(settings);
|
||||
const visibleCamoufoxReady = useVisibleBackend
|
||||
&& VISIBLE_BROWSER_ENABLED
|
||||
&& readiness.playwrightInstalled
|
||||
&& Boolean(getCamoufoxExecutablePath())
|
||||
&& fs.existsSync(X11VNC_BIN)
|
||||
&& fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'));
|
||||
const available = settings.enabled
|
||||
&& readiness.playwrightInstalled
|
||||
&& (useVisibleBackend ? visibleCamoufoxReady : readiness.chromiumInstalled);
|
||||
|
||||
return {
|
||||
enabled: settings.enabled,
|
||||
runtime: getRuntime(),
|
||||
backend: useVisibleBackend ? 'camoufox-vnc' : 'playwright',
|
||||
browserBackend: settings.browserBackend,
|
||||
available,
|
||||
playwrightInstalled: readiness.playwrightInstalled,
|
||||
chromiumInstalled: readiness.chromiumInstalled,
|
||||
camoufoxInstalled: Boolean(getCamoufoxExecutablePath()),
|
||||
noVncInstalled: fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html')),
|
||||
x11vncInstalled: fs.existsSync(X11VNC_BIN),
|
||||
installInProgress: readiness.installInProgress,
|
||||
sessionCount: sessions.size,
|
||||
message: available
|
||||
@@ -505,7 +697,7 @@ export const browserUseService = {
|
||||
}
|
||||
|
||||
await expireStaleSessions();
|
||||
const profileName = normalizeProfileName(options?.profileName);
|
||||
const profileName = resolveSessionProfileName(settings, options?.profileName);
|
||||
|
||||
const now = new Date().toISOString();
|
||||
const session: BrowserUseSession = {
|
||||
@@ -521,6 +713,9 @@ export const browserUseService = {
|
||||
updatedAt: now,
|
||||
lastAction: 'create',
|
||||
message: null,
|
||||
backend: useVisibleCamoufoxBackend(settings) ? 'camoufox-vnc' : 'playwright',
|
||||
viewerUrl: null,
|
||||
viewerEmbedUrl: null,
|
||||
profileName,
|
||||
viewport: { width: 1440, height: 900 },
|
||||
cursor: null,
|
||||
@@ -532,7 +727,13 @@ export const browserUseService = {
|
||||
}
|
||||
|
||||
const readiness = getRuntimeReadiness();
|
||||
if (!settings.enabled || !readiness.playwrightInstalled || !readiness.chromiumInstalled || !readiness.playwright) {
|
||||
const useVisibleBackend = useVisibleCamoufoxBackend(settings);
|
||||
const visibleCamoufoxReady = useVisibleBackend
|
||||
&& VISIBLE_BROWSER_ENABLED
|
||||
&& Boolean(getCamoufoxExecutablePath())
|
||||
&& fs.existsSync(X11VNC_BIN)
|
||||
&& fs.existsSync(path.join(NOVNC_ROOT, 'vnc.html'));
|
||||
if (!settings.enabled || !readiness.playwrightInstalled || !readiness.playwright || (useVisibleBackend ? !visibleCamoufoxReady : !readiness.chromiumInstalled)) {
|
||||
session.message = getSetupMessage(settings, readiness);
|
||||
sessions.set(session.id, session);
|
||||
return publicSession(session);
|
||||
@@ -541,31 +742,73 @@ export const browserUseService = {
|
||||
let browser: any | undefined;
|
||||
let context: any | undefined;
|
||||
let page: any;
|
||||
const launchOptions = {
|
||||
headless: true,
|
||||
let viewer: RuntimeHandle['viewer'];
|
||||
let processes: RuntimeHandle['processes'];
|
||||
const launchOptions: Record<string, unknown> = {
|
||||
headless: !useVisibleBackend,
|
||||
args: ['--disable-dev-shm-usage'],
|
||||
};
|
||||
const contextOptions = {
|
||||
viewport: { width: 1440, height: 900 },
|
||||
serviceWorkers: 'block',
|
||||
};
|
||||
const contextOptions = useVisibleBackend
|
||||
? { viewport: null }
|
||||
: {
|
||||
viewport: { width: 1440, height: 900 },
|
||||
serviceWorkers: 'block',
|
||||
};
|
||||
|
||||
if (profileName) {
|
||||
fs.mkdirSync(PROFILE_ROOT, { recursive: true });
|
||||
context = await readiness.playwright.chromium.launchPersistentContext(getProfilePath(profileName), {
|
||||
...launchOptions,
|
||||
...contextOptions,
|
||||
});
|
||||
page = context.pages()[0] || await context.newPage();
|
||||
} else {
|
||||
browser = await readiness.playwright.chromium.launch(launchOptions);
|
||||
context = await browser.newContext(contextOptions);
|
||||
page = await context.newPage();
|
||||
try {
|
||||
if (useVisibleBackend) {
|
||||
const camoufoxExecutable = getCamoufoxExecutablePath();
|
||||
if (!camoufoxExecutable) {
|
||||
throw new Error('Camoufox is not installed.');
|
||||
}
|
||||
const runtime = await startVisibleRuntime();
|
||||
viewer = {
|
||||
display: runtime.display,
|
||||
vncPort: runtime.vncPort,
|
||||
websockifyPort: runtime.websockifyPort,
|
||||
noVncRoot: runtime.noVncRoot,
|
||||
};
|
||||
processes = runtime.processes;
|
||||
launchOptions.executablePath = camoufoxExecutable;
|
||||
launchOptions.env = {
|
||||
...process.env,
|
||||
DISPLAY: runtime.display,
|
||||
LD_LIBRARY_PATH: `${X11VNC_LIB_DIR}:${X11VNC_EXTRA_LIB_DIR}:${process.env.LD_LIBRARY_PATH || ''}`,
|
||||
};
|
||||
launchOptions.args = [];
|
||||
session.backend = 'camoufox-vnc';
|
||||
const viewerToken = createViewerToken(session.id);
|
||||
session.viewerUrl = getViewerUrl(session.id, viewerToken);
|
||||
session.viewerEmbedUrl = session.viewerUrl;
|
||||
}
|
||||
|
||||
if (profileName) {
|
||||
fs.mkdirSync(PROFILE_ROOT, { recursive: true });
|
||||
const browserType = useVisibleBackend ? readiness.playwright.firefox : readiness.playwright.chromium;
|
||||
context = await browserType.launchPersistentContext(getProfilePath(profileName), {
|
||||
...launchOptions,
|
||||
...contextOptions,
|
||||
});
|
||||
page = context.pages()[0] || await context.newPage();
|
||||
} else {
|
||||
const browserType = useVisibleBackend ? readiness.playwright.firefox : readiness.playwright.chromium;
|
||||
browser = await browserType.launch(launchOptions);
|
||||
context = await browser.newContext(contextOptions);
|
||||
page = await context.newPage();
|
||||
}
|
||||
} catch (error) {
|
||||
await context?.close?.().catch(() => undefined);
|
||||
await browser?.close?.().catch(() => undefined);
|
||||
killRuntimeProcesses(processes);
|
||||
if (viewer?.display) {
|
||||
reservedDisplays.delete(viewer.display);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
session.status = 'ready';
|
||||
session.message = 'Browser session is ready.';
|
||||
sessions.set(session.id, session);
|
||||
handles.set(session.id, { browser, context, page });
|
||||
handles.set(session.id, { browser, context, page, processes, viewer });
|
||||
await captureSession(session, page);
|
||||
return publicSession(session);
|
||||
},
|
||||
@@ -812,6 +1055,25 @@ export const browserUseService = {
|
||||
return { deleted: true, sessionId };
|
||||
},
|
||||
|
||||
getViewerProxyTarget(sessionId: string) {
|
||||
const viewer = getSessionViewer(sessionId);
|
||||
if (!viewer) {
|
||||
throw new Error('Browser viewer is not available for this session.');
|
||||
}
|
||||
return {
|
||||
websockifyPort: viewer.websockifyPort,
|
||||
noVncRoot: viewer.noVncRoot,
|
||||
};
|
||||
},
|
||||
|
||||
validateViewerToken(sessionId: string, token: string | null | undefined) {
|
||||
return validateViewerTokenForSession(sessionId, token);
|
||||
},
|
||||
|
||||
handleViewerWebSocket(clientWs: WebSocket, pathname: string) {
|
||||
handleViewerWebSocket(clientWs, pathname, getSessionViewer);
|
||||
},
|
||||
|
||||
async agentStopSession(sessionId: string) {
|
||||
await this.getAgentSession(sessionId);
|
||||
return this.stopSession(sessionId);
|
||||
|
||||
147
server/modules/browser-use/browser-use.settings.ts
Normal file
147
server/modules/browser-use/browser-use.settings.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
import { appConfigDb } from '@/modules/database/index.js';
|
||||
|
||||
import type { BrowserUseBackend, BrowserUseSettings } from './browser-use.types.js';
|
||||
|
||||
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
|
||||
const BROWSER_USE_SETTINGS_KEY = 'browser_use_settings';
|
||||
const BROWSER_USE_MCP_TOKEN_KEY = 'browser_use_mcp_token';
|
||||
const MAX_PROFILE_NAME_LENGTH = 80;
|
||||
|
||||
export const DEFAULT_BROWSER_USE_SETTINGS: BrowserUseSettings = {
|
||||
enabled: false,
|
||||
persistSessions: false,
|
||||
defaultProfileName: 'default',
|
||||
browserBackend: IS_PLATFORM ? 'camoufox-vnc' : 'playwright',
|
||||
};
|
||||
|
||||
export const PROFILE_ROOT = process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT
|
||||
|| path.join(os.homedir(), '.cloudcli', 'browser-use', 'profiles');
|
||||
|
||||
export function normalizeBrowserBackend(value: unknown): BrowserUseBackend {
|
||||
return value === 'playwright' || value === 'camoufox-vnc'
|
||||
? value
|
||||
: DEFAULT_BROWSER_USE_SETTINGS.browserBackend;
|
||||
}
|
||||
|
||||
function trimEdgeDashes(value: string): string {
|
||||
let start = 0;
|
||||
let end = value.length;
|
||||
while (start < end && value[start] === '-') {
|
||||
start += 1;
|
||||
}
|
||||
while (end > start && value[end - 1] === '-') {
|
||||
end -= 1;
|
||||
}
|
||||
return value.slice(start, end);
|
||||
}
|
||||
|
||||
export function normalizeProfileName(profileName?: string | null): string | null {
|
||||
const sanitized = trimEdgeDashes(String(profileName || '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9._-]+/g, '-'));
|
||||
const normalized = sanitized
|
||||
.slice(0, MAX_PROFILE_NAME_LENGTH)
|
||||
.replace(/^-+|-+$/g, '');
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return /[a-z0-9]/.test(normalized) ? normalized : null;
|
||||
}
|
||||
|
||||
export function normalizeDefaultProfileName(profileName?: string | null): string {
|
||||
return normalizeProfileName(profileName) || DEFAULT_BROWSER_USE_SETTINGS.defaultProfileName;
|
||||
}
|
||||
|
||||
export function resolveSessionProfileName(settings: BrowserUseSettings, profileName?: string | null): string | null {
|
||||
const requestedProfileName = normalizeProfileName(profileName);
|
||||
if (String(profileName || '').trim() && !requestedProfileName) {
|
||||
throw new Error('Browser profile name must include at least one letter or number.');
|
||||
}
|
||||
if (requestedProfileName) {
|
||||
validateRequestedProfileName(profileName, requestedProfileName);
|
||||
return requestedProfileName;
|
||||
}
|
||||
return settings.persistSessions ? normalizeDefaultProfileName(settings.defaultProfileName) : null;
|
||||
}
|
||||
|
||||
export function getProfilePath(profileName: string): string {
|
||||
return path.join(PROFILE_ROOT, normalizeDefaultProfileName(profileName));
|
||||
}
|
||||
|
||||
function validateRequestedProfileName(profileName: string | null | undefined, normalizedProfileName: string): void {
|
||||
const requestedProfileName = String(profileName || '').trim();
|
||||
const existingProfileName = findExistingProfileName(normalizedProfileName);
|
||||
if (
|
||||
existingProfileName
|
||||
&& (requestedProfileName !== normalizedProfileName || existingProfileName !== normalizedProfileName)
|
||||
) {
|
||||
throw new Error(`Browser profile "${requestedProfileName}" resolves to existing profile "${existingProfileName}". Use "${normalizedProfileName}" instead.`);
|
||||
}
|
||||
}
|
||||
|
||||
function findExistingProfileName(normalizedProfileName: string): string | null {
|
||||
try {
|
||||
if (!fs.existsSync(PROFILE_ROOT)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const entries = fs.readdirSync(PROFILE_ROOT, { withFileTypes: true });
|
||||
const match = entries.find((entry) => entry.isDirectory() && normalizeProfileName(entry.name) === normalizedProfileName);
|
||||
return match?.name || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function useVisibleCamoufoxBackend(settings: BrowserUseSettings): boolean {
|
||||
return settings.browserBackend === 'camoufox-vnc';
|
||||
}
|
||||
|
||||
export function readSettings(): BrowserUseSettings {
|
||||
try {
|
||||
const raw = appConfigDb.get(BROWSER_USE_SETTINGS_KEY);
|
||||
if (!raw) {
|
||||
return DEFAULT_BROWSER_USE_SETTINGS;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(raw) as Partial<BrowserUseSettings>;
|
||||
return {
|
||||
enabled: parsed.enabled === true,
|
||||
persistSessions: parsed.persistSessions === true,
|
||||
defaultProfileName: normalizeDefaultProfileName(parsed.defaultProfileName),
|
||||
browserBackend: normalizeBrowserBackend(parsed.browserBackend),
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.warn('[Browser] Failed to read settings:', error?.message || error);
|
||||
return DEFAULT_BROWSER_USE_SETTINGS;
|
||||
}
|
||||
}
|
||||
|
||||
export function writeSettings(settings: BrowserUseSettings): BrowserUseSettings {
|
||||
const normalized = {
|
||||
enabled: settings.enabled === true,
|
||||
persistSessions: settings.persistSessions === true,
|
||||
defaultProfileName: normalizeDefaultProfileName(settings.defaultProfileName),
|
||||
browserBackend: normalizeBrowserBackend(settings.browserBackend),
|
||||
};
|
||||
|
||||
appConfigDb.set(BROWSER_USE_SETTINGS_KEY, JSON.stringify(normalized));
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function getOrCreateMcpToken(): string {
|
||||
const existing = appConfigDb.get(BROWSER_USE_MCP_TOKEN_KEY);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const token = randomBytes(32).toString('hex');
|
||||
appConfigDb.set(BROWSER_USE_MCP_TOKEN_KEY, token);
|
||||
return token;
|
||||
}
|
||||
66
server/modules/browser-use/browser-use.types.ts
Normal file
66
server/modules/browser-use/browser-use.types.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import type { spawn } from 'node:child_process';
|
||||
|
||||
export type BrowserUseRuntime = 'cloud' | 'local';
|
||||
export type BrowserUseBackend = 'playwright' | 'camoufox-vnc';
|
||||
export type BrowserUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
|
||||
|
||||
export type BrowserUseSession = {
|
||||
id: string;
|
||||
ownerId: string;
|
||||
createdBy: 'agent';
|
||||
runtime: BrowserUseRuntime;
|
||||
status: BrowserUseSessionStatus;
|
||||
url: string | null;
|
||||
title: string | null;
|
||||
screenshotDataUrl: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
lastAction: string | null;
|
||||
message: string | null;
|
||||
backend: BrowserUseBackend;
|
||||
viewerUrl: string | null;
|
||||
viewerEmbedUrl: string | null;
|
||||
profileName: string | null;
|
||||
viewport: {
|
||||
width: number;
|
||||
height: number;
|
||||
} | null;
|
||||
cursor: {
|
||||
x: number;
|
||||
y: number;
|
||||
actor: 'agent';
|
||||
} | null;
|
||||
};
|
||||
|
||||
export type PublicBrowserUseSession = Omit<BrowserUseSession, 'ownerId'>;
|
||||
|
||||
export type RuntimeHandle = {
|
||||
browser?: any;
|
||||
context?: any;
|
||||
page?: any;
|
||||
processes?: Array<ReturnType<typeof spawn>>;
|
||||
viewer?: {
|
||||
display: string;
|
||||
vncPort: number;
|
||||
websockifyPort: number;
|
||||
noVncRoot: string;
|
||||
};
|
||||
};
|
||||
|
||||
export type BrowserUseSettings = {
|
||||
enabled: boolean;
|
||||
persistSessions: boolean;
|
||||
defaultProfileName: string;
|
||||
browserBackend: BrowserUseBackend;
|
||||
};
|
||||
|
||||
export type RuntimeReadiness = {
|
||||
playwright: any | null;
|
||||
playwrightInstalled: boolean;
|
||||
chromiumInstalled: boolean;
|
||||
chromiumExecutablePath: string | null;
|
||||
installInProgress: boolean;
|
||||
installMessage: string | null;
|
||||
};
|
||||
|
||||
export type RuntimeProbe = Omit<RuntimeReadiness, 'installInProgress' | 'installMessage'>;
|
||||
76
server/modules/browser-use/browser-use.viewer.ts
Normal file
76
server/modules/browser-use/browser-use.viewer.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { WebSocket } from 'ws';
|
||||
|
||||
import type { RuntimeHandle } from './browser-use.types.js';
|
||||
|
||||
type BrowserUseViewer = NonNullable<RuntimeHandle['viewer']>;
|
||||
|
||||
export const VIEWER_COOKIE_NAME = 'browser_use_viewer_token';
|
||||
const DEFAULT_VIEWER_TOKEN_TTL_MS = 30 * 60 * 1000;
|
||||
const parsedViewerTokenTtlMs = Number.parseInt(
|
||||
process.env.CLOUDCLI_BROWSER_USE_VIEWER_TOKEN_TTL_MS || String(DEFAULT_VIEWER_TOKEN_TTL_MS),
|
||||
10,
|
||||
);
|
||||
export const VIEWER_TOKEN_TTL_MS =
|
||||
Number.isFinite(parsedViewerTokenTtlMs) && parsedViewerTokenTtlMs > 0
|
||||
? parsedViewerTokenTtlMs
|
||||
: DEFAULT_VIEWER_TOKEN_TTL_MS;
|
||||
|
||||
export function getViewerUrl(sessionId: string, viewerToken?: string): string {
|
||||
const basePath = `/api/browser-use/sessions/${encodeURIComponent(sessionId)}/viewer`;
|
||||
const websockifyPath = viewerToken
|
||||
? `${basePath}/websockify?viewerToken=${encodeURIComponent(viewerToken)}`
|
||||
: `${basePath}/websockify`;
|
||||
const params = new URLSearchParams({
|
||||
autoconnect: '1',
|
||||
resize: 'scale',
|
||||
reconnect: '1',
|
||||
path: websockifyPath,
|
||||
});
|
||||
if (viewerToken) {
|
||||
params.set('viewerToken', viewerToken);
|
||||
}
|
||||
return `${basePath}/vnc.html?${params.toString()}`;
|
||||
}
|
||||
|
||||
export function handleViewerWebSocket(
|
||||
clientWs: WebSocket,
|
||||
pathname: string,
|
||||
getSessionViewer: (sessionId: string) => BrowserUseViewer | null | undefined,
|
||||
) {
|
||||
const match = /^\/api\/browser-use\/sessions\/([^/]+)\/viewer\/websockify\/?$/.exec(pathname);
|
||||
const sessionId = match ? decodeURIComponent(match[1]) : '';
|
||||
const viewer = sessionId ? getSessionViewer(sessionId) : null;
|
||||
if (!viewer) {
|
||||
clientWs.close(4404, 'Browser viewer not found');
|
||||
return;
|
||||
}
|
||||
|
||||
const upstream = new WebSocket(`ws://127.0.0.1:${viewer.websockifyPort}`);
|
||||
upstream.on('open', () => {
|
||||
clientWs.on('message', (data) => {
|
||||
if (upstream.readyState === WebSocket.OPEN) {
|
||||
upstream.send(data);
|
||||
}
|
||||
});
|
||||
upstream.on('message', (data) => {
|
||||
if (clientWs.readyState === WebSocket.OPEN) {
|
||||
clientWs.send(data);
|
||||
}
|
||||
});
|
||||
});
|
||||
upstream.on('close', (code, reason) => {
|
||||
if (clientWs.readyState === WebSocket.OPEN) {
|
||||
clientWs.close(code, reason);
|
||||
}
|
||||
});
|
||||
upstream.on('error', () => {
|
||||
if (clientWs.readyState === WebSocket.OPEN) {
|
||||
clientWs.close(4502, 'Browser viewer upstream error');
|
||||
}
|
||||
});
|
||||
clientWs.on('close', () => {
|
||||
if (upstream.readyState === WebSocket.OPEN) {
|
||||
upstream.close();
|
||||
}
|
||||
});
|
||||
}
|
||||
2
server/modules/browser-use/index.ts
Normal file
2
server/modules/browser-use/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export { browserUseService } from './browser-use.service.js';
|
||||
export { VIEWER_COOKIE_NAME } from './browser-use.viewer.js';
|
||||
@@ -0,0 +1,73 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import test from 'node:test';
|
||||
|
||||
const originalProfileRoot = process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT;
|
||||
const testProfileRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'browser-use-profiles-'));
|
||||
process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT = testProfileRoot;
|
||||
|
||||
const {
|
||||
getProfilePath,
|
||||
normalizeDefaultProfileName,
|
||||
normalizeProfileName,
|
||||
PROFILE_ROOT,
|
||||
resolveSessionProfileName,
|
||||
} = await import('@/modules/browser-use/browser-use.settings.js');
|
||||
|
||||
test.after(() => {
|
||||
if (originalProfileRoot === undefined) {
|
||||
delete process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT;
|
||||
} else {
|
||||
process.env.CLOUDCLI_BROWSER_USE_PROFILE_ROOT = originalProfileRoot;
|
||||
}
|
||||
fs.rmSync(testProfileRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('browser profile names are canonicalized before storage and path resolution', () => {
|
||||
assert.equal(normalizeProfileName(' Work Profile!! '), 'work-profile');
|
||||
assert.equal(normalizeProfileName(`${'-'.repeat(100)}Work Profile`), 'work-profile');
|
||||
assert.equal(normalizeDefaultProfileName(' Work Profile!! '), 'work-profile');
|
||||
assert.equal(
|
||||
getProfilePath(' Work Profile!! '),
|
||||
`${PROFILE_ROOT}/work-profile`,
|
||||
);
|
||||
assert.equal(
|
||||
resolveSessionProfileName({
|
||||
enabled: true,
|
||||
persistSessions: true,
|
||||
defaultProfileName: ' Work Profile!! ',
|
||||
browserBackend: 'playwright',
|
||||
}),
|
||||
'work-profile',
|
||||
);
|
||||
});
|
||||
|
||||
test('browser profile aliases are rejected when the normalized profile already exists', () => {
|
||||
const profileName = `alias-test-${Date.now()}`;
|
||||
fs.mkdirSync(getProfilePath(profileName), { recursive: true });
|
||||
|
||||
try {
|
||||
assert.throws(
|
||||
() => resolveSessionProfileName({
|
||||
enabled: true,
|
||||
persistSessions: false,
|
||||
defaultProfileName: 'default',
|
||||
browserBackend: 'playwright',
|
||||
}, profileName.toUpperCase()),
|
||||
/resolves to existing profile/,
|
||||
);
|
||||
assert.equal(
|
||||
resolveSessionProfileName({
|
||||
enabled: true,
|
||||
persistSessions: false,
|
||||
defaultProfileName: 'default',
|
||||
browserBackend: 'playwright',
|
||||
}, profileName),
|
||||
profileName,
|
||||
);
|
||||
} finally {
|
||||
fs.rmSync(getProfilePath(profileName), { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
67
server/modules/computer-use/actions/raw-action-dispatcher.ts
Normal file
67
server/modules/computer-use/actions/raw-action-dispatcher.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
import {
|
||||
captureScreenshot,
|
||||
executor,
|
||||
type ExecutorTarget,
|
||||
} from '@/modules/computer-use/computer-executor.js';
|
||||
import type { RawActionResult, RawComputerAction, RawActionTarget } from '@/modules/computer-use/actions/raw-action-types.js';
|
||||
|
||||
const DEFAULT_WAIT_MS = 1000;
|
||||
const MAX_WAIT_MS = 10_000;
|
||||
|
||||
function normalizeWaitMs(ms: number | undefined): number {
|
||||
if (ms === undefined) {
|
||||
return DEFAULT_WAIT_MS;
|
||||
}
|
||||
if (!Number.isFinite(ms)) {
|
||||
throw new Error('Computer Use wait duration must be a finite number.');
|
||||
}
|
||||
return Math.trunc(Math.max(0, Math.min(ms, MAX_WAIT_MS)));
|
||||
}
|
||||
|
||||
async function snapshot(target: RawActionTarget): Promise<RawActionResult> {
|
||||
const { dataUrl, size } = await captureScreenshot();
|
||||
return { screenshotDataUrl: dataUrl, displaySize: size || target.displaySize };
|
||||
}
|
||||
|
||||
export async function runRawComputerAction(
|
||||
action: RawComputerAction,
|
||||
target: RawActionTarget,
|
||||
): Promise<RawActionResult> {
|
||||
const executorTarget: ExecutorTarget = {
|
||||
displaySize: target.displaySize,
|
||||
};
|
||||
|
||||
switch (action.type) {
|
||||
case 'screenshot':
|
||||
return snapshot(target);
|
||||
case 'cursor_position': {
|
||||
const position = await executor.cursorPosition(executorTarget);
|
||||
return { ...(await snapshot(target)), position, cursor: position };
|
||||
}
|
||||
case 'mouse_move':
|
||||
await executor.moveTo(executorTarget, action.point);
|
||||
return { ...(await snapshot(target)), cursor: action.point };
|
||||
case 'click':
|
||||
await executor.click(executorTarget, action.button, action.point, action.double === true);
|
||||
return { ...(await snapshot(target)), cursor: action.point ?? null };
|
||||
case 'drag':
|
||||
await executor.drag(executorTarget, action.from, action.to, action.button ?? 'left');
|
||||
return { ...(await snapshot(target)), cursor: action.to };
|
||||
case 'type':
|
||||
await executor.type(action.text);
|
||||
return snapshot(target);
|
||||
case 'key':
|
||||
await executor.pressChord(action.key);
|
||||
return snapshot(target);
|
||||
case 'scroll':
|
||||
await executor.scroll(executorTarget, action.direction, action.amount ?? 3, action.point);
|
||||
return { ...(await snapshot(target)), cursor: action.point ?? null };
|
||||
case 'wait':
|
||||
await new Promise((resolve) => setTimeout(resolve, normalizeWaitMs(action.ms)));
|
||||
return snapshot(target);
|
||||
default: {
|
||||
const exhaustive: never = action;
|
||||
throw new Error(`Unsupported computer action: ${(exhaustive as { type?: string }).type || 'unknown'}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
28
server/modules/computer-use/actions/raw-action-types.ts
Normal file
28
server/modules/computer-use/actions/raw-action-types.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import type {
|
||||
ClickButton,
|
||||
DisplaySize,
|
||||
Point,
|
||||
ScrollDirection,
|
||||
} from '@/modules/computer-use/computer-executor.js';
|
||||
|
||||
export type RawComputerAction =
|
||||
| { type: 'screenshot' }
|
||||
| { type: 'cursor_position' }
|
||||
| { type: 'mouse_move'; point: Point }
|
||||
| { type: 'click'; button: ClickButton; point?: Point; double?: boolean }
|
||||
| { type: 'drag'; from: Point; to: Point; button?: ClickButton }
|
||||
| { type: 'type'; text: string }
|
||||
| { type: 'key'; key: string }
|
||||
| { type: 'scroll'; direction: ScrollDirection; amount?: number; point?: Point }
|
||||
| { type: 'wait'; ms?: number };
|
||||
|
||||
export type RawActionTarget = {
|
||||
displaySize: DisplaySize | null;
|
||||
};
|
||||
|
||||
export type RawActionResult = {
|
||||
screenshotDataUrl?: string | null;
|
||||
displaySize?: DisplaySize | null;
|
||||
cursor?: Point | null;
|
||||
position?: Point | null;
|
||||
};
|
||||
242
server/modules/computer-use/computer-executor.ts
Normal file
242
server/modules/computer-use/computer-executor.ts
Normal file
@@ -0,0 +1,242 @@
|
||||
import { createRequire } from 'node:module';
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
|
||||
export type Point = { x: number; y: number };
|
||||
export type ClickButton = 'left' | 'right' | 'middle';
|
||||
export type ScrollDirection = 'up' | 'down' | 'left' | 'right';
|
||||
export type DisplaySize = { width: number; height: number };
|
||||
|
||||
export type RuntimeReadiness = {
|
||||
nut: any | null;
|
||||
screenshot: any | null;
|
||||
nutInstalled: boolean;
|
||||
screenshotInstalled: boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
* Coordinate space the executor reports/accepts. The screenshot pixel space is
|
||||
* the canonical space agents and users address; it is mapped to the nut-js
|
||||
* logical mouse space before any action runs.
|
||||
*/
|
||||
export type ExecutorTarget = {
|
||||
displaySize: DisplaySize | null;
|
||||
};
|
||||
|
||||
export function getNut(): any | null {
|
||||
try {
|
||||
return require('@nut-tree-fork/nut-js');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function getScreenshot(): any | null {
|
||||
try {
|
||||
const mod = require('screenshot-desktop');
|
||||
return mod?.default || mod;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function getRuntimeReadiness(): RuntimeReadiness {
|
||||
const nut = getNut();
|
||||
const screenshot = getScreenshot();
|
||||
return {
|
||||
nut,
|
||||
screenshot,
|
||||
nutInstalled: Boolean(nut),
|
||||
screenshotInstalled: typeof screenshot === 'function',
|
||||
};
|
||||
}
|
||||
|
||||
/** Reads the pixel dimensions from a PNG/JPEG buffer header without decoding it. */
|
||||
export function readImageSize(buffer: Buffer): DisplaySize | null {
|
||||
// PNG: 8-byte signature, then IHDR chunk with width/height as big-endian uint32.
|
||||
if (buffer.length >= 24 && buffer[0] === 0x89 && buffer[1] === 0x50) {
|
||||
return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) };
|
||||
}
|
||||
// JPEG: scan for a Start-Of-Frame marker (0xFFC0..0xFFCF, excluding C4/C8/CC).
|
||||
if (buffer.length >= 4 && buffer[0] === 0xff && buffer[1] === 0xd8) {
|
||||
let offset = 2;
|
||||
while (offset + 9 < buffer.length) {
|
||||
if (buffer[offset] !== 0xff) {
|
||||
offset += 1;
|
||||
continue;
|
||||
}
|
||||
const marker = buffer[offset + 1];
|
||||
if (marker >= 0xc0 && marker <= 0xcf && marker !== 0xc4 && marker !== 0xc8 && marker !== 0xcc) {
|
||||
return { height: buffer.readUInt16BE(offset + 5), width: buffer.readUInt16BE(offset + 7) };
|
||||
}
|
||||
offset += 2 + buffer.readUInt16BE(offset + 2);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function captureScreenshot(): Promise<{ dataUrl: string; size: DisplaySize | null }> {
|
||||
const screenshot = getScreenshot();
|
||||
if (typeof screenshot !== 'function') {
|
||||
throw new Error('Computer Use runtime is not available.');
|
||||
}
|
||||
const buffer: Buffer = await screenshot({ format: 'png' });
|
||||
return {
|
||||
dataUrl: `data:image/png;base64,${buffer.toString('base64')}`,
|
||||
size: readImageSize(buffer),
|
||||
};
|
||||
}
|
||||
|
||||
/** Returns the mouse coordinate space size (logical screen pixels). */
|
||||
export async function getMouseSpaceSize(): Promise<DisplaySize> {
|
||||
const nut = getNut();
|
||||
if (!nut) {
|
||||
throw new Error('Computer Use runtime is not available.');
|
||||
}
|
||||
const width = await nut.screen.width();
|
||||
const height = await nut.screen.height();
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
/** Maps a point from screenshot/image space to the mouse coordinate space. */
|
||||
export async function toMouseSpace(target: ExecutorTarget, point: Point): Promise<Point> {
|
||||
const mouseSize = await getMouseSpaceSize();
|
||||
const image = target.displaySize || mouseSize;
|
||||
const scaleX = image.width ? mouseSize.width / image.width : 1;
|
||||
const scaleY = image.height ? mouseSize.height / image.height : 1;
|
||||
return {
|
||||
x: Math.round(point.x * scaleX),
|
||||
y: Math.round(point.y * scaleY),
|
||||
};
|
||||
}
|
||||
|
||||
/** Maps a point from the mouse coordinate space back to screenshot/image space. */
|
||||
export function toImageSpace(target: ExecutorTarget, point: Point, mouseSize: DisplaySize): Point {
|
||||
const image = target.displaySize || mouseSize;
|
||||
const scaleX = mouseSize.width ? image.width / mouseSize.width : 1;
|
||||
const scaleY = mouseSize.height ? image.height / mouseSize.height : 1;
|
||||
return {
|
||||
x: Math.round(point.x * scaleX),
|
||||
y: Math.round(point.y * scaleY),
|
||||
};
|
||||
}
|
||||
|
||||
function nutButton(nut: any, button: ClickButton) {
|
||||
if (button === 'right') return nut.Button.RIGHT;
|
||||
if (button === 'middle') return nut.Button.MIDDLE;
|
||||
return nut.Button.LEFT;
|
||||
}
|
||||
|
||||
/** Maps a key name (xdotool-style, as Anthropic's computer tool emits) to a nut-js Key. */
|
||||
function nutKey(nut: any, token: string): any {
|
||||
const map: Record<string, string> = {
|
||||
return: 'Enter', enter: 'Enter', esc: 'Escape', escape: 'Escape', tab: 'Tab',
|
||||
space: 'Space', backspace: 'Backspace', delete: 'Delete', del: 'Delete', insert: 'Insert',
|
||||
up: 'Up', down: 'Down', left: 'Left', right: 'Right',
|
||||
home: 'Home', end: 'End', pageup: 'PageUp', page_up: 'PageUp', pagedown: 'PageDown', page_down: 'PageDown',
|
||||
ctrl: 'LeftControl', control: 'LeftControl', alt: 'LeftAlt', shift: 'LeftShift',
|
||||
meta: 'LeftSuper', super: 'LeftSuper', cmd: 'LeftSuper', win: 'LeftSuper',
|
||||
capslock: 'CapsLock',
|
||||
};
|
||||
const lower = token.toLowerCase();
|
||||
if (map[lower]) {
|
||||
return nut.Key[map[lower]];
|
||||
}
|
||||
if (/^f([1-9]|1[0-9]|2[0-4])$/.test(lower)) {
|
||||
return nut.Key[`F${lower.slice(1)}`];
|
||||
}
|
||||
if (token.length === 1) {
|
||||
const upper = token.toUpperCase();
|
||||
if (nut.Key[upper] !== undefined) {
|
||||
return nut.Key[upper];
|
||||
}
|
||||
if (nut.Key[`Num${token}`] !== undefined && /[0-9]/.test(token)) {
|
||||
return nut.Key[`Num${token}`];
|
||||
}
|
||||
}
|
||||
throw new Error(`Unsupported key: ${token}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* The cross-platform OS executor. It is intentionally free of any server,
|
||||
* database, or session dependencies so it can run both inside the local server
|
||||
* process (OSS mode) and inside the standalone desktop agent (cloud relay).
|
||||
*/
|
||||
export const executor = {
|
||||
async configure() {
|
||||
const nut = getNut();
|
||||
if (nut) {
|
||||
// Make actions responsive; the agent loop already paces itself with screenshots.
|
||||
nut.mouse.config.autoDelayMs = 2;
|
||||
nut.keyboard.config.autoDelayMs = 2;
|
||||
}
|
||||
return nut;
|
||||
},
|
||||
|
||||
async cursorPosition(target: ExecutorTarget): Promise<Point> {
|
||||
const nut = await this.configure();
|
||||
const mouseSize = await getMouseSpaceSize();
|
||||
const pos = await nut.mouse.getPosition();
|
||||
return toImageSpace(target, { x: pos.x, y: pos.y }, mouseSize);
|
||||
},
|
||||
|
||||
async moveTo(target: ExecutorTarget, point: Point): Promise<void> {
|
||||
const nut = await this.configure();
|
||||
const dest = await toMouseSpace(target, point);
|
||||
await nut.mouse.setPosition(new nut.Point(dest.x, dest.y));
|
||||
},
|
||||
|
||||
async click(target: ExecutorTarget, button: ClickButton, point?: Point, doubleClick = false): Promise<void> {
|
||||
const nut = await this.configure();
|
||||
if (point) {
|
||||
await this.moveTo(target, point);
|
||||
}
|
||||
if (doubleClick) {
|
||||
await nut.mouse.doubleClick(nutButton(nut, button));
|
||||
} else {
|
||||
await nut.mouse.click(nutButton(nut, button));
|
||||
}
|
||||
},
|
||||
|
||||
async drag(target: ExecutorTarget, from: Point, to: Point, button: ClickButton = 'left'): Promise<void> {
|
||||
const nut = await this.configure();
|
||||
const start = await toMouseSpace(target, from);
|
||||
const end = await toMouseSpace(target, to);
|
||||
await nut.mouse.setPosition(new nut.Point(start.x, start.y));
|
||||
await nut.mouse.pressButton(nutButton(nut, button));
|
||||
await nut.mouse.setPosition(new nut.Point(end.x, end.y));
|
||||
await nut.mouse.releaseButton(nutButton(nut, button));
|
||||
},
|
||||
|
||||
async type(text: string): Promise<void> {
|
||||
const nut = await this.configure();
|
||||
await nut.keyboard.type(text);
|
||||
},
|
||||
|
||||
async pressChord(chord: string): Promise<void> {
|
||||
const nut = await this.configure();
|
||||
const tokens = chord.split('+').map((token) => token.trim()).filter(Boolean);
|
||||
if (tokens.length === 0) {
|
||||
return;
|
||||
}
|
||||
const keys = tokens.map((token) => nutKey(nut, token));
|
||||
for (const key of keys) {
|
||||
await nut.keyboard.pressKey(key);
|
||||
}
|
||||
for (const key of [...keys].reverse()) {
|
||||
await nut.keyboard.releaseKey(key);
|
||||
}
|
||||
},
|
||||
|
||||
async scroll(target: ExecutorTarget, direction: ScrollDirection, amount: number, point?: Point): Promise<void> {
|
||||
const nut = await this.configure();
|
||||
if (point) {
|
||||
await this.moveTo(target, point);
|
||||
}
|
||||
const steps = Math.max(1, Math.round(amount));
|
||||
if (direction === 'up') await nut.mouse.scrollUp(steps);
|
||||
else if (direction === 'down') await nut.mouse.scrollDown(steps);
|
||||
else if (direction === 'left') await nut.mouse.scrollLeft(steps);
|
||||
else await nut.mouse.scrollRight(steps);
|
||||
},
|
||||
};
|
||||
460
server/modules/computer-use/computer-semantics.service.ts
Normal file
460
server/modules/computer-use/computer-semantics.service.ts
Normal file
@@ -0,0 +1,460 @@
|
||||
import { execFile } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
|
||||
import {
|
||||
captureScreenshot,
|
||||
executor,
|
||||
type ClickButton,
|
||||
type ExecutorTarget,
|
||||
type Point,
|
||||
type ScrollDirection,
|
||||
} from '@/modules/computer-use/computer-executor.js';
|
||||
import type { SemanticAdapter } from '@/modules/computer-use/semantics/adapters/semantic-adapter.js';
|
||||
import { createMacOsSemanticAdapter } from '@/modules/computer-use/semantics/adapters/macos/macos-semantic-adapter.js';
|
||||
import { createWindowsSemanticAdapter } from '@/modules/computer-use/semantics/adapters/windows/windows-semantic-adapter.js';
|
||||
import { resolveSemanticHelper } from '@/modules/computer-use/semantics/helpers/semantic-helper-resolver.js';
|
||||
import { semanticSessionStore } from '@/modules/computer-use/semantics/semantic-session-store.js';
|
||||
import type { SemanticAppState, SemanticElement } from '@/modules/computer-use/semantics/semantic-types.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const MAX_APP_STATE_ELEMENTS = 250;
|
||||
let helperAdapter: SemanticAdapter | null | undefined;
|
||||
|
||||
function readString(value: unknown): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
function requireApp(input: Record<string, unknown>): string {
|
||||
const app = readString(input.app);
|
||||
if (!app) {
|
||||
throw new Error('app is required.');
|
||||
}
|
||||
return app;
|
||||
}
|
||||
|
||||
function readNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function readButton(value: unknown): ClickButton {
|
||||
return value === 'right' || value === 'middle' ? value : 'left';
|
||||
}
|
||||
|
||||
function readClickCount(value: unknown): number {
|
||||
const count = readNumber(value);
|
||||
if (count === undefined) {
|
||||
return 1;
|
||||
}
|
||||
return Math.max(1, Math.min(5, Math.trunc(count)));
|
||||
}
|
||||
|
||||
function readDirection(value: unknown): ScrollDirection {
|
||||
return value === 'up' || value === 'left' || value === 'right' ? value : 'down';
|
||||
}
|
||||
|
||||
function readSessionId(input: Record<string, unknown>): string {
|
||||
return readString(input.sessionId) || 'default';
|
||||
}
|
||||
|
||||
function centerOf(element: SemanticElement): Point | null {
|
||||
const bounds = element.bounds;
|
||||
if (!bounds) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
x: Math.round(bounds.x + bounds.width / 2),
|
||||
y: Math.round(bounds.y + bounds.height / 2),
|
||||
};
|
||||
}
|
||||
|
||||
function getCachedElement(sessionId: string, app: string, index: string, stateId?: string): SemanticElement | null {
|
||||
return semanticSessionStore.getElement(sessionId, app, index, stateId);
|
||||
}
|
||||
|
||||
function getPoint(input: Record<string, unknown>, sessionId: string, app: string): Point | undefined {
|
||||
const x = readNumber(input.x);
|
||||
const y = readNumber(input.y);
|
||||
if (x !== undefined && y !== undefined) {
|
||||
return { x, y };
|
||||
}
|
||||
|
||||
const elementIndex = readString(input.element_index);
|
||||
if (!elementIndex) {
|
||||
return undefined;
|
||||
}
|
||||
const element = getCachedElement(sessionId, app, elementIndex, readString(input.stateId) || undefined);
|
||||
return element ? centerOf(element) || undefined : undefined;
|
||||
}
|
||||
|
||||
function getHelperAdapter(): SemanticAdapter | null {
|
||||
if (helperAdapter !== undefined) {
|
||||
return helperAdapter;
|
||||
}
|
||||
|
||||
if (process.platform !== 'darwin' && process.platform !== 'win32') {
|
||||
helperAdapter = null;
|
||||
return helperAdapter;
|
||||
}
|
||||
|
||||
const resolution = resolveSemanticHelper();
|
||||
if (!resolution.available) {
|
||||
helperAdapter = null;
|
||||
return helperAdapter;
|
||||
}
|
||||
|
||||
helperAdapter = process.platform === 'darwin'
|
||||
? createMacOsSemanticAdapter()
|
||||
: createWindowsSemanticAdapter();
|
||||
return helperAdapter;
|
||||
}
|
||||
|
||||
function shouldFallbackFromHelper(error: unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return /not implemented|unavailable|not found|does not exist|timed out|not running|exited with code|failed to start/i.test(message);
|
||||
}
|
||||
|
||||
async function withHelperState(
|
||||
sessionId: string,
|
||||
operation: (adapter: SemanticAdapter) => Promise<SemanticAppState>,
|
||||
): Promise<SemanticAppState | null> {
|
||||
const adapter = getHelperAdapter();
|
||||
if (!adapter) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return semanticSessionStore.save(sessionId, await operation(adapter));
|
||||
} catch (error) {
|
||||
if (shouldFallbackFromHelper(error)) {
|
||||
console.warn('[ComputerSemantics] Falling back from helper:', error instanceof Error ? error.message : String(error));
|
||||
return null;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function run(command: string, args: string[], timeout = 5000): Promise<string> {
|
||||
const { stdout } = await execFileAsync(command, args, {
|
||||
timeout,
|
||||
windowsHide: true,
|
||||
maxBuffer: 1024 * 1024 * 4,
|
||||
});
|
||||
return stdout;
|
||||
}
|
||||
|
||||
async function listMacApps(): Promise<Array<Record<string, unknown>>> {
|
||||
const script = [
|
||||
'tell application "System Events"',
|
||||
'set appRows to {}',
|
||||
'repeat with p in (application processes whose background only is false)',
|
||||
'set end of appRows to (name of p as text)',
|
||||
'end repeat',
|
||||
'return appRows',
|
||||
'end tell',
|
||||
].join('\n');
|
||||
const output = await run('osascript', ['-e', script]);
|
||||
return output.split(', ')
|
||||
.map((name) => name.trim())
|
||||
.filter(Boolean)
|
||||
.map((name) => ({ name, running: true }));
|
||||
}
|
||||
|
||||
async function listWindowsApps(): Promise<Array<Record<string, unknown>>> {
|
||||
const script = [
|
||||
'Get-Process | Where-Object { $_.MainWindowTitle } |',
|
||||
'Select-Object ProcessName, Id, MainWindowTitle | ConvertTo-Json -Depth 3',
|
||||
].join(' ');
|
||||
const output = await run('powershell.exe', ['-NoProfile', '-Command', script]);
|
||||
const parsed = JSON.parse(output || '[]');
|
||||
const rows = Array.isArray(parsed) ? parsed : [parsed];
|
||||
return rows.map((row) => ({
|
||||
name: row.ProcessName,
|
||||
pid: row.Id,
|
||||
windowTitle: row.MainWindowTitle,
|
||||
running: true,
|
||||
}));
|
||||
}
|
||||
|
||||
async function listLinuxApps(): Promise<Array<Record<string, unknown>>> {
|
||||
try {
|
||||
const output = await run('wmctrl', ['-lx']);
|
||||
return output.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean)
|
||||
.map((line) => {
|
||||
const parts = line.split(/\s+/);
|
||||
return {
|
||||
windowId: parts[0],
|
||||
desktop: parts[1],
|
||||
host: parts[2],
|
||||
className: parts[3],
|
||||
windowTitle: parts.slice(4).join(' '),
|
||||
running: true,
|
||||
};
|
||||
});
|
||||
} catch {
|
||||
const output = await run('ps', ['-eo', 'comm=']);
|
||||
return [...new Set(output.split(/\r?\n/).map((name) => name.trim()).filter(Boolean))]
|
||||
.slice(0, 200)
|
||||
.map((name) => ({ name, running: true }));
|
||||
}
|
||||
}
|
||||
|
||||
async function listApps(): Promise<Array<Record<string, unknown>>> {
|
||||
if (process.platform === 'darwin') {
|
||||
return listMacApps();
|
||||
}
|
||||
if (process.platform === 'win32') {
|
||||
return listWindowsApps();
|
||||
}
|
||||
return listLinuxApps();
|
||||
}
|
||||
|
||||
async function macAccessibilityTree(app: string): Promise<SemanticElement[]> {
|
||||
const escapedApp = app.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
||||
const script = `
|
||||
on safeText(v)
|
||||
try
|
||||
return v as text
|
||||
on error
|
||||
return ""
|
||||
end try
|
||||
end safeText
|
||||
|
||||
on emitElement(e, depth, maxDepth, counter)
|
||||
if depth > maxDepth then return {}
|
||||
set rows to {}
|
||||
try
|
||||
set roleText to my safeText(role of e)
|
||||
on error
|
||||
set roleText to "element"
|
||||
end try
|
||||
try
|
||||
set titleText to my safeText(title of e)
|
||||
on error
|
||||
set titleText to ""
|
||||
end try
|
||||
try
|
||||
set valueText to my safeText(value of e)
|
||||
on error
|
||||
set valueText to ""
|
||||
end try
|
||||
try
|
||||
set posValue to position of e
|
||||
set sizeValue to size of e
|
||||
set boundsText to ((item 1 of posValue) as text) & "," & ((item 2 of posValue) as text) & "," & ((item 1 of sizeValue) as text) & "," & ((item 2 of sizeValue) as text)
|
||||
on error
|
||||
set boundsText to ""
|
||||
end try
|
||||
set end of rows to ((counter as text) & tab & roleText & tab & titleText & tab & valueText & tab & boundsText)
|
||||
if counter > ${MAX_APP_STATE_ELEMENTS} then return rows
|
||||
try
|
||||
repeat with childElement in UI elements of e
|
||||
set childRows to my emitElement(childElement, depth + 1, maxDepth, counter + (count of rows))
|
||||
set rows to rows & childRows
|
||||
if (count of rows) > ${MAX_APP_STATE_ELEMENTS} then return rows
|
||||
end repeat
|
||||
end try
|
||||
return rows
|
||||
end emitElement
|
||||
|
||||
tell application "System Events"
|
||||
if not (exists process "${escapedApp}") then error "App is not running: ${escapedApp}"
|
||||
tell process "${escapedApp}"
|
||||
set rows to {}
|
||||
repeat with w in windows
|
||||
set rows to rows & my emitElement(w, 0, 4, (count of rows) + 1)
|
||||
if (count of rows) > ${MAX_APP_STATE_ELEMENTS} then exit repeat
|
||||
end repeat
|
||||
return rows
|
||||
end tell
|
||||
end tell
|
||||
`;
|
||||
const output = await run('osascript', ['-e', script], 10000);
|
||||
return output.split(/\r?\n|, /)
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean)
|
||||
.map((line, index) => {
|
||||
const [rawIndex, role, title, value, boundsText] = line.split('\t');
|
||||
const boundsParts = (boundsText || '').split(',').map((part) => Number.parseFloat(part));
|
||||
const hasBounds = boundsParts.length === 4 && boundsParts.every(Number.isFinite);
|
||||
return {
|
||||
index: rawIndex || String(index + 1),
|
||||
role: role || 'element',
|
||||
title: title || undefined,
|
||||
value: value || undefined,
|
||||
bounds: hasBounds
|
||||
? { x: boundsParts[0], y: boundsParts[1], width: boundsParts[2], height: boundsParts[3] }
|
||||
: undefined,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function getAccessibilityTree(app: string): Promise<{ elements: SemanticElement[]; message?: string }> {
|
||||
if (process.platform === 'darwin') {
|
||||
try {
|
||||
return { elements: await macAccessibilityTree(app) };
|
||||
} catch (error) {
|
||||
return { elements: [], message: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
elements: [],
|
||||
message: 'Native accessibility tree capture is not implemented for this platform yet.',
|
||||
};
|
||||
}
|
||||
|
||||
async function getAppState(sessionId: string, app: string): Promise<SemanticAppState> {
|
||||
if (!app) {
|
||||
throw new Error('app is required.');
|
||||
}
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.getAppState({ sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
|
||||
const screenshot = await captureScreenshot();
|
||||
const tree = await getAccessibilityTree(app);
|
||||
const state: SemanticAppState = {
|
||||
stateId: semanticSessionStore.createStateId(),
|
||||
app,
|
||||
platform: process.platform,
|
||||
screenshotDataUrl: screenshot.dataUrl,
|
||||
displaySize: screenshot.size,
|
||||
elements: tree.elements,
|
||||
accessibilityTree: tree.elements,
|
||||
message: tree.message,
|
||||
};
|
||||
return semanticSessionStore.save(sessionId, state);
|
||||
}
|
||||
|
||||
async function targetFor(sessionId: string, app: string, stateId?: string): Promise<ExecutorTarget> {
|
||||
const cached = semanticSessionStore.getState(sessionId, app, stateId);
|
||||
return { displaySize: cached?.displaySize || (await captureScreenshot()).size };
|
||||
}
|
||||
|
||||
export const computerSemanticsService = {
|
||||
async callTool(name: string, input: Record<string, unknown>): Promise<unknown> {
|
||||
const sessionId = readSessionId(input);
|
||||
switch (name) {
|
||||
case 'list_apps': {
|
||||
const adapter = getHelperAdapter();
|
||||
if (adapter) {
|
||||
try {
|
||||
return { apps: await adapter.listApps(), platform: process.platform };
|
||||
} catch (error) {
|
||||
if (!shouldFallbackFromHelper(error)) {
|
||||
throw error;
|
||||
}
|
||||
console.warn('[ComputerSemantics] Falling back from helper:', error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
}
|
||||
return { apps: await listApps(), platform: process.platform };
|
||||
}
|
||||
case 'get_app_state':
|
||||
return getAppState(sessionId, readString(input.app));
|
||||
case 'click':
|
||||
case 'click_element': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.clickElement({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
const stateId = readString(input.stateId) || undefined;
|
||||
const point = getPoint(input, sessionId, app);
|
||||
if (!point) {
|
||||
throw new Error('click requires x/y or an element_index from computer_get_app_state.');
|
||||
}
|
||||
const target = await targetFor(sessionId, app, stateId);
|
||||
const button = readButton(input.mouse_button ?? input.mouseButton);
|
||||
const clickCount = readClickCount(input.click_count ?? input.clickCount);
|
||||
for (let index = 0; index < clickCount; index += 1) {
|
||||
await executor.click(target, button, point, false);
|
||||
}
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
case 'drag': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.drag({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
const stateId = readString(input.stateId) || undefined;
|
||||
const fromX = readNumber(input.from_x);
|
||||
const fromY = readNumber(input.from_y);
|
||||
const toX = readNumber(input.to_x);
|
||||
const toY = readNumber(input.to_y);
|
||||
if (fromX === undefined || fromY === undefined || toX === undefined || toY === undefined) {
|
||||
throw new Error('drag requires from_x/from_y/to_x/to_y.');
|
||||
}
|
||||
await executor.drag(await targetFor(sessionId, app, stateId), { x: fromX, y: fromY }, { x: toX, y: toY }, readButton(input.mouse_button ?? input.mouseButton));
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
case 'scroll':
|
||||
case 'scroll_element': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.scrollElement({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
const stateId = readString(input.stateId) || undefined;
|
||||
const point = getPoint(input, sessionId, app);
|
||||
if (!point) {
|
||||
throw new Error('scroll requires x/y or an element_index from computer_get_app_state.');
|
||||
}
|
||||
await executor.scroll(await targetFor(sessionId, app, stateId), readDirection(input.direction), readNumber(input.pages) ?? 1, point);
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
case 'type_text': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.typeText({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
await executor.type(readString(input.text));
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
case 'press_key': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.pressKey({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
await executor.pressChord(readString(input.key));
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
case 'set_value': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.setValue({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
const stateId = readString(input.stateId) || undefined;
|
||||
const point = getPoint(input, sessionId, app);
|
||||
if (!point) {
|
||||
throw new Error('set_value requires x/y or an element_index from computer_get_app_state.');
|
||||
}
|
||||
await executor.click(await targetFor(sessionId, app, stateId), 'left', point, false);
|
||||
await executor.pressChord(process.platform === 'darwin' ? 'cmd+a' : 'ctrl+a');
|
||||
await executor.type(readString(input.value));
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
case 'perform_secondary_action': {
|
||||
const app = requireApp(input);
|
||||
const helperState = await withHelperState(sessionId, (adapter) => adapter.performSecondaryAction({ ...input, sessionId, app }));
|
||||
if (helperState) {
|
||||
return helperState;
|
||||
}
|
||||
const stateId = readString(input.stateId) || undefined;
|
||||
const point = getPoint(input, sessionId, app);
|
||||
if (!point) {
|
||||
throw new Error('perform_secondary_action requires x/y or an element_index from computer_get_app_state.');
|
||||
}
|
||||
await executor.click(await targetFor(sessionId, app, stateId), 'right', point, false);
|
||||
return getAppState(sessionId, app);
|
||||
}
|
||||
default:
|
||||
throw new Error(`Unknown semantic Computer Use tool: ${name}`);
|
||||
}
|
||||
},
|
||||
};
|
||||
141
server/modules/computer-use/computer-use-mcp.routes.ts
Normal file
141
server/modules/computer-use/computer-use-mcp.routes.ts
Normal file
@@ -0,0 +1,141 @@
|
||||
import express from 'express';
|
||||
|
||||
import { computerUseService } from '@/modules/computer-use/computer-use.service.js';
|
||||
import { semanticOperationForMcpTool } from '@/modules/computer-use/semantics/semantic-tool-dispatcher.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
function readBearerToken(header: unknown): string | null {
|
||||
if (typeof header !== 'string') {
|
||||
return null;
|
||||
}
|
||||
const trimmed = header.trim();
|
||||
const scheme = 'Bearer';
|
||||
if (trimmed.slice(0, scheme.length).toLowerCase() !== scheme.toLowerCase()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const separator = trimmed[scheme.length];
|
||||
if (separator !== ' ' && separator !== '\t') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return trimmed.slice(scheme.length + 1).trimStart() || null;
|
||||
}
|
||||
|
||||
function toButton(value: unknown): 'left' | 'right' | 'middle' {
|
||||
return value === 'right' || value === 'middle' ? value : 'left';
|
||||
}
|
||||
|
||||
function toScrollDirection(value: unknown): 'up' | 'down' | 'left' | 'right' {
|
||||
return value === 'down' || value === 'left' || value === 'right' ? value : 'up';
|
||||
}
|
||||
|
||||
function point(input: Record<string, unknown>): { x: number; y: number } | undefined {
|
||||
return typeof input.x === 'number' && typeof input.y === 'number'
|
||||
? { x: input.x, y: input.y }
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function requireNumber(input: Record<string, unknown>, name: string): number {
|
||||
const value = input[name];
|
||||
if (typeof value !== 'number' || !Number.isFinite(value)) {
|
||||
throw new Error(`${name} is required and must be a finite number.`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function requirePoint(input: Record<string, unknown>): { x: number; y: number } {
|
||||
return { x: requireNumber(input, 'x'), y: requireNumber(input, 'y') };
|
||||
}
|
||||
|
||||
function requireNamedPoint(input: Record<string, unknown>, xName: string, yName: string): { x: number; y: number } {
|
||||
return { x: requireNumber(input, xName), y: requireNumber(input, yName) };
|
||||
}
|
||||
|
||||
router.use((req, res, next) => {
|
||||
const expected = computerUseService.getMcpToken();
|
||||
const token = readBearerToken(req.headers.authorization) || String(req.headers['x-computer-use-mcp-token'] || '');
|
||||
if (!token || token !== expected) {
|
||||
res.status(401).json({ success: false, error: 'Invalid Computer Use MCP token.' });
|
||||
return;
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
router.post('/tools/:toolName', async (req, res) => {
|
||||
try {
|
||||
const input = (req.body && typeof req.body === 'object' ? req.body : {}) as Record<string, unknown>;
|
||||
const sessionId = typeof input.sessionId === 'string' ? input.sessionId : undefined;
|
||||
const toolName = req.params.toolName;
|
||||
const semanticOperation = semanticOperationForMcpTool(toolName);
|
||||
let result: unknown;
|
||||
|
||||
if (semanticOperation) {
|
||||
result = await computerUseService.callSemanticTool(semanticOperation, input);
|
||||
res.json({ success: true, data: result });
|
||||
return;
|
||||
}
|
||||
|
||||
switch (toolName) {
|
||||
case 'computer_screenshot':
|
||||
result = await computerUseService.agentScreenshot(sessionId);
|
||||
break;
|
||||
case 'computer_cursor_position':
|
||||
result = await computerUseService.agentCursorPosition(sessionId);
|
||||
break;
|
||||
case 'computer_mouse_move':
|
||||
result = await computerUseService.agentMouseMove(sessionId, requirePoint(input));
|
||||
break;
|
||||
case 'computer_click':
|
||||
result = await computerUseService.agentUnifiedClick(sessionId, {
|
||||
button: toButton(input.mouseButton ?? input.mouse_button ?? input.button),
|
||||
point: point(input),
|
||||
clickCount: typeof input.clickCount === 'number'
|
||||
? input.clickCount
|
||||
: typeof input.click_count === 'number'
|
||||
? input.click_count
|
||||
: 1,
|
||||
});
|
||||
break;
|
||||
case 'computer_drag': {
|
||||
const from = requireNamedPoint(input, 'startX', 'startY');
|
||||
const to = requireNamedPoint(input, 'endX', 'endY');
|
||||
result = await computerUseService.agentDrag(sessionId, from, to, toButton(input.mouseButton ?? input.mouse_button ?? input.button));
|
||||
break;
|
||||
}
|
||||
case 'computer_type':
|
||||
result = await computerUseService.agentType(sessionId, String(input.text || ''));
|
||||
break;
|
||||
case 'computer_key':
|
||||
result = await computerUseService.agentKey(sessionId, String(input.key || ''));
|
||||
break;
|
||||
case 'computer_scroll':
|
||||
result = await computerUseService.agentScroll(sessionId, {
|
||||
direction: toScrollDirection(input.direction),
|
||||
amount: typeof input.amount === 'number' ? input.amount : undefined,
|
||||
x: typeof input.x === 'number' ? input.x : undefined,
|
||||
y: typeof input.y === 'number' ? input.y : undefined,
|
||||
});
|
||||
break;
|
||||
case 'computer_wait':
|
||||
result = await computerUseService.agentWait(sessionId, typeof input.timeoutMs === 'number' ? input.timeoutMs : undefined);
|
||||
break;
|
||||
case 'computer_close_session':
|
||||
result = await computerUseService.agentStopSession(sessionId);
|
||||
break;
|
||||
default:
|
||||
res.status(404).json({ success: false, error: `Unknown Computer Use MCP tool "${toolName}".` });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({ success: true, data: result });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Computer Use MCP tool failed.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
211
server/modules/computer-use/computer-use.routes.ts
Normal file
211
server/modules/computer-use/computer-use.routes.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
import express from 'express';
|
||||
|
||||
import { computerUseService } from '@/modules/computer-use/computer-use.service.js';
|
||||
import { AppError } from '@/shared/utils.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
type AuthenticatedRequest = express.Request & {
|
||||
user?: {
|
||||
id?: string | number;
|
||||
};
|
||||
};
|
||||
|
||||
function requireUser(req: AuthenticatedRequest): { id: string | number } {
|
||||
const userId = req.user?.id;
|
||||
if (userId === undefined || userId === null || String(userId).trim() === '') {
|
||||
throw new AppError('Authenticated user is required.', {
|
||||
code: 'AUTHENTICATED_USER_REQUIRED',
|
||||
statusCode: 401,
|
||||
});
|
||||
}
|
||||
return { id: userId };
|
||||
}
|
||||
|
||||
function getErrorStatusCode(error: unknown, fallbackStatusCode: number): number {
|
||||
if (error instanceof AppError) {
|
||||
return error.statusCode;
|
||||
}
|
||||
|
||||
if (error && typeof error === 'object') {
|
||||
const statusCode = 'statusCode' in error ? error.statusCode : 'status' in error ? error.status : undefined;
|
||||
if (typeof statusCode === 'number' && Number.isInteger(statusCode) && statusCode >= 400 && statusCode <= 599) {
|
||||
return statusCode;
|
||||
}
|
||||
}
|
||||
|
||||
return fallbackStatusCode;
|
||||
}
|
||||
|
||||
function readParam(value: string | string[] | undefined): string {
|
||||
return Array.isArray(value) ? value[0] || '' : value || '';
|
||||
}
|
||||
|
||||
function toButton(value: unknown): 'left' | 'right' | 'middle' {
|
||||
return value === 'right' || value === 'middle' ? value : 'left';
|
||||
}
|
||||
|
||||
router.get('/status', async (_req, res) => {
|
||||
try {
|
||||
res.json({ success: true, data: await computerUseService.getStatus() });
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to load Computer Use status.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/settings', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
requireUser(req);
|
||||
res.json({ success: true, data: { settings: await computerUseService.getSettings() } });
|
||||
} catch (error) {
|
||||
res.status(getErrorStatusCode(error, 500)).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to load Computer Use settings.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.put('/settings', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
requireUser(req);
|
||||
const settings = await computerUseService.updateSettings(req.body || {});
|
||||
res.json({ success: true, data: { settings } });
|
||||
} catch (error) {
|
||||
res.status(getErrorStatusCode(error, 400)).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to save Computer Use settings.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/runtime/install', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
requireUser(req);
|
||||
const result = await computerUseService.installRuntime();
|
||||
res.status(result.success ? 200 : 500).json({
|
||||
success: result.success,
|
||||
data: result,
|
||||
error: result.success ? undefined : result.message,
|
||||
});
|
||||
} catch (error) {
|
||||
res.status(getErrorStatusCode(error, 500)).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to install Computer Use runtime.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/sessions', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
res.json({ success: true, data: { sessions: await computerUseService.listSessions(requireUser(req)) } });
|
||||
} catch (error) {
|
||||
res.status(getErrorStatusCode(error, 500)).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to list Computer Use sessions.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/screenshot', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const session = await computerUseService.userScreenshot(requireUser(req), readParam(req.params.sessionId));
|
||||
res.json({ success: true, data: { session } });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to capture the screen.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/click', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const x = Number(req.body?.x);
|
||||
const y = Number(req.body?.y);
|
||||
if (!Number.isFinite(x) || !Number.isFinite(y)) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: 'Valid numeric coordinates are required.',
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const session = await computerUseService.userClick(requireUser(req), readParam(req.params.sessionId), {
|
||||
x,
|
||||
y,
|
||||
button: toButton(req.body?.button),
|
||||
double: req.body?.double === true,
|
||||
});
|
||||
res.json({ success: true, data: { session } });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to click.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/press-key', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const session = await computerUseService.userPressKey(requireUser(req), readParam(req.params.sessionId), String(req.body?.key || ''));
|
||||
res.json({ success: true, data: { session } });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to send key input.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/consent/grant', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const session = await computerUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId));
|
||||
res.json({ success: true, data: { session } });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to grant control.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/consent/revoke', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const session = await computerUseService.revokeAgentAccess(requireUser(req), readParam(req.params.sessionId));
|
||||
res.json({ success: true, data: { session } });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to revoke control.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const result = await computerUseService.stopSession(requireUser(req), readParam(req.params.sessionId));
|
||||
res.json({ success: true, data: result });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to stop Computer Use session.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.delete('/sessions/:sessionId', async (req: AuthenticatedRequest, res) => {
|
||||
try {
|
||||
const result = await computerUseService.deleteSession(requireUser(req), readParam(req.params.sessionId));
|
||||
res.json({ success: true, data: result });
|
||||
} catch (error) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to delete Computer Use session.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
920
server/modules/computer-use/computer-use.service.ts
Normal file
920
server/modules/computer-use/computer-use.service.ts
Normal file
@@ -0,0 +1,920 @@
|
||||
import { randomBytes, randomUUID } from 'node:crypto';
|
||||
import { spawn } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import { appConfigDb } from '@/modules/database/index.js';
|
||||
import { providerMcpService } from '@/modules/providers/index.js';
|
||||
import { getModuleDir } from '@/utils/runtime-paths.js';
|
||||
import {
|
||||
getRuntimeReadiness as getExecutorReadiness,
|
||||
type Point,
|
||||
type ClickButton,
|
||||
type ScrollDirection,
|
||||
} from '@/modules/computer-use/computer-executor.js';
|
||||
import { runRawComputerAction } from '@/modules/computer-use/actions/raw-action-dispatcher.js';
|
||||
import type { RawComputerAction } from '@/modules/computer-use/actions/raw-action-types.js';
|
||||
import { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';
|
||||
import { computerSemanticsService } from '@/modules/computer-use/computer-semantics.service.js';
|
||||
import { semanticOperationNames } from '@/modules/computer-use/semantics/semantic-tool-dispatcher.js';
|
||||
|
||||
const __dirname = getModuleDir(import.meta.url);
|
||||
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
|
||||
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_MAX_SESSIONS_PER_OWNER || '1', 10);
|
||||
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
|
||||
const STOPPED_SESSION_RETENTION_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_STOPPED_SESSION_RETENTION_MS || String(30 * 60 * 1000), 10);
|
||||
const MAX_STORED_SESSIONS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_MAX_STORED_SESSIONS || '100', 10);
|
||||
const COMPUTER_USE_SETTINGS_KEY = 'computer_use_settings';
|
||||
const COMPUTER_USE_MCP_TOKEN_KEY = 'computer_use_mcp_token';
|
||||
type ComputerUseRuntime = 'cloud' | 'local';
|
||||
type ComputerUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
|
||||
|
||||
type ComputerUseSession = {
|
||||
id: string;
|
||||
ownerId: string;
|
||||
createdBy: 'user' | 'agent';
|
||||
runtime: ComputerUseRuntime;
|
||||
status: ComputerUseSessionStatus;
|
||||
screenshotDataUrl: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
lastAction: string | null;
|
||||
message: string | null;
|
||||
/** Per-session consent: agents may act only while this is true. */
|
||||
agentAccessEnabled: boolean;
|
||||
/** Size of the captured screenshot in pixels — the coordinate space agents/users use. */
|
||||
displaySize: {
|
||||
width: number;
|
||||
height: number;
|
||||
} | null;
|
||||
cursor: {
|
||||
x: number;
|
||||
y: number;
|
||||
actor: 'agent' | 'user';
|
||||
} | null;
|
||||
};
|
||||
|
||||
type PublicComputerUseSession = Omit<ComputerUseSession, 'ownerId'>;
|
||||
|
||||
type ComputerUseOwner = {
|
||||
id: string | number;
|
||||
};
|
||||
|
||||
type ComputerUseSettings = {
|
||||
enabled: boolean;
|
||||
};
|
||||
|
||||
type RuntimeReadiness = {
|
||||
nut: any | null;
|
||||
screenshot: any | null;
|
||||
nutInstalled: boolean;
|
||||
screenshotInstalled: boolean;
|
||||
installInProgress: boolean;
|
||||
installMessage: string | null;
|
||||
};
|
||||
|
||||
const sessions = new Map<string, ComputerUseSession>();
|
||||
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
|
||||
let lastInstallMessage: string | null = null;
|
||||
|
||||
const DEFAULT_SETTINGS: ComputerUseSettings = {
|
||||
enabled: false,
|
||||
};
|
||||
const AGENT_OWNER_ID = 'agent';
|
||||
const MCP_SERVER_NAME = 'cloudcli-computer-use';
|
||||
const MCP_PROVIDERS = ['claude', 'codex', 'cursor', 'gemini', 'opencode'];
|
||||
|
||||
function getRuntime(): ComputerUseRuntime {
|
||||
return IS_PLATFORM ? 'cloud' : 'local';
|
||||
}
|
||||
|
||||
function readSettings(): ComputerUseSettings {
|
||||
try {
|
||||
const raw = appConfigDb.get(COMPUTER_USE_SETTINGS_KEY);
|
||||
if (!raw) {
|
||||
return DEFAULT_SETTINGS;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(raw) as Partial<ComputerUseSettings>;
|
||||
return {
|
||||
enabled: parsed.enabled === true,
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.warn('[Computer Use] Failed to read settings:', error?.message || error);
|
||||
return DEFAULT_SETTINGS;
|
||||
}
|
||||
}
|
||||
|
||||
function writeSettings(settings: ComputerUseSettings): ComputerUseSettings {
|
||||
const normalized = {
|
||||
enabled: settings.enabled === true,
|
||||
};
|
||||
|
||||
appConfigDb.set(COMPUTER_USE_SETTINGS_KEY, JSON.stringify(normalized));
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function getOrCreateMcpToken(): string {
|
||||
const existing = appConfigDb.get(COMPUTER_USE_MCP_TOKEN_KEY);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const token = randomBytes(32).toString('hex');
|
||||
appConfigDb.set(COMPUTER_USE_MCP_TOKEN_KEY, token);
|
||||
return token;
|
||||
}
|
||||
|
||||
function getSetupMessage(settings: ComputerUseSettings, readiness: RuntimeReadiness): string {
|
||||
if (!settings.enabled) {
|
||||
return 'Computer Use is disabled in settings.';
|
||||
}
|
||||
if (getRuntime() === 'cloud') {
|
||||
return 'Open CloudCLI Desktop on this computer, connect the same account, and enable Computer Use.';
|
||||
}
|
||||
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
|
||||
return 'Install the desktop control runtime to capture the screen and drive the mouse and keyboard.';
|
||||
}
|
||||
return readiness.installMessage || 'Computer Use runtime is not ready.';
|
||||
}
|
||||
|
||||
function getMcpCommand(): { command: string; args: string[] } {
|
||||
const serverDir = path.resolve(__dirname, '..', '..');
|
||||
const mcpScriptPath = path.join(serverDir, 'computer-use-mcp.js');
|
||||
if (fs.existsSync(mcpScriptPath)) {
|
||||
return {
|
||||
command: process.execPath,
|
||||
args: [mcpScriptPath],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
command: 'cloudcli',
|
||||
args: ['computer-use-mcp'],
|
||||
};
|
||||
}
|
||||
|
||||
function getMcpApiUrl(): string {
|
||||
const port = process.env.SERVER_PORT || process.env.PORT || '3001';
|
||||
return `http://127.0.0.1:${port}/api/computer-use-mcp`;
|
||||
}
|
||||
|
||||
function getRuntimeReadiness(): RuntimeReadiness {
|
||||
const base = getExecutorReadiness();
|
||||
return {
|
||||
...base,
|
||||
installInProgress: Boolean(installPromise),
|
||||
installMessage: lastInstallMessage,
|
||||
};
|
||||
}
|
||||
|
||||
function runCommand(command: string, args: string[]): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
cwd: process.cwd(),
|
||||
env: process.env,
|
||||
shell: false,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
const output: string[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk) => output.push(String(chunk)));
|
||||
child.stderr.on('data', (chunk) => output.push(String(chunk)));
|
||||
child.on('error', reject);
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
reject(new Error(output.join('').trim() || `${command} ${args.join(' ')} exited with code ${code}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function formatInstallError(error: unknown): string {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
if (process.platform === 'linux' && /libxtst|x11|xtst|libpng|imagemagick|scrot/i.test(message)) {
|
||||
return [
|
||||
'Installing the desktop control runtime needs system packages.',
|
||||
'On Debian/Ubuntu run: sudo apt-get install -y libxtst-dev libpng-dev imagemagick',
|
||||
'then try again.',
|
||||
].join(' ');
|
||||
}
|
||||
return message || 'Failed to install the Computer Use runtime.';
|
||||
}
|
||||
|
||||
function isPackagedElectronNodeRuntime(): boolean {
|
||||
return process.env.ELECTRON_RUN_AS_NODE === '1' && Boolean(process.versions.electron);
|
||||
}
|
||||
|
||||
async function installRuntime(): Promise<{ success: boolean; message: string }> {
|
||||
if (installPromise) {
|
||||
return installPromise;
|
||||
}
|
||||
|
||||
const readiness = getExecutorReadiness();
|
||||
if (readiness.nutInstalled && readiness.screenshotInstalled) {
|
||||
lastInstallMessage = 'Computer Use runtime is available.';
|
||||
return { success: true, message: lastInstallMessage };
|
||||
}
|
||||
|
||||
if (isPackagedElectronNodeRuntime()) {
|
||||
lastInstallMessage = 'Computer Use runtime was not bundled with this desktop build.';
|
||||
return { success: false, message: lastInstallMessage };
|
||||
}
|
||||
|
||||
const npmCommand = process.platform === 'win32' ? 'npm.cmd' : 'npm';
|
||||
installPromise = (async () => {
|
||||
try {
|
||||
lastInstallMessage = 'Installing desktop control runtime…';
|
||||
await runCommand(npmCommand, [
|
||||
'install',
|
||||
'--no-save',
|
||||
'--no-package-lock',
|
||||
'@nut-tree-fork/nut-js',
|
||||
'screenshot-desktop',
|
||||
]);
|
||||
|
||||
lastInstallMessage = 'Computer Use runtime installed.';
|
||||
return { success: true, message: lastInstallMessage };
|
||||
} catch (error) {
|
||||
lastInstallMessage = formatInstallError(error);
|
||||
return { success: false, message: lastInstallMessage };
|
||||
}
|
||||
})();
|
||||
|
||||
try {
|
||||
return await installPromise;
|
||||
} finally {
|
||||
installPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
function getOwnerId(owner: ComputerUseOwner): string {
|
||||
if (owner.id === undefined || owner.id === null || String(owner.id).trim() === '') {
|
||||
throw new Error('Authenticated user is required.');
|
||||
}
|
||||
|
||||
return String(owner.id);
|
||||
}
|
||||
|
||||
function publicSession(session: ComputerUseSession): PublicComputerUseSession {
|
||||
const { ownerId: _ownerId, ...publicFields } = session;
|
||||
return publicFields;
|
||||
}
|
||||
|
||||
function ownerSessions(ownerId: string): ComputerUseSession[] {
|
||||
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
|
||||
}
|
||||
|
||||
function canAccessSession(ownerId: string, session: ComputerUseSession): boolean {
|
||||
return session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID;
|
||||
}
|
||||
|
||||
function normalizeSessionId(sessionId?: string | null): string | null {
|
||||
if (typeof sessionId !== 'string') {
|
||||
return null;
|
||||
}
|
||||
const trimmed = sessionId.trim();
|
||||
return trimmed ? trimmed : null;
|
||||
}
|
||||
|
||||
function findActiveAgentSession(): ComputerUseSession | null {
|
||||
return ownerSessions(AGENT_OWNER_ID)
|
||||
.filter((session) => session.status === 'ready')
|
||||
.sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))[0] || null;
|
||||
}
|
||||
|
||||
function positiveDuration(value: number, fallback: number): number {
|
||||
return Number.isFinite(value) && value > 0 ? value : fallback;
|
||||
}
|
||||
|
||||
async function expireStaleSessions(now = Date.now()): Promise<void> {
|
||||
const sessionTtl = positiveDuration(SESSION_TTL_MS, 30 * 60 * 1000);
|
||||
const stoppedRetention = positiveDuration(STOPPED_SESSION_RETENTION_MS, sessionTtl);
|
||||
|
||||
for (const [sessionId, session] of sessions.entries()) {
|
||||
const updatedAt = Date.parse(session.updatedAt);
|
||||
if (!Number.isFinite(updatedAt)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (session.status === 'ready') {
|
||||
if (now - updatedAt <= sessionTtl) {
|
||||
continue;
|
||||
}
|
||||
session.status = 'stopped';
|
||||
session.agentAccessEnabled = false;
|
||||
session.updatedAt = new Date(now).toISOString();
|
||||
session.lastAction = 'expire';
|
||||
session.message = 'Computer Use session expired after inactivity.';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (now - updatedAt > stoppedRetention) {
|
||||
sessions.delete(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
const maxStoredSessions = Number.isFinite(MAX_STORED_SESSIONS) && MAX_STORED_SESSIONS > 0
|
||||
? MAX_STORED_SESSIONS
|
||||
: 100;
|
||||
if (sessions.size <= maxStoredSessions) {
|
||||
return;
|
||||
}
|
||||
|
||||
const removable = [...sessions.values()]
|
||||
.filter((session) => session.status !== 'ready')
|
||||
.sort((a, b) => Date.parse(a.updatedAt) - Date.parse(b.updatedAt));
|
||||
for (const session of removable) {
|
||||
if (sessions.size <= maxStoredSessions) {
|
||||
break;
|
||||
}
|
||||
sessions.delete(session.id);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Action layer: local executor (OSS) or cloud relay to the desktop agent --
|
||||
//
|
||||
// Every desktop interaction goes through `performAction` / `getCursorPosition`.
|
||||
// In local mode it drives the in-process nut-js executor (computer-executor.ts);
|
||||
// in cloud mode it forwards the action to the linked desktop agent over
|
||||
// `desktopAgentRelay` and applies the returned screenshot. The local server
|
||||
// itself never touches the OS in cloud mode.
|
||||
|
||||
/** Shape the desktop agent returns for any relayed action. */
|
||||
type RelayResult = {
|
||||
screenshotDataUrl?: string | null;
|
||||
displaySize?: { width: number; height: number } | null;
|
||||
cursor?: { x: number; y: number } | null;
|
||||
position?: Point | null;
|
||||
};
|
||||
|
||||
function applyRelayResult(session: ComputerUseSession, result: RelayResult): void {
|
||||
if (typeof result.screenshotDataUrl === 'string') {
|
||||
session.screenshotDataUrl = result.screenshotDataUrl;
|
||||
}
|
||||
if (result.displaySize) {
|
||||
session.displaySize = result.displaySize;
|
||||
}
|
||||
if (result.cursor) {
|
||||
session.cursor = { x: result.cursor.x, y: result.cursor.y, actor: session.cursor?.actor ?? 'agent' };
|
||||
}
|
||||
session.updatedAt = new Date().toISOString();
|
||||
}
|
||||
|
||||
function stripSessionArgs(args: Record<string, unknown>): Record<string, unknown> {
|
||||
const { sessionId: _sessionId, ...toolArgs } = args;
|
||||
return toolArgs;
|
||||
}
|
||||
|
||||
async function refreshScreenshot(session: ComputerUseSession): Promise<void> {
|
||||
if (getRuntime() === 'cloud') {
|
||||
const result = (await desktopAgentRelay.relay('screenshot', { sessionId: session.id })) as RelayResult;
|
||||
applyRelayResult(session, result);
|
||||
return;
|
||||
}
|
||||
applyRelayResult(session, await runRawComputerAction({ type: 'screenshot' }, session));
|
||||
}
|
||||
|
||||
/** Runs one action and refreshes the session screenshot afterwards. */
|
||||
async function performAction(session: ComputerUseSession, action: RawComputerAction): Promise<void> {
|
||||
if (getRuntime() === 'cloud') {
|
||||
const result = (await desktopAgentRelay.relay(action.type, {
|
||||
...action,
|
||||
sessionId: session.id,
|
||||
displaySize: session.displaySize,
|
||||
})) as RelayResult;
|
||||
applyRelayResult(session, result);
|
||||
return;
|
||||
}
|
||||
|
||||
applyRelayResult(session, await runRawComputerAction(action, session));
|
||||
}
|
||||
|
||||
/** Reads the current cursor position in screenshot-pixel space. */
|
||||
async function getCursorPosition(session: ComputerUseSession): Promise<Point> {
|
||||
if (getRuntime() === 'cloud') {
|
||||
const result = (await desktopAgentRelay.relay('cursor_position', {
|
||||
sessionId: session.id,
|
||||
displaySize: session.displaySize,
|
||||
})) as RelayResult;
|
||||
applyRelayResult(session, result);
|
||||
if (result.position) {
|
||||
return result.position;
|
||||
}
|
||||
return session.cursor ? { x: session.cursor.x, y: session.cursor.y } : { x: 0, y: 0 };
|
||||
}
|
||||
const result = await runRawComputerAction({ type: 'cursor_position' }, session);
|
||||
applyRelayResult(session, result);
|
||||
return result.position || session.cursor || { x: 0, y: 0 };
|
||||
}
|
||||
|
||||
function assertReady(session: ComputerUseSession): void {
|
||||
if (session.status !== 'ready') {
|
||||
throw new Error(session.message || 'Computer Use session is not available.');
|
||||
}
|
||||
}
|
||||
|
||||
function agentToolsAvailable(): boolean {
|
||||
const settings = readSettings();
|
||||
if (!settings.enabled) {
|
||||
return false;
|
||||
}
|
||||
if (getRuntime() === 'cloud') {
|
||||
return desktopAgentRelay.isConnected();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function assertAgentToolsAvailable(): void {
|
||||
if (agentToolsAvailable()) {
|
||||
return;
|
||||
}
|
||||
const settings = readSettings();
|
||||
if (!settings.enabled) {
|
||||
throw new Error('Computer Use agent tools are disabled.');
|
||||
}
|
||||
throw new Error(
|
||||
getRuntime() === 'cloud'
|
||||
? 'No desktop is linked. Open CloudCLI Desktop on this computer, connect the same account, and enable Computer Use.'
|
||||
: 'Computer Use agent tools are disabled.'
|
||||
);
|
||||
}
|
||||
|
||||
function stopSessions(lastAction: string, message: string): void {
|
||||
for (const session of sessions.values()) {
|
||||
session.status = 'stopped';
|
||||
session.agentAccessEnabled = false;
|
||||
session.updatedAt = new Date().toISOString();
|
||||
session.lastAction = lastAction;
|
||||
session.message = message;
|
||||
}
|
||||
}
|
||||
|
||||
export const computerUseService = {
|
||||
async getSettings() {
|
||||
return readSettings();
|
||||
},
|
||||
|
||||
async updateSettings(settings: Partial<ComputerUseSettings>) {
|
||||
const current = readSettings();
|
||||
const enabled = typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled;
|
||||
const next = writeSettings({ enabled });
|
||||
if (next.enabled) {
|
||||
await this.registerAgentMcp();
|
||||
} else {
|
||||
await this.unregisterAgentMcp();
|
||||
desktopAgentRelay.disconnectAll('Computer Use was disabled in this environment.');
|
||||
stopSessions('settings:disabled', 'Computer Use was disabled in settings.');
|
||||
}
|
||||
return next;
|
||||
},
|
||||
|
||||
async getStatus() {
|
||||
const settings = readSettings();
|
||||
const readiness = getRuntimeReadiness();
|
||||
const isCloud = getRuntime() === 'cloud';
|
||||
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
|
||||
// Cloud mode still respects the saved feature setting. When enabled, cloud
|
||||
// availability comes from a linked desktop agent because the hosted server
|
||||
// has no screen of its own.
|
||||
const desktopAgentConnected = desktopAgentRelay.isConnected();
|
||||
const available = settings.enabled && (isCloud
|
||||
? desktopAgentConnected
|
||||
: runtimeReady);
|
||||
|
||||
return {
|
||||
enabled: settings.enabled,
|
||||
runtime: getRuntime(),
|
||||
available,
|
||||
desktopAgentConnected,
|
||||
desktopAgentCount: desktopAgentRelay.connectedCount(),
|
||||
nutInstalled: readiness.nutInstalled,
|
||||
screenshotInstalled: readiness.screenshotInstalled,
|
||||
installInProgress: readiness.installInProgress,
|
||||
sessionCount: sessions.size,
|
||||
message: available ? 'Computer Use runtime is available.' : getSetupMessage(settings, readiness),
|
||||
};
|
||||
},
|
||||
|
||||
async registerAgentMcp() {
|
||||
const { command, args } = getMcpCommand();
|
||||
const results = await providerMcpService.addMcpServerToAllProviders({
|
||||
name: MCP_SERVER_NAME,
|
||||
scope: 'user',
|
||||
transport: 'stdio',
|
||||
command,
|
||||
args,
|
||||
env: {
|
||||
CLOUDCLI_COMPUTER_USE_MCP_TOKEN: getOrCreateMcpToken(),
|
||||
CLOUDCLI_COMPUTER_USE_API_URL: getMcpApiUrl(),
|
||||
},
|
||||
});
|
||||
return { name: MCP_SERVER_NAME, command, args, results };
|
||||
},
|
||||
|
||||
getMcpToken() {
|
||||
return getOrCreateMcpToken();
|
||||
},
|
||||
|
||||
async unregisterAgentMcp() {
|
||||
const results = await Promise.all(MCP_PROVIDERS.map(async (provider) => {
|
||||
try {
|
||||
const result = await providerMcpService.removeProviderMcpServer(provider, {
|
||||
name: MCP_SERVER_NAME,
|
||||
scope: 'user',
|
||||
});
|
||||
return { provider, removed: result.removed };
|
||||
} catch (error) {
|
||||
return {
|
||||
provider,
|
||||
removed: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
};
|
||||
}
|
||||
}));
|
||||
return { name: MCP_SERVER_NAME, results };
|
||||
},
|
||||
|
||||
async installRuntime() {
|
||||
const result = await installRuntime();
|
||||
return {
|
||||
...result,
|
||||
status: await this.getStatus(),
|
||||
};
|
||||
},
|
||||
|
||||
async listSessions(owner: ComputerUseOwner) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
await expireStaleSessions();
|
||||
return [...sessions.values()]
|
||||
.filter((session) => canAccessSession(ownerId, session))
|
||||
.map(publicSession);
|
||||
},
|
||||
|
||||
async createSession(owner: ComputerUseOwner, options?: { createdBy?: 'user' | 'agent' }) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
await expireStaleSessions();
|
||||
const createdBy = options?.createdBy ?? 'user';
|
||||
|
||||
const now = new Date().toISOString();
|
||||
const session: ComputerUseSession = {
|
||||
id: randomUUID(),
|
||||
ownerId,
|
||||
createdBy,
|
||||
runtime: getRuntime(),
|
||||
status: 'unavailable',
|
||||
screenshotDataUrl: null,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
lastAction: 'create',
|
||||
// Consent is always OFF at creation — the user must explicitly grant control,
|
||||
// even for agent-initiated sessions controlling the full desktop.
|
||||
agentAccessEnabled: false,
|
||||
displaySize: null,
|
||||
message: null,
|
||||
cursor: null,
|
||||
};
|
||||
|
||||
const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready');
|
||||
if (activeOwnerSessions.length >= MAX_SESSIONS_PER_OWNER) {
|
||||
throw new Error(`Computer Use is limited to ${MAX_SESSIONS_PER_OWNER} active session(s).`);
|
||||
}
|
||||
|
||||
const settings = readSettings();
|
||||
const readiness = getRuntimeReadiness();
|
||||
const isCloud = getRuntime() === 'cloud';
|
||||
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
|
||||
const ready = settings.enabled && (isCloud
|
||||
? desktopAgentRelay.isConnected()
|
||||
: runtimeReady);
|
||||
|
||||
if (!ready) {
|
||||
session.message = getSetupMessage(settings, readiness);
|
||||
sessions.set(session.id, session);
|
||||
return publicSession(session);
|
||||
}
|
||||
|
||||
// In cloud mode the linked desktop agent is the consent authority and prompts
|
||||
// the user per its own consent mode, so the relay is allowed to act. In local
|
||||
// mode the user must still grant control from the panel.
|
||||
if (isCloud) {
|
||||
session.agentAccessEnabled = true;
|
||||
}
|
||||
|
||||
session.status = 'ready';
|
||||
session.message = isCloud
|
||||
? 'Computer Use session is ready on the linked desktop.'
|
||||
: 'Computer Use session is ready. Grant control to let agents act.';
|
||||
sessions.set(session.id, session);
|
||||
try {
|
||||
await refreshScreenshot(session);
|
||||
} catch (error) {
|
||||
session.status = 'unavailable';
|
||||
session.message = error instanceof Error ? error.message : 'Failed to capture the screen.';
|
||||
}
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async grantAgentAccess(owner: ComputerUseOwner, sessionId: string) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
throw new Error('Computer Use session not found.');
|
||||
}
|
||||
session.agentAccessEnabled = true;
|
||||
session.updatedAt = new Date().toISOString();
|
||||
session.lastAction = 'consent:grant';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async revokeAgentAccess(owner: ComputerUseOwner, sessionId: string) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
throw new Error('Computer Use session not found.');
|
||||
}
|
||||
session.agentAccessEnabled = false;
|
||||
session.updatedAt = new Date().toISOString();
|
||||
session.lastAction = 'consent:revoke';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async stopSession(owner: ComputerUseOwner, sessionId: string) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
return { stopped: false };
|
||||
}
|
||||
|
||||
session.status = 'stopped';
|
||||
session.agentAccessEnabled = false;
|
||||
session.updatedAt = new Date().toISOString();
|
||||
session.lastAction = 'stop';
|
||||
session.message = 'Computer Use session stopped. Agent control is revoked.';
|
||||
if (getRuntime() === 'cloud' && desktopAgentRelay.isConnected()) {
|
||||
// Best-effort: tell the desktop agent to forget this session's consent.
|
||||
void desktopAgentRelay.relay('stop_session', { sessionId }).catch(() => undefined);
|
||||
}
|
||||
return { stopped: true, session: publicSession(session) };
|
||||
},
|
||||
|
||||
async deleteSession(owner: ComputerUseOwner, sessionId: string) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
return { deleted: false };
|
||||
}
|
||||
|
||||
sessions.delete(sessionId);
|
||||
return { deleted: true, sessionId };
|
||||
},
|
||||
|
||||
// --- User-initiated actions (from the panel) -------------------------------
|
||||
|
||||
async userScreenshot(owner: ComputerUseOwner, sessionId: string) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
throw new Error('Computer Use session not found.');
|
||||
}
|
||||
assertReady(session);
|
||||
await refreshScreenshot(session);
|
||||
session.lastAction = 'screenshot';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async userClick(owner: ComputerUseOwner, sessionId: string, input: { x: number; y: number; button?: ClickButton; double?: boolean }) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
throw new Error('Computer Use session not found.');
|
||||
}
|
||||
assertReady(session);
|
||||
await performAction(session, {
|
||||
type: 'click',
|
||||
button: input.button || 'left',
|
||||
point: { x: input.x, y: input.y },
|
||||
double: input.double === true,
|
||||
});
|
||||
session.cursor = { x: input.x, y: input.y, actor: 'user' };
|
||||
session.lastAction = input.double ? 'double_click' : 'click';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async userPressKey(owner: ComputerUseOwner, sessionId: string, key: string) {
|
||||
const ownerId = getOwnerId(owner);
|
||||
const session = sessions.get(sessionId);
|
||||
if (!session || !canAccessSession(ownerId, session)) {
|
||||
throw new Error('Computer Use session not found.');
|
||||
}
|
||||
assertReady(session);
|
||||
await performAction(session, { type: 'key', key });
|
||||
session.lastAction = `key:${key}`;
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
// --- Agent-initiated actions (via MCP) ------------------------------------
|
||||
|
||||
/**
|
||||
* Resolves a session the agent is allowed to act on. In local mode this
|
||||
* enforces the in-process per-session consent flag. In cloud mode the linked
|
||||
* desktop agent is the consent authority (it prompts the user per its own
|
||||
* consent mode), so this only requires the relay to be connected.
|
||||
*/
|
||||
async getOrCreateAgentSession(): Promise<ComputerUseSession> {
|
||||
assertAgentToolsAvailable();
|
||||
await expireStaleSessions();
|
||||
const existing = findActiveAgentSession();
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
const created = await this.createSession({ id: AGENT_OWNER_ID }, { createdBy: 'agent' });
|
||||
const session = sessions.get(created.id);
|
||||
if (!session) {
|
||||
throw new Error('Computer Use session could not be created.');
|
||||
}
|
||||
return session;
|
||||
},
|
||||
|
||||
async getConsentedSession(sessionId?: string): Promise<ComputerUseSession> {
|
||||
assertAgentToolsAvailable();
|
||||
const normalizedSessionId = normalizeSessionId(sessionId);
|
||||
const session = normalizedSessionId
|
||||
? sessions.get(normalizedSessionId)
|
||||
: await this.getOrCreateAgentSession();
|
||||
if (!session) {
|
||||
throw new Error('Computer Use session not found.');
|
||||
}
|
||||
if (getRuntime() !== 'cloud' && !session.agentAccessEnabled) {
|
||||
throw new Error(`Computer Use session ${session.id} is awaiting user consent. Ask the user to grant control in the Computer panel.`);
|
||||
}
|
||||
assertReady(session);
|
||||
return session;
|
||||
},
|
||||
|
||||
async agentScreenshot(sessionId?: string) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
await refreshScreenshot(session);
|
||||
session.lastAction = 'screenshot';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentCursorPosition(sessionId?: string) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
const point = await getCursorPosition(session);
|
||||
session.cursor = { ...point, actor: 'agent' };
|
||||
session.lastAction = 'cursor_position';
|
||||
return { session: publicSession(session), position: point };
|
||||
},
|
||||
|
||||
async agentMouseMove(sessionId: string | undefined, point: Point) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
await performAction(session, { type: 'mouse_move', point });
|
||||
session.cursor = { ...point, actor: 'agent' };
|
||||
session.lastAction = 'mouse_move';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentUnifiedClick(sessionId: string | undefined, input: { button?: ClickButton; point?: Point; clickCount?: number }) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
const button = input.button || 'left';
|
||||
const clickCount = Math.max(1, Math.min(Math.trunc(input.clickCount || 1), 5));
|
||||
for (let index = 0; index < clickCount; index += 1) {
|
||||
await performAction(session, { type: 'click', button, point: input.point, double: false });
|
||||
}
|
||||
if (input.point) {
|
||||
session.cursor = { ...input.point, actor: 'agent' };
|
||||
}
|
||||
session.lastAction = clickCount > 1 ? `${button}_click:${clickCount}` : `${button}_click`;
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentDrag(sessionId: string | undefined, from: Point, to: Point, button: ClickButton = 'left') {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
await performAction(session, { type: 'drag', from, to, button });
|
||||
session.cursor = { ...to, actor: 'agent' };
|
||||
session.lastAction = `${button}_drag`;
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentType(sessionId: string | undefined, text: string) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
await performAction(session, { type: 'type', text });
|
||||
session.lastAction = 'type';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentKey(sessionId: string | undefined, key: string) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
await performAction(session, { type: 'key', key });
|
||||
session.lastAction = `key:${key}`;
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentScroll(sessionId: string | undefined, input: { direction: ScrollDirection; amount?: number; x?: number; y?: number }) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
const point = typeof input.x === 'number' && typeof input.y === 'number' ? { x: input.x, y: input.y } : undefined;
|
||||
await performAction(session, { type: 'scroll', direction: input.direction, amount: input.amount, point });
|
||||
if (point) {
|
||||
session.cursor = { ...point, actor: 'agent' };
|
||||
}
|
||||
session.lastAction = `scroll:${input.direction}`;
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentWait(sessionId?: string, timeoutMs?: number) {
|
||||
const session = await this.getConsentedSession(sessionId);
|
||||
await performAction(session, { type: 'wait', ms: timeoutMs });
|
||||
session.lastAction = 'wait';
|
||||
return publicSession(session);
|
||||
},
|
||||
|
||||
async agentStopSession(sessionId?: string) {
|
||||
assertAgentToolsAvailable();
|
||||
const normalizedSessionId = normalizeSessionId(sessionId);
|
||||
if (normalizedSessionId) {
|
||||
return this.stopSession({ id: AGENT_OWNER_ID }, normalizedSessionId);
|
||||
}
|
||||
|
||||
await expireStaleSessions();
|
||||
const existing = findActiveAgentSession();
|
||||
if (!existing) {
|
||||
return { stopped: false };
|
||||
}
|
||||
return this.stopSession({ id: AGENT_OWNER_ID }, existing.id);
|
||||
},
|
||||
|
||||
async callSemanticTool(toolName: string, args: Record<string, unknown>) {
|
||||
if (!semanticOperationNames.has(toolName)) {
|
||||
throw new Error(`Unsupported semantic Computer Use tool: ${toolName}`);
|
||||
}
|
||||
|
||||
const sessionId = typeof args.sessionId === 'string' ? args.sessionId : undefined;
|
||||
const session = await this.getConsentedSession(normalizeSessionId(sessionId) ?? undefined);
|
||||
const toolArgs = { ...stripSessionArgs(args), sessionId: session.id };
|
||||
const semanticResult = getRuntime() === 'cloud'
|
||||
? await desktopAgentRelay.relay('semantic_tool', {
|
||||
sessionId: session.id,
|
||||
displaySize: session.displaySize,
|
||||
toolName,
|
||||
arguments: toolArgs,
|
||||
})
|
||||
: await computerSemanticsService.callTool(toolName, toolArgs);
|
||||
|
||||
applyRelayResult(session, semanticResult as RelayResult);
|
||||
session.lastAction = `semantic:${toolName}`;
|
||||
return { session: publicSession(session), result: semanticResult };
|
||||
},
|
||||
|
||||
/**
|
||||
* Cloud only: when a desktop agent links to this hosted environment, expose
|
||||
* the computer_* MCP tools only if the user enabled Computer Use in settings.
|
||||
*/
|
||||
async onDesktopAgentConnected() {
|
||||
if (getRuntime() !== 'cloud') {
|
||||
return;
|
||||
}
|
||||
if (!readSettings().enabled) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await this.registerAgentMcp();
|
||||
} catch (error) {
|
||||
console.warn('[Computer Use] Failed to register MCP for linked desktop agent:', error instanceof Error ? error.message : error);
|
||||
}
|
||||
},
|
||||
|
||||
/** Cloud only: tear down sessions when the last desktop agent disconnects. */
|
||||
async onDesktopAgentDisconnected() {
|
||||
if (getRuntime() !== 'cloud' || desktopAgentRelay.isConnected()) {
|
||||
return;
|
||||
}
|
||||
for (const session of sessions.values()) {
|
||||
if (session.status === 'ready') {
|
||||
session.status = 'stopped';
|
||||
session.agentAccessEnabled = false;
|
||||
session.updatedAt = new Date().toISOString();
|
||||
session.lastAction = 'agent-disconnected';
|
||||
session.message = 'The linked desktop agent disconnected.';
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
async stopAllSessions() {
|
||||
stopSessions('shutdown', 'Computer Use session stopped during server shutdown.');
|
||||
},
|
||||
};
|
||||
|
||||
// Drive cloud MCP exposure + session teardown off desktop-agent connectivity.
|
||||
desktopAgentRelay.setHooks({
|
||||
canAcceptConnection: () => getRuntime() === 'cloud' && readSettings().enabled,
|
||||
onFirstConnect: () => computerUseService.onDesktopAgentConnected(),
|
||||
onLastDisconnect: () => computerUseService.onDesktopAgentDisconnected(),
|
||||
});
|
||||
|
||||
process.once('beforeExit', () => {
|
||||
void computerUseService.stopAllSessions();
|
||||
});
|
||||
158
server/modules/computer-use/desktop-agent-relay.service.ts
Normal file
158
server/modules/computer-use/desktop-agent-relay.service.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
import type { WebSocket } from 'ws';
|
||||
|
||||
const RELAY_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_RELAY_TIMEOUT_MS || '60000', 10);
|
||||
const WS_OPEN = 1;
|
||||
|
||||
type PendingRelay = {
|
||||
resolve: (value: unknown) => void;
|
||||
reject: (reason: Error) => void;
|
||||
timer: ReturnType<typeof setTimeout>;
|
||||
};
|
||||
|
||||
type ConnectedAgent = {
|
||||
ws: WebSocket;
|
||||
label: string;
|
||||
registeredAt: string;
|
||||
};
|
||||
|
||||
type RelayLifecycleHooks = {
|
||||
canAcceptConnection?: () => boolean;
|
||||
onFirstConnect?: () => void | Promise<void>;
|
||||
onLastDisconnect?: () => void | Promise<void>;
|
||||
};
|
||||
|
||||
const agents = new Map<WebSocket, ConnectedAgent>();
|
||||
const pending = new Map<string, PendingRelay>();
|
||||
let hooks: RelayLifecycleHooks = {};
|
||||
|
||||
function rejectAllPending(reason: string): void {
|
||||
for (const [callId, call] of pending.entries()) {
|
||||
clearTimeout(call.timer);
|
||||
call.reject(new Error(reason));
|
||||
pending.delete(callId);
|
||||
}
|
||||
}
|
||||
|
||||
function pickAgent(): ConnectedAgent | undefined {
|
||||
for (const agent of agents.values()) {
|
||||
if (agent.ws.readyState === WS_OPEN) {
|
||||
return agent;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloud-side registry of linked desktop agents and the request/response relay
|
||||
* used to drive the user's real desktop. The hosted server never touches the OS
|
||||
* itself — it only forwards `computer_*` actions to a connected desktop agent
|
||||
* and awaits the screenshot it returns.
|
||||
*/
|
||||
export const desktopAgentRelay = {
|
||||
setHooks(next: RelayLifecycleHooks): void {
|
||||
hooks = next;
|
||||
},
|
||||
|
||||
register(ws: WebSocket, label = 'desktop-agent'): boolean {
|
||||
if (hooks.canAcceptConnection && !hooks.canAcceptConnection()) {
|
||||
console.log(`[DesktopAgent] Rejected (${label}); Computer Use is disabled.`);
|
||||
try {
|
||||
ws.close(1008, 'Computer Use is disabled in this environment.');
|
||||
} catch {
|
||||
// ignore close failures
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const wasEmpty = pickAgent() === undefined;
|
||||
agents.set(ws, { ws, label, registeredAt: new Date().toISOString() });
|
||||
console.log(`[DesktopAgent] Registered (${label}); ${agents.size} connected.`);
|
||||
|
||||
ws.on('close', () => {
|
||||
const wasRegistered = agents.delete(ws);
|
||||
console.log(`[DesktopAgent] Disconnected (${label}); ${agents.size} remain.`);
|
||||
if (wasRegistered && pickAgent() === undefined) {
|
||||
rejectAllPending('Desktop agent disconnected.');
|
||||
void hooks.onLastDisconnect?.();
|
||||
}
|
||||
});
|
||||
|
||||
if (wasEmpty) {
|
||||
void hooks.onFirstConnect?.();
|
||||
}
|
||||
return true;
|
||||
},
|
||||
|
||||
disconnectAll(reason = 'Desktop agent disconnected.'): void {
|
||||
const hadAgent = pickAgent() !== undefined;
|
||||
const sockets = [...agents.keys()];
|
||||
agents.clear();
|
||||
for (const ws of sockets) {
|
||||
try {
|
||||
ws.close(1008, reason);
|
||||
} catch {
|
||||
// ignore close failures
|
||||
}
|
||||
}
|
||||
rejectAllPending(reason);
|
||||
if (hadAgent) {
|
||||
void hooks.onLastDisconnect?.();
|
||||
}
|
||||
},
|
||||
|
||||
/** Resolves a pending relay call with the desktop agent's reply. */
|
||||
handleResult(id: string, result: unknown, error?: string): void {
|
||||
const call = pending.get(id);
|
||||
if (!call) {
|
||||
return;
|
||||
}
|
||||
clearTimeout(call.timer);
|
||||
pending.delete(id);
|
||||
if (error) {
|
||||
call.reject(new Error(error));
|
||||
} else {
|
||||
call.resolve(result);
|
||||
}
|
||||
},
|
||||
|
||||
isConnected(): boolean {
|
||||
return pickAgent() !== undefined;
|
||||
},
|
||||
|
||||
connectedCount(): number {
|
||||
let count = 0;
|
||||
for (const agent of agents.values()) {
|
||||
if (agent.ws.readyState === WS_OPEN) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
},
|
||||
|
||||
async relay(type: string, params: Record<string, unknown>): Promise<unknown> {
|
||||
const agent = pickAgent();
|
||||
if (!agent) {
|
||||
throw new Error(
|
||||
'No desktop is linked. Open CloudCLI Desktop on this computer, connect the same account, and enable Computer Use.'
|
||||
);
|
||||
}
|
||||
|
||||
const id = randomUUID();
|
||||
return new Promise<unknown>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
pending.delete(id);
|
||||
reject(new Error('Desktop agent did not respond in time.'));
|
||||
}, RELAY_TIMEOUT_MS);
|
||||
pending.set(id, { resolve, reject, timer });
|
||||
try {
|
||||
agent.ws.send(JSON.stringify({ kind: 'computer_relay', id, type, params }));
|
||||
} catch (error) {
|
||||
clearTimeout(timer);
|
||||
pending.delete(id);
|
||||
reject(error instanceof Error ? error : new Error('Failed to send to desktop agent.'));
|
||||
}
|
||||
});
|
||||
},
|
||||
};
|
||||
2
server/modules/computer-use/index.ts
Normal file
2
server/modules/computer-use/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export { computerUseService } from '@/modules/computer-use/computer-use.service.js';
|
||||
export { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';
|
||||
@@ -0,0 +1,82 @@
|
||||
import { SemanticHelperProcess } from '@/modules/computer-use/semantics/helpers/semantic-helper-process.js';
|
||||
import { resolveSemanticHelper } from '@/modules/computer-use/semantics/helpers/semantic-helper-resolver.js';
|
||||
import type { SemanticAdapter, SemanticAdapterCapabilities } from '@/modules/computer-use/semantics/adapters/semantic-adapter.js';
|
||||
import type { SemanticApp, SemanticAppState, SemanticToolInput } from '@/modules/computer-use/semantics/semantic-types.js';
|
||||
|
||||
type HelperMethod =
|
||||
| 'list_apps'
|
||||
| 'get_app_state'
|
||||
| 'click_element'
|
||||
| 'perform_secondary_action'
|
||||
| 'set_value'
|
||||
| 'type_text'
|
||||
| 'press_key'
|
||||
| 'scroll_element'
|
||||
| 'drag';
|
||||
|
||||
export class HelperSemanticAdapter implements SemanticAdapter {
|
||||
private helper: SemanticHelperProcess | null = null;
|
||||
|
||||
constructor(
|
||||
private readonly platform: NodeJS.Platform,
|
||||
private readonly arch: NodeJS.Architecture = process.arch,
|
||||
) {}
|
||||
|
||||
capabilities(): SemanticAdapterCapabilities {
|
||||
return {
|
||||
platform: this.platform,
|
||||
appDiscovery: true,
|
||||
accessibilityTree: true,
|
||||
nativeElementActions: true,
|
||||
nativeValueSetting: true,
|
||||
targetedInput: true,
|
||||
};
|
||||
}
|
||||
|
||||
async listApps(): Promise<SemanticApp[]> {
|
||||
return await this.request('list_apps', {}) as SemanticApp[];
|
||||
}
|
||||
|
||||
async getAppState(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('get_app_state', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async clickElement(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('click_element', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async performSecondaryAction(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('perform_secondary_action', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async setValue(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('set_value', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async typeText(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('type_text', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async pressKey(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('press_key', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async scrollElement(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('scroll_element', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
async drag(input: SemanticToolInput): Promise<SemanticAppState> {
|
||||
return await this.request('drag', input) as SemanticAppState;
|
||||
}
|
||||
|
||||
private async request(method: HelperMethod, params: Record<string, unknown>): Promise<unknown> {
|
||||
if (!this.helper) {
|
||||
const resolution = resolveSemanticHelper(this.platform, this.arch);
|
||||
if (!resolution.available || !resolution.path) {
|
||||
throw new Error(resolution.reason || `Semantic helper is unavailable for ${this.platform}-${this.arch}.`);
|
||||
}
|
||||
this.helper = new SemanticHelperProcess(resolution.path);
|
||||
}
|
||||
return this.helper.request(method, params);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
import { HelperSemanticAdapter } from '@/modules/computer-use/semantics/adapters/helper-semantic-adapter.js';
|
||||
|
||||
export function createMacOsSemanticAdapter(): HelperSemanticAdapter {
|
||||
return new HelperSemanticAdapter('darwin');
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
import type { SemanticApp, SemanticAppState, SemanticToolInput } from '@/modules/computer-use/semantics/semantic-types.js';
|
||||
|
||||
export type SemanticAdapterCapabilities = {
|
||||
platform: NodeJS.Platform;
|
||||
appDiscovery: boolean;
|
||||
accessibilityTree: boolean;
|
||||
nativeElementActions: boolean;
|
||||
nativeValueSetting: boolean;
|
||||
targetedInput: boolean;
|
||||
};
|
||||
|
||||
export type SemanticAdapter = {
|
||||
capabilities(): SemanticAdapterCapabilities;
|
||||
listApps(): Promise<SemanticApp[]>;
|
||||
getAppState(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
clickElement(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
performSecondaryAction(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
setValue(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
typeText(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
pressKey(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
scrollElement(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
drag(input: SemanticToolInput): Promise<SemanticAppState>;
|
||||
};
|
||||
@@ -0,0 +1,5 @@
|
||||
import { HelperSemanticAdapter } from '@/modules/computer-use/semantics/adapters/helper-semantic-adapter.js';
|
||||
|
||||
export function createWindowsSemanticAdapter(): HelperSemanticAdapter {
|
||||
return new HelperSemanticAdapter('win32');
|
||||
}
|
||||
@@ -0,0 +1,467 @@
|
||||
import AppKit
|
||||
import ApplicationServices
|
||||
import Foundation
|
||||
|
||||
typealias JSON = [String: Any]
|
||||
|
||||
struct ElementRecord {
|
||||
let index: String
|
||||
let role: String
|
||||
let title: String?
|
||||
let value: String?
|
||||
let bounds: [String: Double]?
|
||||
let actions: [String]
|
||||
}
|
||||
|
||||
var stateElements: [String: [ElementRecord]] = [:]
|
||||
var stateAxElements: [String: [String: AXUIElement]] = [:]
|
||||
var stateOrder: [String] = []
|
||||
let maxStoredStates = 100
|
||||
|
||||
func jsonLine(_ object: Any) {
|
||||
guard JSONSerialization.isValidJSONObject(object),
|
||||
let data = try? JSONSerialization.data(withJSONObject: object),
|
||||
let text = String(data: data, encoding: .utf8)
|
||||
else {
|
||||
print("{\"error\":\"Failed to encode JSON\"}")
|
||||
fflush(stdout)
|
||||
return
|
||||
}
|
||||
print(text)
|
||||
fflush(stdout)
|
||||
}
|
||||
|
||||
func respond(id: Any?, result: Any) {
|
||||
jsonLine(["id": id ?? NSNull(), "result": result])
|
||||
}
|
||||
|
||||
func respondError(id: Any?, _ message: String) {
|
||||
jsonLine(["id": id ?? NSNull(), "error": message])
|
||||
}
|
||||
|
||||
func stringAttr(_ element: AXUIElement, _ attr: CFString) -> String? {
|
||||
var value: CFTypeRef?
|
||||
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return nil }
|
||||
return value as? String
|
||||
}
|
||||
|
||||
func boolAttr(_ element: AXUIElement, _ attr: CFString) -> Bool? {
|
||||
var value: CFTypeRef?
|
||||
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return nil }
|
||||
return value as? Bool
|
||||
}
|
||||
|
||||
func arrayAttr(_ element: AXUIElement, _ attr: CFString) -> [AXUIElement] {
|
||||
var value: CFTypeRef?
|
||||
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return [] }
|
||||
return value as? [AXUIElement] ?? []
|
||||
}
|
||||
|
||||
func actions(_ element: AXUIElement) -> [String] {
|
||||
var names: CFArray?
|
||||
guard AXUIElementCopyActionNames(element, &names) == .success else { return [] }
|
||||
return names as? [String] ?? []
|
||||
}
|
||||
|
||||
func bounds(_ element: AXUIElement) -> [String: Double]? {
|
||||
var positionRef: CFTypeRef?
|
||||
var sizeRef: CFTypeRef?
|
||||
guard AXUIElementCopyAttributeValue(element, kAXPositionAttribute as CFString, &positionRef) == .success,
|
||||
AXUIElementCopyAttributeValue(element, kAXSizeAttribute as CFString, &sizeRef) == .success,
|
||||
let positionValue = positionRef,
|
||||
let sizeValue = sizeRef
|
||||
else { return nil }
|
||||
|
||||
var point = CGPoint.zero
|
||||
var size = CGSize.zero
|
||||
guard CFGetTypeID(positionValue) == AXValueGetTypeID(),
|
||||
CFGetTypeID(sizeValue) == AXValueGetTypeID()
|
||||
else { return nil }
|
||||
|
||||
let positionAxValue = positionValue as! AXValue
|
||||
let sizeAxValue = sizeValue as! AXValue
|
||||
guard AXValueGetValue(positionAxValue, .cgPoint, &point),
|
||||
AXValueGetValue(sizeAxValue, .cgSize, &size)
|
||||
else { return nil }
|
||||
|
||||
return [
|
||||
"x": Double(point.x),
|
||||
"y": Double(point.y),
|
||||
"width": Double(size.width),
|
||||
"height": Double(size.height),
|
||||
]
|
||||
}
|
||||
|
||||
func record(_ element: AXUIElement, index: String) -> ElementRecord {
|
||||
ElementRecord(
|
||||
index: index,
|
||||
role: stringAttr(element, kAXRoleAttribute as CFString) ?? "AXUnknown",
|
||||
title: stringAttr(element, kAXTitleAttribute as CFString) ?? stringAttr(element, kAXDescriptionAttribute as CFString),
|
||||
value: stringAttr(element, kAXValueAttribute as CFString),
|
||||
bounds: bounds(element),
|
||||
actions: actions(element)
|
||||
)
|
||||
}
|
||||
|
||||
func cachedElement(_ params: JSON) -> AXUIElement? {
|
||||
guard let stateId = params["stateId"] as? String,
|
||||
let elementIndex = params["element_index"] as? String
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
return stateAxElements[stateId]?[elementIndex]
|
||||
}
|
||||
|
||||
func dictionary(_ record: ElementRecord) -> JSON {
|
||||
var output: JSON = [
|
||||
"index": record.index,
|
||||
"role": record.role,
|
||||
"actions": record.actions,
|
||||
]
|
||||
if let title = record.title { output["title"] = title }
|
||||
if let value = record.value { output["value"] = value }
|
||||
if let bounds = record.bounds { output["bounds"] = bounds }
|
||||
return output
|
||||
}
|
||||
|
||||
func pruneStoredStates() {
|
||||
while stateOrder.count > maxStoredStates {
|
||||
let evicted = stateOrder.removeFirst()
|
||||
stateElements.removeValue(forKey: evicted)
|
||||
stateAxElements.removeValue(forKey: evicted)
|
||||
}
|
||||
}
|
||||
|
||||
func resolveApp(_ query: String) throws -> NSRunningApplication {
|
||||
let normalized = query.lowercased()
|
||||
let apps = NSWorkspace.shared.runningApplications.filter { app in
|
||||
app.activationPolicy == .regular
|
||||
}
|
||||
if let app = apps.first(where: { $0.bundleIdentifier?.lowercased() == normalized }) {
|
||||
return app
|
||||
}
|
||||
if let app = apps.first(where: { ($0.localizedName ?? "").lowercased() == normalized }) {
|
||||
return app
|
||||
}
|
||||
if let app = apps.first(where: { ($0.localizedName ?? "").lowercased().contains(normalized) }) {
|
||||
return app
|
||||
}
|
||||
throw NSError(domain: "CloudCLISemantics", code: 404, userInfo: [NSLocalizedDescriptionKey: "App is not running: \(query)"])
|
||||
}
|
||||
|
||||
func listApps() -> [[String: Any]] {
|
||||
NSWorkspace.shared.runningApplications
|
||||
.filter { $0.activationPolicy == .regular }
|
||||
.map { app in
|
||||
[
|
||||
"id": app.bundleIdentifier ?? app.localizedName ?? "\(app.processIdentifier)",
|
||||
"name": app.localizedName ?? app.bundleIdentifier ?? "Unknown",
|
||||
"bundleIdentifier": app.bundleIdentifier ?? "",
|
||||
"pid": Int(app.processIdentifier),
|
||||
"running": true,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
func walk(_ element: AXUIElement, depth: Int, maxDepth: Int, records: inout [ElementRecord], axRecords: inout [String: AXUIElement], limit: Int) {
|
||||
if depth > maxDepth || records.count >= limit { return }
|
||||
let index = "\(records.count + 1)"
|
||||
records.append(record(element, index: index))
|
||||
axRecords[index] = element
|
||||
for child in arrayAttr(element, kAXChildrenAttribute as CFString) {
|
||||
walk(child, depth: depth + 1, maxDepth: maxDepth, records: &records, axRecords: &axRecords, limit: limit)
|
||||
if records.count >= limit { return }
|
||||
}
|
||||
}
|
||||
|
||||
func pngDataUrlForMainDisplay() -> String? {
|
||||
let fileURL = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("cloudcli-semantics-\(UUID().uuidString).png")
|
||||
let process = Process()
|
||||
process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture")
|
||||
process.arguments = ["-x", "-t", "png", fileURL.path]
|
||||
|
||||
do {
|
||||
try process.run()
|
||||
process.waitUntilExit()
|
||||
guard process.terminationStatus == 0 else { return nil }
|
||||
let png = try Data(contentsOf: fileURL)
|
||||
try? FileManager.default.removeItem(at: fileURL)
|
||||
return png.isEmpty ? nil : "data:image/png;base64,\(png.base64EncodedString())"
|
||||
} catch {
|
||||
try? FileManager.default.removeItem(at: fileURL)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func getAppState(_ params: JSON) throws -> JSON {
|
||||
let appName = params["app"] as? String ?? ""
|
||||
let app = try resolveApp(appName)
|
||||
let axApp = AXUIElementCreateApplication(app.processIdentifier)
|
||||
let windows = arrayAttr(axApp, kAXWindowsAttribute as CFString)
|
||||
let root = windows.first ?? axApp
|
||||
var records: [ElementRecord] = []
|
||||
var axRecords: [String: AXUIElement] = [:]
|
||||
walk(root, depth: 0, maxDepth: 5, records: &records, axRecords: &axRecords, limit: 300)
|
||||
let stateId = "state_\(UUID().uuidString)"
|
||||
stateElements[stateId] = records
|
||||
stateAxElements[stateId] = axRecords
|
||||
stateOrder.append(stateId)
|
||||
pruneStoredStates()
|
||||
|
||||
let elements = records.map(dictionary)
|
||||
return [
|
||||
"stateId": stateId,
|
||||
"app": app.localizedName ?? app.bundleIdentifier ?? appName,
|
||||
"platform": "darwin",
|
||||
"screenshotDataUrl": pngDataUrlForMainDisplay() ?? NSNull(),
|
||||
"displaySize": [
|
||||
"width": Int(CGDisplayPixelsWide(CGMainDisplayID())),
|
||||
"height": Int(CGDisplayPixelsHigh(CGMainDisplayID())),
|
||||
],
|
||||
"elements": elements,
|
||||
"accessibilityTree": elements,
|
||||
"treeText": elements.map { "\($0["index"] ?? "") \($0["role"] ?? "") \($0["title"] ?? "")" }.joined(separator: "\n"),
|
||||
]
|
||||
}
|
||||
|
||||
func cgMouseButton(_ value: Any?) -> CGMouseButton {
|
||||
guard let button = value as? String else { return .left }
|
||||
switch button {
|
||||
case "right": return .right
|
||||
case "middle": return .center
|
||||
default: return .left
|
||||
}
|
||||
}
|
||||
|
||||
func mouseEventTypes(_ button: CGMouseButton) -> (CGEventType, CGEventType) {
|
||||
switch button {
|
||||
case .right: return (.rightMouseDown, .rightMouseUp)
|
||||
case .center: return (.otherMouseDown, .otherMouseUp)
|
||||
default: return (.leftMouseDown, .leftMouseUp)
|
||||
}
|
||||
}
|
||||
|
||||
func postMouseClick(point: CGPoint, button: CGMouseButton, clickCount: Int = 1) throws {
|
||||
guard let source = CGEventSource(stateID: .combinedSessionState) else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGEventSource"])
|
||||
}
|
||||
let eventTypes = mouseEventTypes(button)
|
||||
for _ in 0..<max(1, clickCount) {
|
||||
let down = CGEvent(mouseEventSource: source, mouseType: eventTypes.0, mouseCursorPosition: point, mouseButton: button)
|
||||
let up = CGEvent(mouseEventSource: source, mouseType: eventTypes.1, mouseCursorPosition: point, mouseButton: button)
|
||||
down?.post(tap: .cghidEventTap)
|
||||
up?.post(tap: .cghidEventTap)
|
||||
usleep(80_000)
|
||||
}
|
||||
}
|
||||
|
||||
func postDrag(from: CGPoint, to: CGPoint, button: CGMouseButton) throws {
|
||||
guard let source = CGEventSource(stateID: .combinedSessionState) else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGEventSource"])
|
||||
}
|
||||
let eventTypes = mouseEventTypes(button)
|
||||
CGEvent(mouseEventSource: source, mouseType: eventTypes.0, mouseCursorPosition: from, mouseButton: button)?.post(tap: .cghidEventTap)
|
||||
usleep(80_000)
|
||||
CGEvent(mouseEventSource: source, mouseType: .leftMouseDragged, mouseCursorPosition: to, mouseButton: button)?.post(tap: .cghidEventTap)
|
||||
usleep(80_000)
|
||||
CGEvent(mouseEventSource: source, mouseType: eventTypes.1, mouseCursorPosition: to, mouseButton: button)?.post(tap: .cghidEventTap)
|
||||
}
|
||||
|
||||
func runAppleScript(_ script: String) throws {
|
||||
let process = Process()
|
||||
process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
|
||||
process.arguments = ["-e", script]
|
||||
process.standardOutput = Pipe()
|
||||
let stderr = Pipe()
|
||||
process.standardError = stderr
|
||||
try process.run()
|
||||
process.waitUntilExit()
|
||||
if process.terminationStatus != 0 {
|
||||
let data = stderr.fileHandleForReading.readDataToEndOfFile()
|
||||
let message = String(data: data, encoding: .utf8) ?? "AppleScript failed."
|
||||
throw NSError(domain: "CloudCLISemantics", code: Int(process.terminationStatus), userInfo: [NSLocalizedDescriptionKey: message])
|
||||
}
|
||||
}
|
||||
|
||||
func escapedAppleScriptString(_ value: String) -> String {
|
||||
value.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(of: "\"", with: "\\\"")
|
||||
}
|
||||
|
||||
func pointForElement(_ params: JSON) -> CGPoint? {
|
||||
if let x = params["x"] as? Double, let y = params["y"] as? Double {
|
||||
return CGPoint(x: x, y: y)
|
||||
}
|
||||
guard let stateId = params["stateId"] as? String,
|
||||
let elementIndex = params["element_index"] as? String,
|
||||
let element = stateElements[stateId]?.first(where: { $0.index == elementIndex }),
|
||||
let b = element.bounds,
|
||||
let x = b["x"], let y = b["y"], let width = b["width"], let height = b["height"]
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
return CGPoint(x: x + width / 2, y: y + height / 2)
|
||||
}
|
||||
|
||||
func click(_ params: JSON) throws -> JSON {
|
||||
if let element = cachedElement(params),
|
||||
cgMouseButton(params["mouse_button"]) == .left,
|
||||
(params["click_count"] as? Int ?? 1) == 1,
|
||||
actions(element).contains(kAXPressAction as String),
|
||||
AXUIElementPerformAction(element, kAXPressAction as CFString) == .success {
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
guard let point = pointForElement(params) else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "click_element requires x/y or stateId + element_index"])
|
||||
}
|
||||
let clickCount = params["click_count"] as? Int ?? 1
|
||||
try postMouseClick(point: point, button: cgMouseButton(params["mouse_button"]), clickCount: clickCount)
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func performSecondaryAction(_ params: JSON) throws -> JSON {
|
||||
if let element = cachedElement(params),
|
||||
actions(element).contains(kAXShowMenuAction as String),
|
||||
AXUIElementPerformAction(element, kAXShowMenuAction as CFString) == .success {
|
||||
return try getAppState(params)
|
||||
}
|
||||
guard let point = pointForElement(params) else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "perform_secondary_action requires x/y or stateId + element_index"])
|
||||
}
|
||||
try postMouseClick(point: point, button: .right)
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func setValue(_ params: JSON) throws -> JSON {
|
||||
guard let value = params["value"] as? String else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "set_value requires value"])
|
||||
}
|
||||
if let element = cachedElement(params),
|
||||
AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, value as CFTypeRef) == .success {
|
||||
return try getAppState(params)
|
||||
}
|
||||
guard let point = pointForElement(params) else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "set_value requires x/y or stateId + element_index"])
|
||||
}
|
||||
try postMouseClick(point: point, button: .left)
|
||||
try runAppleScript("tell application \"System Events\" to keystroke \"a\" using command down")
|
||||
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(value))\"")
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func typeText(_ params: JSON) throws -> JSON {
|
||||
let text = params["text"] as? String ?? ""
|
||||
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(text))\"")
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func appleScriptModifiers(_ parts: [String]) -> String {
|
||||
let modifiers = parts.compactMap { part -> String? in
|
||||
switch part.lowercased() {
|
||||
case "cmd", "command", "meta": return "command down"
|
||||
case "ctrl", "control": return "control down"
|
||||
case "alt", "option": return "option down"
|
||||
case "shift": return "shift down"
|
||||
default: return nil
|
||||
}
|
||||
}
|
||||
return modifiers.isEmpty ? "" : " using {\(modifiers.joined(separator: ", "))}"
|
||||
}
|
||||
|
||||
func appleScriptKeyCode(_ key: String) -> Int? {
|
||||
switch key.lowercased() {
|
||||
case "return", "enter": return 36
|
||||
case "tab": return 48
|
||||
case "space": return 49
|
||||
case "delete", "backspace": return 51
|
||||
case "escape", "esc": return 53
|
||||
case "left": return 123
|
||||
case "right": return 124
|
||||
case "down": return 125
|
||||
case "up": return 126
|
||||
default: return nil
|
||||
}
|
||||
}
|
||||
|
||||
func pressKey(_ params: JSON) throws -> JSON {
|
||||
let raw = params["key"] as? String ?? ""
|
||||
let parts = raw.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines) }.filter { !$0.isEmpty }
|
||||
let key = parts.last ?? raw
|
||||
let modifiers = appleScriptModifiers(Array(parts.dropLast()))
|
||||
if let keyCode = appleScriptKeyCode(key) {
|
||||
try runAppleScript("tell application \"System Events\" to key code \(keyCode)\(modifiers)")
|
||||
} else {
|
||||
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(key))\"\(modifiers)")
|
||||
}
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func scrollElement(_ params: JSON) throws -> JSON {
|
||||
guard let point = pointForElement(params) else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "scroll_element requires x/y or stateId + element_index"])
|
||||
}
|
||||
CGWarpMouseCursorPosition(point)
|
||||
let direction = params["direction"] as? String ?? "down"
|
||||
let pages = params["pages"] as? Double ?? 1.0
|
||||
let amount = Int32(max(1.0, abs(pages) * 8.0))
|
||||
let vertical = direction == "up" ? amount : direction == "down" ? -amount : 0
|
||||
let horizontal = direction == "left" ? amount : direction == "right" ? -amount : 0
|
||||
CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: vertical, wheel2: horizontal, wheel3: 0)?.post(tap: .cghidEventTap)
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func drag(_ params: JSON) throws -> JSON {
|
||||
guard let fromX = params["from_x"] as? Double,
|
||||
let fromY = params["from_y"] as? Double,
|
||||
let toX = params["to_x"] as? Double,
|
||||
let toY = params["to_y"] as? Double
|
||||
else {
|
||||
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "drag requires from_x/from_y/to_x/to_y"])
|
||||
}
|
||||
try postDrag(from: CGPoint(x: fromX, y: fromY), to: CGPoint(x: toX, y: toY), button: cgMouseButton(params["mouse_button"]))
|
||||
return try getAppState(params)
|
||||
}
|
||||
|
||||
func handle(_ request: JSON) {
|
||||
let id = request["id"]
|
||||
let method = request["method"] as? String ?? ""
|
||||
let params = request["params"] as? JSON ?? [:]
|
||||
|
||||
do {
|
||||
switch method {
|
||||
case "list_apps":
|
||||
respond(id: id, result: listApps())
|
||||
case "get_app_state":
|
||||
respond(id: id, result: try getAppState(params))
|
||||
case "click_element":
|
||||
respond(id: id, result: try click(params))
|
||||
case "perform_secondary_action":
|
||||
respond(id: id, result: try performSecondaryAction(params))
|
||||
case "set_value":
|
||||
respond(id: id, result: try setValue(params))
|
||||
case "type_text":
|
||||
respond(id: id, result: try typeText(params))
|
||||
case "press_key":
|
||||
respond(id: id, result: try pressKey(params))
|
||||
case "scroll_element":
|
||||
respond(id: id, result: try scrollElement(params))
|
||||
case "drag":
|
||||
respond(id: id, result: try drag(params))
|
||||
default:
|
||||
respondError(id: id, "Method is not implemented yet: \(method)")
|
||||
}
|
||||
} catch {
|
||||
respondError(id: id, error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
while let line = readLine() {
|
||||
guard let data = line.data(using: .utf8),
|
||||
let object = try? JSONSerialization.jsonObject(with: data),
|
||||
let request = object as? JSON
|
||||
else {
|
||||
respondError(id: nil, "Invalid JSON request")
|
||||
continue
|
||||
}
|
||||
handle(request)
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process';
|
||||
import readline from 'node:readline';
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
|
||||
type PendingRequest = {
|
||||
resolve: (value: unknown) => void;
|
||||
reject: (error: Error) => void;
|
||||
timer: ReturnType<typeof setTimeout>;
|
||||
};
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_SEMANTICS_HELPER_TIMEOUT_MS || '60000', 10);
|
||||
|
||||
function timeoutMs(): number {
|
||||
return Number.isFinite(DEFAULT_TIMEOUT_MS) && DEFAULT_TIMEOUT_MS > 0 ? DEFAULT_TIMEOUT_MS : 60000;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
export class SemanticHelperProcess {
|
||||
private child: ChildProcessWithoutNullStreams | null = null;
|
||||
private reader: readline.Interface | null = null;
|
||||
private nextId = 1;
|
||||
private pending = new Map<number, PendingRequest>();
|
||||
|
||||
constructor(private readonly executablePath: string) {}
|
||||
|
||||
async request(method: string, params: JsonRecord): Promise<unknown> {
|
||||
this.ensureStarted();
|
||||
const child = this.child;
|
||||
if (!child?.stdin.writable) {
|
||||
throw new Error('Semantic helper process is not running.');
|
||||
}
|
||||
|
||||
const id = this.nextId++;
|
||||
return new Promise((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
this.pending.delete(id);
|
||||
reject(new Error(`Semantic helper request timed out: ${method}`));
|
||||
}, timeoutMs());
|
||||
this.pending.set(id, { resolve, reject, timer });
|
||||
child.stdin.write(`${JSON.stringify({ id, method, params })}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
const child = this.child;
|
||||
this.child = null;
|
||||
this.reader?.close();
|
||||
this.reader = null;
|
||||
this.rejectAll('Semantic helper stopped.');
|
||||
if (child) {
|
||||
try { child.kill('SIGTERM'); } catch { /* noop */ }
|
||||
}
|
||||
}
|
||||
|
||||
private ensureStarted(): void {
|
||||
if (this.child) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.child = spawn(this.executablePath, [], {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
this.reader = readline.createInterface({ input: this.child.stdout });
|
||||
this.reader.on('line', (line) => this.handleLine(line));
|
||||
|
||||
this.child.stderr.on('data', (chunk) => {
|
||||
const text = String(chunk).trim();
|
||||
if (text) {
|
||||
console.error('[SemanticHelper]', text);
|
||||
}
|
||||
});
|
||||
|
||||
this.child.once('error', (error) => {
|
||||
this.child = null;
|
||||
this.rejectAll(`Failed to start semantic helper: ${error.message}`);
|
||||
});
|
||||
|
||||
this.child.once('exit', (code) => {
|
||||
this.child = null;
|
||||
this.rejectAll(`Semantic helper exited with code ${code ?? 'null'}.`);
|
||||
});
|
||||
}
|
||||
|
||||
private handleLine(line: string): void {
|
||||
let message: JsonRecord;
|
||||
try {
|
||||
message = JSON.parse(line) as JsonRecord;
|
||||
} catch (error) {
|
||||
console.error('[SemanticHelper] Invalid JSON response:', errorMessage(error));
|
||||
return;
|
||||
}
|
||||
|
||||
const id = typeof message.id === 'number' ? message.id : null;
|
||||
if (id === null) {
|
||||
return;
|
||||
}
|
||||
const pending = this.pending.get(id);
|
||||
if (!pending) {
|
||||
return;
|
||||
}
|
||||
clearTimeout(pending.timer);
|
||||
this.pending.delete(id);
|
||||
|
||||
if (message.error) {
|
||||
pending.reject(new Error(typeof message.error === 'string' ? message.error : 'Semantic helper request failed.'));
|
||||
return;
|
||||
}
|
||||
pending.resolve(message.result);
|
||||
}
|
||||
|
||||
private rejectAll(reason: string): void {
|
||||
for (const [id, request] of this.pending.entries()) {
|
||||
clearTimeout(request.timer);
|
||||
request.reject(new Error(reason));
|
||||
this.pending.delete(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
export type SemanticHelperPlatform = 'darwin' | 'win32';
|
||||
|
||||
export type SemanticHelperResolution = {
|
||||
available: boolean;
|
||||
path: string | null;
|
||||
source: 'bundled' | 'dev' | 'missing';
|
||||
platform: NodeJS.Platform;
|
||||
arch: NodeJS.Architecture;
|
||||
reason?: string;
|
||||
};
|
||||
|
||||
function helperExecutableName(platform: NodeJS.Platform): string | null {
|
||||
if (platform === 'darwin') {
|
||||
return 'CloudCLISemantics';
|
||||
}
|
||||
if (platform === 'win32') {
|
||||
return 'CloudCLISemantics.exe';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function pathExists(filePath: string): boolean {
|
||||
try {
|
||||
fs.accessSync(filePath, fs.constants.X_OK);
|
||||
return true;
|
||||
} catch {
|
||||
try {
|
||||
fs.accessSync(filePath, fs.constants.F_OK);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function candidatePaths(platform: NodeJS.Platform, arch: NodeJS.Architecture): Array<{ source: 'bundled' | 'dev'; path: string }> {
|
||||
const executable = helperExecutableName(platform);
|
||||
if (!executable) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const platformArch = `${platform}-${arch}`;
|
||||
return [
|
||||
{
|
||||
source: 'bundled',
|
||||
path: path.resolve(__dirname, '..', 'bin', platformArch, executable),
|
||||
},
|
||||
{
|
||||
source: 'dev',
|
||||
path: path.resolve(process.cwd(), 'server', 'modules', 'computer-use', 'semantics', 'bin', platformArch, executable),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
export function resolveSemanticHelper(
|
||||
platform: NodeJS.Platform = process.platform,
|
||||
arch: NodeJS.Architecture = process.arch,
|
||||
): SemanticHelperResolution {
|
||||
const executable = helperExecutableName(platform);
|
||||
if (!executable) {
|
||||
return {
|
||||
available: false,
|
||||
path: null,
|
||||
source: 'missing',
|
||||
platform,
|
||||
arch,
|
||||
reason: `Semantic Computer Use helper is not supported on ${platform}.`,
|
||||
};
|
||||
}
|
||||
|
||||
for (const candidate of candidatePaths(platform, arch)) {
|
||||
if (pathExists(candidate.path)) {
|
||||
return {
|
||||
available: true,
|
||||
path: candidate.path,
|
||||
source: candidate.source,
|
||||
platform,
|
||||
arch,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
available: false,
|
||||
path: null,
|
||||
source: 'missing',
|
||||
platform,
|
||||
arch,
|
||||
reason: `Bundled semantic helper was not found for ${platform}-${arch} (${executable}).`,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0-windows</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<UseWindowsForms>true</UseWindowsForms>
|
||||
<UseWPF>true</UseWPF>
|
||||
<AssemblyName>CloudCLISemantics</AssemblyName>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
534
server/modules/computer-use/semantics/helpers/windows/Program.cs
Normal file
534
server/modules/computer-use/semantics/helpers/windows/Program.cs
Normal file
@@ -0,0 +1,534 @@
|
||||
using System.Diagnostics;
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text.Json;
|
||||
using System.Windows.Automation;
|
||||
|
||||
static class Program
|
||||
{
|
||||
private const int MaxStoredStates = 100;
|
||||
private static readonly Dictionary<string, List<ElementRecord>> StateElements = new();
|
||||
private static readonly Dictionary<string, Dictionary<string, AutomationElement>> StateAutomationElements = new();
|
||||
private static readonly Queue<string> StateOrder = new();
|
||||
|
||||
public static void Main()
|
||||
{
|
||||
string? line;
|
||||
while ((line = Console.ReadLine()) != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(line);
|
||||
var root = doc.RootElement;
|
||||
var id = root.TryGetProperty("id", out var idValue) ? idValue.Clone() : default;
|
||||
var method = root.TryGetProperty("method", out var methodValue) ? methodValue.GetString() ?? "" : "";
|
||||
var parameters = root.TryGetProperty("params", out var paramsValue) && paramsValue.ValueKind == JsonValueKind.Object
|
||||
? paramsValue.Clone()
|
||||
: JsonDocument.Parse("{}").RootElement.Clone();
|
||||
|
||||
try
|
||||
{
|
||||
object result = method switch
|
||||
{
|
||||
"list_apps" => ListApps(),
|
||||
"get_app_state" => GetAppState(parameters),
|
||||
"click_element" => ClickElement(parameters),
|
||||
"perform_secondary_action" => PerformSecondaryAction(parameters),
|
||||
"set_value" => SetValue(parameters),
|
||||
"type_text" => TypeText(parameters),
|
||||
"press_key" => PressKey(parameters),
|
||||
"scroll_element" => ScrollElement(parameters),
|
||||
"drag" => Drag(parameters),
|
||||
_ => throw new InvalidOperationException($"Method is not implemented yet: {method}")
|
||||
};
|
||||
Write(new Dictionary<string, object?> { ["id"] = JsonValue(id), ["result"] = result });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Write(new Dictionary<string, object?> { ["id"] = JsonValue(id), ["error"] = ex.Message });
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Write(new Dictionary<string, object?> { ["id"] = null, ["error"] = $"Invalid JSON request: {ex.Message}" });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static object? JsonValue(JsonElement element)
|
||||
{
|
||||
return element.ValueKind switch
|
||||
{
|
||||
JsonValueKind.String => element.GetString(),
|
||||
JsonValueKind.Number => element.TryGetInt64(out var number) ? number : element.GetDouble(),
|
||||
JsonValueKind.True => true,
|
||||
JsonValueKind.False => false,
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
|
||||
private static void Write(object value)
|
||||
{
|
||||
Console.WriteLine(JsonSerializer.Serialize(value));
|
||||
Console.Out.Flush();
|
||||
}
|
||||
|
||||
private static List<Dictionary<string, object?>> ListApps()
|
||||
{
|
||||
return Process.GetProcesses()
|
||||
.Where(process => process.MainWindowHandle != IntPtr.Zero)
|
||||
.OrderBy(process => process.ProcessName)
|
||||
.Select(process => new Dictionary<string, object?>
|
||||
{
|
||||
["id"] = process.Id.ToString(),
|
||||
["name"] = process.ProcessName,
|
||||
["processName"] = process.ProcessName,
|
||||
["pid"] = process.Id,
|
||||
["running"] = true,
|
||||
["windowTitle"] = process.MainWindowTitle
|
||||
})
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static Process ResolveProcess(string query)
|
||||
{
|
||||
var normalized = query.Trim();
|
||||
if (string.IsNullOrWhiteSpace(normalized))
|
||||
{
|
||||
throw new InvalidOperationException("app is required.");
|
||||
}
|
||||
|
||||
var processes = Process.GetProcesses()
|
||||
.Where(process => process.MainWindowHandle != IntPtr.Zero)
|
||||
.ToList();
|
||||
|
||||
return processes.FirstOrDefault(process => process.ProcessName.Equals(normalized, StringComparison.OrdinalIgnoreCase))
|
||||
?? processes.FirstOrDefault(process => process.MainWindowTitle.Equals(normalized, StringComparison.OrdinalIgnoreCase))
|
||||
?? processes.FirstOrDefault(process => process.MainWindowTitle.Contains(normalized, StringComparison.OrdinalIgnoreCase))
|
||||
?? throw new InvalidOperationException($"App is not running: {query}");
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> GetAppState(JsonElement parameters)
|
||||
{
|
||||
var appQuery = ReadString(parameters, "app");
|
||||
var process = ResolveProcess(appQuery);
|
||||
var root = AutomationElement.FromHandle(process.MainWindowHandle)
|
||||
?? throw new InvalidOperationException("No UI Automation root window is available.");
|
||||
|
||||
var records = new List<ElementRecord>();
|
||||
var automationElements = new Dictionary<string, AutomationElement>();
|
||||
Walk(root, records, automationElements, 0, 5, 300);
|
||||
var stateId = $"state_{Guid.NewGuid()}";
|
||||
StateElements[stateId] = records;
|
||||
StateAutomationElements[stateId] = automationElements;
|
||||
StateOrder.Enqueue(stateId);
|
||||
PruneStoredStates();
|
||||
|
||||
var elements = records.Select(record => record.ToDictionary()).ToList();
|
||||
var bounds = root.Current.BoundingRectangle;
|
||||
return new Dictionary<string, object?>
|
||||
{
|
||||
["stateId"] = stateId,
|
||||
["app"] = process.ProcessName,
|
||||
["platform"] = "win32",
|
||||
["screenshotDataUrl"] = CaptureScreen(),
|
||||
["displaySize"] = new Dictionary<string, object?>
|
||||
{
|
||||
["width"] = (int)System.Windows.Forms.Screen.PrimaryScreen!.Bounds.Width,
|
||||
["height"] = (int)System.Windows.Forms.Screen.PrimaryScreen!.Bounds.Height
|
||||
},
|
||||
["window"] = new Dictionary<string, object?>
|
||||
{
|
||||
["title"] = process.MainWindowTitle,
|
||||
["bounds"] = BoundsDictionary(bounds)
|
||||
},
|
||||
["elements"] = elements,
|
||||
["accessibilityTree"] = elements,
|
||||
["treeText"] = string.Join("\n", elements.Select(element => $"{element["index"]} {element["role"]} {element.GetValueOrDefault("title")}"))
|
||||
};
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> ClickElement(JsonElement parameters)
|
||||
{
|
||||
var mouseButton = ReadString(parameters, "mouse_button");
|
||||
if ((mouseButton == "" || mouseButton == "left") && ReadInt(parameters, "click_count", 1) == 1)
|
||||
{
|
||||
var element = AutomationElementFor(parameters);
|
||||
if (element != null && TryInvoke(element))
|
||||
{
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
}
|
||||
|
||||
var point = PointFor(parameters);
|
||||
if (point == null)
|
||||
{
|
||||
throw new InvalidOperationException("click_element requires x/y or stateId + element_index.");
|
||||
}
|
||||
|
||||
SendMouseClick(point.Value.X, point.Value.Y, ReadString(parameters, "mouse_button"), ReadInt(parameters, "click_count", 1));
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> PerformSecondaryAction(JsonElement parameters)
|
||||
{
|
||||
var point = PointFor(parameters);
|
||||
if (point == null)
|
||||
{
|
||||
throw new InvalidOperationException("perform_secondary_action requires x/y or stateId + element_index.");
|
||||
}
|
||||
|
||||
SendMouseClick(point.Value.X, point.Value.Y, "right", 1);
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> SetValue(JsonElement parameters)
|
||||
{
|
||||
var value = ReadString(parameters, "value");
|
||||
var element = AutomationElementFor(parameters);
|
||||
var focused = false;
|
||||
if (element != null)
|
||||
{
|
||||
if (element.TryGetCurrentPattern(ValuePattern.Pattern, out var valuePattern))
|
||||
{
|
||||
((ValuePattern)valuePattern).SetValue(value);
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
element.SetFocus();
|
||||
focused = true;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Fall through to coordinate focus below.
|
||||
}
|
||||
}
|
||||
|
||||
var point = PointFor(parameters);
|
||||
if (point != null)
|
||||
{
|
||||
SendMouseClick(point.Value.X, point.Value.Y, "left", 1);
|
||||
focused = true;
|
||||
}
|
||||
else if (!focused && element == null)
|
||||
{
|
||||
throw new InvalidOperationException("set_value requires x/y or stateId + element_index.");
|
||||
}
|
||||
else if (!focused)
|
||||
{
|
||||
throw new InvalidOperationException("set_value could not focus the requested element.");
|
||||
}
|
||||
System.Windows.Forms.SendKeys.SendWait("^a");
|
||||
System.Windows.Forms.SendKeys.SendWait(EscapeSendKeys(value));
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> TypeText(JsonElement parameters)
|
||||
{
|
||||
var text = ReadString(parameters, "text");
|
||||
System.Windows.Forms.SendKeys.SendWait(EscapeSendKeys(text));
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> PressKey(JsonElement parameters)
|
||||
{
|
||||
var key = ReadString(parameters, "key");
|
||||
System.Windows.Forms.SendKeys.SendWait(ToSendKeysChord(key));
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> ScrollElement(JsonElement parameters)
|
||||
{
|
||||
var element = AutomationElementFor(parameters);
|
||||
var direction = ReadString(parameters, "direction");
|
||||
var pages = ReadDouble(parameters, "pages", 1);
|
||||
if (element != null && element.TryGetCurrentPattern(ScrollPattern.Pattern, out var scrollPatternValue))
|
||||
{
|
||||
var scrollPattern = (ScrollPattern)scrollPatternValue;
|
||||
var vertical = direction == "up" ? ScrollAmount.LargeDecrement : direction == "down" ? ScrollAmount.LargeIncrement : ScrollAmount.NoAmount;
|
||||
var horizontal = direction == "left" ? ScrollAmount.LargeDecrement : direction == "right" ? ScrollAmount.LargeIncrement : ScrollAmount.NoAmount;
|
||||
scrollPattern.Scroll(horizontal, vertical);
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
var point = PointFor(parameters);
|
||||
if (point == null)
|
||||
{
|
||||
throw new InvalidOperationException("scroll_element requires x/y or stateId + element_index.");
|
||||
}
|
||||
SetCursorPos(point.Value.X, point.Value.Y);
|
||||
var wheel = (int)Math.Round(Math.Max(1, pages) * 120);
|
||||
if (direction == "down") wheel = -wheel;
|
||||
mouse_event(0x0800, 0, 0, unchecked((uint)wheel), UIntPtr.Zero);
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static void PruneStoredStates()
|
||||
{
|
||||
while (StateOrder.Count > MaxStoredStates)
|
||||
{
|
||||
var evicted = StateOrder.Dequeue();
|
||||
StateElements.Remove(evicted);
|
||||
StateAutomationElements.Remove(evicted);
|
||||
}
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> Drag(JsonElement parameters)
|
||||
{
|
||||
var fromX = ReadDouble(parameters, "from_x", double.NaN);
|
||||
var fromY = ReadDouble(parameters, "from_y", double.NaN);
|
||||
var toX = ReadDouble(parameters, "to_x", double.NaN);
|
||||
var toY = ReadDouble(parameters, "to_y", double.NaN);
|
||||
if (double.IsNaN(fromX) || double.IsNaN(fromY) || double.IsNaN(toX) || double.IsNaN(toY))
|
||||
{
|
||||
throw new InvalidOperationException("drag requires from_x/from_y/to_x/to_y.");
|
||||
}
|
||||
|
||||
SetCursorPos((int)Math.Round(fromX), (int)Math.Round(fromY));
|
||||
mouse_event(0x0002, 0, 0, 0, UIntPtr.Zero);
|
||||
Thread.Sleep(80);
|
||||
SetCursorPos((int)Math.Round(toX), (int)Math.Round(toY));
|
||||
Thread.Sleep(80);
|
||||
mouse_event(0x0004, 0, 0, 0, UIntPtr.Zero);
|
||||
return GetAppState(parameters);
|
||||
}
|
||||
|
||||
private static void Walk(AutomationElement element, List<ElementRecord> records, Dictionary<string, AutomationElement> automationElements, int depth, int maxDepth, int limit)
|
||||
{
|
||||
if (depth > maxDepth || records.Count >= limit) return;
|
||||
var index = (records.Count + 1).ToString();
|
||||
records.Add(ElementRecord.From(element, index));
|
||||
automationElements[index] = element;
|
||||
var children = element.FindAll(TreeScope.Children, Condition.TrueCondition);
|
||||
foreach (AutomationElement child in children)
|
||||
{
|
||||
Walk(child, records, automationElements, depth + 1, maxDepth, limit);
|
||||
if (records.Count >= limit) return;
|
||||
}
|
||||
}
|
||||
|
||||
private static string ReadString(JsonElement element, string property)
|
||||
{
|
||||
return element.TryGetProperty(property, out var value) && value.ValueKind == JsonValueKind.String
|
||||
? value.GetString() ?? ""
|
||||
: "";
|
||||
}
|
||||
|
||||
private static int ReadInt(JsonElement element, string property, int defaultValue)
|
||||
{
|
||||
return element.TryGetProperty(property, out var value) && value.TryGetInt32(out var number)
|
||||
? number
|
||||
: defaultValue;
|
||||
}
|
||||
|
||||
private static double ReadDouble(JsonElement element, string property, double defaultValue)
|
||||
{
|
||||
return element.TryGetProperty(property, out var value) && value.TryGetDouble(out var number)
|
||||
? number
|
||||
: defaultValue;
|
||||
}
|
||||
|
||||
private static AutomationElement? AutomationElementFor(JsonElement parameters)
|
||||
{
|
||||
var stateId = ReadString(parameters, "stateId");
|
||||
var elementIndex = ReadString(parameters, "element_index");
|
||||
return !string.IsNullOrWhiteSpace(stateId)
|
||||
&& !string.IsNullOrWhiteSpace(elementIndex)
|
||||
&& StateAutomationElements.TryGetValue(stateId, out var elements)
|
||||
&& elements.TryGetValue(elementIndex, out var element)
|
||||
? element
|
||||
: null;
|
||||
}
|
||||
|
||||
private static System.Drawing.Point? PointFor(JsonElement parameters)
|
||||
{
|
||||
if (parameters.TryGetProperty("x", out var xValue) && parameters.TryGetProperty("y", out var yValue)
|
||||
&& xValue.TryGetDouble(out var x) && yValue.TryGetDouble(out var y))
|
||||
{
|
||||
return new System.Drawing.Point((int)Math.Round(x), (int)Math.Round(y));
|
||||
}
|
||||
|
||||
var stateId = ReadString(parameters, "stateId");
|
||||
var elementIndex = ReadString(parameters, "element_index");
|
||||
if (string.IsNullOrWhiteSpace(stateId) || string.IsNullOrWhiteSpace(elementIndex)) return null;
|
||||
if (!StateElements.TryGetValue(stateId, out var elements)) return null;
|
||||
var element = elements.FirstOrDefault(item => item.Index == elementIndex);
|
||||
if (element?.Bounds == null) return null;
|
||||
return new System.Drawing.Point(
|
||||
(int)Math.Round(element.Bounds.Value.Left + element.Bounds.Value.Width / 2),
|
||||
(int)Math.Round(element.Bounds.Value.Top + element.Bounds.Value.Height / 2)
|
||||
);
|
||||
}
|
||||
|
||||
private static string CaptureScreen()
|
||||
{
|
||||
var bounds = System.Windows.Forms.Screen.PrimaryScreen!.Bounds;
|
||||
using var bitmap = new Bitmap(bounds.Width, bounds.Height);
|
||||
using var graphics = Graphics.FromImage(bitmap);
|
||||
graphics.CopyFromScreen(bounds.Left, bounds.Top, 0, 0, bounds.Size);
|
||||
using var stream = new MemoryStream();
|
||||
bitmap.Save(stream, ImageFormat.Png);
|
||||
return $"data:image/png;base64,{Convert.ToBase64String(stream.ToArray())}";
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> BoundsDictionary(System.Windows.Rect rect)
|
||||
{
|
||||
return new Dictionary<string, object?>
|
||||
{
|
||||
["x"] = rect.X,
|
||||
["y"] = rect.Y,
|
||||
["width"] = rect.Width,
|
||||
["height"] = rect.Height
|
||||
};
|
||||
}
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool SetCursorPos(int x, int y);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, UIntPtr dwExtraInfo);
|
||||
|
||||
private static void SendMouseClick(int x, int y, string button, int clickCount)
|
||||
{
|
||||
var (down, up) = button switch
|
||||
{
|
||||
"right" => (0x0008u, 0x0010u),
|
||||
"middle" => (0x0020u, 0x0040u),
|
||||
_ => (0x0002u, 0x0004u)
|
||||
};
|
||||
SetCursorPos(x, y);
|
||||
for (var i = 0; i < Math.Max(1, clickCount); i++)
|
||||
{
|
||||
mouse_event(down, 0, 0, 0, UIntPtr.Zero);
|
||||
mouse_event(up, 0, 0, 0, UIntPtr.Zero);
|
||||
Thread.Sleep(80);
|
||||
}
|
||||
}
|
||||
|
||||
private static bool TryInvoke(AutomationElement element)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!element.TryGetCurrentPattern(InvokePattern.Pattern, out var pattern)) return false;
|
||||
((InvokePattern)pattern).Invoke();
|
||||
return true;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static string EscapeSendKeys(string value)
|
||||
{
|
||||
return value
|
||||
.Replace("{", "{{}")
|
||||
.Replace("}", "{}}")
|
||||
.Replace("+", "{+}")
|
||||
.Replace("^", "{^}")
|
||||
.Replace("%", "{%}")
|
||||
.Replace("~", "{~}")
|
||||
.Replace("(", "{(}")
|
||||
.Replace(")", "{)}")
|
||||
.Replace("[", "{[}")
|
||||
.Replace("]", "{]}");
|
||||
}
|
||||
|
||||
private static string ToSendKeysChord(string key)
|
||||
{
|
||||
var normalized = key.Trim();
|
||||
if (normalized.Contains('+'))
|
||||
{
|
||||
var parts = normalized.Split('+', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
var modifiers = "";
|
||||
var last = parts.LastOrDefault() ?? "";
|
||||
foreach (var part in parts.Take(parts.Length - 1))
|
||||
{
|
||||
modifiers += part.ToLowerInvariant() switch
|
||||
{
|
||||
"ctrl" or "control" => "^",
|
||||
"alt" => "%",
|
||||
"shift" => "+",
|
||||
"cmd" or "win" or "windows" => "^",
|
||||
_ => ""
|
||||
};
|
||||
}
|
||||
return modifiers + SendKeyName(last);
|
||||
}
|
||||
return SendKeyName(normalized);
|
||||
}
|
||||
|
||||
private static string SendKeyName(string key)
|
||||
{
|
||||
return key.ToLowerInvariant() switch
|
||||
{
|
||||
"return" or "enter" => "{ENTER}",
|
||||
"escape" or "esc" => "{ESC}",
|
||||
"tab" => "{TAB}",
|
||||
"backspace" => "{BACKSPACE}",
|
||||
"delete" or "del" => "{DELETE}",
|
||||
"left" => "{LEFT}",
|
||||
"right" => "{RIGHT}",
|
||||
"up" => "{UP}",
|
||||
"down" => "{DOWN}",
|
||||
"space" => " ",
|
||||
_ => key.Length == 1 ? EscapeSendKeys(key) : $"{{{key.ToUpperInvariant()}}}"
|
||||
};
|
||||
}
|
||||
|
||||
private sealed record ElementRecord(
|
||||
string Index,
|
||||
string Role,
|
||||
string? Title,
|
||||
string? Value,
|
||||
System.Windows.Rect? Bounds,
|
||||
List<string> Actions)
|
||||
{
|
||||
public static ElementRecord From(AutomationElement element, string index)
|
||||
{
|
||||
var patterns = element.GetSupportedPatterns().Select(pattern => pattern.ProgrammaticName).ToList();
|
||||
return new ElementRecord(
|
||||
index,
|
||||
element.Current.ControlType.ProgrammaticName.Replace("ControlType.", ""),
|
||||
element.Current.Name,
|
||||
TryValue(element),
|
||||
element.Current.BoundingRectangle,
|
||||
patterns
|
||||
);
|
||||
}
|
||||
|
||||
public Dictionary<string, object?> ToDictionary()
|
||||
{
|
||||
var output = new Dictionary<string, object?>
|
||||
{
|
||||
["index"] = Index,
|
||||
["role"] = Role,
|
||||
["actions"] = Actions
|
||||
};
|
||||
if (!string.IsNullOrEmpty(Title)) output["title"] = Title;
|
||||
if (!string.IsNullOrEmpty(Value)) output["value"] = Value;
|
||||
if (Bounds != null) output["bounds"] = BoundsDictionary(Bounds.Value);
|
||||
return output;
|
||||
}
|
||||
|
||||
private static string? TryValue(AutomationElement element)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (element.TryGetCurrentPattern(ValuePattern.Pattern, out var pattern))
|
||||
{
|
||||
return ((ValuePattern)pattern).Current.Value;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
import type { SemanticAppState, SemanticElement } from '@/modules/computer-use/semantics/semantic-types.js';
|
||||
|
||||
const DEFAULT_STATE_TTL_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_SEMANTIC_STATE_TTL_MS || String(10 * 60 * 1000), 10);
|
||||
|
||||
type StoredState = {
|
||||
sessionId: string;
|
||||
appKey: string;
|
||||
state: SemanticAppState;
|
||||
updatedAt: number;
|
||||
};
|
||||
|
||||
function normalizeAppKey(app: string): string {
|
||||
return app.trim().toLowerCase();
|
||||
}
|
||||
|
||||
export class SemanticSessionStore {
|
||||
private states = new Map<string, StoredState>();
|
||||
private latestBySessionApp = new Map<string, string>();
|
||||
|
||||
createStateId(): string {
|
||||
return `state_${randomUUID()}`;
|
||||
}
|
||||
|
||||
save(sessionId: string, state: SemanticAppState): SemanticAppState {
|
||||
const appKey = normalizeAppKey(state.app);
|
||||
const nextState = {
|
||||
...state,
|
||||
stateId: state.stateId || this.createStateId(),
|
||||
};
|
||||
this.states.set(nextState.stateId, {
|
||||
sessionId,
|
||||
appKey,
|
||||
state: nextState,
|
||||
updatedAt: Date.now(),
|
||||
});
|
||||
this.latestBySessionApp.set(this.latestKey(sessionId, appKey), nextState.stateId);
|
||||
return nextState;
|
||||
}
|
||||
|
||||
getState(sessionId: string, app: string, stateId?: string): SemanticAppState | null {
|
||||
this.expire();
|
||||
if (stateId) {
|
||||
const entry = this.states.get(stateId);
|
||||
const appKey = normalizeAppKey(app);
|
||||
return entry && entry.sessionId === sessionId && entry.appKey === appKey ? entry.state : null;
|
||||
}
|
||||
const latestStateId = this.latestBySessionApp.get(this.latestKey(sessionId, normalizeAppKey(app)));
|
||||
return latestStateId ? this.states.get(latestStateId)?.state || null : null;
|
||||
}
|
||||
|
||||
getElement(sessionId: string, app: string, elementIndex: string, stateId?: string): SemanticElement | null {
|
||||
const state = this.getState(sessionId, app, stateId);
|
||||
return state?.elements.find((element) => element.index === elementIndex) || null;
|
||||
}
|
||||
|
||||
clearSession(sessionId: string): void {
|
||||
for (const [stateId, entry] of this.states.entries()) {
|
||||
if (entry.sessionId === sessionId) {
|
||||
this.states.delete(stateId);
|
||||
this.latestBySessionApp.delete(this.latestKey(entry.sessionId, entry.appKey));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expire(now = Date.now()): void {
|
||||
const ttl = Number.isFinite(DEFAULT_STATE_TTL_MS) && DEFAULT_STATE_TTL_MS > 0
|
||||
? DEFAULT_STATE_TTL_MS
|
||||
: 10 * 60 * 1000;
|
||||
for (const [stateId, entry] of this.states.entries()) {
|
||||
if (now - entry.updatedAt > ttl) {
|
||||
this.states.delete(stateId);
|
||||
const key = this.latestKey(entry.sessionId, entry.appKey);
|
||||
if (this.latestBySessionApp.get(key) === stateId) {
|
||||
this.latestBySessionApp.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private latestKey(sessionId: string, appKey: string): string {
|
||||
return `${sessionId}:${appKey}`;
|
||||
}
|
||||
}
|
||||
|
||||
export const semanticSessionStore = new SemanticSessionStore();
|
||||
@@ -0,0 +1,17 @@
|
||||
export const semanticMcpToolMap: Record<string, string> = {
|
||||
computer_app_drag: 'drag',
|
||||
computer_click_element: 'click_element',
|
||||
computer_get_app_state: 'get_app_state',
|
||||
computer_list_apps: 'list_apps',
|
||||
computer_perform_secondary_action: 'perform_secondary_action',
|
||||
computer_press_key: 'press_key',
|
||||
computer_scroll_element: 'scroll_element',
|
||||
computer_set_value: 'set_value',
|
||||
computer_type_text: 'type_text',
|
||||
};
|
||||
|
||||
export const semanticOperationNames = new Set(Object.values(semanticMcpToolMap));
|
||||
|
||||
export function semanticOperationForMcpTool(toolName: string): string | null {
|
||||
return semanticMcpToolMap[toolName] || null;
|
||||
}
|
||||
58
server/modules/computer-use/semantics/semantic-types.ts
Normal file
58
server/modules/computer-use/semantics/semantic-types.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import type { DisplaySize, Point } from '@/modules/computer-use/computer-executor.js';
|
||||
|
||||
export type SemanticBounds = {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
export type SemanticApp = {
|
||||
id?: string;
|
||||
name: string;
|
||||
bundleIdentifier?: string;
|
||||
processName?: string;
|
||||
pid?: number;
|
||||
running: boolean;
|
||||
windowTitle?: string;
|
||||
};
|
||||
|
||||
export type SemanticElement = {
|
||||
index: string;
|
||||
role: string;
|
||||
title?: string;
|
||||
value?: string;
|
||||
description?: string;
|
||||
enabled?: boolean;
|
||||
focused?: boolean;
|
||||
selected?: boolean;
|
||||
bounds?: SemanticBounds;
|
||||
actions?: string[];
|
||||
settableValue?: boolean;
|
||||
};
|
||||
|
||||
export type SemanticAppState = {
|
||||
stateId: string;
|
||||
app: string;
|
||||
platform: NodeJS.Platform;
|
||||
screenshotDataUrl: string | null;
|
||||
displaySize: DisplaySize | null;
|
||||
elements: SemanticElement[];
|
||||
accessibilityTree: SemanticElement[];
|
||||
treeText?: string;
|
||||
message?: string;
|
||||
};
|
||||
|
||||
export type SemanticToolInput = Record<string, unknown> & {
|
||||
sessionId?: string;
|
||||
app?: string;
|
||||
stateId?: string;
|
||||
element_index?: string;
|
||||
};
|
||||
|
||||
export type SemanticToolResult = SemanticAppState | {
|
||||
apps: SemanticApp[];
|
||||
platform: NodeJS.Platform;
|
||||
};
|
||||
|
||||
export type SemanticActionPoint = Point;
|
||||
@@ -0,0 +1,51 @@
|
||||
import type { WebSocket } from 'ws';
|
||||
|
||||
import { desktopAgentRelay } from '@/modules/computer-use/index.js';
|
||||
import type { AuthenticatedWebSocketRequest } from '@/shared/types.js';
|
||||
import { parseIncomingJsonObject } from '@/shared/utils.js';
|
||||
|
||||
/**
|
||||
* Handles the `/desktop-agent` websocket — the inbound side of the cloud
|
||||
* Computer Use relay. A linked CloudCLI desktop app connects here and registers
|
||||
* itself as the executor for this hosted environment. The server then forwards
|
||||
* `computer_*` actions via `desktopAgentRelay`, and the agent returns results as
|
||||
* `computer_relay_result` frames correlated by `id`.
|
||||
*/
|
||||
export function handleDesktopAgentConnection(
|
||||
ws: WebSocket,
|
||||
request: AuthenticatedWebSocketRequest
|
||||
): void {
|
||||
let registered = false;
|
||||
|
||||
ws.on('message', (rawMessage) => {
|
||||
const data = parseIncomingJsonObject(rawMessage);
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
const kind = typeof data.kind === 'string' ? data.kind : typeof data.type === 'string' ? data.type : '';
|
||||
if (kind === 'register' && !registered) {
|
||||
const label = typeof data.label === 'string' && data.label.trim()
|
||||
? data.label.trim()
|
||||
: request.user?.username
|
||||
? `desktop:${request.user.username}`
|
||||
: 'desktop-agent';
|
||||
registered = true;
|
||||
console.log('[INFO] Desktop agent websocket registered:', label);
|
||||
desktopAgentRelay.register(ws, label);
|
||||
return;
|
||||
}
|
||||
if (kind === 'computer_relay_result' && typeof data.id === 'string') {
|
||||
desktopAgentRelay.handleResult(
|
||||
data.id,
|
||||
(data as Record<string, unknown>).result,
|
||||
typeof (data as Record<string, unknown>).error === 'string'
|
||||
? ((data as Record<string, unknown>).error as string)
|
||||
: undefined
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
ws.on('close', () => {
|
||||
console.log('[INFO] Desktop agent websocket disconnected');
|
||||
});
|
||||
}
|
||||
@@ -1,11 +1,13 @@
|
||||
import type { Server as HttpServer } from 'node:http';
|
||||
|
||||
import { WebSocketServer, type VerifyClientCallbackSync } from 'ws';
|
||||
import { WebSocket, WebSocketServer, type VerifyClientCallbackSync } from 'ws';
|
||||
|
||||
import { handleChatConnection } from '@/modules/websocket/services/chat-websocket.service.js';
|
||||
import { VIEWER_COOKIE_NAME } from '@/modules/browser-use/index.js';
|
||||
import { verifyWebSocketClient } from '@/modules/websocket/services/websocket-auth.service.js';
|
||||
import { handlePluginWsProxy } from '@/modules/websocket/services/plugin-websocket-proxy.service.js';
|
||||
import { handleShellConnection } from '@/modules/websocket/services/shell-websocket.service.js';
|
||||
import { handleDesktopAgentConnection } from '@/modules/websocket/services/desktop-agent-websocket.service.js';
|
||||
import { handleDesktopNotificationsConnection } from '@/modules/notifications/index.js';
|
||||
import type { AuthenticatedWebSocketRequest } from '@/shared/types.js';
|
||||
|
||||
@@ -14,8 +16,21 @@ type WebSocketServerDependencies = {
|
||||
chat: Parameters<typeof handleChatConnection>[2];
|
||||
shell: Parameters<typeof handleShellConnection>[1];
|
||||
getPluginPort: Parameters<typeof handlePluginWsProxy>[2];
|
||||
browserUseViewer?: (ws: WebSocket, pathname: string) => void;
|
||||
authenticateBrowserUseViewer?: (pathname: string, token: string | null) => boolean;
|
||||
};
|
||||
|
||||
function readCookieValue(header: unknown, name: string): string | null {
|
||||
if (!header) return null;
|
||||
const prefix = `${name}=`;
|
||||
const cookie = String(header).split(';').map((part) => part.trim()).find((part) => part.startsWith(prefix));
|
||||
return cookie ? decodeURIComponent(cookie.slice(prefix.length)) : null;
|
||||
}
|
||||
|
||||
function getBrowserUseViewerToken(url: URL, headers: Record<string, unknown>): string | null {
|
||||
return url.searchParams.get('viewerToken') || readCookieValue(headers.cookie, VIEWER_COOKIE_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and wires the server-wide websocket gateway used for chat, shell, and
|
||||
* plugin proxy routes.
|
||||
@@ -28,7 +43,17 @@ export function createWebSocketServer(
|
||||
server,
|
||||
verifyClient: ((
|
||||
info: Parameters<VerifyClientCallbackSync<AuthenticatedWebSocketRequest>>[0]
|
||||
) => verifyWebSocketClient(info, dependencies.verifyClient)),
|
||||
) => {
|
||||
const requestUrl = new URL(info.req.url ?? '/', 'http://localhost');
|
||||
if (
|
||||
requestUrl.pathname.startsWith('/api/browser-use/sessions/')
|
||||
&& requestUrl.pathname.endsWith('/viewer/websockify')
|
||||
) {
|
||||
const token = getBrowserUseViewerToken(requestUrl, info.req.headers as Record<string, unknown>);
|
||||
return Boolean(dependencies.authenticateBrowserUseViewer?.(requestUrl.pathname, token));
|
||||
}
|
||||
return verifyWebSocketClient(info, dependencies.verifyClient);
|
||||
}),
|
||||
});
|
||||
|
||||
wss.on('connection', (ws, request) => {
|
||||
@@ -64,6 +89,11 @@ export function createWebSocketServer(
|
||||
return;
|
||||
}
|
||||
|
||||
if (pathname === '/desktop-agent') {
|
||||
handleDesktopAgentConnection(ws, incomingRequest);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pathname === '/desktop-notifications') {
|
||||
handleDesktopNotificationsConnection(ws, incomingRequest);
|
||||
return;
|
||||
@@ -74,6 +104,11 @@ export function createWebSocketServer(
|
||||
return;
|
||||
}
|
||||
|
||||
if (pathname.startsWith('/api/browser-use/sessions/') && pathname.endsWith('/viewer/websockify')) {
|
||||
dependencies.browserUseViewer?.(ws, pathname);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[WARN] Unknown WebSocket path:', pathname);
|
||||
ws.close();
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
Bot,
|
||||
Clock3,
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
ExternalLink,
|
||||
Loader2,
|
||||
MonitorPlay,
|
||||
MousePointer2,
|
||||
RefreshCw,
|
||||
Settings,
|
||||
Square,
|
||||
@@ -19,9 +20,14 @@ import { Badge, Button } from '../../../shared/view/ui';
|
||||
import { authenticatedFetch } from '../../../utils/api';
|
||||
import type { SettingsMainTab } from '../../settings/types/types';
|
||||
|
||||
const BROWSER_USE_GUIDE_URL = 'https://cloudcli.ai/docs/browser-use';
|
||||
const BROWSER_USE_CACHE_TTL_MS = 30_000;
|
||||
|
||||
type BrowserUseStatus = {
|
||||
enabled: boolean;
|
||||
available: boolean;
|
||||
backend: 'playwright' | 'camoufox-vnc';
|
||||
browserBackend: 'playwright' | 'camoufox-vnc';
|
||||
playwrightInstalled: boolean;
|
||||
chromiumInstalled: boolean;
|
||||
installInProgress: boolean;
|
||||
@@ -39,6 +45,9 @@ type BrowserUseSession = {
|
||||
updatedAt: string;
|
||||
lastAction: string | null;
|
||||
message: string | null;
|
||||
backend?: 'playwright' | 'camoufox-vnc';
|
||||
viewerUrl?: string | null;
|
||||
viewerEmbedUrl?: string | null;
|
||||
createdBy: 'agent';
|
||||
profileName: string | null;
|
||||
viewport: {
|
||||
@@ -54,17 +63,48 @@ type BrowserUseSession = {
|
||||
|
||||
type BrowserUsePanelProps = {
|
||||
isVisible: boolean;
|
||||
projectId?: string | null;
|
||||
onShowSettings?: (tab?: SettingsMainTab) => void;
|
||||
};
|
||||
|
||||
type BrowserUsePanelCacheEntry = {
|
||||
status: BrowserUseStatus | null;
|
||||
sessions: BrowserUseSession[];
|
||||
selectedSessionId: string | null;
|
||||
updatedAt: number;
|
||||
};
|
||||
|
||||
const browserUsePanelCache = new Map<string, BrowserUsePanelCacheEntry>();
|
||||
|
||||
async function readJson<T>(response: Response): Promise<T> {
|
||||
const data = await response.json();
|
||||
const text = await response.text();
|
||||
let data: any = {};
|
||||
if (text) {
|
||||
try {
|
||||
data = JSON.parse(text);
|
||||
} catch {
|
||||
throw new Error(response.ok ? 'Received an invalid Browser response.' : `Browser request failed (${response.status}).`);
|
||||
}
|
||||
}
|
||||
if (!response.ok || data.success === false) {
|
||||
throw new Error(data.error || data.details || `Request failed (${response.status})`);
|
||||
}
|
||||
return data as T;
|
||||
}
|
||||
|
||||
async function fetchBrowserPanelData() {
|
||||
const [statusResponse, sessionsResponse] = await Promise.all([
|
||||
authenticatedFetch('/api/browser-use/status'),
|
||||
authenticatedFetch('/api/browser-use/sessions'),
|
||||
]);
|
||||
const statusData = await readJson<{ data: BrowserUseStatus }>(statusResponse);
|
||||
const sessionsData = await readJson<{ data: { sessions: BrowserUseSession[] } }>(sessionsResponse);
|
||||
return {
|
||||
status: statusData.data,
|
||||
sessions: [...sessionsData.data.sessions].sort((a, b) => Date.parse(b.createdAt) - Date.parse(a.createdAt)),
|
||||
};
|
||||
}
|
||||
|
||||
function formatRelativeTime(value: string | null): string {
|
||||
if (!value) return 'Never';
|
||||
|
||||
@@ -119,20 +159,42 @@ function getStatusDot(status: BrowserUseSession['status']): string {
|
||||
return 'bg-border';
|
||||
}
|
||||
|
||||
function getEngineLabel(backend?: BrowserUseStatus['backend'] | BrowserUseSession['backend']): string {
|
||||
return backend === 'camoufox-vnc' ? 'Visible browser' : 'Playwright';
|
||||
}
|
||||
|
||||
const PROMPTS = [
|
||||
'Use Browser to inspect the checkout flow and report any broken UI states.',
|
||||
'Open <url> with Browser, interact with the page, and summarize what changed after each step.',
|
||||
];
|
||||
|
||||
export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUsePanelProps) {
|
||||
const [status, setStatus] = useState<BrowserUseStatus | null>(null);
|
||||
const [sessions, setSessions] = useState<BrowserUseSession[]>([]);
|
||||
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
|
||||
function getBrowserUseCacheKey(projectId?: string | null): string {
|
||||
return projectId ? `browser-use:project:${projectId}` : 'browser-use:global';
|
||||
}
|
||||
|
||||
function getFreshCacheEntry(cacheKey: string): BrowserUsePanelCacheEntry | null {
|
||||
const entry = browserUsePanelCache.get(cacheKey);
|
||||
if (!entry || Date.now() - entry.updatedAt > BROWSER_USE_CACHE_TTL_MS) {
|
||||
return null;
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
export default function BrowserUsePanel({ isVisible, projectId, onShowSettings }: BrowserUsePanelProps) {
|
||||
const cacheKey = getBrowserUseCacheKey(projectId);
|
||||
const initialCacheEntry = getFreshCacheEntry(cacheKey);
|
||||
const [status, setStatus] = useState<BrowserUseStatus | null>(() => initialCacheEntry?.status ?? null);
|
||||
const [sessions, setSessions] = useState<BrowserUseSession[]>(() => initialCacheEntry?.sessions ?? []);
|
||||
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(() => (
|
||||
initialCacheEntry?.selectedSessionId || initialCacheEntry?.sessions[0]?.id || null
|
||||
));
|
||||
const [hasLoadedOnce, setHasLoadedOnce] = useState(Boolean(initialCacheEntry));
|
||||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
const [isBusy, setIsBusy] = useState(false);
|
||||
const [isInstalling, setIsInstalling] = useState(false);
|
||||
const [isFullscreen, setIsFullscreen] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const activeLoadIdRef = useRef(0);
|
||||
|
||||
const selectedSession = useMemo(
|
||||
() => sessions.find((session) => session.id === selectedSessionId) || sessions[0] || null,
|
||||
@@ -140,8 +202,12 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
);
|
||||
|
||||
const activeSessions = sessions.filter((session) => session.status === 'ready');
|
||||
const needsBrowserBinaries = Boolean(status?.enabled && (!status.playwrightInstalled || !status.chromiumInstalled));
|
||||
const runtimeLabel = !status?.enabled
|
||||
const isInitialLoading = isRefreshing && !hasLoadedOnce && sessions.length === 0;
|
||||
const isBackgroundRefreshing = isRefreshing && !isInitialLoading;
|
||||
const needsBrowserBinaries = Boolean(status?.enabled && !status.available);
|
||||
const runtimeLabel = isInitialLoading
|
||||
? 'Loading'
|
||||
: !status?.enabled
|
||||
? 'Disabled'
|
||||
: status.available
|
||||
? 'Ready'
|
||||
@@ -157,29 +223,72 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
: null;
|
||||
|
||||
const refresh = useCallback(async () => {
|
||||
const loadId = activeLoadIdRef.current + 1;
|
||||
activeLoadIdRef.current = loadId;
|
||||
setIsRefreshing(true);
|
||||
try {
|
||||
const [statusResponse, sessionsResponse] = await Promise.all([
|
||||
authenticatedFetch('/api/browser-use/status'),
|
||||
authenticatedFetch('/api/browser-use/sessions'),
|
||||
]);
|
||||
const statusData = await readJson<{ data: BrowserUseStatus }>(statusResponse);
|
||||
const sessionsData = await readJson<{ data: { sessions: BrowserUseSession[] } }>(sessionsResponse);
|
||||
const nextSessions = sessionsData.data.sessions;
|
||||
setStatus(statusData.data);
|
||||
let nextData: Awaited<ReturnType<typeof fetchBrowserPanelData>>;
|
||||
try {
|
||||
nextData = await fetchBrowserPanelData();
|
||||
} catch (error) {
|
||||
if (loadId !== activeLoadIdRef.current) {
|
||||
return;
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 350));
|
||||
nextData = await fetchBrowserPanelData();
|
||||
}
|
||||
if (activeLoadIdRef.current !== loadId) {
|
||||
return;
|
||||
}
|
||||
const nextSessions = nextData.sessions;
|
||||
setStatus(nextData.status);
|
||||
setSessions(nextSessions);
|
||||
setSelectedSessionId((current) => (
|
||||
current && nextSessions.some((session) => session.id === current)
|
||||
setHasLoadedOnce(true);
|
||||
let nextSelectedSessionId: string | null = null;
|
||||
setSelectedSessionId((current) => {
|
||||
nextSelectedSessionId = current && nextSessions.some((session) => session.id === current)
|
||||
? current
|
||||
: nextSessions[0]?.id || null
|
||||
));
|
||||
: nextSessions[0]?.id || null;
|
||||
return nextSelectedSessionId;
|
||||
});
|
||||
browserUsePanelCache.set(cacheKey, {
|
||||
status: nextData.status,
|
||||
sessions: nextSessions,
|
||||
selectedSessionId: nextSelectedSessionId,
|
||||
updatedAt: Date.now(),
|
||||
});
|
||||
setError(null);
|
||||
} catch (err) {
|
||||
if (activeLoadIdRef.current !== loadId) {
|
||||
return;
|
||||
}
|
||||
setHasLoadedOnce(true);
|
||||
setError(err instanceof Error ? err.message : 'Failed to load Browser');
|
||||
} finally {
|
||||
setIsRefreshing(false);
|
||||
if (activeLoadIdRef.current === loadId) {
|
||||
setIsRefreshing(false);
|
||||
}
|
||||
}
|
||||
}, []);
|
||||
}, [cacheKey]);
|
||||
|
||||
useEffect(() => {
|
||||
const cachedEntry = browserUsePanelCache.get(cacheKey);
|
||||
if (!cachedEntry) return;
|
||||
browserUsePanelCache.set(cacheKey, {
|
||||
...cachedEntry,
|
||||
selectedSessionId,
|
||||
});
|
||||
}, [cacheKey, selectedSessionId]);
|
||||
|
||||
useEffect(() => {
|
||||
const cachedEntry = getFreshCacheEntry(cacheKey);
|
||||
setStatus(cachedEntry?.status ?? null);
|
||||
setSessions(cachedEntry?.sessions ?? []);
|
||||
setSelectedSessionId(cachedEntry?.selectedSessionId || cachedEntry?.sessions[0]?.id || null);
|
||||
setHasLoadedOnce(Boolean(cachedEntry));
|
||||
setError(null);
|
||||
activeLoadIdRef.current += 1;
|
||||
}, [cacheKey]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isVisible) return;
|
||||
@@ -253,6 +362,10 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
<span>{formatRelativeTime(session.updatedAt)}</span>
|
||||
<span className="truncate">- {formatAction(session.lastAction)}</span>
|
||||
</div>
|
||||
<div className="mt-2 flex flex-wrap gap-1.5 pl-3.5 text-[10px] text-muted-foreground">
|
||||
<span className="rounded border border-border/70 bg-background/70 px-1.5 py-0.5">{getEngineLabel(session.backend)}</span>
|
||||
<span className="rounded border border-border/70 bg-background/70 px-1.5 py-0.5">{session.profileName || 'Temporary'}</span>
|
||||
</div>
|
||||
</button>
|
||||
);
|
||||
};
|
||||
@@ -270,9 +383,18 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
</div>
|
||||
<p className="mt-1 max-w-xl text-sm leading-6 text-muted-foreground">
|
||||
{status?.enabled
|
||||
? 'Agent browser sessions appear here while an AI task is using Browser.'
|
||||
: 'Enable Browser in settings to let agents open monitored browser sessions.'}
|
||||
? 'When an agent opens a browser, you can watch the latest screenshot, take control in a new tab, or end the running session.'
|
||||
: 'Enable Browser to let agents open websites, test flows, capture screenshots, and debug UI from a real page.'}
|
||||
</p>
|
||||
<a
|
||||
href={BROWSER_USE_GUIDE_URL}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="mt-2 inline-flex items-center gap-1.5 text-sm font-medium text-primary hover:underline"
|
||||
>
|
||||
Read the Browser guide
|
||||
<ExternalLink className="h-3.5 w-3.5" />
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -312,10 +434,19 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
</div>
|
||||
);
|
||||
|
||||
const renderLoadingState = () => (
|
||||
<div className="flex min-h-0 flex-1 items-center justify-center p-6">
|
||||
<div className="flex items-center gap-3 rounded-md border border-border bg-card/40 px-4 py-3 text-sm text-muted-foreground shadow-sm">
|
||||
<Loader2 className="h-4 w-4 animate-spin text-primary" />
|
||||
Loading browser sessions...
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
const renderBrowserSurface = (fullscreen = false) => (
|
||||
<div className={cn('flex flex-1 items-center justify-center bg-neutral-950', fullscreen ? 'min-h-[80vh]' : 'min-h-[420px]')}>
|
||||
{selectedSession?.screenshotDataUrl ? (
|
||||
<div className="relative inline-block max-h-full">
|
||||
<div className="group relative inline-block max-h-full">
|
||||
<img
|
||||
src={selectedSession.screenshotDataUrl}
|
||||
alt="Browser session screenshot"
|
||||
@@ -329,6 +460,18 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
<div className="absolute left-1/2 top-1/2 h-2 w-2 -translate-x-1/2 -translate-y-1/2 rounded-full bg-white" />
|
||||
</div>
|
||||
)}
|
||||
{selectedSession?.viewerEmbedUrl && selectedSession.status === 'ready' && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => window.open(selectedSession.viewerUrl || selectedSession.viewerEmbedUrl || '', '_blank', 'noopener,noreferrer')}
|
||||
className="absolute inset-0 flex items-center justify-center bg-black/0 opacity-0 transition focus-visible:bg-black/30 focus-visible:opacity-100 focus-visible:outline-none group-hover:bg-black/30 group-hover:opacity-100"
|
||||
>
|
||||
<span className="inline-flex items-center gap-2 rounded-md border border-white/20 bg-black/80 px-3 py-2 text-sm font-medium text-white shadow-lg">
|
||||
<MousePointer2 className="h-4 w-4" />
|
||||
Take control
|
||||
</span>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="px-6 text-center">
|
||||
@@ -350,10 +493,29 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
<Badge variant="outline" className={cn('text-[10px]', getRuntimeTone(status, isInstalling))}>
|
||||
{runtimeLabel}
|
||||
</Badge>
|
||||
<Badge variant="outline" className="border-border bg-background text-[10px] text-muted-foreground">
|
||||
{getEngineLabel(status?.backend)}
|
||||
</Badge>
|
||||
</div>
|
||||
<p className="mt-0.5 text-xs text-muted-foreground">Monitor browser sessions opened by AI agents.</p>
|
||||
<p className="mt-0.5 text-xs text-muted-foreground">Watch and manage browser sessions agents use to test real websites.</p>
|
||||
{isBackgroundRefreshing && (
|
||||
<div className="mt-1 flex items-center gap-1.5 text-xs text-muted-foreground">
|
||||
<RefreshCw className="h-3 w-3 animate-spin" />
|
||||
Refreshing sessions...
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-7 w-7 p-0"
|
||||
onClick={() => window.open(BROWSER_USE_GUIDE_URL, '_blank', 'noopener,noreferrer')}
|
||||
title="Open Browser guide"
|
||||
aria-label="Open Browser guide"
|
||||
>
|
||||
<ExternalLink className="h-3.5 w-3.5" />
|
||||
</Button>
|
||||
{onShowSettings && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
@@ -425,7 +587,7 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
</div>
|
||||
|
||||
{sessions.length === 0 ? (
|
||||
renderEmptyState()
|
||||
isInitialLoading ? renderLoadingState() : renderEmptyState()
|
||||
) : (
|
||||
<div className="min-h-0 flex-1 overflow-auto bg-muted/20 p-4">
|
||||
<div className="mx-auto flex min-h-[500px] max-w-7xl flex-col overflow-hidden rounded-md border border-border bg-background shadow-sm">
|
||||
@@ -441,14 +603,32 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
<ExternalLink className="h-3.5 w-3.5 shrink-0" />
|
||||
<span className="truncate">{selectedSession?.url || 'No page loaded'}</span>
|
||||
</div>
|
||||
<div className="mt-1 flex flex-wrap gap-1.5 text-[10px] text-muted-foreground">
|
||||
<span className="rounded border border-border/70 bg-muted/30 px-1.5 py-0.5">{getEngineLabel(selectedSession?.backend || status?.backend)}</span>
|
||||
<span className="rounded border border-border/70 bg-muted/30 px-1.5 py-0.5">Profile: {selectedSession?.profileName || 'Temporary'}</span>
|
||||
<span className="rounded border border-border/70 bg-muted/30 px-1.5 py-0.5">Updated {formatRelativeTime(selectedSession?.updatedAt || null)}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div className="hidden text-xs text-muted-foreground md:block">
|
||||
{formatAction(selectedSession?.lastAction || null)}
|
||||
</div>
|
||||
{selectedSession?.viewerUrl && selectedSession.status === 'ready' && (
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="h-8"
|
||||
onClick={() => window.open(selectedSession.viewerUrl || '', '_blank', 'noopener,noreferrer')}
|
||||
title="Open live browser control in a new tab"
|
||||
aria-label="Open live browser control in a new tab"
|
||||
>
|
||||
<MousePointer2 className="h-4 w-4" />
|
||||
Take control
|
||||
</Button>
|
||||
)}
|
||||
<Button variant="ghost" size="sm" className="h-8 w-8 p-0" onClick={() => setIsFullscreen(true)} disabled={!selectedSession?.screenshotDataUrl} title="Full screen" aria-label="Full screen">
|
||||
<Expand className="h-4 w-4" />
|
||||
</Button>
|
||||
<Button variant="ghost" size="sm" className="h-8 w-8 p-0 lg:hidden" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'} title="Stop session" aria-label="Stop session">
|
||||
<Button variant="ghost" size="sm" className="h-8 w-8 p-0 lg:hidden" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'} title="End session" aria-label="End session">
|
||||
<Square className="h-4 w-4" />
|
||||
</Button>
|
||||
<Button variant="ghost" size="sm" className="h-8 w-8 p-0 lg:hidden" onClick={deleteSession} disabled={isBusy || !selectedSession} title="Delete session" aria-label="Delete session">
|
||||
@@ -475,6 +655,11 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
<div className="min-h-0 flex-1 overflow-y-auto p-3">
|
||||
{sessions.length > 0 ? (
|
||||
<div className="space-y-2">{sessions.map(renderSessionItem)}</div>
|
||||
) : isInitialLoading ? (
|
||||
<div className="flex items-center justify-center gap-2 rounded-md border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
|
||||
<Loader2 className="h-3.5 w-3.5 animate-spin" />
|
||||
Loading sessions...
|
||||
</div>
|
||||
) : (
|
||||
<div className="rounded-md border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
|
||||
No agent browser sessions.
|
||||
@@ -505,7 +690,7 @@ export default function BrowserUsePanel({ isVisible, onShowSettings }: BrowserUs
|
||||
<div className="mt-3 grid grid-cols-2 gap-2">
|
||||
<Button variant="outline" size="sm" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
|
||||
<Square className="h-4 w-4" />
|
||||
Stop
|
||||
End
|
||||
</Button>
|
||||
<Button variant="outline" size="sm" onClick={deleteSession} disabled={isBusy || !selectedSession}>
|
||||
<Trash2 className="h-4 w-4" />
|
||||
|
||||
1
src/components/computer-use/index.ts
Normal file
1
src/components/computer-use/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { default as ComputerUsePanel } from './view/ComputerUsePanel';
|
||||
537
src/components/computer-use/view/ComputerUsePanel.tsx
Normal file
537
src/components/computer-use/view/ComputerUsePanel.tsx
Normal file
@@ -0,0 +1,537 @@
|
||||
import { useCallback, useEffect, useMemo, useRef, useState, type KeyboardEvent, type MouseEvent } from 'react';
|
||||
import { Bot, Camera, Download, Expand, Loader2, MonitorCog, RefreshCw, Settings, ShieldCheck, Square, Trash2, X } from 'lucide-react';
|
||||
|
||||
import { cn } from '../../../lib/utils';
|
||||
import { Badge, Button } from '../../../shared/view/ui';
|
||||
import { authenticatedFetch } from '../../../utils/api';
|
||||
import type { SettingsMainTab } from '../../settings/types/types';
|
||||
|
||||
type ComputerUseStatus = {
|
||||
enabled: boolean;
|
||||
runtime: 'cloud' | 'local';
|
||||
available: boolean;
|
||||
desktopAgentConnected?: boolean;
|
||||
desktopAgentCount?: number;
|
||||
nutInstalled: boolean;
|
||||
screenshotInstalled: boolean;
|
||||
installInProgress: boolean;
|
||||
sessionCount: number;
|
||||
message: string;
|
||||
};
|
||||
|
||||
type ComputerUseSession = {
|
||||
id: string;
|
||||
status: 'ready' | 'stopped' | 'unavailable';
|
||||
screenshotDataUrl: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
lastAction: string | null;
|
||||
message: string | null;
|
||||
agentAccessEnabled: boolean;
|
||||
createdBy: 'user' | 'agent';
|
||||
displaySize: {
|
||||
width: number;
|
||||
height: number;
|
||||
} | null;
|
||||
cursor: {
|
||||
x: number;
|
||||
y: number;
|
||||
actor: 'agent' | 'user';
|
||||
} | null;
|
||||
};
|
||||
|
||||
type ComputerUsePanelProps = {
|
||||
isVisible: boolean;
|
||||
onShowSettings?: (tab?: SettingsMainTab) => void;
|
||||
};
|
||||
|
||||
async function readJson<T>(response: Response): Promise<T> {
|
||||
const data = await response.json();
|
||||
if (!response.ok || data.success === false) {
|
||||
throw new Error(data.error || data.details || `Request failed (${response.status})`);
|
||||
}
|
||||
return data as T;
|
||||
}
|
||||
|
||||
function getRuntimeTone(status: ComputerUseStatus | null, installing: boolean): string {
|
||||
if (!status?.enabled) return 'border-border bg-muted text-muted-foreground';
|
||||
if (status.runtime === 'cloud') {
|
||||
return status.desktopAgentConnected
|
||||
? 'border-primary/30 bg-primary/5 text-foreground'
|
||||
: 'border-amber-500/30 bg-amber-500/10 text-amber-700 dark:text-amber-300';
|
||||
}
|
||||
if (status.available) return 'border-primary/30 bg-primary/5 text-foreground';
|
||||
if (status.installInProgress || installing) return 'border-primary/30 bg-primary/5 text-foreground';
|
||||
return 'border-border bg-background text-muted-foreground';
|
||||
}
|
||||
|
||||
function getRuntimeLabel(status: ComputerUseStatus | null, installing: boolean): string {
|
||||
if (!status?.enabled) return 'Disabled';
|
||||
if (status.runtime === 'cloud') {
|
||||
const count = status.desktopAgentCount ?? (status.desktopAgentConnected ? 1 : 0);
|
||||
if (count > 1) return `${count} desktops linked`;
|
||||
if (count === 1) return 'Desktop linked';
|
||||
return 'Desktop not linked';
|
||||
}
|
||||
if (status.available) return 'Ready';
|
||||
if (status.installInProgress || installing) return 'Installing';
|
||||
return 'Setup required';
|
||||
}
|
||||
|
||||
export default function ComputerUsePanel({ isVisible, onShowSettings }: ComputerUsePanelProps) {
|
||||
const [status, setStatus] = useState<ComputerUseStatus | null>(null);
|
||||
const [sessions, setSessions] = useState<ComputerUseSession[]>([]);
|
||||
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
|
||||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
const [isBusy, setIsBusy] = useState(false);
|
||||
const [isInstalling, setIsInstalling] = useState(false);
|
||||
const [isFullscreen, setIsFullscreen] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const viewerRef = useRef<HTMLDivElement | null>(null);
|
||||
|
||||
const selectedSession = useMemo(
|
||||
() => sessions.find((session) => session.id === selectedSessionId) || sessions[0] || null,
|
||||
[selectedSessionId, sessions],
|
||||
);
|
||||
|
||||
const refresh = useCallback(async () => {
|
||||
setIsRefreshing(true);
|
||||
try {
|
||||
const [statusResponse, sessionsResponse] = await Promise.all([
|
||||
authenticatedFetch('/api/computer-use/status'),
|
||||
authenticatedFetch('/api/computer-use/sessions'),
|
||||
]);
|
||||
const statusData = await readJson<{ data: ComputerUseStatus }>(statusResponse);
|
||||
const sessionsData = await readJson<{ data: { sessions: ComputerUseSession[] } }>(sessionsResponse);
|
||||
setStatus(statusData.data);
|
||||
setSessions(sessionsData.data.sessions);
|
||||
setSelectedSessionId((current) => (
|
||||
current && sessionsData.data.sessions.some((session) => session.id === current)
|
||||
? current
|
||||
: sessionsData.data.sessions[0]?.id || null
|
||||
));
|
||||
setError(null);
|
||||
} finally {
|
||||
setIsRefreshing(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isVisible) return;
|
||||
void refresh().catch((err) => setError(err instanceof Error ? err.message : 'Failed to load Computer Use'));
|
||||
}, [isVisible, refresh]);
|
||||
|
||||
const handleRefresh = useCallback(() => {
|
||||
void refresh().catch((err) => setError(err instanceof Error ? err.message : 'Failed to refresh Computer Use'));
|
||||
}, [refresh]);
|
||||
|
||||
// Poll while an active session exists so agent-driven changes show up live.
|
||||
useEffect(() => {
|
||||
if (!isVisible || !selectedSession || selectedSession.status !== 'ready') return;
|
||||
const timer = window.setInterval(() => {
|
||||
void refresh().catch(() => undefined);
|
||||
}, 1500);
|
||||
return () => window.clearInterval(timer);
|
||||
}, [isVisible, selectedSession, refresh]);
|
||||
|
||||
const runAction = useCallback(async (action: () => Promise<void>) => {
|
||||
setIsBusy(true);
|
||||
setError(null);
|
||||
try {
|
||||
await action();
|
||||
await refresh();
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Computer Use action failed');
|
||||
} finally {
|
||||
setIsBusy(false);
|
||||
}
|
||||
}, [refresh]);
|
||||
|
||||
const captureScreenshot = () => runAction(async () => {
|
||||
if (!selectedSession) return;
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/screenshot`, { method: 'POST' });
|
||||
await readJson(response);
|
||||
});
|
||||
|
||||
const stopSession = () => runAction(async () => {
|
||||
if (!selectedSession) return;
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/stop`, { method: 'POST' });
|
||||
await readJson(response);
|
||||
});
|
||||
|
||||
const deleteSession = () => runAction(async () => {
|
||||
if (!selectedSession) return;
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}`, { method: 'DELETE' });
|
||||
await readJson(response);
|
||||
setIsFullscreen(false);
|
||||
});
|
||||
|
||||
const grantControl = () => runAction(async () => {
|
||||
if (!selectedSession) return;
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/consent/grant`, { method: 'POST' });
|
||||
await readJson(response);
|
||||
});
|
||||
|
||||
const revokeControl = () => runAction(async () => {
|
||||
if (!selectedSession) return;
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/consent/revoke`, { method: 'POST' });
|
||||
await readJson(response);
|
||||
});
|
||||
|
||||
const installRuntime = () => runAction(async () => {
|
||||
setIsInstalling(true);
|
||||
try {
|
||||
const response = await authenticatedFetch('/api/computer-use/runtime/install', { method: 'POST' });
|
||||
await readJson(response);
|
||||
} finally {
|
||||
setIsInstalling(false);
|
||||
}
|
||||
});
|
||||
|
||||
const clickViewer = useCallback((event: MouseEvent<HTMLImageElement>) => {
|
||||
if (!selectedSession || selectedSession.status !== 'ready' || !selectedSession.displaySize) {
|
||||
return;
|
||||
}
|
||||
viewerRef.current?.focus();
|
||||
|
||||
const bounds = event.currentTarget.getBoundingClientRect();
|
||||
const scaleX = selectedSession.displaySize.width / bounds.width;
|
||||
const scaleY = selectedSession.displaySize.height / bounds.height;
|
||||
const x = Math.round((event.clientX - bounds.left) * scaleX);
|
||||
const y = Math.round((event.clientY - bounds.top) * scaleY);
|
||||
|
||||
void runAction(async () => {
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/click`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ x, y, double: event.detail === 2 }),
|
||||
});
|
||||
await readJson(response);
|
||||
});
|
||||
}, [runAction, selectedSession]);
|
||||
|
||||
const keyForEvent = useCallback((event: KeyboardEvent<HTMLDivElement>) => {
|
||||
if (event.key === ' ') return 'Space';
|
||||
const parts: string[] = [];
|
||||
if (event.ctrlKey) parts.push('ctrl');
|
||||
if (event.altKey) parts.push('alt');
|
||||
if (event.shiftKey && event.key.length > 1) parts.push('shift');
|
||||
if (event.metaKey) parts.push('meta');
|
||||
parts.push(event.key);
|
||||
return parts.join('+');
|
||||
}, []);
|
||||
|
||||
const pressViewerKey = useCallback((event: KeyboardEvent<HTMLDivElement>) => {
|
||||
if (!selectedSession || selectedSession.status !== 'ready') {
|
||||
return;
|
||||
}
|
||||
|
||||
const ignoredKeys = new Set(['Shift', 'Control', 'Alt', 'Meta', 'CapsLock']);
|
||||
if (ignoredKeys.has(event.key)) {
|
||||
return;
|
||||
}
|
||||
|
||||
event.preventDefault();
|
||||
const key = keyForEvent(event);
|
||||
void runAction(async () => {
|
||||
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/press-key`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ key }),
|
||||
});
|
||||
await readJson(response);
|
||||
});
|
||||
}, [keyForEvent, runAction, selectedSession]);
|
||||
|
||||
const needsRuntime = Boolean(status?.enabled && status.runtime === 'local' && (!status.nutInstalled || !status.screenshotInstalled));
|
||||
const isCloud = status?.runtime === 'cloud';
|
||||
const desktopAgentCount = status?.desktopAgentCount ?? (status?.desktopAgentConnected ? 1 : 0);
|
||||
const runtimeLabel = getRuntimeLabel(status, isInstalling);
|
||||
|
||||
const cursorStyle = selectedSession?.cursor && selectedSession.displaySize
|
||||
? {
|
||||
left: `${(selectedSession.cursor.x / selectedSession.displaySize.width) * 100}%`,
|
||||
top: `${(selectedSession.cursor.y / selectedSession.displaySize.height) * 100}%`,
|
||||
}
|
||||
: null;
|
||||
|
||||
const renderSurface = (fullscreen = false) => (
|
||||
<div
|
||||
ref={viewerRef}
|
||||
tabIndex={selectedSession?.status === 'ready' ? 0 : -1}
|
||||
onKeyDown={pressViewerKey}
|
||||
className={`flex min-h-[360px] flex-1 items-center justify-center bg-neutral-950 outline-none ${fullscreen ? 'min-h-[80vh]' : ''}`}
|
||||
>
|
||||
{selectedSession?.screenshotDataUrl ? (
|
||||
<div className="relative inline-block max-h-full">
|
||||
<img
|
||||
src={selectedSession.screenshotDataUrl}
|
||||
alt="Desktop screenshot"
|
||||
className={fullscreen ? 'block max-h-[80vh] w-auto max-w-full object-contain' : 'block max-h-[70vh] w-auto max-w-full object-contain'}
|
||||
onClick={clickViewer}
|
||||
/>
|
||||
{cursorStyle && (
|
||||
<div
|
||||
className="pointer-events-none absolute h-5 w-5 -translate-x-1/2 -translate-y-1/2 rounded-full border-2 border-white/90 bg-sky-500/80 shadow-[0_0_0_6px_rgba(14,165,233,0.18)]"
|
||||
style={cursorStyle}
|
||||
>
|
||||
<div className="absolute left-1/2 top-1/2 h-2 w-2 -translate-x-1/2 -translate-y-1/2 rounded-full bg-white" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="max-w-md px-6 text-center">
|
||||
<MonitorCog className="mx-auto h-10 w-10 text-neutral-500" />
|
||||
<div className="mt-3 text-sm font-medium text-neutral-100">
|
||||
{selectedSession?.message || 'No active Computer Use session.'}
|
||||
</div>
|
||||
<p className="mt-2 text-xs leading-relaxed text-neutral-400">
|
||||
{isCloud
|
||||
? 'Agents create sessions automatically. Keep the CloudCLI desktop app connected to approve control requests.'
|
||||
: 'Agents create sessions automatically. Enable Computer Use and install the local runtime if needed.'}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex h-full min-h-0 flex-col bg-background">
|
||||
<div className="flex flex-wrap items-center justify-between gap-3 border-b border-border/60 px-4 py-3">
|
||||
<div className="min-w-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<MonitorCog className="h-4 w-4 text-primary" />
|
||||
<h3 className="text-sm font-semibold text-foreground">Computer Use</h3>
|
||||
<Badge variant="outline" className={cn('text-[10px]', getRuntimeTone(status, isInstalling))}>
|
||||
{runtimeLabel}
|
||||
</Badge>
|
||||
</div>
|
||||
<p className="mt-0.5 text-xs text-muted-foreground">
|
||||
{isCloud
|
||||
? 'Monitor cloud agent desktop sessions and linked desktops.'
|
||||
: 'Monitor local desktop sessions and grant control only when an agent needs it.'}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
{onShowSettings && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-7 w-7 p-0"
|
||||
onClick={() => onShowSettings('computer')}
|
||||
title="Open Computer Use settings"
|
||||
aria-label="Open Computer Use settings"
|
||||
>
|
||||
<Settings className="h-3.5 w-3.5" />
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-7 w-7 p-0"
|
||||
onClick={handleRefresh}
|
||||
disabled={isRefreshing || isBusy}
|
||||
title="Refresh Computer Use"
|
||||
aria-label="Refresh Computer Use"
|
||||
>
|
||||
<RefreshCw className={cn('h-3.5 w-3.5', isRefreshing && 'animate-spin')} />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid min-h-0 flex-1 grid-cols-1 lg:grid-cols-[300px_minmax(0,1fr)]">
|
||||
<aside className="border-b border-border/60 p-3 lg:border-b-0 lg:border-r">
|
||||
{isCloud && (
|
||||
<div className="rounded-lg border border-border/70 bg-card/40 p-3">
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="min-w-0">
|
||||
<div className="text-xs font-medium uppercase tracking-wide text-muted-foreground">Cloud desktop access</div>
|
||||
<div className="mt-1 text-sm font-medium text-foreground">{runtimeLabel}</div>
|
||||
</div>
|
||||
<Badge variant="outline" className={cn('shrink-0 text-[10px]', getRuntimeTone(status, isInstalling))}>
|
||||
{desktopAgentCount > 0 ? `${desktopAgentCount} linked` : 'Not linked'}
|
||||
</Badge>
|
||||
</div>
|
||||
<p className="mt-2 text-xs leading-relaxed text-muted-foreground">
|
||||
{desktopAgentCount > 1
|
||||
? 'More than one CloudCLI Desktop app is linked. Agents will use one available desktop.'
|
||||
: desktopAgentCount === 1
|
||||
? 'CloudCLI Desktop is connected. Approval prompts appear on that computer.'
|
||||
: 'Open CloudCLI Desktop on the computer you want agents to use, connect the same account, and enable Computer Use.'}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{needsRuntime && (
|
||||
<div className={cn('rounded-lg border border-border/70 bg-card/40 p-3', isCloud && 'mt-3')}>
|
||||
<div className="text-xs font-medium uppercase tracking-wide text-muted-foreground">Desktop runtime required</div>
|
||||
<p className="mt-2 text-xs leading-relaxed text-muted-foreground">
|
||||
{status?.message || 'Install the desktop control runtime to enable Computer Use.'}
|
||||
</p>
|
||||
<div className="mt-3 flex flex-wrap gap-2 text-xs text-muted-foreground">
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Control lib: {status?.nutInstalled ? 'installed' : 'missing'}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Screen capture: {status?.screenshotInstalled ? 'installed' : 'missing'}
|
||||
</span>
|
||||
</div>
|
||||
<Button
|
||||
type="button"
|
||||
size="sm"
|
||||
className="mt-3 w-full"
|
||||
onClick={installRuntime}
|
||||
disabled={isBusy || isInstalling || status?.installInProgress}
|
||||
>
|
||||
{isInstalling || status?.installInProgress ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<Download className="h-4 w-4" />
|
||||
)}
|
||||
{isInstalling || status?.installInProgress ? 'Installing…' : 'Install Runtime'}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="mt-3 space-y-2">
|
||||
<div className="rounded-lg border border-border/70 bg-muted/30 p-3 text-xs leading-relaxed text-muted-foreground">
|
||||
<div className="flex items-center gap-1.5 font-medium text-foreground">
|
||||
<ShieldCheck className="h-3.5 w-3.5" />
|
||||
Safety
|
||||
</div>
|
||||
{isCloud ? (
|
||||
<p className="mt-1.5">
|
||||
Agents create sessions automatically through MCP. The CloudCLI desktop app asks for approval on this
|
||||
computer, and <span className="font-medium text-foreground">Stop</span> ends the session and clears access.
|
||||
</p>
|
||||
) : (
|
||||
<p className="mt-1.5">
|
||||
Agents create sessions automatically through MCP but cannot act until you grant control here. Use
|
||||
<span className="font-medium text-foreground"> Grant Control </span>
|
||||
to allow agent actions, and
|
||||
<span className="font-medium text-foreground"> Stop </span>
|
||||
to revoke instantly.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
{sessions.map((session) => (
|
||||
<button
|
||||
key={session.id}
|
||||
type="button"
|
||||
onClick={() => setSelectedSessionId(session.id)}
|
||||
className={`w-full rounded-lg border px-3 py-2 text-left text-sm transition-colors ${selectedSession?.id === session.id
|
||||
? 'border-primary/50 bg-primary/10 text-foreground'
|
||||
: 'border-border/60 bg-card/30 text-muted-foreground hover:bg-muted/50'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<span className="truncate font-medium">
|
||||
{session.createdBy === 'agent' ? 'Agent session' : 'Desktop session'}
|
||||
</span>
|
||||
<Badge variant="outline" className="text-[10px]">{session.status}</Badge>
|
||||
</div>
|
||||
<div className="mt-1 flex flex-wrap gap-1">
|
||||
{session.agentAccessEnabled ? (
|
||||
<span className="rounded border border-emerald-500/30 px-1.5 py-0.5 text-[10px] text-emerald-600 dark:text-emerald-300">
|
||||
control granted
|
||||
</span>
|
||||
) : (
|
||||
<span className="rounded border border-amber-500/30 px-1.5 py-0.5 text-[10px] text-amber-600 dark:text-amber-300">
|
||||
awaiting consent
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="mt-1 truncate text-xs">{session.lastAction || session.message || session.id}</div>
|
||||
</button>
|
||||
))}
|
||||
{sessions.length === 0 && (
|
||||
<div className="rounded-lg border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
|
||||
Agents will create sessions automatically when they need desktop access.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<main className="flex min-h-0 flex-col">
|
||||
<div className="flex flex-wrap items-center gap-2 border-b border-border/60 px-3 py-2">
|
||||
<Button variant="outline" size="sm" onClick={captureScreenshot} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
|
||||
<Camera className="h-4 w-4" />
|
||||
Screenshot
|
||||
</Button>
|
||||
{!isCloud && selectedSession?.agentAccessEnabled ? (
|
||||
<Button variant="outline" size="sm" onClick={revokeControl} disabled={isBusy || !selectedSession}>
|
||||
<X className="h-4 w-4" />
|
||||
Revoke Control
|
||||
</Button>
|
||||
) : !isCloud ? (
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={grantControl}
|
||||
disabled={isBusy || !selectedSession || selectedSession.status !== 'ready' || !status?.enabled}
|
||||
>
|
||||
<Bot className="h-4 w-4" />
|
||||
Grant Control
|
||||
</Button>
|
||||
) : null}
|
||||
<Button variant="outline" size="sm" onClick={() => setIsFullscreen(true)} disabled={!selectedSession?.screenshotDataUrl}>
|
||||
<Expand className="h-4 w-4" />
|
||||
Full Screen
|
||||
</Button>
|
||||
<Button variant="outline" size="sm" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
|
||||
<Square className="h-4 w-4" />
|
||||
Stop
|
||||
</Button>
|
||||
<Button variant="outline" size="sm" onClick={deleteSession} disabled={isBusy || !selectedSession}>
|
||||
<Trash2 className="h-4 w-4" />
|
||||
Delete
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="border-b border-red-200 bg-red-50 px-4 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="min-h-0 flex-1 overflow-auto bg-muted/20 p-4">
|
||||
<div className="mx-auto flex min-h-[420px] max-w-6xl flex-col overflow-hidden rounded-lg border border-border bg-background shadow-sm">
|
||||
<div className="flex items-center gap-2 border-b border-border/60 px-3 py-2 text-xs text-muted-foreground">
|
||||
<MonitorCog className="h-3.5 w-3.5" />
|
||||
<span className="truncate">
|
||||
{selectedSession?.displaySize
|
||||
? `${selectedSession.displaySize.width}×${selectedSession.displaySize.height}`
|
||||
: 'No screen captured'}
|
||||
</span>
|
||||
{selectedSession?.agentAccessEnabled && (
|
||||
<span className="ml-auto inline-flex items-center gap-1 rounded border border-emerald-500/30 px-2 py-0.5 text-emerald-600 dark:text-emerald-300">
|
||||
<Bot className="h-3.5 w-3.5" />
|
||||
{isCloud ? 'Desktop-approved session' : 'Agent control active'}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{renderSurface()}
|
||||
</div>
|
||||
<p className="mx-auto mt-2 max-w-6xl text-center text-xs text-muted-foreground">
|
||||
{selectedSession
|
||||
? 'Click the screenshot to click the real desktop. Focus the view and type to send keystrokes.'
|
||||
: 'Computer Use sessions appear here after an agent requests desktop access.'}
|
||||
</p>
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
{isFullscreen && selectedSession && (
|
||||
<div className="fixed inset-0 z-50 bg-black/90 p-6">
|
||||
<div className="flex h-full flex-col rounded-lg border border-white/10 bg-black">
|
||||
<div className="flex items-center justify-between border-b border-white/10 px-4 py-3 text-sm text-white/80">
|
||||
<div className="min-w-0 truncate">Desktop session</div>
|
||||
<Button variant="outline" size="sm" onClick={() => setIsFullscreen(false)}>
|
||||
<X className="h-4 w-4" />
|
||||
Close
|
||||
</Button>
|
||||
</div>
|
||||
{renderSurface(true)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -66,6 +66,7 @@ export type MainContentHeaderProps = {
|
||||
selectedSession: ProjectSession | null;
|
||||
shouldShowTasksTab: boolean;
|
||||
shouldShowBrowserTab: boolean;
|
||||
shouldShowComputerTab: boolean;
|
||||
isMobile: boolean;
|
||||
onMenuClick: () => void;
|
||||
};
|
||||
|
||||
@@ -6,12 +6,14 @@ import StandaloneShell from '../../standalone-shell/view/StandaloneShell';
|
||||
import GitPanel from '../../git-panel/view/GitPanel';
|
||||
import PluginTabContent from '../../plugins/view/PluginTabContent';
|
||||
import { BrowserUsePanel } from '../../browser-use';
|
||||
import { ComputerUsePanel } from '../../computer-use';
|
||||
import type { MainContentProps } from '../types/types';
|
||||
import { useTaskMaster } from '../../../contexts/TaskMasterContext';
|
||||
import { usePaletteOpsRegister } from '../../../contexts/PaletteOpsContext';
|
||||
import { useTasksSettings } from '../../../contexts/TasksSettingsContext';
|
||||
import { useUiPreferences } from '../../../hooks/useUiPreferences';
|
||||
import { useFileOpenResolver } from '../../../hooks/useFileOpenResolver';
|
||||
import { COMPUTER_USE_MENUS_ENABLED } from '../../../constants/featureFlags';
|
||||
import { authenticatedFetch } from '../../../utils/api';
|
||||
import { useEditorSidebar } from '../../code-editor/hooks/useEditorSidebar';
|
||||
import EditorSidebar from '../../code-editor/view/EditorSidebar';
|
||||
@@ -59,9 +61,11 @@ function MainContent({
|
||||
const { currentProject, setCurrentProject } = useTaskMaster() as TaskMasterContextValue;
|
||||
const { tasksEnabled, isTaskMasterInstalled } = useTasksSettings() as TasksSettingsContextValue;
|
||||
const [browserUseEnabled, setBrowserUseEnabled] = useState(false);
|
||||
const [computerUseEnabled, setComputerUseEnabled] = useState<boolean | undefined>(undefined);
|
||||
|
||||
const shouldShowTasksTab = Boolean(tasksEnabled && isTaskMasterInstalled);
|
||||
const shouldShowBrowserTab = browserUseEnabled;
|
||||
const shouldShowComputerTab = COMPUTER_USE_MENUS_ENABLED && computerUseEnabled === true;
|
||||
|
||||
const {
|
||||
editingFile,
|
||||
@@ -121,6 +125,60 @@ function MainContent({
|
||||
}
|
||||
}, [shouldShowBrowserTab, activeTab, setActiveTab]);
|
||||
|
||||
const loadComputerUseSettings = useCallback(async () => {
|
||||
try {
|
||||
const [settingsResponse, statusResponse] = await Promise.allSettled([
|
||||
authenticatedFetch('/api/computer-use/settings'),
|
||||
authenticatedFetch('/api/computer-use/status'),
|
||||
]);
|
||||
const settingsRes = settingsResponse.status === 'fulfilled' ? settingsResponse.value : null;
|
||||
const statusRes = statusResponse.status === 'fulfilled' ? statusResponse.value : null;
|
||||
const readJson = async (response: Response | null) => {
|
||||
if (!response) return null;
|
||||
try {
|
||||
return await response.json();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const settingsData = await readJson(settingsRes);
|
||||
const statusData = await readJson(statusRes);
|
||||
const runtime = statusData?.data?.runtime;
|
||||
const settingsUsable = Boolean(settingsRes?.ok && settingsData?.success !== false);
|
||||
const statusUsable = Boolean(statusRes?.ok && statusData?.success !== false);
|
||||
const settingsEnabled = Boolean(
|
||||
settingsUsable &&
|
||||
settingsData?.data?.settings?.enabled
|
||||
);
|
||||
const cloudEnabled = Boolean(
|
||||
statusUsable &&
|
||||
runtime === 'cloud' &&
|
||||
statusData?.data?.enabled
|
||||
);
|
||||
if (runtime === 'cloud') {
|
||||
setComputerUseEnabled(cloudEnabled);
|
||||
} else if (settingsUsable) {
|
||||
setComputerUseEnabled(settingsEnabled);
|
||||
} else if (statusUsable) {
|
||||
setComputerUseEnabled(Boolean(statusData?.data?.enabled));
|
||||
}
|
||||
} catch {
|
||||
// Keep the current tab availability on transient status/settings failures.
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
void loadComputerUseSettings();
|
||||
window.addEventListener('computerUseSettingsChanged', loadComputerUseSettings);
|
||||
return () => window.removeEventListener('computerUseSettingsChanged', loadComputerUseSettings);
|
||||
}, [loadComputerUseSettings]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!shouldShowComputerTab && activeTab === 'computer') {
|
||||
setActiveTab('chat');
|
||||
}
|
||||
}, [shouldShowComputerTab, activeTab, setActiveTab]);
|
||||
|
||||
usePaletteOpsRegister({
|
||||
openFile: (filePath: string) => {
|
||||
setActiveTab('files');
|
||||
@@ -149,6 +207,7 @@ function MainContent({
|
||||
selectedSession={selectedSession}
|
||||
shouldShowTasksTab={shouldShowTasksTab}
|
||||
shouldShowBrowserTab={shouldShowBrowserTab}
|
||||
shouldShowComputerTab={shouldShowComputerTab}
|
||||
isMobile={isMobile}
|
||||
onMenuClick={onMenuClick}
|
||||
/>
|
||||
@@ -209,7 +268,17 @@ function MainContent({
|
||||
|
||||
{shouldShowBrowserTab && activeTab === 'browser' && (
|
||||
<div className="h-full overflow-hidden">
|
||||
<BrowserUsePanel isVisible={activeTab === 'browser'} onShowSettings={onShowSettings} />
|
||||
<BrowserUsePanel
|
||||
isVisible={activeTab === 'browser'}
|
||||
projectId={selectedProject.projectId}
|
||||
onShowSettings={onShowSettings}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{shouldShowComputerTab && activeTab === 'computer' && (
|
||||
<div className="h-full overflow-hidden">
|
||||
<ComputerUsePanel isVisible={activeTab === 'computer'} onShowSettings={onShowSettings} />
|
||||
</div>
|
||||
)}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ export default function MainContentHeader({
|
||||
selectedSession,
|
||||
shouldShowTasksTab,
|
||||
shouldShowBrowserTab,
|
||||
shouldShowComputerTab,
|
||||
isMobile,
|
||||
onMenuClick,
|
||||
}: MainContentHeaderProps) {
|
||||
@@ -61,6 +62,7 @@ export default function MainContentHeader({
|
||||
setActiveTab={setActiveTab}
|
||||
shouldShowTasksTab={shouldShowTasksTab}
|
||||
shouldShowBrowserTab={shouldShowBrowserTab}
|
||||
shouldShowComputerTab={shouldShowComputerTab}
|
||||
/>
|
||||
</div>
|
||||
{canScrollRight && (
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { MessageSquare, Terminal, Folder, GitBranch, ClipboardCheck, MonitorPlay, type LucideIcon } from 'lucide-react';
|
||||
import { MessageSquare, Terminal, Folder, GitBranch, ClipboardCheck, MonitorCog, MonitorPlay, type LucideIcon } from 'lucide-react';
|
||||
import type { Dispatch, SetStateAction } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
@@ -12,6 +12,7 @@ type MainContentTabSwitcherProps = {
|
||||
setActiveTab: Dispatch<SetStateAction<AppTab>>;
|
||||
shouldShowTasksTab: boolean;
|
||||
shouldShowBrowserTab: boolean;
|
||||
shouldShowComputerTab: boolean;
|
||||
};
|
||||
|
||||
type BuiltInTab = {
|
||||
@@ -45,6 +46,13 @@ const BROWSER_TAB: BuiltInTab = {
|
||||
icon: MonitorPlay,
|
||||
};
|
||||
|
||||
const COMPUTER_TAB: BuiltInTab = {
|
||||
kind: 'builtin',
|
||||
id: 'computer',
|
||||
labelKey: 'tabs.computer',
|
||||
icon: MonitorCog,
|
||||
};
|
||||
|
||||
const TASKS_TAB: BuiltInTab = {
|
||||
kind: 'builtin',
|
||||
id: 'tasks',
|
||||
@@ -57,6 +65,7 @@ export default function MainContentTabSwitcher({
|
||||
setActiveTab,
|
||||
shouldShowTasksTab,
|
||||
shouldShowBrowserTab,
|
||||
shouldShowComputerTab,
|
||||
}: MainContentTabSwitcherProps) {
|
||||
const { t } = useTranslation();
|
||||
const { plugins } = usePlugins();
|
||||
@@ -64,6 +73,7 @@ export default function MainContentTabSwitcher({
|
||||
const builtInTabs: BuiltInTab[] = [
|
||||
...BASE_TABS,
|
||||
...(shouldShowBrowserTab ? [BROWSER_TAB] : []),
|
||||
...(shouldShowComputerTab ? [COMPUTER_TAB] : []),
|
||||
...(shouldShowTasksTab ? [TASKS_TAB] : []),
|
||||
];
|
||||
|
||||
|
||||
@@ -32,6 +32,10 @@ function getTabTitle(activeTab: AppTab, shouldShowTasksTab: boolean, t: (key: st
|
||||
return t('tabs.browser');
|
||||
}
|
||||
|
||||
if (activeTab === 'computer') {
|
||||
return t('tabs.computer');
|
||||
}
|
||||
|
||||
return 'Project';
|
||||
}
|
||||
|
||||
@@ -73,7 +77,15 @@ export default function MainContentTitle({
|
||||
<h2 className="scrollbar-hide overflow-x-auto whitespace-nowrap text-sm font-semibold leading-tight text-foreground">
|
||||
{getSessionTitle(selectedSession)}
|
||||
</h2>
|
||||
<div className="truncate text-[11px] leading-tight text-muted-foreground">{selectedProject.displayName}</div>
|
||||
<div className="flex min-w-0 items-center gap-2 text-[11px] leading-tight text-muted-foreground">
|
||||
<span className="min-w-0 truncate">{selectedProject.displayName}</span>
|
||||
<span
|
||||
className="hidden min-w-0 max-w-[45%] flex-shrink truncate border-l border-border/60 pl-2 font-mono text-[10px] sm:block"
|
||||
title={selectedSession.id}
|
||||
>
|
||||
{selectedSession.id}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
) : showChatNewSession ? (
|
||||
<div className="min-w-0">
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
|
||||
import { useTheme } from '../../../contexts/ThemeContext';
|
||||
import { COMPUTER_USE_MENUS_ENABLED } from '../../../constants/featureFlags';
|
||||
import { authenticatedFetch } from '../../../utils/api';
|
||||
import { setNotificationSoundEnabled } from '../../../utils/notificationSound';
|
||||
import { useProviderAuthStatus } from '../../provider-auth/hooks/useProviderAuthStatus';
|
||||
@@ -54,11 +55,11 @@ type NotificationPreferencesResponse = {
|
||||
|
||||
type ActiveLoginProvider = AgentProvider | '';
|
||||
|
||||
const KNOWN_MAIN_TABS: SettingsMainTab[] = ['agents', 'appearance', 'git', 'api', 'tasks', 'browser', 'notifications', 'plugins', 'about'];
|
||||
const KNOWN_MAIN_TABS: SettingsMainTab[] = ['agents', 'appearance', 'git', 'api', 'tasks', 'browser', 'computer', 'notifications', 'plugins', 'about'];
|
||||
|
||||
const normalizeMainTab = (tab: string): SettingsMainTab => {
|
||||
// Keep backwards compatibility with older callers that still pass "tools".
|
||||
if (tab === 'tools') {
|
||||
if (tab === 'tools' || (tab === 'computer' && !COMPUTER_USE_MENUS_ENABLED)) {
|
||||
return 'agents';
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
|
||||
import type { LLMProvider } from '../../../types/app';
|
||||
import type { ProviderAuthStatus } from '../../provider-auth/types';
|
||||
|
||||
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
|
||||
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'computer' | 'notifications' | 'plugins' | 'about';
|
||||
export type AgentProvider = LLMProvider;
|
||||
export type AgentCategory = 'account' | 'permissions' | 'mcp' | 'skills';
|
||||
export type ProjectSortOrder = 'name' | 'date';
|
||||
|
||||
@@ -11,6 +11,7 @@ import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSetting
|
||||
import VoiceSettingsTab from '../view/tabs/VoiceSettingsTab';
|
||||
import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
|
||||
import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
|
||||
import ComputerUseSettingsTab from '../view/tabs/computer-use-settings/ComputerUseSettingsTab';
|
||||
import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
|
||||
import TasksSettingsTab from '../view/tabs/tasks-settings/TasksSettingsTab';
|
||||
import PluginSettingsTab from '../../plugins/view/PluginSettingsTab';
|
||||
@@ -198,6 +199,8 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
|
||||
|
||||
{activeTab === 'browser' && <BrowserUseSettingsTab />}
|
||||
|
||||
{activeTab === 'computer' && <ComputerUseSettingsTab />}
|
||||
|
||||
{activeTab === 'notifications' && (
|
||||
<NotificationsSettingsTab
|
||||
notificationPreferences={notificationPreferences}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Bell, Bot, GitBranch, Info, Key, ListChecks, Mic, MonitorPlay, Palette, Puzzle } from 'lucide-react';
|
||||
import { Bell, Bot, GitBranch, Info, Key, ListChecks,Mic, MonitorCog, MonitorPlay, Palette, Puzzle } from 'lucide-react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
import { COMPUTER_USE_MENUS_ENABLED } from '../../../constants/featureFlags';
|
||||
import { cn } from '../../../lib/utils';
|
||||
import { PillBar, Pill } from '../../../shared/view/ui';
|
||||
import type { SettingsMainTab } from '../types/types';
|
||||
@@ -24,11 +25,16 @@ const NAV_ITEMS: NavItem[] = [
|
||||
{ id: 'voice', labelKey: 'mainTabs.voice', icon: Mic },
|
||||
{ id: 'tasks', labelKey: 'mainTabs.tasks', icon: ListChecks },
|
||||
{ id: 'browser', labelKey: 'mainTabs.browser', icon: MonitorPlay },
|
||||
{ id: 'computer', labelKey: 'mainTabs.computer', icon: MonitorCog },
|
||||
{ id: 'plugins', labelKey: 'mainTabs.plugins', icon: Puzzle },
|
||||
{ id: 'notifications', labelKey: 'mainTabs.notifications', icon: Bell },
|
||||
{ id: 'about', labelKey: 'mainTabs.about', icon: Info },
|
||||
];
|
||||
|
||||
const VISIBLE_NAV_ITEMS = NAV_ITEMS.filter((item) => (
|
||||
COMPUTER_USE_MENUS_ENABLED || item.id !== 'computer'
|
||||
));
|
||||
|
||||
export default function SettingsSidebar({ activeTab, onChange }: SettingsSidebarProps) {
|
||||
const { t } = useTranslation('settings');
|
||||
|
||||
@@ -37,7 +43,7 @@ export default function SettingsSidebar({ activeTab, onChange }: SettingsSidebar
|
||||
{/* Desktop sidebar */}
|
||||
<aside className="hidden w-56 flex-shrink-0 border-r border-border bg-muted/30 md:flex md:flex-col">
|
||||
<nav className="flex flex-col gap-1 p-3">
|
||||
{NAV_ITEMS.map((item) => {
|
||||
{VISIBLE_NAV_ITEMS.map((item) => {
|
||||
const Icon = item.icon;
|
||||
const isActive = activeTab === item.id;
|
||||
|
||||
@@ -63,7 +69,7 @@ export default function SettingsSidebar({ activeTab, onChange }: SettingsSidebar
|
||||
{/* Mobile horizontal nav — pill bar */}
|
||||
<div className="flex-shrink-0 border-b border-border px-3 py-2 md:hidden">
|
||||
<PillBar className="scrollbar-hide w-full overflow-x-auto">
|
||||
{NAV_ITEMS.map((item) => {
|
||||
{VISIBLE_NAV_ITEMS.map((item) => {
|
||||
const Icon = item.icon;
|
||||
|
||||
return (
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
import { useCallback, useEffect, useState } from 'react';
|
||||
import { Download, Loader2 } from 'lucide-react';
|
||||
import { Download, ExternalLink, Eye, Loader2, Zap } from 'lucide-react';
|
||||
|
||||
import { Button } from '../../../../../shared/view/ui';
|
||||
import { Button, Input } from '../../../../../shared/view/ui';
|
||||
import { authenticatedFetch } from '../../../../../utils/api';
|
||||
import SettingsCard from '../../SettingsCard';
|
||||
import SettingsRow from '../../SettingsRow';
|
||||
import SettingsSection from '../../SettingsSection';
|
||||
import SettingsToggle from '../../SettingsToggle';
|
||||
|
||||
const BROWSER_USE_GUIDE_URL = 'https://cloudcli.ai/docs/browser-use';
|
||||
|
||||
type BrowserUseSettings = {
|
||||
enabled: boolean;
|
||||
persistSessions: boolean;
|
||||
defaultProfileName: string;
|
||||
browserBackend: 'playwright' | 'camoufox-vnc';
|
||||
};
|
||||
|
||||
type BrowserUseStatus = {
|
||||
enabled: boolean;
|
||||
available: boolean;
|
||||
backend: 'playwright' | 'camoufox-vnc';
|
||||
browserBackend: 'playwright' | 'camoufox-vnc';
|
||||
playwrightInstalled: boolean;
|
||||
chromiumInstalled: boolean;
|
||||
camoufoxInstalled: boolean;
|
||||
noVncInstalled: boolean;
|
||||
x11vncInstalled: boolean;
|
||||
installInProgress: boolean;
|
||||
message: string;
|
||||
};
|
||||
@@ -32,16 +42,20 @@ async function readJson<T>(response: Response): Promise<T> {
|
||||
export default function BrowserUseSettingsTab() {
|
||||
const [settings, setSettings] = useState<BrowserUseSettings | null>(null);
|
||||
const [status, setStatus] = useState<BrowserUseStatus | null>(null);
|
||||
const [hasLoadedSettings, setHasLoadedSettings] = useState(false);
|
||||
const [isSettingsLoading, setIsSettingsLoading] = useState(true);
|
||||
const [isStatusLoading, setIsStatusLoading] = useState(true);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
const [isInstalling, setIsInstalling] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [profileNameDraft, setProfileNameDraft] = useState('default');
|
||||
|
||||
const loadSettings = useCallback(async () => {
|
||||
const settingsResponse = await authenticatedFetch('/api/browser-use/settings');
|
||||
const settingsData = await readJson<{ data: { settings: BrowserUseSettings } }>(settingsResponse);
|
||||
setSettings(settingsData.data.settings);
|
||||
setHasLoadedSettings(true);
|
||||
setProfileNameDraft(settingsData.data.settings.defaultProfileName || 'default');
|
||||
}, []);
|
||||
|
||||
const loadStatus = useCallback(async () => {
|
||||
@@ -52,6 +66,7 @@ export default function BrowserUseSettingsTab() {
|
||||
|
||||
useEffect(() => {
|
||||
setError(null);
|
||||
setHasLoadedSettings(false);
|
||||
setIsSettingsLoading(true);
|
||||
setIsStatusLoading(true);
|
||||
|
||||
@@ -74,6 +89,7 @@ export default function BrowserUseSettingsTab() {
|
||||
});
|
||||
const data = await readJson<{ data: { settings: BrowserUseSettings } }>(response);
|
||||
setSettings(data.data.settings);
|
||||
setHasLoadedSettings(true);
|
||||
window.dispatchEvent(new Event('browserUseSettingsChanged'));
|
||||
setIsStatusLoading(true);
|
||||
await loadStatus();
|
||||
@@ -101,8 +117,21 @@ export default function BrowserUseSettingsTab() {
|
||||
}
|
||||
};
|
||||
|
||||
const saveProfileName = async () => {
|
||||
const nextName = profileNameDraft.trim() || 'default';
|
||||
setProfileNameDraft(nextName);
|
||||
if (nextName === settings?.defaultProfileName) {
|
||||
return;
|
||||
}
|
||||
await updateSettings({ defaultProfileName: nextName });
|
||||
};
|
||||
|
||||
const browserEnabled = settings?.enabled === true;
|
||||
const needsBrowserBinaries = Boolean(browserEnabled && status && (!status.playwrightInstalled || !status.chromiumInstalled));
|
||||
const browserDisabled = hasLoadedSettings && settings?.enabled === false;
|
||||
const persistSessions = settings?.persistSessions === true;
|
||||
const selectedBackend = settings?.browserBackend || 'playwright';
|
||||
const effectiveBackend = status?.backend || 'playwright';
|
||||
const needsBrowserBinaries = Boolean(browserEnabled && status && !status.available);
|
||||
const runtimeLabel = (installed?: boolean) => {
|
||||
if (isStatusLoading && !status) {
|
||||
return 'checking...';
|
||||
@@ -114,33 +143,165 @@ export default function BrowserUseSettingsTab() {
|
||||
<div className="space-y-8">
|
||||
<SettingsSection
|
||||
title="Browser"
|
||||
description="Allow agents to create guarded Playwright browser sessions that you can monitor from the Browser tab."
|
||||
description="Give coding agents a working browser so they can open websites, test flows, capture screenshots, and help debug what users actually see."
|
||||
>
|
||||
<SettingsCard divided>
|
||||
<SettingsRow
|
||||
label="Enable Browser"
|
||||
description="Registers Browser for supported agents. Agents can create browser sessions; you can watch, stop, and delete them."
|
||||
label="Give Agents Browser Access"
|
||||
description="Let agents use a browser during coding tasks while you can watch live sessions, open them in a tab, and stop them at any time."
|
||||
>
|
||||
{isSettingsLoading && !settings ? (
|
||||
{isSettingsLoading && !hasLoadedSettings ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin text-muted-foreground" />
|
||||
) : (
|
||||
) : hasLoadedSettings ? (
|
||||
<SettingsToggle
|
||||
checked={browserEnabled}
|
||||
onChange={(value) => void updateSettings({ enabled: value })}
|
||||
ariaLabel="Enable Browser"
|
||||
ariaLabel="Give Agents Browser Access"
|
||||
disabled={isSaving}
|
||||
/>
|
||||
) : (
|
||||
<span className="text-sm text-muted-foreground">Unavailable</span>
|
||||
)}
|
||||
</SettingsRow>
|
||||
|
||||
{browserDisabled && (
|
||||
<div className="px-4 py-4">
|
||||
<a
|
||||
href={BROWSER_USE_GUIDE_URL}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-flex items-center gap-1.5 text-sm font-medium text-primary hover:underline"
|
||||
>
|
||||
Read the Browser guide
|
||||
<ExternalLink className="h-3.5 w-3.5" />
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="px-4 py-4">
|
||||
<div className="rounded-md border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
|
||||
{error}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{browserEnabled && (
|
||||
<>
|
||||
<div className="space-y-3 px-4 py-4">
|
||||
<div className="min-w-0">
|
||||
<div className="text-sm font-medium text-foreground">Browser Engine</div>
|
||||
<div className="mt-0.5 text-sm text-muted-foreground">
|
||||
Pick the kind of browser experience agents should use for new sessions.
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid gap-2 sm:grid-cols-2">
|
||||
{([
|
||||
{
|
||||
value: 'playwright' as const,
|
||||
label: 'Playwright',
|
||||
description: 'Best for quick checks, screenshots, and automated page interaction when no manual login is needed.',
|
||||
icon: Zap,
|
||||
},
|
||||
{
|
||||
value: 'camoufox-vnc' as const,
|
||||
label: 'Camoufox + noVNC',
|
||||
description: 'Best when a person may need to log in, approve a step, or watch the browser session live.',
|
||||
icon: Eye,
|
||||
},
|
||||
]).map((option) => {
|
||||
const Icon = option.icon;
|
||||
const selected = selectedBackend === option.value;
|
||||
return (
|
||||
<button
|
||||
key={option.value}
|
||||
type="button"
|
||||
onClick={() => void updateSettings({ browserBackend: option.value })}
|
||||
disabled={isSaving || isSettingsLoading}
|
||||
className={[
|
||||
'group flex min-h-[88px] items-start gap-3 rounded-lg border px-3 py-3 text-left transition-colors',
|
||||
'focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background',
|
||||
selected
|
||||
? 'border-primary bg-primary/5 text-foreground shadow-sm'
|
||||
: 'border-border bg-background hover:border-foreground/20 hover:bg-muted/40',
|
||||
(isSaving || isSettingsLoading) ? 'cursor-not-allowed opacity-60' : '',
|
||||
].join(' ')}
|
||||
aria-pressed={selected}
|
||||
>
|
||||
<span className={[
|
||||
'mt-0.5 flex h-8 w-8 flex-shrink-0 items-center justify-center rounded-md border',
|
||||
selected ? 'border-primary/30 bg-primary/10 text-primary' : 'border-border bg-muted/40 text-muted-foreground',
|
||||
].join(' ')}
|
||||
>
|
||||
<Icon className="h-4 w-4" />
|
||||
</span>
|
||||
<span className="min-w-0">
|
||||
<span className="block text-sm font-medium">{option.label}</span>
|
||||
<span className="mt-1 block text-xs leading-relaxed text-muted-foreground">{option.description}</span>
|
||||
</span>
|
||||
</button>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<SettingsRow
|
||||
label="Remember Browser Logins"
|
||||
description="Keep cookies and site storage in a named profile so agents can reuse signed-in sessions instead of starting from scratch."
|
||||
>
|
||||
{isSettingsLoading && !settings ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin text-muted-foreground" />
|
||||
) : (
|
||||
<SettingsToggle
|
||||
checked={persistSessions}
|
||||
onChange={(value) => void updateSettings({ persistSessions: value })}
|
||||
ariaLabel="Remember Browser Logins"
|
||||
disabled={isSaving}
|
||||
/>
|
||||
)}
|
||||
</SettingsRow>
|
||||
|
||||
{persistSessions && (
|
||||
<SettingsRow
|
||||
label="Default Browser Profile"
|
||||
description="New browser sessions use this profile by default, so saved logins stay tied to a predictable workspace."
|
||||
>
|
||||
<Input
|
||||
value={profileNameDraft}
|
||||
onChange={(event) => setProfileNameDraft(event.target.value)}
|
||||
onBlur={() => void saveProfileName()}
|
||||
onKeyDown={(event) => {
|
||||
if (event.key === 'Enter') {
|
||||
event.currentTarget.blur();
|
||||
}
|
||||
}}
|
||||
disabled={isSaving || isSettingsLoading}
|
||||
className="w-40"
|
||||
aria-label="Default Browser Profile"
|
||||
/>
|
||||
</SettingsRow>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{browserEnabled && (
|
||||
<div className="space-y-4 px-4 py-4">
|
||||
<div className="flex flex-wrap gap-2 text-xs text-muted-foreground">
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Backend: {effectiveBackend === 'camoufox-vnc' ? 'Camoufox + noVNC' : 'Playwright'}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Playwright: {runtimeLabel(status?.playwrightInstalled)}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Chromium: {runtimeLabel(status?.chromiumInstalled)}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Camoufox: {runtimeLabel(status?.camoufoxInstalled)}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
noVNC: {runtimeLabel(status?.noVncInstalled)}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Status: {isStatusLoading && !status ? 'checking...' : status?.available ? 'ready' : browserEnabled ? 'setup required' : 'disabled'}
|
||||
</span>
|
||||
@@ -172,12 +333,17 @@ export default function BrowserUseSettingsTab() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="rounded-md border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
<a
|
||||
href={BROWSER_USE_GUIDE_URL}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-flex items-center gap-1.5 text-sm font-medium text-primary hover:underline"
|
||||
>
|
||||
Read the Browser guide
|
||||
<ExternalLink className="h-3.5 w-3.5" />
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
</SettingsCard>
|
||||
</SettingsSection>
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,247 @@
|
||||
import { useCallback, useEffect, useState } from 'react';
|
||||
import { Download, Loader2, RefreshCw } from 'lucide-react';
|
||||
|
||||
import { Button } from '../../../../../shared/view/ui';
|
||||
import { authenticatedFetch } from '../../../../../utils/api';
|
||||
import SettingsCard from '../../SettingsCard';
|
||||
import SettingsRow from '../../SettingsRow';
|
||||
import SettingsSection from '../../SettingsSection';
|
||||
import SettingsToggle from '../../SettingsToggle';
|
||||
|
||||
type ComputerUseSettings = {
|
||||
enabled: boolean;
|
||||
};
|
||||
|
||||
type ComputerUseStatus = {
|
||||
enabled: boolean;
|
||||
runtime: 'cloud' | 'local';
|
||||
available: boolean;
|
||||
desktopAgentConnected?: boolean;
|
||||
desktopAgentCount?: number;
|
||||
nutInstalled: boolean;
|
||||
screenshotInstalled: boolean;
|
||||
installInProgress: boolean;
|
||||
message: string;
|
||||
};
|
||||
|
||||
async function readJson<T>(response: Response): Promise<T> {
|
||||
const data = await response.json();
|
||||
if (!response.ok || data.success === false) {
|
||||
throw new Error(data.error || data.details || `Request failed (${response.status})`);
|
||||
}
|
||||
return data as T;
|
||||
}
|
||||
|
||||
export default function ComputerUseSettingsTab() {
|
||||
const [settings, setSettings] = useState<ComputerUseSettings>({ enabled: false });
|
||||
const [status, setStatus] = useState<ComputerUseStatus | null>(null);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
const [isInstalling, setIsInstalling] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const loadState = useCallback(async () => {
|
||||
setError(null);
|
||||
const [settingsResponse, statusResponse] = await Promise.all([
|
||||
authenticatedFetch('/api/computer-use/settings'),
|
||||
authenticatedFetch('/api/computer-use/status'),
|
||||
]);
|
||||
const settingsData = await readJson<{ data: { settings: ComputerUseSettings } }>(settingsResponse);
|
||||
const statusData = await readJson<{ data: ComputerUseStatus }>(statusResponse);
|
||||
setSettings(settingsData.data.settings);
|
||||
setStatus(statusData.data);
|
||||
}, []);
|
||||
|
||||
const refreshState = useCallback(async () => {
|
||||
setIsLoading(true);
|
||||
try {
|
||||
await loadState();
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to load Computer Use settings');
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}, [loadState]);
|
||||
|
||||
useEffect(() => {
|
||||
void refreshState();
|
||||
}, [refreshState]);
|
||||
|
||||
const updateSettings = async (nextSettings: Partial<ComputerUseSettings>) => {
|
||||
setIsSaving(true);
|
||||
setError(null);
|
||||
try {
|
||||
const response = await authenticatedFetch('/api/computer-use/settings', {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(nextSettings),
|
||||
});
|
||||
const data = await readJson<{ data: { settings: ComputerUseSettings } }>(response);
|
||||
setSettings(data.data.settings);
|
||||
window.dispatchEvent(new Event('computerUseSettingsChanged'));
|
||||
await loadState();
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to save Computer Use settings');
|
||||
} finally {
|
||||
setIsSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
const installRuntime = async () => {
|
||||
setIsInstalling(true);
|
||||
setError(null);
|
||||
try {
|
||||
const response = await authenticatedFetch('/api/computer-use/runtime/install', { method: 'POST' });
|
||||
await readJson(response);
|
||||
await loadState();
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to install Computer Use runtime');
|
||||
} finally {
|
||||
setIsInstalling(false);
|
||||
}
|
||||
};
|
||||
|
||||
const isCloud = status?.runtime === 'cloud';
|
||||
const effectiveEnabled = isCloud ? status?.enabled === true : settings.enabled;
|
||||
const showCloudDesktopAccess = Boolean(isCloud && effectiveEnabled);
|
||||
const needsRuntime = Boolean(effectiveEnabled && !isCloud && status && (!status.nutInstalled || !status.screenshotInstalled));
|
||||
const desktopAgentCount = status?.desktopAgentCount ?? (status?.desktopAgentConnected ? 1 : 0);
|
||||
const modeDescription = isCloud
|
||||
? 'Let cloud agents request access to your own computer through CloudCLI Desktop.'
|
||||
: 'Let local agents request access to this computer.';
|
||||
|
||||
return (
|
||||
<div className="space-y-8">
|
||||
<SettingsSection
|
||||
title="Computer Use"
|
||||
description={modeDescription}
|
||||
>
|
||||
<SettingsCard divided>
|
||||
<div className="flex flex-col gap-3 px-4 py-4">
|
||||
<div className="rounded-md border border-amber-300/50 bg-amber-50 px-3 py-2 text-sm text-amber-800 dark:border-amber-900/50 dark:bg-amber-950/30 dark:text-amber-200">
|
||||
{isCloud
|
||||
? 'A cloud agent can use your desktop only after you approve the request in CloudCLI Desktop. Stop ends access immediately.'
|
||||
: 'Agents can use your desktop only while you grant control from the Computer tab. Stop ends access immediately.'}
|
||||
</div>
|
||||
{effectiveEnabled && (
|
||||
<div className="rounded-md border border-border bg-muted/40 px-3 py-2 text-sm text-muted-foreground">
|
||||
{isCloud
|
||||
? 'Keep CloudCLI Desktop open on the computer you want agents to use.'
|
||||
: 'Open the Computer tab to review requests, grant control, or stop a session.'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<SettingsRow
|
||||
label="Enable Computer Use"
|
||||
description={isCloud
|
||||
? 'Registers Computer Use MCP servers for supported agents and allows cloud agents to request guarded access to a linked desktop.'
|
||||
: 'Registers Computer Use for supported agents and allows CloudCLI to create guarded desktop control sessions on this machine.'}
|
||||
>
|
||||
<SettingsToggle
|
||||
checked={settings.enabled}
|
||||
onChange={(value) => void updateSettings({ enabled: value })}
|
||||
ariaLabel="Enable Computer Use"
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
</SettingsRow>
|
||||
|
||||
{showCloudDesktopAccess && (
|
||||
<SettingsRow
|
||||
label="Cloud desktop access"
|
||||
description={status?.desktopAgentConnected
|
||||
? `${desktopAgentCount} ${desktopAgentCount === 1 ? 'desktop app is' : 'desktop apps are'} connected to this environment.`
|
||||
: 'Not connected yet. Link happens from CloudCLI Desktop on your computer.'}
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<Button
|
||||
type="button"
|
||||
size="sm"
|
||||
onClick={() => void refreshState()}
|
||||
disabled={isLoading}
|
||||
className="h-8"
|
||||
>
|
||||
<RefreshCw className={`h-4 w-4 ${isLoading ? 'animate-spin' : ''}`} />
|
||||
Refresh
|
||||
</Button>
|
||||
<div className={`rounded-md border px-2.5 py-1 text-xs font-medium ${
|
||||
status?.desktopAgentConnected
|
||||
? 'border-emerald-500/30 text-emerald-600 dark:text-emerald-300'
|
||||
: 'border-amber-500/30 text-amber-600 dark:text-amber-300'
|
||||
}`}
|
||||
>
|
||||
{status?.desktopAgentConnected
|
||||
? `${desktopAgentCount} linked`
|
||||
: 'Not linked'}
|
||||
</div>
|
||||
</div>
|
||||
</SettingsRow>
|
||||
)}
|
||||
|
||||
{(needsRuntime || showCloudDesktopAccess || error) && (
|
||||
<div className="space-y-4 px-4 py-4">
|
||||
{showCloudDesktopAccess && !status?.desktopAgentConnected && (
|
||||
<div className="rounded-md border border-border bg-muted/40 px-3 py-3 text-sm text-muted-foreground">
|
||||
<div className="font-medium text-foreground">To link this computer</div>
|
||||
<ol className="mt-2 list-decimal space-y-1 pl-5">
|
||||
<li>Open CloudCLI Desktop on the computer you want agents to use.</li>
|
||||
<li>Connect the same CloudCLI account used for this cloud environment.</li>
|
||||
<li>Open Desktop Settings and turn on Computer Use.</li>
|
||||
<li>Keep the desktop app running. This status changes to Desktop linked automatically.</li>
|
||||
</ol>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{showCloudDesktopAccess && status?.desktopAgentConnected && (
|
||||
<div className="rounded-md border border-border bg-muted/40 px-3 py-2 text-sm text-muted-foreground">
|
||||
{desktopAgentCount > 1
|
||||
? `${desktopAgentCount} desktops are linked. Agents will use one available desktop; stop Computer Use on any desktop you do not want agents to control.`
|
||||
: 'CloudCLI Desktop is linked. Approval prompts will appear there when an agent requests desktop access.'}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{needsRuntime && (
|
||||
<div className="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
|
||||
<div className="min-w-0 space-y-1">
|
||||
<div className="text-sm font-medium text-foreground">Desktop runtime required</div>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
{status?.message || 'Install the desktop control runtime needed to capture the screen and drive input.'}
|
||||
</p>
|
||||
<div className="flex flex-wrap gap-2 pt-1 text-xs text-muted-foreground">
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Control lib: {status?.nutInstalled ? 'installed' : 'missing'}
|
||||
</span>
|
||||
<span className="rounded-md border border-border px-2 py-1">
|
||||
Screen capture: {status?.screenshotInstalled ? 'installed' : 'missing'}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
type="button"
|
||||
size="sm"
|
||||
onClick={() => void installRuntime()}
|
||||
disabled={isInstalling || status?.installInProgress}
|
||||
className="flex-shrink-0"
|
||||
>
|
||||
{isInstalling || status?.installInProgress ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<Download className="h-4 w-4" />
|
||||
)}
|
||||
{isInstalling || status?.installInProgress ? 'Installing…' : 'Install Runtime'}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="rounded-md border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</SettingsCard>
|
||||
</SettingsSection>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
3
src/constants/featureFlags.ts
Normal file
3
src/constants/featureFlags.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
// TODO: Re-enable Computer Use menus after fixing the MCP server connection
|
||||
// between the desktop app and the web UI.
|
||||
export const COMPUTER_USE_MENUS_ENABLED = false;
|
||||
@@ -324,7 +324,7 @@ const removeSessionFromProject = (project: Project, sessionIdToDelete: string):
|
||||
return updatedProject;
|
||||
};
|
||||
|
||||
const VALID_TABS: Set<string> = new Set(['chat', 'files', 'shell', 'git', 'tasks', 'browser']);
|
||||
const VALID_TABS: Set<string> = new Set(['chat', 'files', 'shell', 'git', 'tasks', 'browser', 'computer']);
|
||||
|
||||
const isValidTab = (tab: string): tab is AppTab => {
|
||||
return VALID_TABS.has(tab) || tab.startsWith('plugin:');
|
||||
@@ -776,7 +776,7 @@ export function useProjectsState({
|
||||
(session: ProjectSession) => {
|
||||
setSelectedSession(session);
|
||||
|
||||
if (activeTab === 'tasks' || activeTab === 'browser') {
|
||||
if (activeTab === 'tasks' || activeTab === 'browser' || activeTab === 'computer') {
|
||||
setActiveTab('chat');
|
||||
}
|
||||
|
||||
|
||||
@@ -94,6 +94,7 @@
|
||||
"git": "Git",
|
||||
"apiTokens": "API & Token",
|
||||
"tasks": "Aufgaben",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "Benachrichtigungen",
|
||||
"plugins": "Plugins",
|
||||
"about": "Info"
|
||||
|
||||
@@ -113,6 +113,7 @@
|
||||
"voice": "Voice",
|
||||
"tasks": "Tasks",
|
||||
"browser": "Browser",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "Notifications",
|
||||
"plugins": "Plugins",
|
||||
"about": "About"
|
||||
|
||||
@@ -94,6 +94,7 @@
|
||||
"git": "Git",
|
||||
"apiTokens": "API & Tokens",
|
||||
"tasks": "Tâches",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "Notifications",
|
||||
"plugins": "Plugins",
|
||||
"about": "À propos"
|
||||
|
||||
@@ -94,6 +94,7 @@
|
||||
"git": "Git",
|
||||
"apiTokens": "API e Token",
|
||||
"tasks": "Attività",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "Notifiche",
|
||||
"plugins": "Plugin",
|
||||
"about": "Informazioni"
|
||||
|
||||
@@ -94,6 +94,7 @@
|
||||
"git": "Git",
|
||||
"apiTokens": "API & トークン",
|
||||
"tasks": "タスク",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "通知",
|
||||
"plugins": "プラグイン",
|
||||
"about": "概要"
|
||||
|
||||
@@ -94,6 +94,7 @@
|
||||
"git": "Git",
|
||||
"apiTokens": "API & 토큰",
|
||||
"tasks": "작업",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "알림",
|
||||
"plugins": "플러그인",
|
||||
"about": "정보"
|
||||
|
||||
@@ -94,6 +94,7 @@
|
||||
"git": "Git",
|
||||
"apiTokens": "API и токены",
|
||||
"tasks": "Задачи",
|
||||
"computer": "Computer Use",
|
||||
"notifications": "Уведомления",
|
||||
"plugins": "Плагины",
|
||||
"about": "О программе"
|
||||
|
||||
@@ -17,7 +17,7 @@ export type ProviderModelsCacheInfo = {
|
||||
source: 'memory' | 'disk' | 'fresh';
|
||||
};
|
||||
|
||||
export type AppTab = 'chat' | 'files' | 'shell' | 'git' | 'tasks' | 'browser' | `plugin:${string}`;
|
||||
export type AppTab = 'chat' | 'files' | 'shell' | 'git' | 'tasks' | 'browser' | 'computer' | `plugin:${string}`;
|
||||
|
||||
export interface ProjectSession {
|
||||
id: string;
|
||||
|
||||
Reference in New Issue
Block a user