Compare commits

..

11 Commits

Author SHA1 Message Date
Simos Mikelatos
f188648a2a fix(skills): show add skill dialog above settings 2026-06-30 10:34:59 +00:00
Simos Mikelatos
cdf1a04e26 fix(redesign): redesign hermes skills add flow 2026-06-30 10:29:19 +00:00
Simos Mikelatos
048c671b13 feat: add Hermes provider 2026-06-30 09:51:18 +00:00
Haile
2ebe64f218 fix: preview video on new tab (#933) 2026-06-29 15:36:31 +02:00
Haile
b6cf33308d fix: resolve mobile shell issues (#923) 2026-06-29 14:19:01 +02:00
Simos Mikelatos
6761f31a56 chore: remove computer use 2026-06-29 10:31:11 +00:00
viper151
35da5d090d chore(release): v1.35.0 2026-06-29 10:07:59 +00:00
Simos Mikelatos
d882f80b6d Consolidate desktop release workflow 2026-06-29 09:40:28 +00:00
Haile
053f244d14 Chat & sidebar UX improvements (#929) 2026-06-29 09:37:24 +00:00
Simos Mikelatos
97c9b67bfc feat: add Electron desktop app 2026-06-29 09:37:21 +00:00
turato
ed4ae3114a fix(chat): prevent chat interface crash on malformed AskUserQuestion payload (#920)
* fix(chat): prevent chat interface crash when AskUserQuestion payload is malformed

Loading a session that contains an AskUserQuestion tool call could crash the
entire chat interface with "TypeError: e.map is not a function".

The AskUserQuestion tool is configured with `defaultOpen: true`, so
QuestionAnswerContent renders as soon as the session loads. Its array guard
(`!questions || questions.length === 0`) only checked for truthiness, and
`q.options` was mapped/iterated with no guard at all. When `questions` or
`options` arrive from the session transcript as a non-array value, the
`.map()` / `.some()` calls throw and take down the whole chat view via the
error boundary.

Guard both with `Array.isArray()` so a single malformed message can no longer
crash the interface.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* test(chat): cover QuestionAnswerContent against malformed AskUserQuestion payloads

Adds the first frontend regression test, guarding the crash fixed in the
previous commit: a non-array `questions` value or a question missing its
`options` array must render gracefully instead of throwing
"e.map is not a function" and taking down the whole chat interface.

Follows the repo's existing test convention (node:test + tsx); uses
react-dom/server renderToStaticMarkup so no DOM/jsdom is required.
Run with: npx tsx --test src/**/QuestionAnswerContent.test.tsx

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(chat): harden QuestionAnswerContent against malformed question entries

Addresses review feedback: even with the array guards, a malformed transcript
could still crash before the options fallback ran —

- a `questions` entry that is null/non-object threw on `q.question` access
- a non-string `answers[q.question]` threw on `answer.split(', ')`

Skip entries that aren't a proper question object with a string prompt, and
only call string methods on the answer when it is actually a string. Extends
the regression test to cover both vectors.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(chat): guard malformed question options

---------

Co-authored-by: hustuhao <hustuhao@users.noreply.github.com>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Simos Mikelatos <simosmik@gmail.com>
2026-06-26 16:47:24 +02:00
137 changed files with 5608 additions and 7110 deletions

View File

@@ -28,6 +28,9 @@ HOST=0.0.0.0
# Uncomment the following line if you have a custom claude cli path other than the default "claude"
# CLAUDE_CLI_PATH=claude
# Uncomment the following line if you want a custom Hermes ACP launcher
# HERMES_CLI_PATH=hermes acp
# =============================================================================
# DATABASE CONFIGURATION
# =============================================================================
@@ -42,4 +45,3 @@ HOST=0.0.0.0
VITE_CONTEXT_WINDOW=160000
CONTEXT_WINDOW=160000

View File

@@ -1,151 +0,0 @@
name: Desktop macOS Release
on:
workflow_dispatch:
inputs:
tag:
description: 'Release tag to create or update (defaults to v<package version>)'
required: false
type: string
release_name:
description: 'Release name (defaults to "CloudCLI Desktop macOS <tag>")'
required: false
type: string
prerelease:
description: 'Mark the GitHub release as a prerelease'
required: true
default: false
type: boolean
jobs:
build-macos:
name: Build signed macOS desktop app
runs-on: macos-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: 22
cache: npm
- name: Install dependencies
run: npm ci
- name: Typecheck
run: npm run typecheck
- name: Resolve release metadata
id: release
env:
TAG_INPUT: ${{ inputs.tag }}
RELEASE_NAME_INPUT: ${{ inputs.release_name }}
run: |
VERSION="$(node -p "require('./package.json').version")"
TAG="$TAG_INPUT"
if [ -z "$TAG" ]; then
TAG="v${VERSION}"
fi
TAG="$(printf '%s' "$TAG" | tr -d '\r\n' | sed 's/[^A-Za-z0-9._-]/-/g')"
if [ -z "$TAG" ]; then
echo "Resolved release tag is empty after normalization." >&2
exit 1
fi
RELEASE_NAME="$RELEASE_NAME_INPUT"
if [ -z "$RELEASE_NAME" ]; then
RELEASE_NAME="CloudCLI Desktop macOS ${TAG}"
fi
RELEASE_NAME_DELIMITER="release_name_$(uuidgen)"
{
echo "tag=$TAG"
echo "release_name<<$RELEASE_NAME_DELIMITER"
printf '%s\n' "$RELEASE_NAME"
echo "$RELEASE_NAME_DELIMITER"
echo "server_bundle_tag=cloudcli-local-server-${TAG}"
} >> "$GITHUB_OUTPUT"
- name: Configure release server bundle source
env:
SERVER_BUNDLE_TAG: ${{ steps.release.outputs.server_bundle_tag }}
run: printf '{"releaseTag":"%s"}\n' "$SERVER_BUNDLE_TAG" > electron/server-bundle-config.json
- name: Verify signing secrets are configured
run: |
test -n "$CSC_LINK"
test -n "$CSC_KEY_PASSWORD"
test -n "$APPLE_ID"
test -n "$APPLE_APP_SPECIFIC_PASSWORD"
test -n "$APPLE_TEAM_ID"
env:
CSC_LINK: ${{ secrets.CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
- name: Build signed and notarized macOS artifacts
run: npm run desktop:dist:mac -- --publish never
env:
CLOUDCLI_SEMANTICS_BUILD_REQUIRED: "1"
CSC_LINK: ${{ secrets.CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
- name: Build local server bundle
run: node scripts/release/build-server-bundle.js
- name: Verify local server runtime artifacts
run: |
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz' -print -quit)"
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)"
- name: Publish local server runtime assets
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
with:
tag_name: ${{ steps.release.outputs.server_bundle_tag }}
target_commitish: ${{ github.sha }}
name: CloudCLI Local Server Runtime (${{ steps.release.outputs.tag }})
body: |
This prerelease contains the Local mode runtime for CloudCLI Desktop.
Download CloudCLI Desktop from the main ${{ steps.release.outputs.tag }} release. When you open Local CloudCLI, the desktop app automatically downloads the matching runtime from this prerelease.
You do not need to download these runtime files manually.
prerelease: true
fail_on_unmatched_files: false
overwrite_files: true
files: |
release/local-server/*
- name: Verify macOS artifacts
run: |
test -n "$(find release/desktop -maxdepth 1 -name '*.dmg' -print -quit)"
shasum -a 256 release/desktop/*.dmg > release/SHASUMS256.txt
cat release/SHASUMS256.txt
- name: Publish GitHub release assets
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
with:
tag_name: ${{ steps.release.outputs.tag }}
target_commitish: ${{ github.sha }}
name: ${{ steps.release.outputs.release_name }}
body: |
Download the CloudCLI Desktop installer for your Mac.
The local server runtime used by local mode is installed automatically by the desktop app. You do not need to download any server bundle manually.
prerelease: ${{ inputs.prerelease }}
fail_on_unmatched_files: false
files: |
release/desktop/*.dmg
release/SHASUMS256.txt

305
.github/workflows/desktop-release.yml vendored Normal file
View File

@@ -0,0 +1,305 @@
name: Desktop Release
on:
workflow_dispatch:
inputs:
tag:
description: "Release tag to create or update (defaults to v<package version>)"
required: false
type: string
release_name:
description: 'Release name (defaults to "CloudCLI Desktop <tag>")'
required: false
type: string
prerelease:
description: "Mark the GitHub release as a prerelease"
required: true
default: false
type: boolean
jobs:
resolve-release:
name: Resolve release metadata
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
tag: ${{ steps.release.outputs.tag }}
release_name: ${{ steps.release.outputs.release_name }}
server_bundle_tag: ${{ steps.release.outputs.server_bundle_tag }}
steps:
- name: Checkout
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
with:
persist-credentials: false
- name: Resolve release metadata
id: release
env:
TAG_INPUT: ${{ inputs.tag }}
RELEASE_NAME_INPUT: ${{ inputs.release_name }}
run: |
VERSION="$(node -p "require('./package.json').version")"
TAG="$TAG_INPUT"
if [ -z "$TAG" ]; then
TAG="v${VERSION}"
fi
TAG="$(printf '%s' "$TAG" | tr -d '\r\n' | sed 's/[^A-Za-z0-9._-]/-/g')"
if [ -z "$TAG" ]; then
echo "Resolved release tag is empty after normalization." >&2
exit 1
fi
RELEASE_NAME="$RELEASE_NAME_INPUT"
if [ -z "$RELEASE_NAME" ]; then
RELEASE_NAME="CloudCLI Desktop ${TAG}"
fi
RELEASE_NAME_DELIMITER="release_name_${GITHUB_RUN_ID}_${GITHUB_RUN_ATTEMPT}"
{
echo "tag=$TAG"
echo "release_name<<$RELEASE_NAME_DELIMITER"
printf '%s\n' "$RELEASE_NAME"
echo "$RELEASE_NAME_DELIMITER"
echo "server_bundle_tag=cloudcli-local-server-${TAG}"
} >> "$GITHUB_OUTPUT"
build-macos:
name: Build signed macOS desktop app
needs: resolve-release
runs-on: macos-latest
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: 22
cache: npm
- name: Install dependencies
run: npm ci
- name: Typecheck
run: npm run typecheck
- name: Configure release server bundle source
env:
SERVER_BUNDLE_TAG: ${{ needs.resolve-release.outputs.server_bundle_tag }}
run: printf '{"releaseTag":"%s"}\n' "$SERVER_BUNDLE_TAG" > electron/server-bundle-config.json
- name: Verify macOS signing secrets are configured
run: |
test -n "$CSC_LINK"
test -n "$CSC_KEY_PASSWORD"
test -n "$APPLE_ID"
test -n "$APPLE_APP_SPECIFIC_PASSWORD"
test -n "$APPLE_TEAM_ID"
env:
CSC_LINK: ${{ secrets.CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
- name: Build signed and notarized macOS artifacts
run: npm run desktop:dist:mac -- --publish never
env:
CLOUDCLI_SEMANTICS_BUILD_REQUIRED: "1"
CSC_LINK: ${{ secrets.CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
- name: Build macOS local server bundle
run: node scripts/release/build-server-bundle.js
- name: Stage macOS release assets
run: |
mkdir -p desktop-release-assets server-release-assets
test -n "$(find release/desktop -maxdepth 1 -name '*.dmg' -print -quit)"
shasum -a 256 release/desktop/*.dmg > desktop-release-assets/SHASUMS256-macos.txt
cp release/desktop/*.dmg desktop-release-assets/
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz' -print -quit)"
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)"
cp release/local-server/* server-release-assets/
- name: Upload macOS desktop assets
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
with:
name: desktop-release-macos
path: desktop-release-assets/*
if-no-files-found: error
- name: Upload macOS server assets
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
with:
name: server-release-macos
path: server-release-assets/*
if-no-files-found: error
build-windows:
name: Build Windows desktop app
needs: resolve-release
runs-on: windows-latest
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6
with:
node-version: 22
cache: npm
- name: Install dependencies
run: npm ci
env:
GITHUB_TOKEN: ${{ github.token }}
- name: Typecheck
run: npm run typecheck
- name: Configure release server bundle source
shell: bash
env:
SERVER_BUNDLE_TAG: ${{ needs.resolve-release.outputs.server_bundle_tag }}
run: printf '{"releaseTag":"%s"}\n' "$SERVER_BUNDLE_TAG" > electron/server-bundle-config.json
- name: Check Windows signing secrets
id: windows-signing
shell: bash
env:
WINDOWS_CSC_LINK: ${{ secrets.WINDOWS_CSC_LINK }}
WINDOWS_CSC_KEY_PASSWORD: ${{ secrets.WINDOWS_CSC_KEY_PASSWORD }}
run: |
if [ -n "$WINDOWS_CSC_LINK" ] && [ -n "$WINDOWS_CSC_KEY_PASSWORD" ]; then
echo "enabled=true" >> "$GITHUB_OUTPUT"
else
echo "enabled=false" >> "$GITHUB_OUTPUT"
fi
- name: Build signed Windows artifacts
if: steps.windows-signing.outputs.enabled == 'true'
run: npm run desktop:dist:win -- --publish never
env:
CLOUDCLI_SEMANTICS_BUILD_REQUIRED: "1"
CSC_LINK: ${{ secrets.WINDOWS_CSC_LINK }}
CSC_KEY_PASSWORD: ${{ secrets.WINDOWS_CSC_KEY_PASSWORD }}
- name: Build unsigned Windows artifacts
if: steps.windows-signing.outputs.enabled != 'true'
run: npm run desktop:dist:win -- --publish never
env:
CLOUDCLI_SEMANTICS_BUILD_REQUIRED: "1"
CSC_IDENTITY_AUTO_DISCOVERY: "false"
- name: Build Windows local server bundle
run: node scripts/release/build-server-bundle.js
- name: Stage Windows release assets
shell: bash
run: |
mkdir -p desktop-release-assets server-release-assets
test -n "$(find release/desktop -maxdepth 1 -name '*.exe' -print -quit)"
sha256sum release/desktop/*.exe > desktop-release-assets/SHASUMS256-windows.txt
cp release/desktop/*.exe desktop-release-assets/
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz' -print -quit)"
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)"
cp release/local-server/* server-release-assets/
- name: Upload Windows desktop assets
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
with:
name: desktop-release-windows
path: desktop-release-assets/*
if-no-files-found: error
- name: Upload Windows server assets
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
with:
name: server-release-windows
path: server-release-assets/*
if-no-files-found: error
publish:
name: Publish desktop release
needs:
- resolve-release
- build-macos
- build-windows
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Download desktop assets
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6
with:
pattern: desktop-release-*
path: release/desktop
merge-multiple: true
- name: Download server assets
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6
with:
pattern: server-release-*
path: release/local-server
merge-multiple: true
- name: Verify release assets
run: |
test -n "$(find release/desktop -maxdepth 1 -name '*.dmg' -print -quit)"
test -n "$(find release/desktop -maxdepth 1 -name '*.exe' -print -quit)"
test -f release/desktop/SHASUMS256-macos.txt
test -f release/desktop/SHASUMS256-windows.txt
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz' -print -quit)"
test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)"
find release -maxdepth 2 -type f -print | sort
- name: Publish local server runtime assets
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
with:
tag_name: ${{ needs.resolve-release.outputs.server_bundle_tag }}
target_commitish: ${{ github.sha }}
name: CloudCLI Local Server Runtime (${{ needs.resolve-release.outputs.tag }})
body: |
This prerelease contains the Local mode runtime for CloudCLI Desktop.
Download CloudCLI Desktop from the main ${{ needs.resolve-release.outputs.tag }} release. When you open Local CloudCLI, the desktop app automatically downloads the matching runtime from this prerelease.
You do not need to download these runtime files manually.
prerelease: true
fail_on_unmatched_files: false
overwrite_files: true
files: |
release/local-server/*
- name: Publish GitHub release assets
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
with:
tag_name: ${{ needs.resolve-release.outputs.tag }}
target_commitish: ${{ github.sha }}
name: ${{ needs.resolve-release.outputs.release_name }}
body: |
Download the CloudCLI Desktop installer for your platform.
The local server runtime used by local mode is installed automatically by the desktop app. You do not need to download any server bundle manually.
prerelease: ${{ inputs.prerelease }}
fail_on_unmatched_files: false
overwrite_files: true
files: |
release/desktop/*

View File

@@ -4,9 +4,9 @@ on:
workflow_dispatch:
inputs:
increment:
description: 'Version bump: patch, minor, major, or explicit (e.g. 1.27.0)'
description: "Version bump: patch, minor, major, or explicit (e.g. 1.27.0)"
required: true
default: 'patch'
default: "patch"
type: string
release_name:
description: 'Custom release name (optional, defaults to "CloudCLI UI vX.Y.Z")'
@@ -124,6 +124,9 @@ jobs:
path: server/modules/computer-use/semantics/bin
merge-multiple: true
- name: Restore semantic helper permissions
run: find server/modules/computer-use/semantics/bin -path '*/darwin-*/CloudCLISemantics' -type f -exec chmod 755 {} +
- name: Verify bundled semantic helpers
run: |
test -x server/modules/computer-use/semantics/bin/darwin-arm64/CloudCLISemantics

View File

@@ -3,6 +3,59 @@
All notable changes to CloudCLI UI will be documented in this file.
## [1.35.0](https://github.com/siteboon/claudecodeui/compare/v1.34.0...v1.35.0) (2026-06-29)
### New Features
* add Electron desktop app ([97c9b67](https://github.com/siteboon/claudecodeui/commit/97c9b67bfc2d803560cd1559a4e79eea9731c7b5))
* **chat:** derive activity indicator from per-session state and unify provider lifecycle events ([afc717e](https://github.com/siteboon/claudecodeui/commit/afc717e69e67f53173c30d2230722236f9180d39))
* **chat:** unify session gateway with stable IDs and a single WS protocol ([f5eac2e](https://github.com/siteboon/claudecodeui/commit/f5eac2ec12c8575bf80202fafe807d9e04720105))
* **i18n:** add French (fr) locale ([#878](https://github.com/siteboon/claudecodeui/issues/878)) ([f319d2c](https://github.com/siteboon/claudecodeui/commit/f319d2cf8d61452deaf6adf345494dd3e6898284))
* play sound for pending tool requests ([#918](https://github.com/siteboon/claudecodeui/issues/918)) ([c947eaa](https://github.com/siteboon/claudecodeui/commit/c947eaaee5fbc959563efb917f4ec7c88847dd6b))
* render changelog as markdown in version upgrade modal ([6a53c31](https://github.com/siteboon/claudecodeui/commit/6a53c31e907fffa79320997c27f99660c946b4a6))
* **sidebar:** improve running session state tracking ([591b18e](https://github.com/siteboon/claudecodeui/commit/591b18e9e343fda23affe100a53911f76aaa8f57))
* **skills:** add provider skill management ([#909](https://github.com/siteboon/claudecodeui/issues/909)) ([c5fe127](https://github.com/siteboon/claudecodeui/commit/c5fe127958d830eee19d008d8634c0e7d77fe1b9))
* **version:** warn when the server was updated but not restarted ([#898](https://github.com/siteboon/claudecodeui/issues/898)) ([f6326c8](https://github.com/siteboon/claudecodeui/commit/f6326c8082dfbe8a65dcdb836d3e71c635594c26))
### Bug Fixes
* changes provider logos to svg for fast load ([7bed675](https://github.com/siteboon/claudecodeui/commit/7bed675ad5fd1ecf7912d1a04afe9db5b1032823))
* **chat:** prevent chat interface crash on malformed AskUserQuestion payload ([#920](https://github.com/siteboon/claudecodeui/issues/920)) ([ed4ae31](https://github.com/siteboon/claudecodeui/commit/ed4ae3114aafc1d4ecb0b621eaf9d3b26dbca5b1))
* **chat:** prevent normalizeInlineCodeFences from breaking adjacent fenced code blocks ([#903](https://github.com/siteboon/claudecodeui/issues/903)) ([4712431](https://github.com/siteboon/claudecodeui/commit/4712431be81718dfb559ef43d7d7d5315bf4e01a))
* **chat:** sort messages appropriately ([123ae31](https://github.com/siteboon/claudecodeui/commit/123ae310207fe5969c3b313f62b9dee27e5d7489))
* **claude-sync:** skip subagent transcripts to prevent main session corruption ([#854](https://github.com/siteboon/claudecodeui/issues/854)) ([a12ca8e](https://github.com/siteboon/claudecodeui/commit/a12ca8eed373ef56cd37fbdd097845eaab34dee9))
* correct notification session id ([881e72d](https://github.com/siteboon/claudecodeui/commit/881e72d4a00ec9c1a5e1ae4799bffa900f27c1f8))
* create one unified function for frontend session processing ([677d330](https://github.com/siteboon/claudecodeui/commit/677d330981ef29a856f09e62b9f69bac0fa580d4))
* **i18n:** add missing sidebar message keys to all locales ([#896](https://github.com/siteboon/claudecodeui/issues/896)) ([7ca3556](https://github.com/siteboon/claudecodeui/commit/7ca355651f0a805965bc27af3d75def626c5fb96))
* keep running-session polling active ([39b0473](https://github.com/siteboon/claudecodeui/commit/39b0473e38201c29ff1e5388946452d2eed44527))
* normalize project session payloads ([d0adddb](https://github.com/siteboon/claudecodeui/commit/d0adddbbdafecfd5713a8ac5b95c87a8f7fc54f8))
* **opencode:** bind watcher sessions to app rows early ([5b9adbb](https://github.com/siteboon/claudecodeui/commit/5b9adbbdee8561439a27ad90744388225823427b))
* **opencode:** pass workspace dir explicitly ([416a737](https://github.com/siteboon/claudecodeui/commit/416a737d76e654d2fc649206c2b921a7db150775))
* recover pending permission requests ([56b2e14](https://github.com/siteboon/claudecodeui/commit/56b2e1405967c50301d0c773567349763edc8560))
* remove provider specific token usage calculator ([2abb456](https://github.com/siteboon/claudecodeui/commit/2abb45636b5e1109733cfa58c8ab92fd4c812165))
* resolve session provider on backend reads ([9fb2d91](https://github.com/siteboon/claudecodeui/commit/9fb2d91b26bef9579337d953a29718802c466fed))
* **sessions:** canonicalize sidebar ids and timestamps ([3bbb42c](https://github.com/siteboon/claudecodeui/commit/3bbb42c23324c3cbb5587f2bcab09b1dc23086a8))
* **shell:** prioritize user npm binaries ([#913](https://github.com/siteboon/claudecodeui/issues/913)) ([4a503b1](https://github.com/siteboon/claudecodeui/commit/4a503b1dc87ff58821670c8bfb1d8a8c1dab2bcf))
* **shell:** use correct session id ([89f0524](https://github.com/siteboon/claudecodeui/commit/89f05247eddec4fe53bd1616c6a5563e3ae2427a))
* **sidebar:** align session status controls across layouts ([1b336e9](https://github.com/siteboon/claudecodeui/commit/1b336e9aa9d2cccf0676d852815d9ba613ac04d2))
* upgrade gemini logo ([9cb2afd](https://github.com/siteboon/claudecodeui/commit/9cb2afd67eb25a4f869b88abcf86f7748b2b6d71))
* voice tts format settings ([#919](https://github.com/siteboon/claudecodeui/issues/919)) ([591e8e7](https://github.com/siteboon/claudecodeui/commit/591e8e7642589b0584f9b29b46b881aaab54624e))
### Documentation
* update available plugin readmes ([f549bd9](https://github.com/siteboon/claudecodeui/commit/f549bd99e7106362a27cf4ccee6e9d434b8b5363))
* update session activity guard comment ([e23e6af](https://github.com/siteboon/claudecodeui/commit/e23e6af06a44cc4b016df5778984602d49e52629))
### Maintenance
* add github issues board plugin ([21b0f14](https://github.com/siteboon/claudecodeui/commit/21b0f14e7a86f257c65484742c43b9f85152b32c))
* add more plugins list ([bc34085](https://github.com/siteboon/claudecodeui/commit/bc34085af9912da8d8592881a5845cff84a53f7d))
* move tests to appropriate folder ([d7a38a5](https://github.com/siteboon/claudecodeui/commit/d7a38a567a5e9039935353a886310b3c32b25a79))
* move tests to appropriate folder ([c6c153e](https://github.com/siteboon/claudecodeui/commit/c6c153e7f2a60572b08d687b59f010b4ad4f5d72))
* remove a log ([00e526b](https://github.com/siteboon/claudecodeui/commit/00e526b6e90ee0baf09ebf48873bc10824ab80ba))
* remove unused modelConstants from the project ([92de0ed](https://github.com/siteboon/claudecodeui/commit/92de0ed6137bf4571056deb3b930cc9fd22e2a08))
* upgrade gemini models ([3d94821](https://github.com/siteboon/claudecodeui/commit/3d948217ef3084e764171ebc5dda55f663150b2c))
## [](https://github.com/siteboon/claudecodeui/compare/v1.33.3...vnull) (2026-06-09)
### New Features

View File

@@ -1,290 +0,0 @@
import { spawn } from 'node:child_process';
import fs from 'node:fs/promises';
import path from 'node:path';
const IPC_PREFIX = '@@CUAGENT@@';
const TARGET_STATUS_TIMEOUT_MS = 5000;
function getDesktopPath() {
const currentPath = process.env.PATH || '';
const commonPaths = process.platform === 'win32'
? []
: ['/opt/homebrew/bin', '/usr/local/bin', '/usr/bin', '/bin', '/usr/sbin', '/sbin'];
return [...commonPaths, currentPath].filter(Boolean).join(path.delimiter);
}
function getNodeRuntime(isPackaged) {
if (isPackaged && process.versions.electron) {
return { command: process.execPath, env: { ELECTRON_RUN_AS_NODE: '1' } };
}
if (process.env.npm_node_execpath) {
return { command: process.env.npm_node_execpath, env: {} };
}
return { command: 'node', env: {} };
}
function toAgentWsUrl(httpUrl) {
try {
const parsed = new URL(httpUrl);
parsed.protocol = parsed.protocol === 'http:' ? 'ws:' : 'wss:';
parsed.pathname = '/desktop-agent';
parsed.search = '';
parsed.hash = '';
return parsed.toString();
} catch {
return null;
}
}
async function isComputerUseEnabledTarget(httpUrl, apiKey) {
let statusUrl;
try {
statusUrl = new URL('/api/computer-use/status', httpUrl).toString();
} catch {
return false;
}
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), TARGET_STATUS_TIMEOUT_MS);
try {
const response = await fetch(statusUrl, {
signal: controller.signal,
headers: apiKey ? { 'X-API-Key': apiKey } : undefined,
});
const body = await response.json().catch(() => null);
return response.ok && body?.success !== false && body?.data?.enabled === true;
} catch {
return false;
} finally {
clearTimeout(timeout);
}
}
async function filterEnabledComputerUseTargets(targets, apiKey) {
const checks = await Promise.all(targets.map(async (target) => ({
target,
enabled: await isComputerUseEnabledTarget(target, apiKey),
})));
return checks.filter((item) => item.enabled).map((item) => item.target);
}
/**
* Keeps a Computer Use desktop agent connected to running cloud environments
* while desktop access is enabled.
*/
export class ComputerAgentController {
constructor({ appRoot, settingsPath, isPackaged = false, getRunningEnvironmentUrls, getApiKey, promptConsent, onChange }) {
this.appRoot = appRoot;
this.settingsPath = settingsPath;
this.isPackaged = isPackaged;
this.getRunningEnvironmentUrls = getRunningEnvironmentUrls;
this.getApiKey = getApiKey;
this.promptConsent = promptConsent;
this.onChange = onChange;
this.settings = { enabled: false, consentMode: 'ask' };
this.child = null;
this.connectedUrls = new Set();
this.currentTargets = [];
this.stdoutBuffer = '';
this.lastEvent = null;
this.lastError = null;
}
getSettings() {
return { ...this.settings };
}
getState() {
return {
enabled: this.settings.enabled,
consentMode: this.settings.consentMode,
running: Boolean(this.child),
connectedCount: this.connectedUrls.size,
targetCount: this.currentTargets.length,
targetUrls: [...this.currentTargets],
lastEvent: this.lastEvent,
lastError: this.lastError,
};
}
async loadSettings() {
try {
const raw = await fs.readFile(this.settingsPath, 'utf8');
const stored = JSON.parse(raw);
this.settings = {
enabled: Boolean(stored.enabled),
consentMode: stored.consentMode === 'auto' ? 'auto' : 'ask',
};
} catch {
this.settings = { enabled: false, consentMode: 'ask' };
}
return this.settings;
}
async saveSettings(next) {
this.settings = {
enabled: Boolean(next.enabled),
consentMode: next.consentMode === 'auto' ? 'auto' : 'ask',
};
await fs.mkdir(path.dirname(this.settingsPath), { recursive: true });
await fs.writeFile(this.settingsPath, JSON.stringify(this.settings, null, 2), 'utf8');
await this.sync();
this.onChange?.();
return this.settings;
}
async sync() {
const targets = this.settings.enabled ? (this.getRunningEnvironmentUrls?.() || []) : [];
const enabledTargets = this.settings.enabled ? await filterEnabledComputerUseTargets(targets, this.getApiKey?.() || '') : [];
const wsTargets = enabledTargets.map(toAgentWsUrl).filter(Boolean);
const sameTargets =
wsTargets.length === this.currentTargets.length &&
wsTargets.every((url) => this.currentTargets.includes(url));
if (!this.settings.enabled || wsTargets.length === 0) {
this.stop();
this.currentTargets = [];
this.lastEvent = this.settings.enabled ? 'no-targets' : 'disabled';
return;
}
if (this.child && sameTargets) {
return;
}
this.currentTargets = wsTargets;
this.lastEvent = 'restarting';
this.lastError = null;
this.restart(wsTargets);
}
restart(wsTargets) {
this.stop();
const agentEntry = process.env.CLOUDCLI_COMPUTER_AGENT_ENTRY
|| path.join(this.appRoot, 'dist-server', 'server', 'computer-use-agent.js');
const runtime = getNodeRuntime(this.isPackaged);
this.child = spawn(runtime.command, [agentEntry], {
cwd: this.appRoot,
env: {
...process.env,
...runtime.env,
PATH: getDesktopPath(),
CLOUDCLI_DESKTOP_AGENT_URLS: wsTargets.join(','),
CLOUDCLI_DESKTOP_AGENT_API_KEY: this.getApiKey?.() || '',
CLOUDCLI_COMPUTER_USE_CONSENT_MODE: this.settings.consentMode,
},
stdio: ['pipe', 'pipe', 'pipe'],
windowsHide: true,
});
this.connectedUrls = new Set();
this.child.once('error', (error) => {
console.error('[ComputerAgent] failed to start:', error.message);
this.lastEvent = 'start-error';
this.lastError = error.message;
this.child = null;
this.onChange?.();
});
this.child.stdout?.on('data', (chunk) => this.handleStdout(String(chunk)));
this.child.stderr?.on('data', (chunk) => {
for (const line of String(chunk).split(/\r?\n/)) {
if (line.trim()) {
this.lastError = line.trim();
console.error('[ComputerAgent]', line);
}
}
});
this.child.once('exit', (code) => {
console.log(`[ComputerAgent] exited (code ${code ?? 'null'})`);
this.lastEvent = `exit:${code ?? 'null'}`;
this.child = null;
this.connectedUrls = new Set();
this.onChange?.();
});
this.onChange?.();
}
handleStdout(chunk) {
this.stdoutBuffer += chunk;
const lines = this.stdoutBuffer.split('\n');
this.stdoutBuffer = lines.pop() || '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith(IPC_PREFIX)) {
if (trimmed) console.log('[ComputerAgent]', trimmed);
continue;
}
let payload;
try {
payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim());
} catch {
continue;
}
void this.handleAgentEvent(payload);
}
}
async handleAgentEvent(payload) {
switch (payload.type) {
case 'connected':
this.connectedUrls.add(payload.url);
this.lastEvent = 'connected';
this.lastError = null;
this.onChange?.();
break;
case 'disconnected':
this.connectedUrls.delete(payload.url);
this.lastEvent = 'disconnected';
this.onChange?.();
if (payload.reason && /computer use.*disabled/i.test(payload.reason)) {
void this.sync().catch((error) => {
this.lastError = error instanceof Error ? error.message : 'Failed to sync Computer Use targets.';
this.onChange?.();
});
}
break;
case 'starting':
this.lastEvent = 'starting';
this.lastError = null;
this.onChange?.();
break;
case 'error':
this.lastEvent = 'error';
this.lastError = payload.message || 'Computer agent error.';
this.onChange?.();
break;
case 'consent-request': {
const allow = await this.promptConsent?.(payload.sessionId);
this.sendToChild({ type: 'consent-response', sessionId: payload.sessionId, allow: Boolean(allow) });
break;
}
default:
break;
}
}
sendToChild(message) {
if (this.child?.stdin?.writable) {
this.child.stdin.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
}
}
revokeSession(sessionId) {
this.sendToChild({ type: 'revoke-session', sessionId });
}
stop() {
if (!this.child) return;
const child = this.child;
this.child = null;
this.connectedUrls = new Set();
try { child.kill('SIGTERM'); } catch { /* noop */ }
}
}

View File

@@ -4,10 +4,6 @@ import { ViewHost } from './viewHost.js';
const TITLEBAR_HEIGHT = 44;
const AUTH_TOKEN_STORAGE_KEY = 'auth-token';
// TODO: Re-enable Computer Use menus after fixing the MCP server connection
// between the desktop app and the web UI.
const COMPUTER_USE_MENUS_ENABLED = false;
function isAllowedPermissionOrigin(sourceUrl, controlPlaneUrl) {
try {
const source = new URL(sourceUrl);
@@ -437,17 +433,6 @@ export class DesktopWindowManager {
accelerator: 'CmdOrCtrl+Shift+E',
click: () => void this.actions.showEnvironmentPicker().catch((error) => this.actions.showError('Could not switch environment', error)),
},
{ type: 'separator' },
{
label: 'Services',
visible: COMPUTER_USE_MENUS_ENABLED,
submenu: [
{
label: 'Computer Use',
click: () => void this.showDesktopSettings(),
},
],
},
{
label: 'Diagnostics',
submenu: [

View File

@@ -8,14 +8,6 @@ window.__MOCK_STATE__ = {
shareableWebUrl: 'http://localhost:3001',
localServerRunning: false,
localStartupLogs: [],
computerUse: { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 },
computerUsePermissions: {
platform: 'darwin',
supported: true,
accessibility: 'not_granted',
screenRecording: 'not_determined',
message: 'macOS requires Accessibility and Screen Recording for Computer Use.',
},
environments: [
{ id: 'env-api', name: 'api-gateway', subdomain: 'api-gateway', access_url: 'https://api-gateway.cloudcli.ai', status: 'running', region: 'fra1', agent: 'Claude Code' },
{ id: 'env-web', name: 'web-frontend', subdomain: 'web-frontend', access_url: 'https://web-frontend.cloudcli.ai', status: 'stopped', region: 'sfo1', agent: 'Codex' },
@@ -62,7 +54,6 @@ window.__MOCK_STATE__ = {
refreshEnvironments: function () { return Promise.resolve(clone(mockState)); },
refreshActiveTab: function () { return Promise.resolve(clone(mockState)); },
copyDiagnostics: function () { return Promise.resolve(clone(mockState)); },
showComputerAccess: function () { return Promise.resolve(clone(mockState)); },
showEnvironmentPicker: function () { return Promise.resolve(clone(mockState)); },
showLauncher: function () { return Promise.resolve(clone(mockState)); },
showLocalSettings: function () { return Promise.resolve(clone(mockState)); },
@@ -82,23 +73,6 @@ window.__MOCK_STATE__ = {
mockState.desktopSettings[key] = key === 'themeMode' ? value : !!value;
return Promise.resolve(clone(mockState));
},
updateComputerUse: function (settings) {
mockState.computerUse = mockState.computerUse || { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 };
if (typeof settings.enabled === 'boolean') mockState.computerUse.enabled = settings.enabled;
if (settings.consentMode === 'auto' || settings.consentMode === 'ask') mockState.computerUse.consentMode = settings.consentMode;
mockState.computerUse.running = mockState.computerUse.enabled;
return Promise.resolve(clone(mockState));
},
requestComputerUsePermission: function (permission) {
mockState.computerUsePermissions = mockState.computerUsePermissions || {};
if (permission === 'accessibility') mockState.computerUsePermissions.accessibility = 'granted';
if (permission === 'screen') mockState.computerUsePermissions.screenRecording = 'granted';
if (permission === 'all') {
mockState.computerUsePermissions.accessibility = 'granted';
mockState.computerUsePermissions.screenRecording = 'granted';
}
return Promise.resolve(clone(mockState));
},
openEnvironment: function (id) {
var env = (mockState.environments || []).filter(function (item) { return item.id === id; })[0];
if (env) {
@@ -189,22 +163,6 @@ window.__MOCK_STATE__ = {
return window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
}
function computerUseStatus(state) {
var computerUse = state && state.computerUse ? state.computerUse : {};
var connectedCount = computerUse.connectedCount || 0;
var environmentLabel = connectedCount + ' environment' + (connectedCount === 1 ? '' : 's');
if (!computerUse.enabled) {
return { label: 'Disabled', tone: 'idle', detail: 'CloudCLI cannot use this computer.' };
}
if (!connectedCount) {
return { label: 'Not connected', tone: 'warn', detail: 'No environment connected.' };
}
if (computerUse.consentMode === 'auto') {
return { label: 'Connected', tone: 'warn', detail: environmentLabel + ' connected. Unattended access is on.' };
}
return { label: 'Connected', tone: 'ok', detail: environmentLabel + ' connected.' };
}
var CC = {
icon: icon,
esc: esc,
@@ -214,7 +172,6 @@ window.__MOCK_STATE__ = {
accountLabel: accountLabel,
localUrl: localUrl,
envCount: envCount,
computerUseStatus: computerUseStatus,
version: VERSION,
logoUrl: LOGO_URL,
platform: 'win',
@@ -352,42 +309,12 @@ window.__MOCK_STATE__ = {
return CC.run('Saved', function () { return bridge.updateSetting(node.key, node.value); });
case 'set-theme-mode':
return CC.run('Saved', function () { return bridge.updateSetting('themeMode', node.value); });
case 'set-computer-mode':
CC.state.computerUse = {
...((CC.state && CC.state.computerUse) || {}),
enabled: true,
consentMode: node.value === 'auto' ? 'auto' : 'ask',
};
return CC.run('Saved', function () {
return bridge.updateComputerUse({
enabled: true,
consentMode: node.value,
});
});
case 'set-computer-enabled':
CC.state.computerUse = {
...((CC.state && CC.state.computerUse) || {}),
enabled: !!node.value,
};
return CC.run('Saved', function () {
var current = (CC.state && CC.state.computerUse) || { consentMode: 'ask' };
return bridge.updateComputerUse({
enabled: !!node.value,
consentMode: current.consentMode === 'auto' ? 'auto' : 'ask',
});
});
case 'computer-permission':
return CC.run('Opening permission settings...', function () {
return bridge.requestComputerUsePermission(node.getAttribute('data-cc-computer-permission'));
});
case 'settings-toggle':
return CC.run('Opening desktop settings...', function () { return bridge.showDesktopSettings(); });
case 'desktop-settings-toggle':
return CC.run('Opening desktop settings...', function () { return bridge.showDesktopSettings(); });
case 'local-settings-toggle':
return CC.run('Opening local settings...', function () { return bridge.showLocalSettings(); });
case 'computer-settings-toggle':
return CC.run('Opening desktop settings...', function () { return bridge.showDesktopSettings(); });
case 'settings-close':
return CC.closeSheet();
case 'dashboard':
@@ -541,62 +468,6 @@ window.__MOCK_STATE__ = {
);
};
function permissionLabel(value) {
if (value === 'granted') return 'Granted';
if (value === 'denied' || value === 'restricted') return 'Needs attention';
if (value === 'not_applicable') return 'Not required';
return 'Not granted';
}
function permissionTone(value) {
if (value === 'granted' || value === 'not_applicable') return 'ok';
if (value === 'denied' || value === 'restricted') return 'warn';
return 'idle';
}
// TODO: Re-enable Computer Use menus after fixing the MCP server connection
// between the desktop app and the web UI.
var COMPUTER_USE_MENUS_ENABLED = false;
function renderComputerPermissionRow(key, label, detail, status) {
return '<div class="cc-permission-row">' +
'<div><div class="cc-permission-title">' + CC.esc(label) + '</div><div class="cc-permission-detail">' + CC.esc(detail) + '</div></div>' +
'<div class="cc-permission-actions"><span class="badge ' + permissionTone(status) + '">' + CC.esc(permissionLabel(status)) + '</span>' +
(status === 'granted' || status === 'not_applicable'
? ''
: '<button class="btn sm" data-cc-action="computer-permission" data-cc-computer-permission="' + CC.esc(key) + '">Open settings</button>') +
'</div>' +
'</div>';
}
function renderComputerPermissions(state) {
var permissions = state.computerUsePermissions || {};
if (!permissions.supported) {
return '<div class="cc-note">' + CC.esc(permissions.message || 'No additional OS permission setup is required from CloudCLI on this platform.') + '</div>';
}
return '<div class="cc-note">' + CC.esc(permissions.message || 'Grant the required OS permissions before approving agent control.') + '</div>' +
renderComputerPermissionRow('accessibility', 'Accessibility', 'Allows CloudCLI to click, type, and use accessibility actions.', permissions.accessibility) +
renderComputerPermissionRow('screen', 'Screen Recording', 'Allows CloudCLI to capture screenshots for agent observation.', permissions.screenRecording);
}
CC.buildComputerUseSection = function (state) {
var computerUse = state.computerUse || {};
var status = computerUseStatus(state);
var body =
'<div class="cc-surface">' +
'<label class="cc-toggle"><input type="checkbox" data-cc-computer-enabled="true"' + (computerUse.enabled ? ' checked' : '') + '><span><b>Enable Computer Use</b><br>Let CloudCLI use the computer. Agents cannot act until you approve a session.</span></label>' +
'<div class="cc-row2"><span class="badge ' + CC.esc(status.tone) + '">' + CC.esc(status.label) + '</span><span class="cc-meta">' + CC.esc(status.detail) + '</span><button class="btn sm" data-cc-action="refresh-environments">' + CC.icon('refresh', 14) + 'Refresh / relink</button></div>';
if (computerUse.enabled) {
body += '<div class="cc-permissions">' + renderComputerPermissions(state) + '</div>';
body += '<div class="cc-choice-group">' +
CC.renderRadioOption('computer-access-mode', 'ask', computerUse.consentMode !== 'auto', 'Ask before each session', 'Agents can request control, but you approve every session.') +
CC.renderRadioOption('computer-access-mode', 'auto', computerUse.consentMode === 'auto', 'Unattended access', 'Trusted agents can use this computer without a local approval prompt.') +
'</div>';
}
body += '</div>';
return CC.renderSection('COMPUTER USE', 'Control how agents can use this computer', body);
};
CC.renderLocalSettings = function () {
var state = CC.state || {};
var sections = [
@@ -612,13 +483,9 @@ window.__MOCK_STATE__ = {
};
CC.renderDesktopSettings = function () {
var state = CC.state || {};
var sections = [
CC.buildThemeSection(state),
CC.buildThemeSection(CC.state || {}),
];
if (COMPUTER_USE_MENUS_ENABLED) {
sections.push(CC.buildComputerUseSection(state));
}
CC.renderSheet('Desktop Settings', 'Manage the desktop app appearance.', sections);
};
@@ -681,15 +548,6 @@ window.__MOCK_STATE__ = {
CC.act('set-theme-mode', { value: theme.value });
return;
}
var computerMode = event.target.closest('[name="computer-access-mode"]');
if (computerMode) {
CC.act('set-computer-mode', { value: computerMode.value });
return;
}
var computerEnabled = event.target.closest('[data-cc-computer-enabled]');
if (computerEnabled) {
CC.act('set-computer-enabled', { value: computerEnabled.checked });
}
});
document.addEventListener('keydown', function (event) {

View File

@@ -1,10 +1,9 @@
import { app, BrowserWindow, clipboard, dialog, ipcMain, session, shell, systemPreferences } from 'electron';
import { app, BrowserWindow, clipboard, dialog, ipcMain, session, shell } from 'electron';
import { spawn } from 'node:child_process';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { CloudController } from './cloud.js';
import { ComputerAgentController } from './computerAgent.js';
import { DesktopWindowManager } from './desktopWindow.js';
import { DesktopNotificationsController } from './desktopNotifications.js';
import { LocalServerController } from './localServer.js';
@@ -30,7 +29,6 @@ let activeTarget = { kind: 'launcher', name: APP_NAME, url: null };
let desktopWindow = null;
let localServer = null;
let cloud = null;
let computerAgent = null;
let desktopNotifications = null;
let isQuitting = false;
let isRefreshingCloud = false;
@@ -63,10 +61,6 @@ function getSettingsPath() {
return path.join(app.getPath('userData'), 'desktop-settings.json');
}
function getComputerUseSettingsPath() {
return path.join(app.getPath('userData'), 'computer-use-settings.json');
}
function getDesktopNotificationsSettingsPath() {
return path.join(app.getPath('userData'), 'desktop-notifications-settings.json');
}
@@ -78,23 +72,6 @@ function getRunningEnvironmentUrls() {
.filter(Boolean);
}
async function promptComputerUseConsent(sessionId) {
const { response } = await dialog.showMessageBox(desktopWindow?.getMainWindow() || undefined, {
type: 'warning',
buttons: ['Allow this session', 'Deny'],
defaultId: 0,
cancelId: 1,
title: 'Computer Use request',
message: 'An agent wants to control this computer',
detail: [
'A cloud agent is requesting control of your mouse, keyboard, and screen for this session.',
'Approval lasts for this session only. You can stop it any time from the Computer panel.',
sessionId ? `\nSession: ${sessionId}` : '',
].join('\n'),
});
return response === 0;
}
function getDisplayTargetName() {
return activeTarget?.name || APP_NAME;
}
@@ -151,66 +128,10 @@ function getDesktopState() {
tabs: tabs.getSerializableTabs(),
activeTabId: tabs.activeTabId,
environments: cloud.getEnvironments().map(serializeEnvironment),
computerUse: computerAgent?.getState() || { enabled: false, consentMode: 'ask', running: false, connectedCount: 0, targetCount: 0 },
desktopNotifications: desktopNotifications?.getState() || { enabled: false, supported: false, connectedCount: 0, targetCount: 0 },
computerUsePermissions: getComputerUsePermissions(),
};
}
function getComputerUsePermissions() {
if (process.platform !== 'darwin') {
return {
platform: process.platform,
supported: false,
accessibility: 'not_applicable',
screenRecording: 'not_applicable',
message: 'No OS permission onboarding is required from CloudCLI on this platform.',
};
}
let accessibility;
let screenRecording;
try {
accessibility = systemPreferences.isTrustedAccessibilityClient(false) ? 'granted' : 'not_granted';
} catch {
accessibility = 'unknown';
}
try {
screenRecording = systemPreferences.getMediaAccessStatus('screen');
} catch {
screenRecording = 'unknown';
}
return {
platform: 'darwin',
supported: true,
accessibility,
screenRecording,
message: accessibility === 'granted' && screenRecording === 'granted'
? 'macOS permissions are granted.'
: 'macOS requires Accessibility and Screen Recording for Computer Use.',
};
}
async function requestComputerUsePermission(permission) {
if (process.platform !== 'darwin') {
return getDesktopState();
}
if (permission === 'accessibility') {
systemPreferences.isTrustedAccessibilityClient(true);
} else if (permission === 'screen') {
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
} else if (permission === 'all') {
systemPreferences.isTrustedAccessibilityClient(true);
await shell.openExternal('x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture');
} else {
throw new Error(`Unknown Computer Use permission: ${permission}`);
}
return getDesktopState();
}
async function openExternalUrl(url) {
if (String(url).startsWith(CALLBACK_PROTOCOL + "://")) {
await handleDeepLink(url);
@@ -316,8 +237,6 @@ function getDiagnosticsText() {
cloudEnvironmentCount: cloud.getEnvironments().length,
cloudRunningEnvironmentCount: getRunningEnvironmentUrls().length,
cloudAuthState: cloud.getAuthState(),
computerUse: computerAgent?.getState() || null,
computerUseSettingsPath: getComputerUseSettingsPath(),
cloudAccountPath: getStorePath(),
controlPlaneUrl: CLOUDCLI_CONTROL_PLANE_URL,
}, null, 2);
@@ -332,22 +251,6 @@ async function copyDiagnostics() {
});
}
async function showComputerAccess() {
await desktopWindow?.showDesktopSettings();
return getDesktopState();
}
async function updateComputerUse(settings) {
const current = computerAgent?.getSettings() || { enabled: false, consentMode: 'ask' };
const next = {
enabled: typeof settings?.enabled === 'boolean' ? settings.enabled : current.enabled,
consentMode: settings?.consentMode === 'auto' ? 'auto' : 'ask',
};
await computerAgent?.saveSettings(next);
syncDesktopState();
return getDesktopState();
}
async function refreshCloudEnvironments({ showErrors = false } = {}) {
isRefreshingCloud = true;
syncDesktopState();
@@ -370,7 +273,6 @@ async function refreshCloudEnvironments({ showErrors = false } = {}) {
throw error;
} finally {
isRefreshingCloud = false;
void computerAgent?.sync().catch((error) => console.error('[ComputerAgent] sync failed:', error?.message || error));
void desktopNotifications?.sync().catch((error) => console.error('[DesktopNotifications] sync failed:', error?.message || error));
syncDesktopState();
}
@@ -852,16 +754,10 @@ function registerIpcHandlers() {
await desktopWindow.showLauncher();
return getDesktopState();
});
ipcMain.handle('cloudcli-desktop:show-computer-access', async () => {
await showComputerAccess();
return getDesktopState();
});
ipcMain.handle('cloudcli-desktop:update-computer-use', async (_event, settings) => updateComputerUse(settings));
ipcMain.handle('cloudcli-desktop:update-desktop-notifications', async (_event, settings) => {
await desktopNotifications?.saveSettings(settings);
return getDesktopState();
});
ipcMain.handle('cloudcli-desktop:request-computer-use-permission', async (_event, permission) => requestComputerUsePermission(permission));
ipcMain.handle('cloudcli-desktop:show-desktop-settings', async () => desktopWindow.showDesktopSettings());
ipcMain.handle('cloudcli-desktop:show-local-settings', async () => desktopWindow.showLocalSettings());
ipcMain.handle('cloudcli-desktop:close-settings-window', async () => {
@@ -899,7 +795,6 @@ function registerAppEvents() {
});
app.on('before-quit', () => {
computerAgent?.stop();
desktopNotifications?.stop();
});
@@ -951,7 +846,6 @@ async function createDesktopWindow() {
openCloudDashboard,
refreshCloudEnvironments: () => refreshCloudEnvironments({ showErrors: true }),
setActiveTarget,
showComputerAccess,
showEnvironmentPicker,
showError,
startEnvironment,
@@ -1017,15 +911,6 @@ async function bootstrap() {
callbackUrl: CALLBACK_URL,
onChange: syncDesktopState,
});
computerAgent = new ComputerAgentController({
appRoot: getAppRoot(),
settingsPath: getComputerUseSettingsPath(),
isPackaged: app.isPackaged,
getRunningEnvironmentUrls,
getApiKey: () => cloud.getAccount()?.apiKey || '',
promptConsent: promptComputerUseConsent,
onChange: syncDesktopState,
});
desktopNotifications = new DesktopNotificationsController({
settingsPath: getDesktopNotificationsSettingsPath(),
appVersion: app.getVersion(),
@@ -1042,7 +927,6 @@ async function bootstrap() {
await localServer.loadDesktopSettings();
await cloud.loadCloudAccount();
await computerAgent.loadSettings();
await desktopNotifications.loadSettings();
registerProtocolHandler();

View File

@@ -44,10 +44,7 @@ if (window.location.protocol === 'file:') {
refreshActiveTab: () => ipcRenderer.invoke('cloudcli-desktop:reload-active-tab'),
showEnvironmentPicker: () => ipcRenderer.invoke('cloudcli-desktop:show-environment-picker'),
showLauncher: () => ipcRenderer.invoke('cloudcli-desktop:show-launcher'),
showComputerAccess: () => ipcRenderer.invoke('cloudcli-desktop:show-computer-access'),
showLocalSettings: () => ipcRenderer.invoke('cloudcli-desktop:show-local-settings'),
updateComputerUse: (settings) => ipcRenderer.invoke('cloudcli-desktop:update-computer-use', settings),
requestComputerUsePermission: (permission) => ipcRenderer.invoke('cloudcli-desktop:request-computer-use-permission', permission),
showDesktopSettings: () => ipcRenderer.invoke('cloudcli-desktop:show-desktop-settings'),
closeSettingsWindow: () => ipcRenderer.invoke('cloudcli-desktop:close-settings-window'),
showActiveEnvironmentActionsMenu: () => ipcRenderer.invoke('cloudcli-desktop:show-active-environment-actions-menu'),

View File

@@ -4,7 +4,7 @@
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<link rel="icon" type="image/png" href="/favicon.png" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no, viewport-fit=cover" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no, viewport-fit=cover, interactive-widget=resizes-content" />
<title>CloudCLI UI</title>
<!-- PWA Manifest -->

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "@cloudcli-ai/cloudcli",
"version": "1.34.0",
"version": "1.35.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@cloudcli-ai/cloudcli",
"version": "1.34.0",
"version": "1.35.0",
"hasInstallScript": true,
"license": "AGPL-3.0-or-later",
"dependencies": {

View File

@@ -1,6 +1,6 @@
{
"name": "@cloudcli-ai/cloudcli",
"version": "1.34.0",
"version": "1.35.0",
"productName": "CloudCLI",
"description": "A web-based UI for Claude Code CLI",
"type": "module",
@@ -29,13 +29,10 @@
"scripts": {
"dev": "concurrently --kill-others \"npm run server:dev\" \"npm run client\"",
"server": "node dist-server/server/index.js",
"preserver:dev": "npm run build:semantics",
"server:dev": "tsx --tsconfig server/tsconfig.json server/index.js",
"preserver:dev-watch": "npm run build:semantics",
"server:dev-watch": "tsx watch --tsconfig server/tsconfig.json server/index.js",
"client": "vite",
"desktop": "electron electron/main.js",
"predesktop:dev": "npm run build:semantics",
"desktop:dev": "cross-env ELECTRON_DEV_URL=http://127.0.0.1:5173 electron electron/main.js",
"desktop:stage": "node scripts/release/prepare-desktop-app.js",
"desktop:pack": "npm run build && npm run desktop:stage && electron-builder --projectDir .desktop-build/desktop-app --dir",
@@ -43,12 +40,10 @@
"desktop:dist:win": "npm run build && npm run desktop:stage && electron-builder --projectDir .desktop-build/desktop-app --win nsis",
"server:bundle": "npm run build && node scripts/release/build-server-bundle.js",
"desktop:icon:mac": "node electron/scripts/generate-macos-icon.js",
"build": "npm run build:semantics && npm run build:client && npm run build:server",
"build": "npm run build:client && npm run build:server",
"build:client": "vite build",
"build:semantics": "node scripts/build-computer-semantics.mjs",
"prebuild:server": "node -e \"require('node:fs').rmSync('dist-server', { recursive: true, force: true })\"",
"build:server": "tsc -p server/tsconfig.json && tsc-alias -p server/tsconfig.json",
"postbuild:server": "node scripts/copy-computer-semantics-bin.mjs",
"preview": "vite preview",
"typecheck": "tsc --noEmit -p tsconfig.json && tsc --noEmit -p server/tsconfig.json",
"lint": "eslint src/ server/",
@@ -56,7 +51,7 @@
"start": "npm run build && npm run server",
"release": "./release.sh",
"prepublishOnly": "npm run build",
"postinstall": "node scripts/fix-node-pty.js && npm run build:semantics",
"postinstall": "node scripts/fix-node-pty.js",
"prepare": "husky",
"update:platform": "./update-platform.sh"
},

View File

@@ -524,7 +524,7 @@
<td><code>provider</code></td>
<td>string</td>
<td><span class="badge badge-optional">Optional</span></td>
<td><code>claude</code>, <code>cursor</code>, or <code>codex</code> (default: <code>claude</code>)</td>
<td><code>claude</code>, <code>cursor</code>, <code>codex</code>, <code>gemini</code>, <code>opencode</code>, or <code>hermes</code> (default: <code>claude</code>)</td>
</tr>
<tr>
<td><code>stream</code></td>
@@ -834,6 +834,7 @@ data: {"type":"done"}</code></pre>
{ id: 'gemini', name: 'Google' },
{ id: 'cursor', name: 'Cursor' },
{ id: 'opencode', name: 'OpenCode' },
{ id: 'hermes', name: 'Nous Research' },
];
async function populateModels() {

View File

@@ -1,133 +0,0 @@
#!/usr/bin/env node
import { spawn } from 'node:child_process';
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const rootDir = path.resolve(__dirname, '..');
const platform = process.env.CLOUDCLI_SEMANTICS_PLATFORM || process.platform;
const arch = process.env.CLOUDCLI_SEMANTICS_ARCH || process.arch;
const platformArch = `${platform}-${arch}`;
const semanticsRoot = path.join(rootDir, 'server', 'modules', 'computer-use', 'semantics');
const outDir = path.join(semanticsRoot, 'bin', platformArch);
const requireBuild = process.env.CLOUDCLI_SEMANTICS_BUILD_REQUIRED === '1';
function run(command, args, options = {}) {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
stdio: 'inherit',
shell: process.platform === 'win32',
...options,
});
child.once('error', reject);
child.once('exit', (code) => {
if (code === 0) resolve();
else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
});
});
}
function commandExists(command) {
return new Promise((resolve) => {
const child = spawn(command, ['--version'], {
stdio: 'ignore',
shell: process.platform === 'win32',
});
child.once('error', () => resolve(false));
child.once('exit', (code) => resolve(code === 0));
});
}
async function pathExists(filePath) {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}
async function isUpToDate(output, inputs) {
if (!(await pathExists(output))) {
return false;
}
const outputStat = await fs.stat(output);
for (const input of inputs) {
const inputStat = await fs.stat(input);
if (inputStat.mtimeMs > outputStat.mtimeMs) {
return false;
}
}
return true;
}
async function ensureCommand(command, helpText) {
if (await commandExists(command)) {
return true;
}
const message = `${command} was not found. ${helpText}`;
if (requireBuild) {
throw new Error(message);
}
console.log(`Skipping semantic helper build: ${message}`);
return false;
}
if (platform === 'darwin') {
const source = path.join(semanticsRoot, 'helpers', 'macos', 'CloudCLISemantics.swift');
const output = path.join(outDir, 'CloudCLISemantics');
if (!(await ensureCommand('swiftc', 'Install Xcode Command Line Tools to compile the macOS helper.'))) {
process.exit(0);
}
if (await isUpToDate(output, [source])) {
console.log(`Semantic helper is up to date: ${path.relative(rootDir, output)}`);
process.exit(0);
}
await fs.mkdir(outDir, { recursive: true });
await run('swiftc', [
source,
'-o',
output,
'-framework',
'AppKit',
'-framework',
'ApplicationServices',
]);
await fs.chmod(output, 0o755);
console.log(`Built ${path.relative(rootDir, output)}`);
} else if (platform === 'win32') {
const project = path.join(semanticsRoot, 'helpers', 'windows', 'CloudCLISemantics.csproj');
const source = path.join(semanticsRoot, 'helpers', 'windows', 'Program.cs');
const output = path.join(outDir, 'CloudCLISemantics.exe');
if (!(await ensureCommand('dotnet', '.NET SDK is required to compile the Windows helper.'))) {
process.exit(0);
}
if (await isUpToDate(output, [project, source])) {
console.log(`Semantic helper is up to date: ${path.relative(rootDir, output)}`);
process.exit(0);
}
await fs.mkdir(outDir, { recursive: true });
await run('dotnet', [
'publish',
project,
'-c',
'Release',
'-r',
arch === 'arm64' ? 'win-arm64' : 'win-x64',
'--self-contained',
'false',
'-p:PublishSingleFile=true',
'-o',
outDir,
]);
console.log(`Built ${path.relative(rootDir, output)}`);
} else {
console.log(`Semantic helper build is not supported for ${platform}-${arch}.`);
}

View File

@@ -1,24 +0,0 @@
#!/usr/bin/env node
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const rootDir = path.resolve(__dirname, '..');
const sourceDir = path.join(rootDir, 'server', 'modules', 'computer-use', 'semantics', 'bin');
const targetDir = path.join(rootDir, 'dist-server', 'server', 'modules', 'computer-use', 'semantics', 'bin');
async function pathExists(filePath) {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}
if (await pathExists(sourceDir)) {
await fs.mkdir(path.dirname(targetDir), { recursive: true });
await fs.cp(sourceDir, targetDir, { recursive: true });
console.log(`Copied Computer Use semantic helpers to ${path.relative(rootDir, targetDir)}`);
}

View File

@@ -113,12 +113,6 @@ await copyRequired('electron');
await copyRequired('dist');
await copyRequired('public');
// The desktop app still ships the standalone Computer Use desktop agent, but
// not the full local server. Local CloudCLI is downloaded on demand.
await copyRequired('dist-server/server/computer-use-agent.js');
await copyIfExists('dist-server/server/computer-use-agent.js.map');
await copyRequired('dist-server/server/modules/computer-use');
const copiedRuntimeDependencies = [];
if (await copyNodeModule('ws')) {
copiedRuntimeDependencies.push('ws');

View File

@@ -29,9 +29,14 @@ import {
import { sessionsService } from './modules/providers/services/sessions.service.js';
import { providerAuthService } from './modules/providers/services/provider-auth.service.js';
import { createCompleteMessage, createNormalizedMessage } from './shared/utils.js';
import {
getPendingApprovalsForSession,
registerApproval,
resolveToolApproval,
unregisterApproval,
} from './shared/tool-approval-registry.js';
const activeSessions = new Map();
const pendingToolApprovals = new Map();
// Sessions cancelled via abort-session. The abort handler already sent the
// terminal `complete` (aborted: true) to the client, so the run loop must not
// emit a second one when its generator winds down.
@@ -64,7 +69,7 @@ function waitForToolApproval(requestId, options = {}) {
let timeout;
const cleanup = () => {
pendingToolApprovals.delete(requestId);
unregisterApproval(requestId);
if (timeout) clearTimeout(timeout);
if (signal && abortHandler) {
signal.removeEventListener('abort', abortHandler);
@@ -96,21 +101,15 @@ function waitForToolApproval(requestId, options = {}) {
const resolver = (decision) => {
finalize(decision);
};
// Attach metadata for getPendingApprovalsForSession lookup
if (metadata) {
Object.assign(resolver, metadata);
}
pendingToolApprovals.set(requestId, resolver);
registerApproval(requestId, {
resolver,
sessionId: metadata?._sessionId ?? null,
provider: 'claude',
meta: metadata ?? {},
});
});
}
function resolveToolApproval(requestId, decision) {
const resolver = pendingToolApprovals.get(requestId);
if (resolver) {
resolver(decision);
}
}
// Match stored permission entries against a tool + input combo.
// This only supports exact tool names and the Bash(command:*) shorthand
// used by the UI; it intentionally does not implement full glob semantics,
@@ -846,28 +845,6 @@ function getActiveClaudeSDKSessions() {
return getAllSessions();
}
/**
* Get pending tool approvals for a specific session.
* @param {string} sessionId - The session ID
* @returns {Array} Array of pending permission request objects
*/
function getPendingApprovalsForSession(sessionId) {
const pending = [];
for (const [requestId, resolver] of pendingToolApprovals.entries()) {
if (resolver._sessionId === sessionId) {
pending.push({
requestId,
toolName: resolver._toolName || 'UnknownTool',
input: resolver._input,
context: resolver._context,
sessionId,
receivedAt: resolver._receivedAt || new Date(),
});
}
}
return pending;
}
/**
* Reconnect a session's WebSocketWriter to a new raw WebSocket.
* Called when client reconnects (e.g. page refresh) while SDK is still running.

View File

@@ -1,279 +0,0 @@
#!/usr/bin/env node
/**
* CloudCLI Computer Use — Desktop Agent.
*
* Standalone executor for the cloud relay. The Electron desktop app spawns this
* process (via ELECTRON_RUN_AS_NODE) whenever Computer Use is enabled and the
* user has running cloud environments. It opens an outbound websocket to each
* environment's `/desktop-agent` endpoint and executes the `computer_*` actions
* the hosted server relays, returning a fresh screenshot each time.
*
* It is fully self-contained: it reuses the shared nut-js executor module and
* does NOT depend on the local CloudCLI server. Consent is enforced here (the
* controlled machine is the authority): in `ask` mode the agent asks the parent
* Electron process for a per-session decision before the first action runs.
*/
import readline from 'node:readline';
import { WebSocket } from 'ws';
import {
getRuntimeReadiness,
type Point,
type ClickButton,
type ScrollDirection,
} from './modules/computer-use/computer-executor.js';
import { runRawComputerAction } from './modules/computer-use/actions/raw-action-dispatcher.js';
import type { RawActionTarget, RawComputerAction } from './modules/computer-use/actions/raw-action-types.js';
import { computerSemanticsService } from './modules/computer-use/computer-semantics.service.js';
type ConsentMode = 'ask' | 'auto';
type RelayMessage = {
kind?: string;
type?: string;
id?: string;
params?: Record<string, unknown>;
};
const IPC_PREFIX = '@@CUAGENT@@';
const RECONNECT_BASE_MS = 2000;
const RECONNECT_MAX_MS = 30_000;
const consentMode: ConsentMode = process.env.CLOUDCLI_COMPUTER_USE_CONSENT_MODE === 'auto' ? 'auto' : 'ask';
const agentLabel = process.env.CLOUDCLI_DESKTOP_AGENT_LABEL || 'cloudcli-desktop';
const desktopAgentApiKey = process.env.CLOUDCLI_DESKTOP_AGENT_API_KEY || '';
function parseTargets(): string[] {
const raw =
process.env.CLOUDCLI_DESKTOP_AGENT_URLS ||
process.env.CLOUDCLI_DESKTOP_AGENT_URL ||
'';
return raw
.split(',')
.map((value) => value.trim())
.filter(Boolean);
}
// --- Parent (Electron) IPC over stdout/stdin -------------------------------
function emitToParent(message: Record<string, unknown>): void {
process.stdout.write(`${IPC_PREFIX} ${JSON.stringify(message)}\n`);
}
/** Per-session consent decisions, and resolvers awaiting a parent reply. */
const sessionConsent = new Map<string, 'granted' | 'denied'>();
const pendingConsent = new Map<string, Array<(allow: boolean) => void>>();
const stdinReader = readline.createInterface({ input: process.stdin });
stdinReader.on('line', (line) => {
const trimmed = line.trim();
if (!trimmed.startsWith(IPC_PREFIX)) {
return;
}
try {
const payload = JSON.parse(trimmed.slice(IPC_PREFIX.length).trim()) as Record<string, unknown>;
if (payload.type === 'consent-response' && typeof payload.sessionId === 'string') {
const allow = payload.allow === true;
sessionConsent.set(payload.sessionId, allow ? 'granted' : 'denied');
const waiters = pendingConsent.get(payload.sessionId) || [];
pendingConsent.delete(payload.sessionId);
for (const resolve of waiters) {
resolve(allow);
}
} else if (payload.type === 'revoke-session' && typeof payload.sessionId === 'string') {
sessionConsent.delete(payload.sessionId);
}
} catch {
// ignore malformed control lines
}
});
async function ensureConsent(sessionId: string): Promise<boolean> {
if (consentMode === 'auto') {
return true;
}
const existing = sessionConsent.get(sessionId);
if (existing === 'granted') return true;
if (existing === 'denied') return false;
// Ask the parent (Electron) to prompt the user, and wait for the decision.
return new Promise<boolean>((resolve) => {
const waiters = pendingConsent.get(sessionId) || [];
waiters.push(resolve);
pendingConsent.set(sessionId, waiters);
emitToParent({ type: 'consent-request', sessionId });
});
}
// --- Action execution ------------------------------------------------------
function asPoint(value: unknown): Point | undefined {
if (value && typeof value === 'object') {
const point = value as Record<string, unknown>;
if (typeof point.x === 'number' && typeof point.y === 'number') {
return { x: point.x, y: point.y };
}
}
return undefined;
}
function rawActionFromRelay(type: string, params: Record<string, unknown>): RawComputerAction {
const point = asPoint(params.point);
switch (type) {
case 'screenshot':
return { type: 'screenshot' };
case 'cursor_position':
return { type: 'cursor_position' };
case 'mouse_move':
if (!point) {
throw new Error('mouse_move requires a valid point.');
}
return { type: 'mouse_move', point };
case 'click':
return {
type: 'click',
button: (params.button as ClickButton) || 'left',
point,
double: params.double === true,
};
case 'drag': {
const from = asPoint(params.from);
const to = asPoint(params.to);
if (!from || !to) {
throw new Error('drag requires valid from and to points.');
}
return { type: 'drag', from, to, button: (params.button as ClickButton) || 'left' };
}
case 'type':
return { type: 'type', text: String(params.text ?? '') };
case 'key':
return { type: 'key', key: String(params.key ?? '') };
case 'scroll':
return {
type: 'scroll',
direction: (params.direction as ScrollDirection) || 'down',
amount: typeof params.amount === 'number' ? params.amount : 3,
point,
};
case 'wait':
return { type: 'wait', ms: typeof params.ms === 'number' ? params.ms : undefined };
default:
throw new Error(`Unsupported computer action: ${type}`);
}
}
async function runAction(type: string, params: Record<string, unknown>): Promise<Record<string, unknown>> {
if (type === 'semantic_tool') {
const toolName = typeof params.toolName === 'string' ? params.toolName : '';
const args = params.arguments && typeof params.arguments === 'object'
? params.arguments as Record<string, unknown>
: {};
const sessionId = typeof params.sessionId === 'string' ? params.sessionId : 'default';
if (!toolName) {
throw new Error('semantic_tool requires toolName.');
}
return await computerSemanticsService.callTool(toolName, { ...args, sessionId }) as Record<string, unknown>;
}
const readiness = getRuntimeReadiness();
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
throw new Error('Computer Use runtime is not installed on the desktop agent.');
}
const target: RawActionTarget = {
displaySize: (params.displaySize as RawActionTarget['displaySize']) ?? null,
};
return await runRawComputerAction(rawActionFromRelay(type, params), target) as Record<string, unknown>;
}
// --- Relay connection ------------------------------------------------------
function connect(url: string): void {
let reconnectMs = RECONNECT_BASE_MS;
let socket: WebSocket | null = null;
const open = () => {
socket = new WebSocket(url, {
headers: desktopAgentApiKey ? { 'X-API-Key': desktopAgentApiKey } : undefined,
});
socket.on('open', () => {
reconnectMs = RECONNECT_BASE_MS;
emitToParent({ type: 'connected', url });
socket?.send(JSON.stringify({ kind: 'register', label: agentLabel, consentMode }));
});
socket.on('message', async (raw) => {
let message: RelayMessage;
try {
message = JSON.parse(String(raw)) as RelayMessage;
} catch {
return;
}
const kind = message.kind || message.type;
if (kind !== 'computer_relay' || typeof message.id !== 'string') {
return;
}
const id = message.id;
const type = String(message.type || (message.params?.type as string) || '');
const params = message.params || {};
const sessionId = typeof params.sessionId === 'string' ? params.sessionId : 'default';
if (type === 'stop_session') {
sessionConsent.delete(sessionId);
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result: { ok: true } }));
return;
}
try {
const allowed = await ensureConsent(sessionId);
if (!allowed) {
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, error: 'The user denied desktop control for this session.' }));
return;
}
const result = await runAction(type, params);
socket?.send(JSON.stringify({ kind: 'computer_relay_result', id, result }));
} catch (error) {
socket?.send(JSON.stringify({
kind: 'computer_relay_result',
id,
error: error instanceof Error ? error.message : 'Desktop agent action failed.',
}));
}
});
const scheduleReconnect = (code?: number, reason?: Buffer) => {
const reasonText = reason?.toString() || '';
emitToParent({ type: 'disconnected', url, code, reason: reasonText });
if (code === 1008 && /computer use.*disabled/i.test(reasonText)) {
return;
}
setTimeout(open, reconnectMs);
reconnectMs = Math.min(reconnectMs * 2, RECONNECT_MAX_MS);
};
socket.on('close', scheduleReconnect);
socket.on('error', () => {
try { socket?.close(); } catch { /* noop */ }
});
};
open();
}
function main(): void {
const targets = parseTargets();
if (targets.length === 0) {
emitToParent({ type: 'error', message: 'No desktop-agent target URLs provided.' });
return;
}
emitToParent({ type: 'starting', targets, consentMode });
for (const url of targets) {
connect(url);
}
}
main();

View File

@@ -1,574 +0,0 @@
#!/usr/bin/env node
import './load-env.js';
type JsonRpcRequest = {
jsonrpc: '2.0';
id?: string | number | null;
method: string;
params?: Record<string, unknown>;
};
type ToolDefinition = {
name: string;
description: string;
inputSchema: Record<string, unknown>;
};
const readString = (value: unknown, name: string): string => {
if (typeof value !== 'string' || value.trim() === '') {
throw new Error(`${name} is required.`);
}
return value.trim();
};
const readOptionalString = (value: unknown): string | undefined =>
typeof value === 'string' && value.trim() !== '' ? value.trim() : undefined;
const readNumber = (value: unknown): number | undefined =>
typeof value === 'number' && Number.isFinite(value) ? value : undefined;
const readMouseButton = (value: unknown): 'left' | 'right' | 'middle' =>
value === 'right' || value === 'middle' ? value : 'left';
const apiUrl = (process.env.CLOUDCLI_COMPUTER_USE_API_URL || 'http://127.0.0.1:3001/api/computer-use-mcp').replace(/\/$/, '');
const apiToken = process.env.CLOUDCLI_COMPUTER_USE_MCP_TOKEN || '';
const computerUseInstructions = `
CloudCLI Computer Use lets you operate the user's real desktop through guarded sessions. Use it deliberately: observe first, act second, then verify.
Recommended app workflow:
1. If you do not know the target app name, call computer_list_apps.
2. Call computer_get_app_state for the target app before app-scoped actions. It returns a screenshot, accessibility elements, and a stateId.
3. Prefer semantic element actions using stateId + element_index from the latest computer_get_app_state result. Do not guess element indexes or reuse them after large UI changes without refreshing state.
4. Use x/y coordinates from the returned screenshot only when no suitable element_index is available.
5. After every action, inspect the returned screenshot/state before deciding the next action.
Use app-scoped tools when the target app is known: computer_list_apps, computer_get_app_state, computer_click_element, computer_perform_secondary_action, computer_set_value, computer_type_text, computer_press_key, computer_scroll_element, and computer_app_drag.
Use raw desktop tools only when you need full-screen coordinate control, cursor position, or current-focus input: computer_screenshot, computer_cursor_position, computer_mouse_move, computer_click, computer_drag, computer_type, computer_key, computer_scroll, computer_wait, and computer_close_session. Raw coordinates are screenshot pixels, so call computer_screenshot first when you need a coordinate frame.
Most tools can use or create the active agent session automatically when sessionId is omitted. In local mode, input actions require the user to grant control in the Computer tab before they work. In cloud mode, approval is handled by the linked CloudCLI desktop app.
If a tool reports missing permission, denied control, or no available desktop session, stop retrying and ask the user to fix access. For local mode, ask them to open CloudCLI Desktop, go to the Computer tab, enable Computer Use, grant the requested OS permissions, and allow the session. On macOS this usually means Accessibility and Screen Recording. For cloud mode, ask them to keep the linked CloudCLI Desktop app running and approve the cloud agent's Computer Use request there.
Ask before sending, deleting, purchasing, approving, uploading, publishing, changing account settings, or making other externally visible or destructive changes. Do not inspect unrelated private content unless the user explicitly asked for that task.
`.trim();
async function callComputerUseApi(toolName: string, input: Record<string, unknown>) {
if (!apiToken) {
throw new Error('CLOUDCLI_COMPUTER_USE_MCP_TOKEN is not configured.');
}
const response = await fetch(`${apiUrl}/tools/${encodeURIComponent(toolName)}`, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(input),
});
const data = await response.json() as { success?: boolean; data?: unknown; error?: string };
if (!response.ok || data.success === false) {
throw new Error(data.error || `Computer Use API request failed (${response.status})`);
}
return data.data;
}
/** Pulls the most recent screenshot data URL out of an API result, if present. */
function findScreenshot(value: unknown): string | null {
if (!value || typeof value !== 'object') {
return null;
}
const record = value as Record<string, unknown>;
if (typeof record.screenshotDataUrl === 'string') {
return record.screenshotDataUrl;
}
if (record.session && typeof record.session === 'object') {
const session = record.session as Record<string, unknown>;
if (typeof session.screenshotDataUrl === 'string') {
return session.screenshotDataUrl;
}
}
return null;
}
/** Removes the large data URL from JSON so the text block stays small. */
function stripScreenshot(value: unknown): unknown {
if (Array.isArray(value)) {
return value.map(stripScreenshot);
}
if (value && typeof value === 'object') {
const out: Record<string, unknown> = {};
for (const [key, val] of Object.entries(value as Record<string, unknown>)) {
if (key === 'screenshotDataUrl' && typeof val === 'string') {
out.screenshot = '[returned as image]';
continue;
}
out[key] = stripScreenshot(val);
}
return out;
}
return value;
}
/**
* Builds an MCP tool result. Screenshots are returned as an `image` content block so
* vision-capable models actually see the desktop — a JSON data-URL string would not work.
*/
function toolResult(value: unknown) {
const content: Array<Record<string, unknown>> = [
{ type: 'text', text: JSON.stringify(stripScreenshot(value), null, 2) },
];
const screenshot = findScreenshot(value);
const match = screenshot ? /^data:(image\/[a-z]+);base64,(.+)$/i.exec(screenshot) : null;
if (match) {
content.push({ type: 'image', data: match[2], mimeType: match[1] });
}
return { content };
}
const sessionIdSchema = {
type: 'object',
properties: {
sessionId: { type: 'string', description: 'Optional. Omit to use or create the active agent session automatically.' },
},
};
const optionalSessionProperty = sessionIdSchema.properties.sessionId;
const withOptionalSession = (properties: Record<string, unknown> = {}) => ({
sessionId: optionalSessionProperty,
...properties,
});
const optionalSessionInput = (args: Record<string, unknown>, extra: Record<string, unknown> = {}) => ({
sessionId: readOptionalString(args.sessionId),
...extra,
});
const stateIdProperty = {
type: 'string',
description: 'State id returned by the latest computer_get_app_state call for this app. Send it with element_index so the runtime can resolve the cached element.',
};
const elementIndexProperty = {
type: 'string',
description: 'Element index from the latest computer_get_app_state result for this app. Use with stateId when possible.',
};
const tools: ToolDefinition[] = [
{
name: 'computer_list_apps',
description: 'Discover app names, bundle identifiers, process names, and window titles that can be used as the app target for app-scoped Computer Use tools. Call this first when you are unsure which app string to pass to computer_get_app_state.',
inputSchema: { type: 'object', properties: withOptionalSession() },
},
{
name: 'computer_get_app_state',
description: 'Inspect a target app and return its current screenshot, accessibility elements, and stateId. Call this before element-targeted actions, after navigation, and whenever the UI may have changed enough that old element indexes could be stale.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'App name, process name, bundle identifier, or window title from computer_list_apps or the user request.' },
}),
required: ['app'],
},
},
{
name: 'computer_click_element',
description: 'Click a target inside an app. Prefer stateId + element_index from computer_get_app_state; use x/y screenshot coordinates only when the target is not represented in the accessibility elements.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
stateId: stateIdProperty,
element_index: elementIndexProperty,
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
click_count: { type: 'integer', description: 'Number of clicks, usually 1. Defaults to 1 and is capped by the runtime.' },
mouse_button: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Button for the click; omitted means left.' },
}),
required: ['app'],
},
},
{
name: 'computer_perform_secondary_action',
description: 'Open the secondary action for a target inside an app, typically a context menu. Prefer stateId + element_index; if native secondary actions are unavailable, the runtime falls back to a right-click at the resolved point.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
stateId: stateIdProperty,
element_index: elementIndexProperty,
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
}),
required: ['app'],
},
},
{
name: 'computer_set_value',
description: 'Set the value of a specific editable element in an app. Prefer stateId + element_index for a settable accessibility element; coordinate fallback focuses the resolved point and replaces the current value, so do not call this unless the target is resolved.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
stateId: stateIdProperty,
element_index: elementIndexProperty,
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
value: { type: 'string', description: 'Exact value to put into the target element.' },
}),
required: ['app', 'value'],
},
},
{
name: 'computer_type_text',
description: 'Type literal text into the target app using keyboard input. Use after you have focused the intended field with computer_click_element or verified the correct focus in computer_get_app_state.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
text: { type: 'string', description: 'Text to enter exactly as provided.' },
}),
required: ['app', 'text'],
},
},
{
name: 'computer_press_key',
description: 'Press a key or key combination in the target app. Use for navigation, shortcuts, and confirmation keys after verifying the intended app/focus.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
key: { type: 'string', description: 'Key or chord, using names such as Return, Escape, Tab, ctrl+s, cmd+a, Up, or Page_Down.' },
}),
required: ['app', 'key'],
},
},
{
name: 'computer_scroll_element',
description: 'Scroll a target area inside an app. Prefer stateId + element_index for scrollable elements; use x/y screenshot coordinates only when the scroll target is visible but not represented as an element.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
stateId: stateIdProperty,
element_index: elementIndexProperty,
x: { type: 'number', description: 'X coordinate in screenshot pixel coordinates from computer_get_app_state.' },
y: { type: 'number', description: 'Y coordinate in screenshot pixel coordinates from computer_get_app_state.' },
direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Direction to scroll the target.' },
pages: { type: 'number', description: 'How far to scroll, measured in page units. Fractional values are allowed; default is 1.' },
}),
required: ['app', 'direction'],
},
},
{
name: 'computer_app_drag',
description: 'Drag inside a target app from one screenshot coordinate to another. Use for sliders, selections, map/canvas gestures, or drag-and-drop when no semantic element action is available.',
inputSchema: {
type: 'object',
properties: withOptionalSession({
app: { type: 'string', description: 'Target app name, process name, bundle identifier, or window title.' },
from_x: { type: 'number', description: 'Start X coordinate in screenshot pixels.' },
from_y: { type: 'number', description: 'Start Y coordinate in screenshot pixels.' },
to_x: { type: 'number', description: 'End X coordinate in screenshot pixels.' },
to_y: { type: 'number', description: 'End Y coordinate in screenshot pixels.' },
}),
required: ['app', 'from_x', 'from_y', 'to_x', 'to_y'],
},
},
{
name: 'computer_screenshot',
description: 'Capture the full desktop screenshot and current display size. Use this before raw coordinate actions when an app-specific accessibility state is unavailable or the task spans multiple apps.',
inputSchema: sessionIdSchema,
},
{
name: 'computer_cursor_position',
description: 'Get the current mouse cursor position in desktop screenshot pixel coordinates. Useful after a raw action misses or when coordinating pointer-relative steps.',
inputSchema: sessionIdSchema,
},
{
name: 'computer_mouse_move',
description: 'Move the mouse cursor to an exact full-desktop screenshot coordinate. Call computer_screenshot first if you do not already have a current coordinate frame.',
inputSchema: {
type: 'object',
properties: {
sessionId: optionalSessionProperty,
x: { type: 'number', description: 'X coordinate in full-desktop screenshot pixels.' },
y: { type: 'number', description: 'Y coordinate in full-desktop screenshot pixels.' },
},
required: ['x', 'y'],
},
},
{
name: 'computer_click',
description: 'Raw desktop click at the current cursor or at optional full-desktop screenshot coordinates. Prefer computer_click_element when the target app and element are known.',
inputSchema: {
type: 'object',
properties: {
sessionId: optionalSessionProperty,
x: { type: 'number', description: 'Optional X coordinate in full-desktop screenshot pixels.' },
y: { type: 'number', description: 'Optional Y coordinate in full-desktop screenshot pixels.' },
mouseButton: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Button for the click; omitted means left.' },
clickCount: { type: 'integer', description: 'How many times to click; omitted means 1.' },
},
},
},
{
name: 'computer_drag',
description: 'Raw desktop drag from start coordinates to end coordinates in full-desktop screenshot pixels. Prefer computer_app_drag for app-scoped drags when the target app is known.',
inputSchema: {
type: 'object',
properties: {
sessionId: optionalSessionProperty,
startX: { type: 'number', description: 'Start X coordinate in full-desktop screenshot pixels.' },
startY: { type: 'number', description: 'Start Y coordinate in full-desktop screenshot pixels.' },
endX: { type: 'number', description: 'End X coordinate in full-desktop screenshot pixels.' },
endY: { type: 'number', description: 'End Y coordinate in full-desktop screenshot pixels.' },
mouseButton: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Button to hold during the drag; omitted means left.' },
},
required: ['startX', 'startY', 'endX', 'endY'],
},
},
{
name: 'computer_type',
description: 'Type literal text at the current desktop focus. This is not app-scoped; use only after verifying the intended field is focused.',
inputSchema: {
type: 'object',
properties: { sessionId: optionalSessionProperty, text: { type: 'string', description: 'Text to enter exactly as provided at current focus.' } },
required: ['text'],
},
},
{
name: 'computer_key',
description: 'Press a key or key chord at the current desktop focus. This is not app-scoped; use computer_press_key when the target app is known.',
inputSchema: {
type: 'object',
properties: { sessionId: optionalSessionProperty, key: { type: 'string', description: 'Key or chord, using names such as Return, Escape, Tab, ctrl+s, cmd+a, Up, or Page_Down.' } },
required: ['key'],
},
},
{
name: 'computer_scroll',
description: 'Raw desktop scroll at the current cursor or optional full-desktop screenshot coordinates. Prefer computer_scroll_element when the target app/element is known.',
inputSchema: {
type: 'object',
properties: {
sessionId: optionalSessionProperty,
direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Direction to scroll the desktop target.' },
amount: { type: 'number', description: 'Scroll amount in wheel/page-like units. Defaults are runtime-defined.' },
x: { type: 'number', description: 'Optional X coordinate in full-desktop screenshot pixels.' },
y: { type: 'number', description: 'Optional Y coordinate in full-desktop screenshot pixels.' },
},
required: ['direction'],
},
},
{
name: 'computer_wait',
description: 'Wait briefly, up to 10000 ms, then return an updated desktop screenshot. Use after actions that trigger loading, animation, or delayed UI changes.',
inputSchema: {
type: 'object',
properties: { sessionId: optionalSessionProperty, timeoutMs: { type: 'number', description: 'Milliseconds to wait. The runtime caps long waits.' } },
},
},
{
name: 'computer_close_session',
description: 'Stop the active auto-created Computer Use session, or the specified session, and revoke agent input control for that session.',
inputSchema: sessionIdSchema,
},
];
async function callTool(name: string, args: Record<string, unknown>) {
switch (name) {
case 'computer_app_drag':
case 'computer_click_element':
case 'computer_get_app_state':
case 'computer_list_apps':
case 'computer_perform_secondary_action':
case 'computer_press_key':
case 'computer_scroll_element':
case 'computer_set_value':
case 'computer_type_text':
return toolResult(await callComputerUseApi(name, args));
case 'computer_screenshot':
case 'computer_cursor_position':
case 'computer_close_session':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args)));
case 'computer_mouse_move':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
x: readNumber(args.x),
y: readNumber(args.y),
})));
case 'computer_click':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
x: readNumber(args.x),
y: readNumber(args.y),
mouseButton: readMouseButton(args.mouseButton ?? args.mouse_button ?? args.button),
clickCount: readNumber(args.clickCount ?? args.click_count),
})));
case 'computer_drag':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
startX: readNumber(args.startX),
startY: readNumber(args.startY),
endX: readNumber(args.endX),
endY: readNumber(args.endY),
mouseButton: readMouseButton(args.mouseButton ?? args.mouse_button ?? args.button),
})));
case 'computer_type':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
text: readString(args.text, 'text'),
})));
case 'computer_key':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
key: readString(args.key, 'key'),
})));
case 'computer_scroll':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
direction: typeof args.direction === 'string' ? args.direction : 'up',
amount: readNumber(args.amount),
x: readNumber(args.x),
y: readNumber(args.y),
})));
case 'computer_wait':
return toolResult(await callComputerUseApi(name, optionalSessionInput(args, {
timeoutMs: readNumber(args.timeoutMs),
})));
default:
throw new Error(`Unknown tool: ${name}`);
}
}
async function handleMessage(message: JsonRpcRequest) {
if (message.method === 'initialize') {
return {
protocolVersion: '2024-11-05',
capabilities: { tools: {} },
serverInfo: { name: 'cloudcli-computer-use', version: '1.0.0' },
instructions: computerUseInstructions,
};
}
if (message.method === 'tools/list') {
return { tools };
}
if (message.method === 'tools/call') {
const params = message.params || {};
const name = readString(params.name, 'name');
const args = (params.arguments && typeof params.arguments === 'object'
? params.arguments
: {}) as Record<string, unknown>;
return callTool(name, args);
}
if (message.method.startsWith('notifications/')) {
return undefined;
}
throw new Error(`Unsupported method: ${message.method}`);
}
type MessageFraming = 'content-length' | 'line';
function writeMessage(message: Record<string, unknown>, framing: MessageFraming) {
const payload = JSON.stringify(message);
if (framing === 'line') {
process.stdout.write(`${payload}\n`);
return;
}
process.stdout.write(`Content-Length: ${Buffer.byteLength(payload, 'utf8')}\r\n\r\n${payload}`);
}
function sendResult(id: string | number | null | undefined, result: unknown, framing: MessageFraming) {
if (id === undefined) {
return;
}
writeMessage({ jsonrpc: '2.0', id, result }, framing);
}
function sendError(id: string | number | null | undefined, error: unknown, framing: MessageFraming) {
if (id === undefined) {
return;
}
writeMessage({
jsonrpc: '2.0',
id,
error: {
code: -32000,
message: error instanceof Error ? error.message : String(error),
},
}, framing);
}
let buffer = Buffer.alloc(0);
function handleRawMessage(rawMessage: string, framing: MessageFraming) {
void (async () => {
let request: JsonRpcRequest | null = null;
try {
request = JSON.parse(rawMessage) as JsonRpcRequest;
const result = await handleMessage(request);
sendResult(request.id, result, framing);
} catch (error) {
sendError(request?.id ?? null, error, framing);
}
})();
}
function findHeaderEnd(input: Buffer): { index: number; length: number } | null {
const crlf = input.indexOf('\r\n\r\n');
if (crlf !== -1) {
return { index: crlf, length: 4 };
}
const lf = input.indexOf('\n\n');
if (lf !== -1) {
return { index: lf, length: 2 };
}
return null;
}
process.stdin.on('data', (chunk) => {
buffer = Buffer.concat([buffer, chunk]);
while (true) {
const headerEnd = findHeaderEnd(buffer);
if (!headerEnd) {
if (/^Content-Length:/i.test(buffer.toString('utf8', 0, Math.min(buffer.length, 32)))) {
return;
}
const newline = buffer.indexOf('\n');
if (newline === -1) {
return;
}
const rawLine = buffer.slice(0, newline).toString('utf8').trim();
buffer = buffer.slice(newline + 1);
if (!rawLine) {
continue;
}
handleRawMessage(rawLine, 'line');
continue;
}
const header = buffer.slice(0, headerEnd.index).toString('utf8');
const lengthMatch = /Content-Length:\s*(\d+)/i.exec(header);
if (!lengthMatch) {
buffer = buffer.slice(headerEnd.index + headerEnd.length);
continue;
}
const length = Number.parseInt(lengthMatch[1], 10);
const messageStart = headerEnd.index + headerEnd.length;
const messageEnd = messageStart + length;
if (buffer.length < messageEnd) {
return;
}
const rawMessage = buffer.slice(messageStart, messageEnd).toString('utf8');
buffer = buffer.slice(messageEnd);
handleRawMessage(rawMessage, 'content-length');
}
});

398
server/hermes-cli.js Normal file
View File

@@ -0,0 +1,398 @@
import crypto from 'node:crypto';
import { sessionsService } from './modules/providers/services/sessions.service.js';
import { providerAuthService } from './modules/providers/services/provider-auth.service.js';
import { providerModelsService } from './modules/providers/services/provider-models.service.js';
import { notifyRunFailed, notifyRunStopped } from './services/notification-orchestrator.js';
import { createCompleteMessage, createNormalizedMessage } from './shared/utils.js';
import {
clearApprovalsForSession,
getPendingApprovalsForSession,
registerApproval,
resolveToolApproval,
unregisterApproval,
} from './shared/tool-approval-registry.js';
import { hermesConnectionManager } from './hermes/acp-client.js';
const PROVIDER = 'hermes';
const HERMES_CONFIGURED_MODEL = '__hermes_configured_model__';
const activeHermesSessions = new Map();
// Session ids whose run was aborted; the terminal `complete` is emitted by
// handleChatAbort, so the runtime must not also emit a "completed" one.
const abortedSessionIds = new Set();
function createRequestId() {
if (typeof crypto.randomUUID === 'function') {
return crypto.randomUUID();
}
return crypto.randomBytes(16).toString('hex');
}
function readSessionId(result) {
if (!result || typeof result !== 'object') {
return null;
}
return result.sessionId
|| result.session_id
|| result.id
|| result.session?.id
|| result.session?.sessionId
|| result.session?.session_id
|| null;
}
function readStopReason(result) {
if (!result || typeof result !== 'object') {
return null;
}
return result.stopReason || result.stop_reason || result.reason || null;
}
function buildPromptParams(sessionId, command, model) {
const params = {
sessionId,
prompt: [{ type: 'text', text: command }],
};
if (model) {
params.modelId = model;
}
return params;
}
function findPermissionOption(options, kinds, fallbackOptionIds = []) {
if (!Array.isArray(options)) {
return null;
}
for (const kind of kinds) {
const match = options.find((option) => option?.kind === kind);
if (match?.optionId) {
return match.optionId;
}
}
for (const optionId of fallbackOptionIds) {
const match = options.find((option) => option?.optionId === optionId);
if (match?.optionId) {
return match.optionId;
}
}
return null;
}
function createPermissionDecision(decision, options = []) {
if (!decision) {
return { outcome: { outcome: 'cancelled' } };
}
if (decision.cancelled) {
return { outcome: { outcome: 'cancelled' } };
}
if (decision.allow) {
const optionId = decision.rememberEntry
? findPermissionOption(options, ['allow_always', 'allow_session'], ['allow_always', 'allow_session'])
: findPermissionOption(options, ['allow_once'], ['allow_once']);
if (!optionId) {
return { outcome: { outcome: 'cancelled' } };
}
return {
outcome: {
outcome: 'selected',
optionId,
},
};
}
const denyOptionId = findPermissionOption(options, ['reject_once', 'deny', 'reject_always'], ['deny', 'reject_once', 'reject_always']);
if (denyOptionId) {
return {
outcome: {
outcome: 'selected',
optionId: denyOptionId,
},
};
}
return {
outcome: { outcome: 'cancelled' },
};
}
async function waitForPermission(ws, params, capturedSessionId, sessionSummary) {
const requestId = createRequestId();
const toolCall = params?.toolCall || params?.tool_call || {};
const toolName = params?.toolName
|| params?.tool_name
|| params?.name
|| params?.tool?.name
|| toolCall.title
|| 'HermesTool';
const input = params?.input
?? params?.arguments
?? params?.toolInput
?? params?.tool_input
?? toolCall.rawInput
?? toolCall.raw_input
?? toolCall;
ws.send(createNormalizedMessage({
kind: 'permission_request',
requestId,
toolName,
input,
sessionId: capturedSessionId,
provider: PROVIDER,
}));
return new Promise((resolve) => {
registerApproval(requestId, {
sessionId: capturedSessionId,
provider: PROVIDER,
meta: {
toolName,
input,
context: params,
sessionName: sessionSummary,
receivedAt: new Date(),
},
resolver: (decision) => {
unregisterApproval(requestId);
resolve(createPermissionDecision(decision, params?.options));
},
});
});
}
async function spawnHermes(command, options = {}, ws) {
const { sessionId, projectPath, cwd, model, sessionSummary } = options;
const workingDir = cwd || projectPath || process.cwd();
const requestedModel = model === HERMES_CONFIGURED_MODEL ? undefined : model;
let capturedSessionId = sessionId || null;
let sessionCreatedSent = false;
let completeSent = false;
let activeKey = capturedSessionId || `pending-${createRequestId()}`;
const notifyTerminalState = ({ error = null, stopReason = 'completed' } = {}) => {
const finalSessionId = capturedSessionId || sessionId || activeKey;
if (!error) {
notifyRunStopped({
userId: ws?.userId || null,
provider: PROVIDER,
sessionId: finalSessionId,
sessionName: sessionSummary,
stopReason,
});
return;
}
notifyRunFailed({
userId: ws?.userId || null,
provider: PROVIDER,
sessionId: finalSessionId,
sessionName: sessionSummary,
error,
});
};
const registerSession = (nextSessionId, connection) => {
if (!nextSessionId || capturedSessionId === nextSessionId) {
return;
}
if (activeHermesSessions.has(activeKey)) {
activeHermesSessions.delete(activeKey);
}
activeKey = nextSessionId;
capturedSessionId = nextSessionId;
activeHermesSessions.set(activeKey, {
connection,
sessionId: capturedSessionId,
status: 'active',
aborted: false,
ws,
sessionSummary,
});
if (ws.setSessionId && typeof ws.setSessionId === 'function') {
ws.setSessionId(capturedSessionId);
}
if (!sessionId && !sessionCreatedSent) {
sessionCreatedSent = true;
ws.send(createNormalizedMessage({
kind: 'session_created',
newSessionId: capturedSessionId,
sessionId: capturedSessionId,
provider: PROVIDER,
}));
}
};
try {
const resolvedModel = await providerModelsService.resolveResumeModel(PROVIDER, sessionId, requestedModel);
const connection = await hermesConnectionManager.getConnection(workingDir);
activeHermesSessions.set(activeKey, {
connection,
sessionId: capturedSessionId,
status: 'active',
aborted: false,
ws,
sessionSummary,
});
const unregisterPermissionHandler = connection.registerRequestHandler('session/request_permission', (params) => {
const permissionSessionId = params?.sessionId || params?.session_id || null;
const active = permissionSessionId
? activeHermesSessions.get(permissionSessionId)
: activeHermesSessions.get(activeKey);
if (!active) {
return { outcome: { outcome: 'cancelled' } };
}
return waitForPermission(
active.ws,
params,
active.sessionId || permissionSessionId || capturedSessionId,
active.sessionSummary || sessionSummary,
);
});
const updateHandler = (params) => {
const updateSessionId = params?.sessionId || params?.session_id || null;
if (capturedSessionId && updateSessionId && updateSessionId !== capturedSessionId) {
return;
}
registerSession(updateSessionId, connection);
const normalized = sessionsService.normalizeMessage(PROVIDER, params, capturedSessionId || updateSessionId || null);
for (const msg of normalized) {
ws.send(msg);
}
};
connection.on('session/update', updateHandler);
try {
let sessionResult;
if (sessionId) {
try {
sessionResult = await connection.request('session/load', { sessionId, cwd: workingDir });
} catch {
sessionResult = { sessionId };
}
} else {
sessionResult = await connection.request('session/new', {
cwd: workingDir,
});
}
registerSession(readSessionId(sessionResult) || sessionId, connection);
const promptResult = await connection.request('session/prompt', buildPromptParams(capturedSessionId, command, resolvedModel));
const finalSessionId = capturedSessionId || readSessionId(promptResult) || sessionId || activeKey;
const stopReason = readStopReason(promptResult) || 'completed';
const active = activeHermesSessions.get(finalSessionId) || activeHermesSessions.get(activeKey);
if (promptResult?.usage || promptResult?.tokenUsage || promptResult?.token_usage) {
ws.send(createNormalizedMessage({
kind: 'status',
text: 'token_budget',
tokenBudget: promptResult.usage || promptResult.tokenUsage || promptResult.token_usage,
sessionId: finalSessionId,
provider: PROVIDER,
}));
}
const abortedById = abortedSessionIds.delete(finalSessionId);
const abortedByKey = abortedSessionIds.delete(activeKey);
const wasAborted = Boolean(active?.aborted || abortedById || abortedByKey);
if (!completeSent && !wasAborted) {
completeSent = true;
ws.send(createCompleteMessage({ provider: PROVIDER, sessionId: finalSessionId, exitCode: 0 }));
}
activeHermesSessions.delete(finalSessionId);
activeHermesSessions.delete(activeKey);
clearApprovalsForSession(finalSessionId);
notifyTerminalState({ stopReason: wasAborted ? 'aborted' : stopReason });
} finally {
connection.off('session/update', updateHandler);
unregisterPermissionHandler();
}
} catch (error) {
const finalSessionId = capturedSessionId || sessionId || activeKey;
const abortedById = abortedSessionIds.delete(finalSessionId);
const abortedByKey = abortedSessionIds.delete(activeKey);
activeHermesSessions.delete(finalSessionId);
activeHermesSessions.delete(activeKey);
clearApprovalsForSession(finalSessionId);
// A cancelled session/prompt rejects here; its aborted terminal `complete`
// is sent by handleChatAbort, so don't surface the cancellation as an error.
if (abortedById || abortedByKey) {
return;
}
const installed = await providerAuthService.isProviderInstalled(PROVIDER);
const errorContent = !installed
? 'Hermes ACP is not installed. Install Hermes and ensure hermes-acp is on PATH.'
: error instanceof Error ? error.message : String(error);
ws.send(createNormalizedMessage({
kind: 'error',
content: errorContent,
sessionId: finalSessionId,
provider: PROVIDER,
}));
if (!completeSent) {
completeSent = true;
ws.send(createCompleteMessage({ provider: PROVIDER, sessionId: finalSessionId, exitCode: 1 }));
}
notifyTerminalState({ error });
throw error;
}
}
async function abortHermesSession(providerSessionId) {
const active = activeHermesSessions.get(providerSessionId);
if (!active) {
return false;
}
active.aborted = true;
active.status = 'aborted';
abortedSessionIds.add(providerSessionId);
if (active.sessionId) {
abortedSessionIds.add(active.sessionId);
}
for (const approval of getPendingApprovalsForSession(active.sessionId || providerSessionId)) {
resolveToolApproval(approval.requestId, { cancelled: true });
}
try {
active.connection.notify('session/cancel', { sessionId: active.sessionId || providerSessionId });
} catch {
// If Hermes already finished, the caller still sees the run as aborted.
}
activeHermesSessions.delete(providerSessionId);
return true;
}
function isHermesSessionActive(sessionId) {
return activeHermesSessions.has(sessionId);
}
function getActiveHermesSessions() {
return Array.from(activeHermesSessions.keys());
}
export {
spawnHermes,
abortHermesSession,
isHermesSessionActive,
getActiveHermesSessions,
createPermissionDecision,
};

279
server/hermes/acp-client.js Normal file
View File

@@ -0,0 +1,279 @@
import { EventEmitter } from 'node:events';
import { spawn } from 'node:child_process';
import crossSpawn from 'cross-spawn';
const spawnFunction = process.platform === 'win32' ? crossSpawn : spawn;
class AcpClient extends EventEmitter {
constructor({ command = process.env.HERMES_CLI_PATH || 'hermes acp', cwd = process.cwd(), env = process.env } = {}) {
super();
const commandParts = command.trim().split(/\s+/);
this.command = commandParts.shift() || 'hermes';
this.args = commandParts;
this.cwd = cwd;
this.env = env;
this.process = null;
this.nextId = 1;
this.pending = new Map();
this.buffer = '';
this.requestHandlers = new Map();
this.initialized = false;
}
start() {
if (this.process) {
return;
}
this.process = spawnFunction(this.command, this.args, {
cwd: this.cwd,
stdio: ['pipe', 'pipe', 'pipe'],
env: { ...this.env },
});
this.process.stdout.on('data', (chunk) => this.handleData(chunk));
this.process.stderr.on('data', (chunk) => {
const text = chunk.toString();
if (text.trim()) {
this.emit('stderr', text);
}
});
this.process.on('error', (error) => this.rejectAll(error));
this.process.on('close', (code, signal) => {
this.rejectAll(new Error(`hermes-acp exited with code ${code ?? 'null'}${signal ? ` signal ${signal}` : ''}`));
this.emit('close', { code, signal });
this.process = null;
this.initialized = false;
});
}
async initialize() {
if (this.initialized) {
return;
}
this.start();
await this.request('initialize', {
protocolVersion: 1,
clientInfo: {
name: 'CloudCLI',
version: '1.0.0',
},
capabilities: {
fs: false,
terminal: false,
session: {
requestPermission: true,
},
},
});
this.initialized = true;
this.notify('initialized', {});
}
onRequest(method, handler) {
this.requestHandlers.set(method, handler);
}
registerRequestHandler(method, handler) {
const handlers = this.requestHandlers.get(method) || new Set();
handlers.add(handler);
this.requestHandlers.set(method, handlers);
return () => {
handlers.delete(handler);
if (handlers.size === 0) {
this.requestHandlers.delete(method);
}
};
}
request(method, params) {
this.start();
const id = this.nextId;
this.nextId += 1;
const payload = { jsonrpc: '2.0', id, method, params };
return new Promise((resolve, reject) => {
this.pending.set(id, { resolve, reject });
this.writeMessage(payload);
});
}
notify(method, params) {
this.start();
this.writeMessage({ jsonrpc: '2.0', method, params });
}
writeMessage(payload) {
if (!this.process || !this.process.stdin || this.process.stdin.destroyed) {
throw new Error('hermes-acp process is not running');
}
const line = `${JSON.stringify(payload)}\n`;
this.process.stdin.write(line);
}
handleData(chunk) {
this.buffer += chunk.toString();
while (this.buffer.length > 0) {
if (this.buffer.startsWith('Content-Length:')) {
const headerEnd = this.buffer.indexOf('\r\n\r\n');
if (headerEnd === -1) {
return;
}
const header = this.buffer.slice(0, headerEnd);
const match = header.match(/Content-Length:\s*(\d+)/i);
if (!match) {
this.buffer = this.buffer.slice(headerEnd + 4);
continue;
}
const length = Number(match[1]);
const messageStart = headerEnd + 4;
if (this.buffer.length < messageStart + length) {
return;
}
const raw = this.buffer.slice(messageStart, messageStart + length);
this.buffer = this.buffer.slice(messageStart + length);
this.dispatchRaw(raw);
continue;
}
const newlineIndex = this.buffer.indexOf('\n');
if (newlineIndex === -1) {
return;
}
const raw = this.buffer.slice(0, newlineIndex).trim();
this.buffer = this.buffer.slice(newlineIndex + 1);
if (raw) {
this.dispatchRaw(raw);
}
}
}
dispatchRaw(raw) {
let message;
try {
message = JSON.parse(raw);
} catch (error) {
this.emit('error', error);
return;
}
void this.dispatchMessage(message);
}
async dispatchMessage(message) {
if (Object.prototype.hasOwnProperty.call(message, 'id') && (message.result !== undefined || message.error !== undefined)) {
const pending = this.pending.get(message.id);
if (!pending) {
return;
}
this.pending.delete(message.id);
if (message.error) {
pending.reject(new Error(message.error.message || JSON.stringify(message.error)));
} else {
pending.resolve(message.result);
}
return;
}
if (Object.prototype.hasOwnProperty.call(message, 'id') && message.method) {
const handler = this.requestHandlers.get(message.method);
if (!handler) {
this.writeMessage({
jsonrpc: '2.0',
id: message.id,
error: { code: -32601, message: `No handler for ${message.method}` },
});
return;
}
try {
const result = handler instanceof Set
? await this.dispatchRequestHandlers(handler, message.params)
: await handler(message.params);
this.writeMessage({ jsonrpc: '2.0', id: message.id, result });
} catch (error) {
this.writeMessage({
jsonrpc: '2.0',
id: message.id,
error: { code: -32000, message: error instanceof Error ? error.message : String(error) },
});
}
return;
}
if (message.method) {
this.emit(message.method, message.params);
this.emit('notification', { method: message.method, params: message.params });
}
}
rejectAll(error) {
for (const pending of this.pending.values()) {
pending.reject(error);
}
this.pending.clear();
}
async dispatchRequestHandlers(handlers, params) {
let fallbackResult = null;
let sawHandler = false;
for (const handler of Array.from(handlers).reverse()) {
sawHandler = true;
const result = await handler(params);
const outcome = result?.outcome?.outcome;
if (outcome !== 'cancelled') {
return result;
}
fallbackResult = result;
}
if (sawHandler && fallbackResult) {
return fallbackResult;
}
return { outcome: { outcome: 'cancelled' } };
}
close() {
if (!this.process) {
return;
}
this.process.kill('SIGTERM');
}
}
class HermesConnectionManager {
constructor() {
this.connections = new Map();
}
async getConnection(cwd) {
const key = cwd || process.cwd();
let connection = this.connections.get(key);
if (!connection) {
connection = new AcpClient({ cwd: key });
connection.on('close', () => {
this.connections.delete(key);
});
this.connections.set(key, connection);
}
await connection.initialize();
return connection;
}
closeAll() {
for (const connection of this.connections.values()) {
connection.close();
}
this.connections.clear();
}
}
const hermesConnectionManager = new HermesConnectionManager();
export {
AcpClient,
HermesConnectionManager,
hermesConnectionManager,
};

View File

@@ -41,6 +41,10 @@ import {
spawnOpenCode,
abortOpenCodeSession,
} from './opencode-cli.js';
import {
spawnHermes,
abortHermesSession,
} from './hermes-cli.js';
import sessionManager from './sessionManager.js';
import {
stripAnsiSequences,
@@ -66,9 +70,6 @@ import voiceRoutes from './voice-proxy.js';
import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
import { browserUseService } from './modules/browser-use/browser-use.service.js';
import computerUseRoutes from './modules/computer-use/computer-use.routes.js';
import computerUseMcpRoutes from './modules/computer-use/computer-use-mcp.routes.js';
import { computerUseService } from './modules/computer-use/computer-use.service.js';
import { startEnabledPluginServers, stopAllPlugins, getPluginPort } from './utils/plugin-process-manager.js';
import { initializeDatabase, projectsDb, sessionsDb } from './modules/database/index.js';
import { configureWebPush } from './services/vapid-keys.js';
@@ -121,6 +122,7 @@ const wss = createWebSocketServer(server, {
codex: queryCodex,
gemini: spawnGemini,
opencode: spawnOpenCode,
hermes: spawnHermes,
},
abortFns: {
claude: abortClaudeSDKSession,
@@ -128,6 +130,7 @@ const wss = createWebSocketServer(server, {
codex: abortCodexSession,
gemini: abortGeminiSession,
opencode: abortOpenCodeSession,
hermes: abortHermesSession,
},
resolveToolApproval,
getPendingApprovalsForSession,
@@ -223,12 +226,6 @@ app.use('/api/browser-use-mcp', browserUseMcpRoutes);
// Browser API Routes (protected)
app.use('/api/browser-use', authenticateToken, browserUseRoutes);
// Computer Use MCP bridge API (local token protected)
app.use('/api/computer-use-mcp', computerUseMcpRoutes);
// Computer Use API Routes (protected)
app.use('/api/computer-use', authenticateToken, computerUseRoutes);
// Unified provider MCP routes (protected)
app.use('/api/providers', authenticateToken, providerRoutes);
@@ -1785,11 +1782,6 @@ async function startServer() {
} catch (err) {
console.error('[Browser] Error stopping sessions during shutdown:', err?.message || err);
}
try {
await computerUseService.stopAllSessions();
} catch (err) {
console.error('[Computer Use] Error stopping sessions during shutdown:', err?.message || err);
}
try {
await stopAllPlugins();
} catch (err) {

View File

@@ -1,67 +0,0 @@
import {
captureScreenshot,
executor,
type ExecutorTarget,
} from '@/modules/computer-use/computer-executor.js';
import type { RawActionResult, RawComputerAction, RawActionTarget } from '@/modules/computer-use/actions/raw-action-types.js';
const DEFAULT_WAIT_MS = 1000;
const MAX_WAIT_MS = 10_000;
function normalizeWaitMs(ms: number | undefined): number {
if (ms === undefined) {
return DEFAULT_WAIT_MS;
}
if (!Number.isFinite(ms)) {
throw new Error('Computer Use wait duration must be a finite number.');
}
return Math.trunc(Math.max(0, Math.min(ms, MAX_WAIT_MS)));
}
async function snapshot(target: RawActionTarget): Promise<RawActionResult> {
const { dataUrl, size } = await captureScreenshot();
return { screenshotDataUrl: dataUrl, displaySize: size || target.displaySize };
}
export async function runRawComputerAction(
action: RawComputerAction,
target: RawActionTarget,
): Promise<RawActionResult> {
const executorTarget: ExecutorTarget = {
displaySize: target.displaySize,
};
switch (action.type) {
case 'screenshot':
return snapshot(target);
case 'cursor_position': {
const position = await executor.cursorPosition(executorTarget);
return { ...(await snapshot(target)), position, cursor: position };
}
case 'mouse_move':
await executor.moveTo(executorTarget, action.point);
return { ...(await snapshot(target)), cursor: action.point };
case 'click':
await executor.click(executorTarget, action.button, action.point, action.double === true);
return { ...(await snapshot(target)), cursor: action.point ?? null };
case 'drag':
await executor.drag(executorTarget, action.from, action.to, action.button ?? 'left');
return { ...(await snapshot(target)), cursor: action.to };
case 'type':
await executor.type(action.text);
return snapshot(target);
case 'key':
await executor.pressChord(action.key);
return snapshot(target);
case 'scroll':
await executor.scroll(executorTarget, action.direction, action.amount ?? 3, action.point);
return { ...(await snapshot(target)), cursor: action.point ?? null };
case 'wait':
await new Promise((resolve) => setTimeout(resolve, normalizeWaitMs(action.ms)));
return snapshot(target);
default: {
const exhaustive: never = action;
throw new Error(`Unsupported computer action: ${(exhaustive as { type?: string }).type || 'unknown'}`);
}
}
}

View File

@@ -1,28 +0,0 @@
import type {
ClickButton,
DisplaySize,
Point,
ScrollDirection,
} from '@/modules/computer-use/computer-executor.js';
export type RawComputerAction =
| { type: 'screenshot' }
| { type: 'cursor_position' }
| { type: 'mouse_move'; point: Point }
| { type: 'click'; button: ClickButton; point?: Point; double?: boolean }
| { type: 'drag'; from: Point; to: Point; button?: ClickButton }
| { type: 'type'; text: string }
| { type: 'key'; key: string }
| { type: 'scroll'; direction: ScrollDirection; amount?: number; point?: Point }
| { type: 'wait'; ms?: number };
export type RawActionTarget = {
displaySize: DisplaySize | null;
};
export type RawActionResult = {
screenshotDataUrl?: string | null;
displaySize?: DisplaySize | null;
cursor?: Point | null;
position?: Point | null;
};

View File

@@ -1,242 +0,0 @@
import { createRequire } from 'node:module';
const require = createRequire(import.meta.url);
export type Point = { x: number; y: number };
export type ClickButton = 'left' | 'right' | 'middle';
export type ScrollDirection = 'up' | 'down' | 'left' | 'right';
export type DisplaySize = { width: number; height: number };
export type RuntimeReadiness = {
nut: any | null;
screenshot: any | null;
nutInstalled: boolean;
screenshotInstalled: boolean;
};
/**
* Coordinate space the executor reports/accepts. The screenshot pixel space is
* the canonical space agents and users address; it is mapped to the nut-js
* logical mouse space before any action runs.
*/
export type ExecutorTarget = {
displaySize: DisplaySize | null;
};
export function getNut(): any | null {
try {
return require('@nut-tree-fork/nut-js');
} catch {
return null;
}
}
export function getScreenshot(): any | null {
try {
const mod = require('screenshot-desktop');
return mod?.default || mod;
} catch {
return null;
}
}
export function getRuntimeReadiness(): RuntimeReadiness {
const nut = getNut();
const screenshot = getScreenshot();
return {
nut,
screenshot,
nutInstalled: Boolean(nut),
screenshotInstalled: typeof screenshot === 'function',
};
}
/** Reads the pixel dimensions from a PNG/JPEG buffer header without decoding it. */
export function readImageSize(buffer: Buffer): DisplaySize | null {
// PNG: 8-byte signature, then IHDR chunk with width/height as big-endian uint32.
if (buffer.length >= 24 && buffer[0] === 0x89 && buffer[1] === 0x50) {
return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) };
}
// JPEG: scan for a Start-Of-Frame marker (0xFFC0..0xFFCF, excluding C4/C8/CC).
if (buffer.length >= 4 && buffer[0] === 0xff && buffer[1] === 0xd8) {
let offset = 2;
while (offset + 9 < buffer.length) {
if (buffer[offset] !== 0xff) {
offset += 1;
continue;
}
const marker = buffer[offset + 1];
if (marker >= 0xc0 && marker <= 0xcf && marker !== 0xc4 && marker !== 0xc8 && marker !== 0xcc) {
return { height: buffer.readUInt16BE(offset + 5), width: buffer.readUInt16BE(offset + 7) };
}
offset += 2 + buffer.readUInt16BE(offset + 2);
}
}
return null;
}
export async function captureScreenshot(): Promise<{ dataUrl: string; size: DisplaySize | null }> {
const screenshot = getScreenshot();
if (typeof screenshot !== 'function') {
throw new Error('Computer Use runtime is not available.');
}
const buffer: Buffer = await screenshot({ format: 'png' });
return {
dataUrl: `data:image/png;base64,${buffer.toString('base64')}`,
size: readImageSize(buffer),
};
}
/** Returns the mouse coordinate space size (logical screen pixels). */
export async function getMouseSpaceSize(): Promise<DisplaySize> {
const nut = getNut();
if (!nut) {
throw new Error('Computer Use runtime is not available.');
}
const width = await nut.screen.width();
const height = await nut.screen.height();
return { width, height };
}
/** Maps a point from screenshot/image space to the mouse coordinate space. */
export async function toMouseSpace(target: ExecutorTarget, point: Point): Promise<Point> {
const mouseSize = await getMouseSpaceSize();
const image = target.displaySize || mouseSize;
const scaleX = image.width ? mouseSize.width / image.width : 1;
const scaleY = image.height ? mouseSize.height / image.height : 1;
return {
x: Math.round(point.x * scaleX),
y: Math.round(point.y * scaleY),
};
}
/** Maps a point from the mouse coordinate space back to screenshot/image space. */
export function toImageSpace(target: ExecutorTarget, point: Point, mouseSize: DisplaySize): Point {
const image = target.displaySize || mouseSize;
const scaleX = mouseSize.width ? image.width / mouseSize.width : 1;
const scaleY = mouseSize.height ? image.height / mouseSize.height : 1;
return {
x: Math.round(point.x * scaleX),
y: Math.round(point.y * scaleY),
};
}
function nutButton(nut: any, button: ClickButton) {
if (button === 'right') return nut.Button.RIGHT;
if (button === 'middle') return nut.Button.MIDDLE;
return nut.Button.LEFT;
}
/** Maps a key name (xdotool-style, as Anthropic's computer tool emits) to a nut-js Key. */
function nutKey(nut: any, token: string): any {
const map: Record<string, string> = {
return: 'Enter', enter: 'Enter', esc: 'Escape', escape: 'Escape', tab: 'Tab',
space: 'Space', backspace: 'Backspace', delete: 'Delete', del: 'Delete', insert: 'Insert',
up: 'Up', down: 'Down', left: 'Left', right: 'Right',
home: 'Home', end: 'End', pageup: 'PageUp', page_up: 'PageUp', pagedown: 'PageDown', page_down: 'PageDown',
ctrl: 'LeftControl', control: 'LeftControl', alt: 'LeftAlt', shift: 'LeftShift',
meta: 'LeftSuper', super: 'LeftSuper', cmd: 'LeftSuper', win: 'LeftSuper',
capslock: 'CapsLock',
};
const lower = token.toLowerCase();
if (map[lower]) {
return nut.Key[map[lower]];
}
if (/^f([1-9]|1[0-9]|2[0-4])$/.test(lower)) {
return nut.Key[`F${lower.slice(1)}`];
}
if (token.length === 1) {
const upper = token.toUpperCase();
if (nut.Key[upper] !== undefined) {
return nut.Key[upper];
}
if (nut.Key[`Num${token}`] !== undefined && /[0-9]/.test(token)) {
return nut.Key[`Num${token}`];
}
}
throw new Error(`Unsupported key: ${token}`);
}
/**
* The cross-platform OS executor. It is intentionally free of any server,
* database, or session dependencies so it can run both inside the local server
* process (OSS mode) and inside the standalone desktop agent (cloud relay).
*/
export const executor = {
async configure() {
const nut = getNut();
if (nut) {
// Make actions responsive; the agent loop already paces itself with screenshots.
nut.mouse.config.autoDelayMs = 2;
nut.keyboard.config.autoDelayMs = 2;
}
return nut;
},
async cursorPosition(target: ExecutorTarget): Promise<Point> {
const nut = await this.configure();
const mouseSize = await getMouseSpaceSize();
const pos = await nut.mouse.getPosition();
return toImageSpace(target, { x: pos.x, y: pos.y }, mouseSize);
},
async moveTo(target: ExecutorTarget, point: Point): Promise<void> {
const nut = await this.configure();
const dest = await toMouseSpace(target, point);
await nut.mouse.setPosition(new nut.Point(dest.x, dest.y));
},
async click(target: ExecutorTarget, button: ClickButton, point?: Point, doubleClick = false): Promise<void> {
const nut = await this.configure();
if (point) {
await this.moveTo(target, point);
}
if (doubleClick) {
await nut.mouse.doubleClick(nutButton(nut, button));
} else {
await nut.mouse.click(nutButton(nut, button));
}
},
async drag(target: ExecutorTarget, from: Point, to: Point, button: ClickButton = 'left'): Promise<void> {
const nut = await this.configure();
const start = await toMouseSpace(target, from);
const end = await toMouseSpace(target, to);
await nut.mouse.setPosition(new nut.Point(start.x, start.y));
await nut.mouse.pressButton(nutButton(nut, button));
await nut.mouse.setPosition(new nut.Point(end.x, end.y));
await nut.mouse.releaseButton(nutButton(nut, button));
},
async type(text: string): Promise<void> {
const nut = await this.configure();
await nut.keyboard.type(text);
},
async pressChord(chord: string): Promise<void> {
const nut = await this.configure();
const tokens = chord.split('+').map((token) => token.trim()).filter(Boolean);
if (tokens.length === 0) {
return;
}
const keys = tokens.map((token) => nutKey(nut, token));
for (const key of keys) {
await nut.keyboard.pressKey(key);
}
for (const key of [...keys].reverse()) {
await nut.keyboard.releaseKey(key);
}
},
async scroll(target: ExecutorTarget, direction: ScrollDirection, amount: number, point?: Point): Promise<void> {
const nut = await this.configure();
if (point) {
await this.moveTo(target, point);
}
const steps = Math.max(1, Math.round(amount));
if (direction === 'up') await nut.mouse.scrollUp(steps);
else if (direction === 'down') await nut.mouse.scrollDown(steps);
else if (direction === 'left') await nut.mouse.scrollLeft(steps);
else await nut.mouse.scrollRight(steps);
},
};

View File

@@ -1,460 +0,0 @@
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import {
captureScreenshot,
executor,
type ClickButton,
type ExecutorTarget,
type Point,
type ScrollDirection,
} from '@/modules/computer-use/computer-executor.js';
import type { SemanticAdapter } from '@/modules/computer-use/semantics/adapters/semantic-adapter.js';
import { createMacOsSemanticAdapter } from '@/modules/computer-use/semantics/adapters/macos/macos-semantic-adapter.js';
import { createWindowsSemanticAdapter } from '@/modules/computer-use/semantics/adapters/windows/windows-semantic-adapter.js';
import { resolveSemanticHelper } from '@/modules/computer-use/semantics/helpers/semantic-helper-resolver.js';
import { semanticSessionStore } from '@/modules/computer-use/semantics/semantic-session-store.js';
import type { SemanticAppState, SemanticElement } from '@/modules/computer-use/semantics/semantic-types.js';
const execFileAsync = promisify(execFile);
const MAX_APP_STATE_ELEMENTS = 250;
let helperAdapter: SemanticAdapter | null | undefined;
function readString(value: unknown): string {
return typeof value === 'string' ? value.trim() : '';
}
function requireApp(input: Record<string, unknown>): string {
const app = readString(input.app);
if (!app) {
throw new Error('app is required.');
}
return app;
}
function readNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
}
function readButton(value: unknown): ClickButton {
return value === 'right' || value === 'middle' ? value : 'left';
}
function readClickCount(value: unknown): number {
const count = readNumber(value);
if (count === undefined) {
return 1;
}
return Math.max(1, Math.min(5, Math.trunc(count)));
}
function readDirection(value: unknown): ScrollDirection {
return value === 'up' || value === 'left' || value === 'right' ? value : 'down';
}
function readSessionId(input: Record<string, unknown>): string {
return readString(input.sessionId) || 'default';
}
function centerOf(element: SemanticElement): Point | null {
const bounds = element.bounds;
if (!bounds) {
return null;
}
return {
x: Math.round(bounds.x + bounds.width / 2),
y: Math.round(bounds.y + bounds.height / 2),
};
}
function getCachedElement(sessionId: string, app: string, index: string, stateId?: string): SemanticElement | null {
return semanticSessionStore.getElement(sessionId, app, index, stateId);
}
function getPoint(input: Record<string, unknown>, sessionId: string, app: string): Point | undefined {
const x = readNumber(input.x);
const y = readNumber(input.y);
if (x !== undefined && y !== undefined) {
return { x, y };
}
const elementIndex = readString(input.element_index);
if (!elementIndex) {
return undefined;
}
const element = getCachedElement(sessionId, app, elementIndex, readString(input.stateId) || undefined);
return element ? centerOf(element) || undefined : undefined;
}
function getHelperAdapter(): SemanticAdapter | null {
if (helperAdapter !== undefined) {
return helperAdapter;
}
if (process.platform !== 'darwin' && process.platform !== 'win32') {
helperAdapter = null;
return helperAdapter;
}
const resolution = resolveSemanticHelper();
if (!resolution.available) {
helperAdapter = null;
return helperAdapter;
}
helperAdapter = process.platform === 'darwin'
? createMacOsSemanticAdapter()
: createWindowsSemanticAdapter();
return helperAdapter;
}
function shouldFallbackFromHelper(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return /not implemented|unavailable|not found|does not exist|timed out|not running|exited with code|failed to start/i.test(message);
}
async function withHelperState(
sessionId: string,
operation: (adapter: SemanticAdapter) => Promise<SemanticAppState>,
): Promise<SemanticAppState | null> {
const adapter = getHelperAdapter();
if (!adapter) {
return null;
}
try {
return semanticSessionStore.save(sessionId, await operation(adapter));
} catch (error) {
if (shouldFallbackFromHelper(error)) {
console.warn('[ComputerSemantics] Falling back from helper:', error instanceof Error ? error.message : String(error));
return null;
}
throw error;
}
}
async function run(command: string, args: string[], timeout = 5000): Promise<string> {
const { stdout } = await execFileAsync(command, args, {
timeout,
windowsHide: true,
maxBuffer: 1024 * 1024 * 4,
});
return stdout;
}
async function listMacApps(): Promise<Array<Record<string, unknown>>> {
const script = [
'tell application "System Events"',
'set appRows to {}',
'repeat with p in (application processes whose background only is false)',
'set end of appRows to (name of p as text)',
'end repeat',
'return appRows',
'end tell',
].join('\n');
const output = await run('osascript', ['-e', script]);
return output.split(', ')
.map((name) => name.trim())
.filter(Boolean)
.map((name) => ({ name, running: true }));
}
async function listWindowsApps(): Promise<Array<Record<string, unknown>>> {
const script = [
'Get-Process | Where-Object { $_.MainWindowTitle } |',
'Select-Object ProcessName, Id, MainWindowTitle | ConvertTo-Json -Depth 3',
].join(' ');
const output = await run('powershell.exe', ['-NoProfile', '-Command', script]);
const parsed = JSON.parse(output || '[]');
const rows = Array.isArray(parsed) ? parsed : [parsed];
return rows.map((row) => ({
name: row.ProcessName,
pid: row.Id,
windowTitle: row.MainWindowTitle,
running: true,
}));
}
async function listLinuxApps(): Promise<Array<Record<string, unknown>>> {
try {
const output = await run('wmctrl', ['-lx']);
return output.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean)
.map((line) => {
const parts = line.split(/\s+/);
return {
windowId: parts[0],
desktop: parts[1],
host: parts[2],
className: parts[3],
windowTitle: parts.slice(4).join(' '),
running: true,
};
});
} catch {
const output = await run('ps', ['-eo', 'comm=']);
return [...new Set(output.split(/\r?\n/).map((name) => name.trim()).filter(Boolean))]
.slice(0, 200)
.map((name) => ({ name, running: true }));
}
}
async function listApps(): Promise<Array<Record<string, unknown>>> {
if (process.platform === 'darwin') {
return listMacApps();
}
if (process.platform === 'win32') {
return listWindowsApps();
}
return listLinuxApps();
}
async function macAccessibilityTree(app: string): Promise<SemanticElement[]> {
const escapedApp = app.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
const script = `
on safeText(v)
try
return v as text
on error
return ""
end try
end safeText
on emitElement(e, depth, maxDepth, counter)
if depth > maxDepth then return {}
set rows to {}
try
set roleText to my safeText(role of e)
on error
set roleText to "element"
end try
try
set titleText to my safeText(title of e)
on error
set titleText to ""
end try
try
set valueText to my safeText(value of e)
on error
set valueText to ""
end try
try
set posValue to position of e
set sizeValue to size of e
set boundsText to ((item 1 of posValue) as text) & "," & ((item 2 of posValue) as text) & "," & ((item 1 of sizeValue) as text) & "," & ((item 2 of sizeValue) as text)
on error
set boundsText to ""
end try
set end of rows to ((counter as text) & tab & roleText & tab & titleText & tab & valueText & tab & boundsText)
if counter > ${MAX_APP_STATE_ELEMENTS} then return rows
try
repeat with childElement in UI elements of e
set childRows to my emitElement(childElement, depth + 1, maxDepth, counter + (count of rows))
set rows to rows & childRows
if (count of rows) > ${MAX_APP_STATE_ELEMENTS} then return rows
end repeat
end try
return rows
end emitElement
tell application "System Events"
if not (exists process "${escapedApp}") then error "App is not running: ${escapedApp}"
tell process "${escapedApp}"
set rows to {}
repeat with w in windows
set rows to rows & my emitElement(w, 0, 4, (count of rows) + 1)
if (count of rows) > ${MAX_APP_STATE_ELEMENTS} then exit repeat
end repeat
return rows
end tell
end tell
`;
const output = await run('osascript', ['-e', script], 10000);
return output.split(/\r?\n|, /)
.map((line) => line.trim())
.filter(Boolean)
.map((line, index) => {
const [rawIndex, role, title, value, boundsText] = line.split('\t');
const boundsParts = (boundsText || '').split(',').map((part) => Number.parseFloat(part));
const hasBounds = boundsParts.length === 4 && boundsParts.every(Number.isFinite);
return {
index: rawIndex || String(index + 1),
role: role || 'element',
title: title || undefined,
value: value || undefined,
bounds: hasBounds
? { x: boundsParts[0], y: boundsParts[1], width: boundsParts[2], height: boundsParts[3] }
: undefined,
};
});
}
async function getAccessibilityTree(app: string): Promise<{ elements: SemanticElement[]; message?: string }> {
if (process.platform === 'darwin') {
try {
return { elements: await macAccessibilityTree(app) };
} catch (error) {
return { elements: [], message: error instanceof Error ? error.message : String(error) };
}
}
return {
elements: [],
message: 'Native accessibility tree capture is not implemented for this platform yet.',
};
}
async function getAppState(sessionId: string, app: string): Promise<SemanticAppState> {
if (!app) {
throw new Error('app is required.');
}
const helperState = await withHelperState(sessionId, (adapter) => adapter.getAppState({ sessionId, app }));
if (helperState) {
return helperState;
}
const screenshot = await captureScreenshot();
const tree = await getAccessibilityTree(app);
const state: SemanticAppState = {
stateId: semanticSessionStore.createStateId(),
app,
platform: process.platform,
screenshotDataUrl: screenshot.dataUrl,
displaySize: screenshot.size,
elements: tree.elements,
accessibilityTree: tree.elements,
message: tree.message,
};
return semanticSessionStore.save(sessionId, state);
}
async function targetFor(sessionId: string, app: string, stateId?: string): Promise<ExecutorTarget> {
const cached = semanticSessionStore.getState(sessionId, app, stateId);
return { displaySize: cached?.displaySize || (await captureScreenshot()).size };
}
export const computerSemanticsService = {
async callTool(name: string, input: Record<string, unknown>): Promise<unknown> {
const sessionId = readSessionId(input);
switch (name) {
case 'list_apps': {
const adapter = getHelperAdapter();
if (adapter) {
try {
return { apps: await adapter.listApps(), platform: process.platform };
} catch (error) {
if (!shouldFallbackFromHelper(error)) {
throw error;
}
console.warn('[ComputerSemantics] Falling back from helper:', error instanceof Error ? error.message : String(error));
}
}
return { apps: await listApps(), platform: process.platform };
}
case 'get_app_state':
return getAppState(sessionId, readString(input.app));
case 'click':
case 'click_element': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.clickElement({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
const stateId = readString(input.stateId) || undefined;
const point = getPoint(input, sessionId, app);
if (!point) {
throw new Error('click requires x/y or an element_index from computer_get_app_state.');
}
const target = await targetFor(sessionId, app, stateId);
const button = readButton(input.mouse_button ?? input.mouseButton);
const clickCount = readClickCount(input.click_count ?? input.clickCount);
for (let index = 0; index < clickCount; index += 1) {
await executor.click(target, button, point, false);
}
return getAppState(sessionId, app);
}
case 'drag': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.drag({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
const stateId = readString(input.stateId) || undefined;
const fromX = readNumber(input.from_x);
const fromY = readNumber(input.from_y);
const toX = readNumber(input.to_x);
const toY = readNumber(input.to_y);
if (fromX === undefined || fromY === undefined || toX === undefined || toY === undefined) {
throw new Error('drag requires from_x/from_y/to_x/to_y.');
}
await executor.drag(await targetFor(sessionId, app, stateId), { x: fromX, y: fromY }, { x: toX, y: toY }, readButton(input.mouse_button ?? input.mouseButton));
return getAppState(sessionId, app);
}
case 'scroll':
case 'scroll_element': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.scrollElement({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
const stateId = readString(input.stateId) || undefined;
const point = getPoint(input, sessionId, app);
if (!point) {
throw new Error('scroll requires x/y or an element_index from computer_get_app_state.');
}
await executor.scroll(await targetFor(sessionId, app, stateId), readDirection(input.direction), readNumber(input.pages) ?? 1, point);
return getAppState(sessionId, app);
}
case 'type_text': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.typeText({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
await executor.type(readString(input.text));
return getAppState(sessionId, app);
}
case 'press_key': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.pressKey({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
await executor.pressChord(readString(input.key));
return getAppState(sessionId, app);
}
case 'set_value': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.setValue({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
const stateId = readString(input.stateId) || undefined;
const point = getPoint(input, sessionId, app);
if (!point) {
throw new Error('set_value requires x/y or an element_index from computer_get_app_state.');
}
await executor.click(await targetFor(sessionId, app, stateId), 'left', point, false);
await executor.pressChord(process.platform === 'darwin' ? 'cmd+a' : 'ctrl+a');
await executor.type(readString(input.value));
return getAppState(sessionId, app);
}
case 'perform_secondary_action': {
const app = requireApp(input);
const helperState = await withHelperState(sessionId, (adapter) => adapter.performSecondaryAction({ ...input, sessionId, app }));
if (helperState) {
return helperState;
}
const stateId = readString(input.stateId) || undefined;
const point = getPoint(input, sessionId, app);
if (!point) {
throw new Error('perform_secondary_action requires x/y or an element_index from computer_get_app_state.');
}
await executor.click(await targetFor(sessionId, app, stateId), 'right', point, false);
return getAppState(sessionId, app);
}
default:
throw new Error(`Unknown semantic Computer Use tool: ${name}`);
}
},
};

View File

@@ -1,141 +0,0 @@
import express from 'express';
import { computerUseService } from '@/modules/computer-use/computer-use.service.js';
import { semanticOperationForMcpTool } from '@/modules/computer-use/semantics/semantic-tool-dispatcher.js';
const router = express.Router();
function readBearerToken(header: unknown): string | null {
if (typeof header !== 'string') {
return null;
}
const trimmed = header.trim();
const scheme = 'Bearer';
if (trimmed.slice(0, scheme.length).toLowerCase() !== scheme.toLowerCase()) {
return null;
}
const separator = trimmed[scheme.length];
if (separator !== ' ' && separator !== '\t') {
return null;
}
return trimmed.slice(scheme.length + 1).trimStart() || null;
}
function toButton(value: unknown): 'left' | 'right' | 'middle' {
return value === 'right' || value === 'middle' ? value : 'left';
}
function toScrollDirection(value: unknown): 'up' | 'down' | 'left' | 'right' {
return value === 'down' || value === 'left' || value === 'right' ? value : 'up';
}
function point(input: Record<string, unknown>): { x: number; y: number } | undefined {
return typeof input.x === 'number' && typeof input.y === 'number'
? { x: input.x, y: input.y }
: undefined;
}
function requireNumber(input: Record<string, unknown>, name: string): number {
const value = input[name];
if (typeof value !== 'number' || !Number.isFinite(value)) {
throw new Error(`${name} is required and must be a finite number.`);
}
return value;
}
function requirePoint(input: Record<string, unknown>): { x: number; y: number } {
return { x: requireNumber(input, 'x'), y: requireNumber(input, 'y') };
}
function requireNamedPoint(input: Record<string, unknown>, xName: string, yName: string): { x: number; y: number } {
return { x: requireNumber(input, xName), y: requireNumber(input, yName) };
}
router.use((req, res, next) => {
const expected = computerUseService.getMcpToken();
const token = readBearerToken(req.headers.authorization) || String(req.headers['x-computer-use-mcp-token'] || '');
if (!token || token !== expected) {
res.status(401).json({ success: false, error: 'Invalid Computer Use MCP token.' });
return;
}
next();
});
router.post('/tools/:toolName', async (req, res) => {
try {
const input = (req.body && typeof req.body === 'object' ? req.body : {}) as Record<string, unknown>;
const sessionId = typeof input.sessionId === 'string' ? input.sessionId : undefined;
const toolName = req.params.toolName;
const semanticOperation = semanticOperationForMcpTool(toolName);
let result: unknown;
if (semanticOperation) {
result = await computerUseService.callSemanticTool(semanticOperation, input);
res.json({ success: true, data: result });
return;
}
switch (toolName) {
case 'computer_screenshot':
result = await computerUseService.agentScreenshot(sessionId);
break;
case 'computer_cursor_position':
result = await computerUseService.agentCursorPosition(sessionId);
break;
case 'computer_mouse_move':
result = await computerUseService.agentMouseMove(sessionId, requirePoint(input));
break;
case 'computer_click':
result = await computerUseService.agentUnifiedClick(sessionId, {
button: toButton(input.mouseButton ?? input.mouse_button ?? input.button),
point: point(input),
clickCount: typeof input.clickCount === 'number'
? input.clickCount
: typeof input.click_count === 'number'
? input.click_count
: 1,
});
break;
case 'computer_drag': {
const from = requireNamedPoint(input, 'startX', 'startY');
const to = requireNamedPoint(input, 'endX', 'endY');
result = await computerUseService.agentDrag(sessionId, from, to, toButton(input.mouseButton ?? input.mouse_button ?? input.button));
break;
}
case 'computer_type':
result = await computerUseService.agentType(sessionId, String(input.text || ''));
break;
case 'computer_key':
result = await computerUseService.agentKey(sessionId, String(input.key || ''));
break;
case 'computer_scroll':
result = await computerUseService.agentScroll(sessionId, {
direction: toScrollDirection(input.direction),
amount: typeof input.amount === 'number' ? input.amount : undefined,
x: typeof input.x === 'number' ? input.x : undefined,
y: typeof input.y === 'number' ? input.y : undefined,
});
break;
case 'computer_wait':
result = await computerUseService.agentWait(sessionId, typeof input.timeoutMs === 'number' ? input.timeoutMs : undefined);
break;
case 'computer_close_session':
result = await computerUseService.agentStopSession(sessionId);
break;
default:
res.status(404).json({ success: false, error: `Unknown Computer Use MCP tool "${toolName}".` });
return;
}
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Computer Use MCP tool failed.',
});
}
});
export default router;

View File

@@ -1,211 +0,0 @@
import express from 'express';
import { computerUseService } from '@/modules/computer-use/computer-use.service.js';
import { AppError } from '@/shared/utils.js';
const router = express.Router();
type AuthenticatedRequest = express.Request & {
user?: {
id?: string | number;
};
};
function requireUser(req: AuthenticatedRequest): { id: string | number } {
const userId = req.user?.id;
if (userId === undefined || userId === null || String(userId).trim() === '') {
throw new AppError('Authenticated user is required.', {
code: 'AUTHENTICATED_USER_REQUIRED',
statusCode: 401,
});
}
return { id: userId };
}
function getErrorStatusCode(error: unknown, fallbackStatusCode: number): number {
if (error instanceof AppError) {
return error.statusCode;
}
if (error && typeof error === 'object') {
const statusCode = 'statusCode' in error ? error.statusCode : 'status' in error ? error.status : undefined;
if (typeof statusCode === 'number' && Number.isInteger(statusCode) && statusCode >= 400 && statusCode <= 599) {
return statusCode;
}
}
return fallbackStatusCode;
}
function readParam(value: string | string[] | undefined): string {
return Array.isArray(value) ? value[0] || '' : value || '';
}
function toButton(value: unknown): 'left' | 'right' | 'middle' {
return value === 'right' || value === 'middle' ? value : 'left';
}
router.get('/status', async (_req, res) => {
try {
res.json({ success: true, data: await computerUseService.getStatus() });
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Computer Use status.',
});
}
});
router.get('/settings', async (req: AuthenticatedRequest, res) => {
try {
requireUser(req);
res.json({ success: true, data: { settings: await computerUseService.getSettings() } });
} catch (error) {
res.status(getErrorStatusCode(error, 500)).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to load Computer Use settings.',
});
}
});
router.put('/settings', async (req: AuthenticatedRequest, res) => {
try {
requireUser(req);
const settings = await computerUseService.updateSettings(req.body || {});
res.json({ success: true, data: { settings } });
} catch (error) {
res.status(getErrorStatusCode(error, 400)).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to save Computer Use settings.',
});
}
});
router.post('/runtime/install', async (req: AuthenticatedRequest, res) => {
try {
requireUser(req);
const result = await computerUseService.installRuntime();
res.status(result.success ? 200 : 500).json({
success: result.success,
data: result,
error: result.success ? undefined : result.message,
});
} catch (error) {
res.status(getErrorStatusCode(error, 500)).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to install Computer Use runtime.',
});
}
});
router.get('/sessions', async (req: AuthenticatedRequest, res) => {
try {
res.json({ success: true, data: { sessions: await computerUseService.listSessions(requireUser(req)) } });
} catch (error) {
res.status(getErrorStatusCode(error, 500)).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to list Computer Use sessions.',
});
}
});
router.post('/sessions/:sessionId/screenshot', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userScreenshot(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to capture the screen.',
});
}
});
router.post('/sessions/:sessionId/click', async (req: AuthenticatedRequest, res) => {
try {
const x = Number(req.body?.x);
const y = Number(req.body?.y);
if (!Number.isFinite(x) || !Number.isFinite(y)) {
res.status(400).json({
success: false,
error: 'Valid numeric coordinates are required.',
});
return;
}
const session = await computerUseService.userClick(requireUser(req), readParam(req.params.sessionId), {
x,
y,
button: toButton(req.body?.button),
double: req.body?.double === true,
});
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to click.',
});
}
});
router.post('/sessions/:sessionId/press-key', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.userPressKey(requireUser(req), readParam(req.params.sessionId), String(req.body?.key || ''));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to send key input.',
});
}
});
router.post('/sessions/:sessionId/consent/grant', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.grantAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to grant control.',
});
}
});
router.post('/sessions/:sessionId/consent/revoke', async (req: AuthenticatedRequest, res) => {
try {
const session = await computerUseService.revokeAgentAccess(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: { session } });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to revoke control.',
});
}
});
router.post('/sessions/:sessionId/stop', async (req: AuthenticatedRequest, res) => {
try {
const result = await computerUseService.stopSession(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to stop Computer Use session.',
});
}
});
router.delete('/sessions/:sessionId', async (req: AuthenticatedRequest, res) => {
try {
const result = await computerUseService.deleteSession(requireUser(req), readParam(req.params.sessionId));
res.json({ success: true, data: result });
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : 'Failed to delete Computer Use session.',
});
}
});
export default router;

View File

@@ -1,920 +0,0 @@
import { randomBytes, randomUUID } from 'node:crypto';
import { spawn } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { appConfigDb } from '@/modules/database/index.js';
import { providerMcpService } from '@/modules/providers/index.js';
import { getModuleDir } from '@/utils/runtime-paths.js';
import {
getRuntimeReadiness as getExecutorReadiness,
type Point,
type ClickButton,
type ScrollDirection,
} from '@/modules/computer-use/computer-executor.js';
import { runRawComputerAction } from '@/modules/computer-use/actions/raw-action-dispatcher.js';
import type { RawComputerAction } from '@/modules/computer-use/actions/raw-action-types.js';
import { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';
import { computerSemanticsService } from '@/modules/computer-use/computer-semantics.service.js';
import { semanticOperationNames } from '@/modules/computer-use/semantics/semantic-tool-dispatcher.js';
const __dirname = getModuleDir(import.meta.url);
const IS_PLATFORM = process.env.VITE_IS_PLATFORM === 'true';
const MAX_SESSIONS_PER_OWNER = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_MAX_SESSIONS_PER_OWNER || '1', 10);
const SESSION_TTL_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_SESSION_TTL_MS || String(30 * 60 * 1000), 10);
const STOPPED_SESSION_RETENTION_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_STOPPED_SESSION_RETENTION_MS || String(30 * 60 * 1000), 10);
const MAX_STORED_SESSIONS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_MAX_STORED_SESSIONS || '100', 10);
const COMPUTER_USE_SETTINGS_KEY = 'computer_use_settings';
const COMPUTER_USE_MCP_TOKEN_KEY = 'computer_use_mcp_token';
type ComputerUseRuntime = 'cloud' | 'local';
type ComputerUseSessionStatus = 'ready' | 'stopped' | 'unavailable';
type ComputerUseSession = {
id: string;
ownerId: string;
createdBy: 'user' | 'agent';
runtime: ComputerUseRuntime;
status: ComputerUseSessionStatus;
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
/** Per-session consent: agents may act only while this is true. */
agentAccessEnabled: boolean;
/** Size of the captured screenshot in pixels — the coordinate space agents/users use. */
displaySize: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent' | 'user';
} | null;
};
type PublicComputerUseSession = Omit<ComputerUseSession, 'ownerId'>;
type ComputerUseOwner = {
id: string | number;
};
type ComputerUseSettings = {
enabled: boolean;
};
type RuntimeReadiness = {
nut: any | null;
screenshot: any | null;
nutInstalled: boolean;
screenshotInstalled: boolean;
installInProgress: boolean;
installMessage: string | null;
};
const sessions = new Map<string, ComputerUseSession>();
let installPromise: Promise<{ success: boolean; message: string }> | null = null;
let lastInstallMessage: string | null = null;
const DEFAULT_SETTINGS: ComputerUseSettings = {
enabled: false,
};
const AGENT_OWNER_ID = 'agent';
const MCP_SERVER_NAME = 'cloudcli-computer-use';
const MCP_PROVIDERS = ['claude', 'codex', 'cursor', 'gemini', 'opencode'];
function getRuntime(): ComputerUseRuntime {
return IS_PLATFORM ? 'cloud' : 'local';
}
function readSettings(): ComputerUseSettings {
try {
const raw = appConfigDb.get(COMPUTER_USE_SETTINGS_KEY);
if (!raw) {
return DEFAULT_SETTINGS;
}
const parsed = JSON.parse(raw) as Partial<ComputerUseSettings>;
return {
enabled: parsed.enabled === true,
};
} catch (error: any) {
console.warn('[Computer Use] Failed to read settings:', error?.message || error);
return DEFAULT_SETTINGS;
}
}
function writeSettings(settings: ComputerUseSettings): ComputerUseSettings {
const normalized = {
enabled: settings.enabled === true,
};
appConfigDb.set(COMPUTER_USE_SETTINGS_KEY, JSON.stringify(normalized));
return normalized;
}
function getOrCreateMcpToken(): string {
const existing = appConfigDb.get(COMPUTER_USE_MCP_TOKEN_KEY);
if (existing) {
return existing;
}
const token = randomBytes(32).toString('hex');
appConfigDb.set(COMPUTER_USE_MCP_TOKEN_KEY, token);
return token;
}
function getSetupMessage(settings: ComputerUseSettings, readiness: RuntimeReadiness): string {
if (!settings.enabled) {
return 'Computer Use is disabled in settings.';
}
if (getRuntime() === 'cloud') {
return 'Open CloudCLI Desktop on this computer, connect the same account, and enable Computer Use.';
}
if (!readiness.nutInstalled || !readiness.screenshotInstalled) {
return 'Install the desktop control runtime to capture the screen and drive the mouse and keyboard.';
}
return readiness.installMessage || 'Computer Use runtime is not ready.';
}
function getMcpCommand(): { command: string; args: string[] } {
const serverDir = path.resolve(__dirname, '..', '..');
const mcpScriptPath = path.join(serverDir, 'computer-use-mcp.js');
if (fs.existsSync(mcpScriptPath)) {
return {
command: process.execPath,
args: [mcpScriptPath],
};
}
return {
command: 'cloudcli',
args: ['computer-use-mcp'],
};
}
function getMcpApiUrl(): string {
const port = process.env.SERVER_PORT || process.env.PORT || '3001';
return `http://127.0.0.1:${port}/api/computer-use-mcp`;
}
function getRuntimeReadiness(): RuntimeReadiness {
const base = getExecutorReadiness();
return {
...base,
installInProgress: Boolean(installPromise),
installMessage: lastInstallMessage,
};
}
function runCommand(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: process.cwd(),
env: process.env,
shell: false,
stdio: ['ignore', 'pipe', 'pipe'],
});
const output: string[] = [];
child.stdout.on('data', (chunk) => output.push(String(chunk)));
child.stderr.on('data', (chunk) => output.push(String(chunk)));
child.on('error', reject);
child.on('close', (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(output.join('').trim() || `${command} ${args.join(' ')} exited with code ${code}`));
});
});
}
function formatInstallError(error: unknown): string {
const message = error instanceof Error ? error.message : String(error);
if (process.platform === 'linux' && /libxtst|x11|xtst|libpng|imagemagick|scrot/i.test(message)) {
return [
'Installing the desktop control runtime needs system packages.',
'On Debian/Ubuntu run: sudo apt-get install -y libxtst-dev libpng-dev imagemagick',
'then try again.',
].join(' ');
}
return message || 'Failed to install the Computer Use runtime.';
}
function isPackagedElectronNodeRuntime(): boolean {
return process.env.ELECTRON_RUN_AS_NODE === '1' && Boolean(process.versions.electron);
}
async function installRuntime(): Promise<{ success: boolean; message: string }> {
if (installPromise) {
return installPromise;
}
const readiness = getExecutorReadiness();
if (readiness.nutInstalled && readiness.screenshotInstalled) {
lastInstallMessage = 'Computer Use runtime is available.';
return { success: true, message: lastInstallMessage };
}
if (isPackagedElectronNodeRuntime()) {
lastInstallMessage = 'Computer Use runtime was not bundled with this desktop build.';
return { success: false, message: lastInstallMessage };
}
const npmCommand = process.platform === 'win32' ? 'npm.cmd' : 'npm';
installPromise = (async () => {
try {
lastInstallMessage = 'Installing desktop control runtime…';
await runCommand(npmCommand, [
'install',
'--no-save',
'--no-package-lock',
'@nut-tree-fork/nut-js',
'screenshot-desktop',
]);
lastInstallMessage = 'Computer Use runtime installed.';
return { success: true, message: lastInstallMessage };
} catch (error) {
lastInstallMessage = formatInstallError(error);
return { success: false, message: lastInstallMessage };
}
})();
try {
return await installPromise;
} finally {
installPromise = null;
}
}
function getOwnerId(owner: ComputerUseOwner): string {
if (owner.id === undefined || owner.id === null || String(owner.id).trim() === '') {
throw new Error('Authenticated user is required.');
}
return String(owner.id);
}
function publicSession(session: ComputerUseSession): PublicComputerUseSession {
const { ownerId: _ownerId, ...publicFields } = session;
return publicFields;
}
function ownerSessions(ownerId: string): ComputerUseSession[] {
return [...sessions.values()].filter((session) => session.ownerId === ownerId);
}
function canAccessSession(ownerId: string, session: ComputerUseSession): boolean {
return session.ownerId === ownerId || session.ownerId === AGENT_OWNER_ID;
}
function normalizeSessionId(sessionId?: string | null): string | null {
if (typeof sessionId !== 'string') {
return null;
}
const trimmed = sessionId.trim();
return trimmed ? trimmed : null;
}
function findActiveAgentSession(): ComputerUseSession | null {
return ownerSessions(AGENT_OWNER_ID)
.filter((session) => session.status === 'ready')
.sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))[0] || null;
}
function positiveDuration(value: number, fallback: number): number {
return Number.isFinite(value) && value > 0 ? value : fallback;
}
async function expireStaleSessions(now = Date.now()): Promise<void> {
const sessionTtl = positiveDuration(SESSION_TTL_MS, 30 * 60 * 1000);
const stoppedRetention = positiveDuration(STOPPED_SESSION_RETENTION_MS, sessionTtl);
for (const [sessionId, session] of sessions.entries()) {
const updatedAt = Date.parse(session.updatedAt);
if (!Number.isFinite(updatedAt)) {
continue;
}
if (session.status === 'ready') {
if (now - updatedAt <= sessionTtl) {
continue;
}
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date(now).toISOString();
session.lastAction = 'expire';
session.message = 'Computer Use session expired after inactivity.';
continue;
}
if (now - updatedAt > stoppedRetention) {
sessions.delete(sessionId);
}
}
const maxStoredSessions = Number.isFinite(MAX_STORED_SESSIONS) && MAX_STORED_SESSIONS > 0
? MAX_STORED_SESSIONS
: 100;
if (sessions.size <= maxStoredSessions) {
return;
}
const removable = [...sessions.values()]
.filter((session) => session.status !== 'ready')
.sort((a, b) => Date.parse(a.updatedAt) - Date.parse(b.updatedAt));
for (const session of removable) {
if (sessions.size <= maxStoredSessions) {
break;
}
sessions.delete(session.id);
}
}
// --- Action layer: local executor (OSS) or cloud relay to the desktop agent --
//
// Every desktop interaction goes through `performAction` / `getCursorPosition`.
// In local mode it drives the in-process nut-js executor (computer-executor.ts);
// in cloud mode it forwards the action to the linked desktop agent over
// `desktopAgentRelay` and applies the returned screenshot. The local server
// itself never touches the OS in cloud mode.
/** Shape the desktop agent returns for any relayed action. */
type RelayResult = {
screenshotDataUrl?: string | null;
displaySize?: { width: number; height: number } | null;
cursor?: { x: number; y: number } | null;
position?: Point | null;
};
function applyRelayResult(session: ComputerUseSession, result: RelayResult): void {
if (typeof result.screenshotDataUrl === 'string') {
session.screenshotDataUrl = result.screenshotDataUrl;
}
if (result.displaySize) {
session.displaySize = result.displaySize;
}
if (result.cursor) {
session.cursor = { x: result.cursor.x, y: result.cursor.y, actor: session.cursor?.actor ?? 'agent' };
}
session.updatedAt = new Date().toISOString();
}
function stripSessionArgs(args: Record<string, unknown>): Record<string, unknown> {
const { sessionId: _sessionId, ...toolArgs } = args;
return toolArgs;
}
async function refreshScreenshot(session: ComputerUseSession): Promise<void> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay('screenshot', { sessionId: session.id })) as RelayResult;
applyRelayResult(session, result);
return;
}
applyRelayResult(session, await runRawComputerAction({ type: 'screenshot' }, session));
}
/** Runs one action and refreshes the session screenshot afterwards. */
async function performAction(session: ComputerUseSession, action: RawComputerAction): Promise<void> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay(action.type, {
...action,
sessionId: session.id,
displaySize: session.displaySize,
})) as RelayResult;
applyRelayResult(session, result);
return;
}
applyRelayResult(session, await runRawComputerAction(action, session));
}
/** Reads the current cursor position in screenshot-pixel space. */
async function getCursorPosition(session: ComputerUseSession): Promise<Point> {
if (getRuntime() === 'cloud') {
const result = (await desktopAgentRelay.relay('cursor_position', {
sessionId: session.id,
displaySize: session.displaySize,
})) as RelayResult;
applyRelayResult(session, result);
if (result.position) {
return result.position;
}
return session.cursor ? { x: session.cursor.x, y: session.cursor.y } : { x: 0, y: 0 };
}
const result = await runRawComputerAction({ type: 'cursor_position' }, session);
applyRelayResult(session, result);
return result.position || session.cursor || { x: 0, y: 0 };
}
function assertReady(session: ComputerUseSession): void {
if (session.status !== 'ready') {
throw new Error(session.message || 'Computer Use session is not available.');
}
}
function agentToolsAvailable(): boolean {
const settings = readSettings();
if (!settings.enabled) {
return false;
}
if (getRuntime() === 'cloud') {
return desktopAgentRelay.isConnected();
}
return true;
}
function assertAgentToolsAvailable(): void {
if (agentToolsAvailable()) {
return;
}
const settings = readSettings();
if (!settings.enabled) {
throw new Error('Computer Use agent tools are disabled.');
}
throw new Error(
getRuntime() === 'cloud'
? 'No desktop is linked. Open CloudCLI Desktop on this computer, connect the same account, and enable Computer Use.'
: 'Computer Use agent tools are disabled.'
);
}
function stopSessions(lastAction: string, message: string): void {
for (const session of sessions.values()) {
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = lastAction;
session.message = message;
}
}
export const computerUseService = {
async getSettings() {
return readSettings();
},
async updateSettings(settings: Partial<ComputerUseSettings>) {
const current = readSettings();
const enabled = typeof settings.enabled === 'boolean' ? settings.enabled : current.enabled;
const next = writeSettings({ enabled });
if (next.enabled) {
await this.registerAgentMcp();
} else {
await this.unregisterAgentMcp();
desktopAgentRelay.disconnectAll('Computer Use was disabled in this environment.');
stopSessions('settings:disabled', 'Computer Use was disabled in settings.');
}
return next;
},
async getStatus() {
const settings = readSettings();
const readiness = getRuntimeReadiness();
const isCloud = getRuntime() === 'cloud';
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
// Cloud mode still respects the saved feature setting. When enabled, cloud
// availability comes from a linked desktop agent because the hosted server
// has no screen of its own.
const desktopAgentConnected = desktopAgentRelay.isConnected();
const available = settings.enabled && (isCloud
? desktopAgentConnected
: runtimeReady);
return {
enabled: settings.enabled,
runtime: getRuntime(),
available,
desktopAgentConnected,
desktopAgentCount: desktopAgentRelay.connectedCount(),
nutInstalled: readiness.nutInstalled,
screenshotInstalled: readiness.screenshotInstalled,
installInProgress: readiness.installInProgress,
sessionCount: sessions.size,
message: available ? 'Computer Use runtime is available.' : getSetupMessage(settings, readiness),
};
},
async registerAgentMcp() {
const { command, args } = getMcpCommand();
const results = await providerMcpService.addMcpServerToAllProviders({
name: MCP_SERVER_NAME,
scope: 'user',
transport: 'stdio',
command,
args,
env: {
CLOUDCLI_COMPUTER_USE_MCP_TOKEN: getOrCreateMcpToken(),
CLOUDCLI_COMPUTER_USE_API_URL: getMcpApiUrl(),
},
});
return { name: MCP_SERVER_NAME, command, args, results };
},
getMcpToken() {
return getOrCreateMcpToken();
},
async unregisterAgentMcp() {
const results = await Promise.all(MCP_PROVIDERS.map(async (provider) => {
try {
const result = await providerMcpService.removeProviderMcpServer(provider, {
name: MCP_SERVER_NAME,
scope: 'user',
});
return { provider, removed: result.removed };
} catch (error) {
return {
provider,
removed: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}));
return { name: MCP_SERVER_NAME, results };
},
async installRuntime() {
const result = await installRuntime();
return {
...result,
status: await this.getStatus(),
};
},
async listSessions(owner: ComputerUseOwner) {
const ownerId = getOwnerId(owner);
await expireStaleSessions();
return [...sessions.values()]
.filter((session) => canAccessSession(ownerId, session))
.map(publicSession);
},
async createSession(owner: ComputerUseOwner, options?: { createdBy?: 'user' | 'agent' }) {
const ownerId = getOwnerId(owner);
await expireStaleSessions();
const createdBy = options?.createdBy ?? 'user';
const now = new Date().toISOString();
const session: ComputerUseSession = {
id: randomUUID(),
ownerId,
createdBy,
runtime: getRuntime(),
status: 'unavailable',
screenshotDataUrl: null,
createdAt: now,
updatedAt: now,
lastAction: 'create',
// Consent is always OFF at creation — the user must explicitly grant control,
// even for agent-initiated sessions controlling the full desktop.
agentAccessEnabled: false,
displaySize: null,
message: null,
cursor: null,
};
const activeOwnerSessions = ownerSessions(ownerId).filter((item) => item.status === 'ready');
if (activeOwnerSessions.length >= MAX_SESSIONS_PER_OWNER) {
throw new Error(`Computer Use is limited to ${MAX_SESSIONS_PER_OWNER} active session(s).`);
}
const settings = readSettings();
const readiness = getRuntimeReadiness();
const isCloud = getRuntime() === 'cloud';
const runtimeReady = readiness.nutInstalled && readiness.screenshotInstalled;
const ready = settings.enabled && (isCloud
? desktopAgentRelay.isConnected()
: runtimeReady);
if (!ready) {
session.message = getSetupMessage(settings, readiness);
sessions.set(session.id, session);
return publicSession(session);
}
// In cloud mode the linked desktop agent is the consent authority and prompts
// the user per its own consent mode, so the relay is allowed to act. In local
// mode the user must still grant control from the panel.
if (isCloud) {
session.agentAccessEnabled = true;
}
session.status = 'ready';
session.message = isCloud
? 'Computer Use session is ready on the linked desktop.'
: 'Computer Use session is ready. Grant control to let agents act.';
sessions.set(session.id, session);
try {
await refreshScreenshot(session);
} catch (error) {
session.status = 'unavailable';
session.message = error instanceof Error ? error.message : 'Failed to capture the screen.';
}
return publicSession(session);
},
async grantAgentAccess(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
session.agentAccessEnabled = true;
session.updatedAt = new Date().toISOString();
session.lastAction = 'consent:grant';
return publicSession(session);
},
async revokeAgentAccess(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'consent:revoke';
return publicSession(session);
},
async stopSession(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
return { stopped: false };
}
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'stop';
session.message = 'Computer Use session stopped. Agent control is revoked.';
if (getRuntime() === 'cloud' && desktopAgentRelay.isConnected()) {
// Best-effort: tell the desktop agent to forget this session's consent.
void desktopAgentRelay.relay('stop_session', { sessionId }).catch(() => undefined);
}
return { stopped: true, session: publicSession(session) };
},
async deleteSession(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
return { deleted: false };
}
sessions.delete(sessionId);
return { deleted: true, sessionId };
},
// --- User-initiated actions (from the panel) -------------------------------
async userScreenshot(owner: ComputerUseOwner, sessionId: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await refreshScreenshot(session);
session.lastAction = 'screenshot';
return publicSession(session);
},
async userClick(owner: ComputerUseOwner, sessionId: string, input: { x: number; y: number; button?: ClickButton; double?: boolean }) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, {
type: 'click',
button: input.button || 'left',
point: { x: input.x, y: input.y },
double: input.double === true,
});
session.cursor = { x: input.x, y: input.y, actor: 'user' };
session.lastAction = input.double ? 'double_click' : 'click';
return publicSession(session);
},
async userPressKey(owner: ComputerUseOwner, sessionId: string, key: string) {
const ownerId = getOwnerId(owner);
const session = sessions.get(sessionId);
if (!session || !canAccessSession(ownerId, session)) {
throw new Error('Computer Use session not found.');
}
assertReady(session);
await performAction(session, { type: 'key', key });
session.lastAction = `key:${key}`;
return publicSession(session);
},
// --- Agent-initiated actions (via MCP) ------------------------------------
/**
* Resolves a session the agent is allowed to act on. In local mode this
* enforces the in-process per-session consent flag. In cloud mode the linked
* desktop agent is the consent authority (it prompts the user per its own
* consent mode), so this only requires the relay to be connected.
*/
async getOrCreateAgentSession(): Promise<ComputerUseSession> {
assertAgentToolsAvailable();
await expireStaleSessions();
const existing = findActiveAgentSession();
if (existing) {
return existing;
}
const created = await this.createSession({ id: AGENT_OWNER_ID }, { createdBy: 'agent' });
const session = sessions.get(created.id);
if (!session) {
throw new Error('Computer Use session could not be created.');
}
return session;
},
async getConsentedSession(sessionId?: string): Promise<ComputerUseSession> {
assertAgentToolsAvailable();
const normalizedSessionId = normalizeSessionId(sessionId);
const session = normalizedSessionId
? sessions.get(normalizedSessionId)
: await this.getOrCreateAgentSession();
if (!session) {
throw new Error('Computer Use session not found.');
}
if (getRuntime() !== 'cloud' && !session.agentAccessEnabled) {
throw new Error(`Computer Use session ${session.id} is awaiting user consent. Ask the user to grant control in the Computer panel.`);
}
assertReady(session);
return session;
},
async agentScreenshot(sessionId?: string) {
const session = await this.getConsentedSession(sessionId);
await refreshScreenshot(session);
session.lastAction = 'screenshot';
return publicSession(session);
},
async agentCursorPosition(sessionId?: string) {
const session = await this.getConsentedSession(sessionId);
const point = await getCursorPosition(session);
session.cursor = { ...point, actor: 'agent' };
session.lastAction = 'cursor_position';
return { session: publicSession(session), position: point };
},
async agentMouseMove(sessionId: string | undefined, point: Point) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'mouse_move', point });
session.cursor = { ...point, actor: 'agent' };
session.lastAction = 'mouse_move';
return publicSession(session);
},
async agentUnifiedClick(sessionId: string | undefined, input: { button?: ClickButton; point?: Point; clickCount?: number }) {
const session = await this.getConsentedSession(sessionId);
const button = input.button || 'left';
const clickCount = Math.max(1, Math.min(Math.trunc(input.clickCount || 1), 5));
for (let index = 0; index < clickCount; index += 1) {
await performAction(session, { type: 'click', button, point: input.point, double: false });
}
if (input.point) {
session.cursor = { ...input.point, actor: 'agent' };
}
session.lastAction = clickCount > 1 ? `${button}_click:${clickCount}` : `${button}_click`;
return publicSession(session);
},
async agentDrag(sessionId: string | undefined, from: Point, to: Point, button: ClickButton = 'left') {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'drag', from, to, button });
session.cursor = { ...to, actor: 'agent' };
session.lastAction = `${button}_drag`;
return publicSession(session);
},
async agentType(sessionId: string | undefined, text: string) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'type', text });
session.lastAction = 'type';
return publicSession(session);
},
async agentKey(sessionId: string | undefined, key: string) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'key', key });
session.lastAction = `key:${key}`;
return publicSession(session);
},
async agentScroll(sessionId: string | undefined, input: { direction: ScrollDirection; amount?: number; x?: number; y?: number }) {
const session = await this.getConsentedSession(sessionId);
const point = typeof input.x === 'number' && typeof input.y === 'number' ? { x: input.x, y: input.y } : undefined;
await performAction(session, { type: 'scroll', direction: input.direction, amount: input.amount, point });
if (point) {
session.cursor = { ...point, actor: 'agent' };
}
session.lastAction = `scroll:${input.direction}`;
return publicSession(session);
},
async agentWait(sessionId?: string, timeoutMs?: number) {
const session = await this.getConsentedSession(sessionId);
await performAction(session, { type: 'wait', ms: timeoutMs });
session.lastAction = 'wait';
return publicSession(session);
},
async agentStopSession(sessionId?: string) {
assertAgentToolsAvailable();
const normalizedSessionId = normalizeSessionId(sessionId);
if (normalizedSessionId) {
return this.stopSession({ id: AGENT_OWNER_ID }, normalizedSessionId);
}
await expireStaleSessions();
const existing = findActiveAgentSession();
if (!existing) {
return { stopped: false };
}
return this.stopSession({ id: AGENT_OWNER_ID }, existing.id);
},
async callSemanticTool(toolName: string, args: Record<string, unknown>) {
if (!semanticOperationNames.has(toolName)) {
throw new Error(`Unsupported semantic Computer Use tool: ${toolName}`);
}
const sessionId = typeof args.sessionId === 'string' ? args.sessionId : undefined;
const session = await this.getConsentedSession(normalizeSessionId(sessionId) ?? undefined);
const toolArgs = { ...stripSessionArgs(args), sessionId: session.id };
const semanticResult = getRuntime() === 'cloud'
? await desktopAgentRelay.relay('semantic_tool', {
sessionId: session.id,
displaySize: session.displaySize,
toolName,
arguments: toolArgs,
})
: await computerSemanticsService.callTool(toolName, toolArgs);
applyRelayResult(session, semanticResult as RelayResult);
session.lastAction = `semantic:${toolName}`;
return { session: publicSession(session), result: semanticResult };
},
/**
* Cloud only: when a desktop agent links to this hosted environment, expose
* the computer_* MCP tools only if the user enabled Computer Use in settings.
*/
async onDesktopAgentConnected() {
if (getRuntime() !== 'cloud') {
return;
}
if (!readSettings().enabled) {
return;
}
try {
await this.registerAgentMcp();
} catch (error) {
console.warn('[Computer Use] Failed to register MCP for linked desktop agent:', error instanceof Error ? error.message : error);
}
},
/** Cloud only: tear down sessions when the last desktop agent disconnects. */
async onDesktopAgentDisconnected() {
if (getRuntime() !== 'cloud' || desktopAgentRelay.isConnected()) {
return;
}
for (const session of sessions.values()) {
if (session.status === 'ready') {
session.status = 'stopped';
session.agentAccessEnabled = false;
session.updatedAt = new Date().toISOString();
session.lastAction = 'agent-disconnected';
session.message = 'The linked desktop agent disconnected.';
}
}
},
async stopAllSessions() {
stopSessions('shutdown', 'Computer Use session stopped during server shutdown.');
},
};
// Drive cloud MCP exposure + session teardown off desktop-agent connectivity.
desktopAgentRelay.setHooks({
canAcceptConnection: () => getRuntime() === 'cloud' && readSettings().enabled,
onFirstConnect: () => computerUseService.onDesktopAgentConnected(),
onLastDisconnect: () => computerUseService.onDesktopAgentDisconnected(),
});
process.once('beforeExit', () => {
void computerUseService.stopAllSessions();
});

View File

@@ -1,158 +0,0 @@
import { randomUUID } from 'node:crypto';
import type { WebSocket } from 'ws';
const RELAY_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_USE_RELAY_TIMEOUT_MS || '60000', 10);
const WS_OPEN = 1;
type PendingRelay = {
resolve: (value: unknown) => void;
reject: (reason: Error) => void;
timer: ReturnType<typeof setTimeout>;
};
type ConnectedAgent = {
ws: WebSocket;
label: string;
registeredAt: string;
};
type RelayLifecycleHooks = {
canAcceptConnection?: () => boolean;
onFirstConnect?: () => void | Promise<void>;
onLastDisconnect?: () => void | Promise<void>;
};
const agents = new Map<WebSocket, ConnectedAgent>();
const pending = new Map<string, PendingRelay>();
let hooks: RelayLifecycleHooks = {};
function rejectAllPending(reason: string): void {
for (const [callId, call] of pending.entries()) {
clearTimeout(call.timer);
call.reject(new Error(reason));
pending.delete(callId);
}
}
function pickAgent(): ConnectedAgent | undefined {
for (const agent of agents.values()) {
if (agent.ws.readyState === WS_OPEN) {
return agent;
}
}
return undefined;
}
/**
* Cloud-side registry of linked desktop agents and the request/response relay
* used to drive the user's real desktop. The hosted server never touches the OS
* itself — it only forwards `computer_*` actions to a connected desktop agent
* and awaits the screenshot it returns.
*/
export const desktopAgentRelay = {
setHooks(next: RelayLifecycleHooks): void {
hooks = next;
},
register(ws: WebSocket, label = 'desktop-agent'): boolean {
if (hooks.canAcceptConnection && !hooks.canAcceptConnection()) {
console.log(`[DesktopAgent] Rejected (${label}); Computer Use is disabled.`);
try {
ws.close(1008, 'Computer Use is disabled in this environment.');
} catch {
// ignore close failures
}
return false;
}
const wasEmpty = pickAgent() === undefined;
agents.set(ws, { ws, label, registeredAt: new Date().toISOString() });
console.log(`[DesktopAgent] Registered (${label}); ${agents.size} connected.`);
ws.on('close', () => {
const wasRegistered = agents.delete(ws);
console.log(`[DesktopAgent] Disconnected (${label}); ${agents.size} remain.`);
if (wasRegistered && pickAgent() === undefined) {
rejectAllPending('Desktop agent disconnected.');
void hooks.onLastDisconnect?.();
}
});
if (wasEmpty) {
void hooks.onFirstConnect?.();
}
return true;
},
disconnectAll(reason = 'Desktop agent disconnected.'): void {
const hadAgent = pickAgent() !== undefined;
const sockets = [...agents.keys()];
agents.clear();
for (const ws of sockets) {
try {
ws.close(1008, reason);
} catch {
// ignore close failures
}
}
rejectAllPending(reason);
if (hadAgent) {
void hooks.onLastDisconnect?.();
}
},
/** Resolves a pending relay call with the desktop agent's reply. */
handleResult(id: string, result: unknown, error?: string): void {
const call = pending.get(id);
if (!call) {
return;
}
clearTimeout(call.timer);
pending.delete(id);
if (error) {
call.reject(new Error(error));
} else {
call.resolve(result);
}
},
isConnected(): boolean {
return pickAgent() !== undefined;
},
connectedCount(): number {
let count = 0;
for (const agent of agents.values()) {
if (agent.ws.readyState === WS_OPEN) {
count++;
}
}
return count;
},
async relay(type: string, params: Record<string, unknown>): Promise<unknown> {
const agent = pickAgent();
if (!agent) {
throw new Error(
'No desktop is linked. Open CloudCLI Desktop on this computer, connect the same account, and enable Computer Use.'
);
}
const id = randomUUID();
return new Promise<unknown>((resolve, reject) => {
const timer = setTimeout(() => {
pending.delete(id);
reject(new Error('Desktop agent did not respond in time.'));
}, RELAY_TIMEOUT_MS);
pending.set(id, { resolve, reject, timer });
try {
agent.ws.send(JSON.stringify({ kind: 'computer_relay', id, type, params }));
} catch (error) {
clearTimeout(timer);
pending.delete(id);
reject(error instanceof Error ? error : new Error('Failed to send to desktop agent.'));
}
});
},
};

View File

@@ -1,2 +0,0 @@
export { computerUseService } from '@/modules/computer-use/computer-use.service.js';
export { desktopAgentRelay } from '@/modules/computer-use/desktop-agent-relay.service.js';

View File

@@ -1,82 +0,0 @@
import { SemanticHelperProcess } from '@/modules/computer-use/semantics/helpers/semantic-helper-process.js';
import { resolveSemanticHelper } from '@/modules/computer-use/semantics/helpers/semantic-helper-resolver.js';
import type { SemanticAdapter, SemanticAdapterCapabilities } from '@/modules/computer-use/semantics/adapters/semantic-adapter.js';
import type { SemanticApp, SemanticAppState, SemanticToolInput } from '@/modules/computer-use/semantics/semantic-types.js';
type HelperMethod =
| 'list_apps'
| 'get_app_state'
| 'click_element'
| 'perform_secondary_action'
| 'set_value'
| 'type_text'
| 'press_key'
| 'scroll_element'
| 'drag';
export class HelperSemanticAdapter implements SemanticAdapter {
private helper: SemanticHelperProcess | null = null;
constructor(
private readonly platform: NodeJS.Platform,
private readonly arch: NodeJS.Architecture = process.arch,
) {}
capabilities(): SemanticAdapterCapabilities {
return {
platform: this.platform,
appDiscovery: true,
accessibilityTree: true,
nativeElementActions: true,
nativeValueSetting: true,
targetedInput: true,
};
}
async listApps(): Promise<SemanticApp[]> {
return await this.request('list_apps', {}) as SemanticApp[];
}
async getAppState(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('get_app_state', input) as SemanticAppState;
}
async clickElement(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('click_element', input) as SemanticAppState;
}
async performSecondaryAction(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('perform_secondary_action', input) as SemanticAppState;
}
async setValue(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('set_value', input) as SemanticAppState;
}
async typeText(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('type_text', input) as SemanticAppState;
}
async pressKey(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('press_key', input) as SemanticAppState;
}
async scrollElement(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('scroll_element', input) as SemanticAppState;
}
async drag(input: SemanticToolInput): Promise<SemanticAppState> {
return await this.request('drag', input) as SemanticAppState;
}
private async request(method: HelperMethod, params: Record<string, unknown>): Promise<unknown> {
if (!this.helper) {
const resolution = resolveSemanticHelper(this.platform, this.arch);
if (!resolution.available || !resolution.path) {
throw new Error(resolution.reason || `Semantic helper is unavailable for ${this.platform}-${this.arch}.`);
}
this.helper = new SemanticHelperProcess(resolution.path);
}
return this.helper.request(method, params);
}
}

View File

@@ -1,5 +0,0 @@
import { HelperSemanticAdapter } from '@/modules/computer-use/semantics/adapters/helper-semantic-adapter.js';
export function createMacOsSemanticAdapter(): HelperSemanticAdapter {
return new HelperSemanticAdapter('darwin');
}

View File

@@ -1,23 +0,0 @@
import type { SemanticApp, SemanticAppState, SemanticToolInput } from '@/modules/computer-use/semantics/semantic-types.js';
export type SemanticAdapterCapabilities = {
platform: NodeJS.Platform;
appDiscovery: boolean;
accessibilityTree: boolean;
nativeElementActions: boolean;
nativeValueSetting: boolean;
targetedInput: boolean;
};
export type SemanticAdapter = {
capabilities(): SemanticAdapterCapabilities;
listApps(): Promise<SemanticApp[]>;
getAppState(input: SemanticToolInput): Promise<SemanticAppState>;
clickElement(input: SemanticToolInput): Promise<SemanticAppState>;
performSecondaryAction(input: SemanticToolInput): Promise<SemanticAppState>;
setValue(input: SemanticToolInput): Promise<SemanticAppState>;
typeText(input: SemanticToolInput): Promise<SemanticAppState>;
pressKey(input: SemanticToolInput): Promise<SemanticAppState>;
scrollElement(input: SemanticToolInput): Promise<SemanticAppState>;
drag(input: SemanticToolInput): Promise<SemanticAppState>;
};

View File

@@ -1,5 +0,0 @@
import { HelperSemanticAdapter } from '@/modules/computer-use/semantics/adapters/helper-semantic-adapter.js';
export function createWindowsSemanticAdapter(): HelperSemanticAdapter {
return new HelperSemanticAdapter('win32');
}

View File

@@ -1,467 +0,0 @@
import AppKit
import ApplicationServices
import Foundation
typealias JSON = [String: Any]
struct ElementRecord {
let index: String
let role: String
let title: String?
let value: String?
let bounds: [String: Double]?
let actions: [String]
}
var stateElements: [String: [ElementRecord]] = [:]
var stateAxElements: [String: [String: AXUIElement]] = [:]
var stateOrder: [String] = []
let maxStoredStates = 100
func jsonLine(_ object: Any) {
guard JSONSerialization.isValidJSONObject(object),
let data = try? JSONSerialization.data(withJSONObject: object),
let text = String(data: data, encoding: .utf8)
else {
print("{\"error\":\"Failed to encode JSON\"}")
fflush(stdout)
return
}
print(text)
fflush(stdout)
}
func respond(id: Any?, result: Any) {
jsonLine(["id": id ?? NSNull(), "result": result])
}
func respondError(id: Any?, _ message: String) {
jsonLine(["id": id ?? NSNull(), "error": message])
}
func stringAttr(_ element: AXUIElement, _ attr: CFString) -> String? {
var value: CFTypeRef?
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return nil }
return value as? String
}
func boolAttr(_ element: AXUIElement, _ attr: CFString) -> Bool? {
var value: CFTypeRef?
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return nil }
return value as? Bool
}
func arrayAttr(_ element: AXUIElement, _ attr: CFString) -> [AXUIElement] {
var value: CFTypeRef?
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return [] }
return value as? [AXUIElement] ?? []
}
func actions(_ element: AXUIElement) -> [String] {
var names: CFArray?
guard AXUIElementCopyActionNames(element, &names) == .success else { return [] }
return names as? [String] ?? []
}
func bounds(_ element: AXUIElement) -> [String: Double]? {
var positionRef: CFTypeRef?
var sizeRef: CFTypeRef?
guard AXUIElementCopyAttributeValue(element, kAXPositionAttribute as CFString, &positionRef) == .success,
AXUIElementCopyAttributeValue(element, kAXSizeAttribute as CFString, &sizeRef) == .success,
let positionValue = positionRef,
let sizeValue = sizeRef
else { return nil }
var point = CGPoint.zero
var size = CGSize.zero
guard CFGetTypeID(positionValue) == AXValueGetTypeID(),
CFGetTypeID(sizeValue) == AXValueGetTypeID()
else { return nil }
let positionAxValue = positionValue as! AXValue
let sizeAxValue = sizeValue as! AXValue
guard AXValueGetValue(positionAxValue, .cgPoint, &point),
AXValueGetValue(sizeAxValue, .cgSize, &size)
else { return nil }
return [
"x": Double(point.x),
"y": Double(point.y),
"width": Double(size.width),
"height": Double(size.height),
]
}
func record(_ element: AXUIElement, index: String) -> ElementRecord {
ElementRecord(
index: index,
role: stringAttr(element, kAXRoleAttribute as CFString) ?? "AXUnknown",
title: stringAttr(element, kAXTitleAttribute as CFString) ?? stringAttr(element, kAXDescriptionAttribute as CFString),
value: stringAttr(element, kAXValueAttribute as CFString),
bounds: bounds(element),
actions: actions(element)
)
}
func cachedElement(_ params: JSON) -> AXUIElement? {
guard let stateId = params["stateId"] as? String,
let elementIndex = params["element_index"] as? String
else {
return nil
}
return stateAxElements[stateId]?[elementIndex]
}
func dictionary(_ record: ElementRecord) -> JSON {
var output: JSON = [
"index": record.index,
"role": record.role,
"actions": record.actions,
]
if let title = record.title { output["title"] = title }
if let value = record.value { output["value"] = value }
if let bounds = record.bounds { output["bounds"] = bounds }
return output
}
func pruneStoredStates() {
while stateOrder.count > maxStoredStates {
let evicted = stateOrder.removeFirst()
stateElements.removeValue(forKey: evicted)
stateAxElements.removeValue(forKey: evicted)
}
}
func resolveApp(_ query: String) throws -> NSRunningApplication {
let normalized = query.lowercased()
let apps = NSWorkspace.shared.runningApplications.filter { app in
app.activationPolicy == .regular
}
if let app = apps.first(where: { $0.bundleIdentifier?.lowercased() == normalized }) {
return app
}
if let app = apps.first(where: { ($0.localizedName ?? "").lowercased() == normalized }) {
return app
}
if let app = apps.first(where: { ($0.localizedName ?? "").lowercased().contains(normalized) }) {
return app
}
throw NSError(domain: "CloudCLISemantics", code: 404, userInfo: [NSLocalizedDescriptionKey: "App is not running: \(query)"])
}
func listApps() -> [[String: Any]] {
NSWorkspace.shared.runningApplications
.filter { $0.activationPolicy == .regular }
.map { app in
[
"id": app.bundleIdentifier ?? app.localizedName ?? "\(app.processIdentifier)",
"name": app.localizedName ?? app.bundleIdentifier ?? "Unknown",
"bundleIdentifier": app.bundleIdentifier ?? "",
"pid": Int(app.processIdentifier),
"running": true,
]
}
}
func walk(_ element: AXUIElement, depth: Int, maxDepth: Int, records: inout [ElementRecord], axRecords: inout [String: AXUIElement], limit: Int) {
if depth > maxDepth || records.count >= limit { return }
let index = "\(records.count + 1)"
records.append(record(element, index: index))
axRecords[index] = element
for child in arrayAttr(element, kAXChildrenAttribute as CFString) {
walk(child, depth: depth + 1, maxDepth: maxDepth, records: &records, axRecords: &axRecords, limit: limit)
if records.count >= limit { return }
}
}
func pngDataUrlForMainDisplay() -> String? {
let fileURL = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("cloudcli-semantics-\(UUID().uuidString).png")
let process = Process()
process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture")
process.arguments = ["-x", "-t", "png", fileURL.path]
do {
try process.run()
process.waitUntilExit()
guard process.terminationStatus == 0 else { return nil }
let png = try Data(contentsOf: fileURL)
try? FileManager.default.removeItem(at: fileURL)
return png.isEmpty ? nil : "data:image/png;base64,\(png.base64EncodedString())"
} catch {
try? FileManager.default.removeItem(at: fileURL)
return nil
}
}
func getAppState(_ params: JSON) throws -> JSON {
let appName = params["app"] as? String ?? ""
let app = try resolveApp(appName)
let axApp = AXUIElementCreateApplication(app.processIdentifier)
let windows = arrayAttr(axApp, kAXWindowsAttribute as CFString)
let root = windows.first ?? axApp
var records: [ElementRecord] = []
var axRecords: [String: AXUIElement] = [:]
walk(root, depth: 0, maxDepth: 5, records: &records, axRecords: &axRecords, limit: 300)
let stateId = "state_\(UUID().uuidString)"
stateElements[stateId] = records
stateAxElements[stateId] = axRecords
stateOrder.append(stateId)
pruneStoredStates()
let elements = records.map(dictionary)
return [
"stateId": stateId,
"app": app.localizedName ?? app.bundleIdentifier ?? appName,
"platform": "darwin",
"screenshotDataUrl": pngDataUrlForMainDisplay() ?? NSNull(),
"displaySize": [
"width": Int(CGDisplayPixelsWide(CGMainDisplayID())),
"height": Int(CGDisplayPixelsHigh(CGMainDisplayID())),
],
"elements": elements,
"accessibilityTree": elements,
"treeText": elements.map { "\($0["index"] ?? "") \($0["role"] ?? "") \($0["title"] ?? "")" }.joined(separator: "\n"),
]
}
func cgMouseButton(_ value: Any?) -> CGMouseButton {
guard let button = value as? String else { return .left }
switch button {
case "right": return .right
case "middle": return .center
default: return .left
}
}
func mouseEventTypes(_ button: CGMouseButton) -> (CGEventType, CGEventType) {
switch button {
case .right: return (.rightMouseDown, .rightMouseUp)
case .center: return (.otherMouseDown, .otherMouseUp)
default: return (.leftMouseDown, .leftMouseUp)
}
}
func postMouseClick(point: CGPoint, button: CGMouseButton, clickCount: Int = 1) throws {
guard let source = CGEventSource(stateID: .combinedSessionState) else {
throw NSError(domain: "CloudCLISemantics", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGEventSource"])
}
let eventTypes = mouseEventTypes(button)
for _ in 0..<max(1, clickCount) {
let down = CGEvent(mouseEventSource: source, mouseType: eventTypes.0, mouseCursorPosition: point, mouseButton: button)
let up = CGEvent(mouseEventSource: source, mouseType: eventTypes.1, mouseCursorPosition: point, mouseButton: button)
down?.post(tap: .cghidEventTap)
up?.post(tap: .cghidEventTap)
usleep(80_000)
}
}
func postDrag(from: CGPoint, to: CGPoint, button: CGMouseButton) throws {
guard let source = CGEventSource(stateID: .combinedSessionState) else {
throw NSError(domain: "CloudCLISemantics", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGEventSource"])
}
let eventTypes = mouseEventTypes(button)
CGEvent(mouseEventSource: source, mouseType: eventTypes.0, mouseCursorPosition: from, mouseButton: button)?.post(tap: .cghidEventTap)
usleep(80_000)
CGEvent(mouseEventSource: source, mouseType: .leftMouseDragged, mouseCursorPosition: to, mouseButton: button)?.post(tap: .cghidEventTap)
usleep(80_000)
CGEvent(mouseEventSource: source, mouseType: eventTypes.1, mouseCursorPosition: to, mouseButton: button)?.post(tap: .cghidEventTap)
}
func runAppleScript(_ script: String) throws {
let process = Process()
process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
process.arguments = ["-e", script]
process.standardOutput = Pipe()
let stderr = Pipe()
process.standardError = stderr
try process.run()
process.waitUntilExit()
if process.terminationStatus != 0 {
let data = stderr.fileHandleForReading.readDataToEndOfFile()
let message = String(data: data, encoding: .utf8) ?? "AppleScript failed."
throw NSError(domain: "CloudCLISemantics", code: Int(process.terminationStatus), userInfo: [NSLocalizedDescriptionKey: message])
}
}
func escapedAppleScriptString(_ value: String) -> String {
value.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(of: "\"", with: "\\\"")
}
func pointForElement(_ params: JSON) -> CGPoint? {
if let x = params["x"] as? Double, let y = params["y"] as? Double {
return CGPoint(x: x, y: y)
}
guard let stateId = params["stateId"] as? String,
let elementIndex = params["element_index"] as? String,
let element = stateElements[stateId]?.first(where: { $0.index == elementIndex }),
let b = element.bounds,
let x = b["x"], let y = b["y"], let width = b["width"], let height = b["height"]
else {
return nil
}
return CGPoint(x: x + width / 2, y: y + height / 2)
}
func click(_ params: JSON) throws -> JSON {
if let element = cachedElement(params),
cgMouseButton(params["mouse_button"]) == .left,
(params["click_count"] as? Int ?? 1) == 1,
actions(element).contains(kAXPressAction as String),
AXUIElementPerformAction(element, kAXPressAction as CFString) == .success {
return try getAppState(params)
}
guard let point = pointForElement(params) else {
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "click_element requires x/y or stateId + element_index"])
}
let clickCount = params["click_count"] as? Int ?? 1
try postMouseClick(point: point, button: cgMouseButton(params["mouse_button"]), clickCount: clickCount)
return try getAppState(params)
}
func performSecondaryAction(_ params: JSON) throws -> JSON {
if let element = cachedElement(params),
actions(element).contains(kAXShowMenuAction as String),
AXUIElementPerformAction(element, kAXShowMenuAction as CFString) == .success {
return try getAppState(params)
}
guard let point = pointForElement(params) else {
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "perform_secondary_action requires x/y or stateId + element_index"])
}
try postMouseClick(point: point, button: .right)
return try getAppState(params)
}
func setValue(_ params: JSON) throws -> JSON {
guard let value = params["value"] as? String else {
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "set_value requires value"])
}
if let element = cachedElement(params),
AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, value as CFTypeRef) == .success {
return try getAppState(params)
}
guard let point = pointForElement(params) else {
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "set_value requires x/y or stateId + element_index"])
}
try postMouseClick(point: point, button: .left)
try runAppleScript("tell application \"System Events\" to keystroke \"a\" using command down")
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(value))\"")
return try getAppState(params)
}
func typeText(_ params: JSON) throws -> JSON {
let text = params["text"] as? String ?? ""
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(text))\"")
return try getAppState(params)
}
func appleScriptModifiers(_ parts: [String]) -> String {
let modifiers = parts.compactMap { part -> String? in
switch part.lowercased() {
case "cmd", "command", "meta": return "command down"
case "ctrl", "control": return "control down"
case "alt", "option": return "option down"
case "shift": return "shift down"
default: return nil
}
}
return modifiers.isEmpty ? "" : " using {\(modifiers.joined(separator: ", "))}"
}
func appleScriptKeyCode(_ key: String) -> Int? {
switch key.lowercased() {
case "return", "enter": return 36
case "tab": return 48
case "space": return 49
case "delete", "backspace": return 51
case "escape", "esc": return 53
case "left": return 123
case "right": return 124
case "down": return 125
case "up": return 126
default: return nil
}
}
func pressKey(_ params: JSON) throws -> JSON {
let raw = params["key"] as? String ?? ""
let parts = raw.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines) }.filter { !$0.isEmpty }
let key = parts.last ?? raw
let modifiers = appleScriptModifiers(Array(parts.dropLast()))
if let keyCode = appleScriptKeyCode(key) {
try runAppleScript("tell application \"System Events\" to key code \(keyCode)\(modifiers)")
} else {
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(key))\"\(modifiers)")
}
return try getAppState(params)
}
func scrollElement(_ params: JSON) throws -> JSON {
guard let point = pointForElement(params) else {
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "scroll_element requires x/y or stateId + element_index"])
}
CGWarpMouseCursorPosition(point)
let direction = params["direction"] as? String ?? "down"
let pages = params["pages"] as? Double ?? 1.0
let amount = Int32(max(1.0, abs(pages) * 8.0))
let vertical = direction == "up" ? amount : direction == "down" ? -amount : 0
let horizontal = direction == "left" ? amount : direction == "right" ? -amount : 0
CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: vertical, wheel2: horizontal, wheel3: 0)?.post(tap: .cghidEventTap)
return try getAppState(params)
}
func drag(_ params: JSON) throws -> JSON {
guard let fromX = params["from_x"] as? Double,
let fromY = params["from_y"] as? Double,
let toX = params["to_x"] as? Double,
let toY = params["to_y"] as? Double
else {
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "drag requires from_x/from_y/to_x/to_y"])
}
try postDrag(from: CGPoint(x: fromX, y: fromY), to: CGPoint(x: toX, y: toY), button: cgMouseButton(params["mouse_button"]))
return try getAppState(params)
}
func handle(_ request: JSON) {
let id = request["id"]
let method = request["method"] as? String ?? ""
let params = request["params"] as? JSON ?? [:]
do {
switch method {
case "list_apps":
respond(id: id, result: listApps())
case "get_app_state":
respond(id: id, result: try getAppState(params))
case "click_element":
respond(id: id, result: try click(params))
case "perform_secondary_action":
respond(id: id, result: try performSecondaryAction(params))
case "set_value":
respond(id: id, result: try setValue(params))
case "type_text":
respond(id: id, result: try typeText(params))
case "press_key":
respond(id: id, result: try pressKey(params))
case "scroll_element":
respond(id: id, result: try scrollElement(params))
case "drag":
respond(id: id, result: try drag(params))
default:
respondError(id: id, "Method is not implemented yet: \(method)")
}
} catch {
respondError(id: id, error.localizedDescription)
}
}
while let line = readLine() {
guard let data = line.data(using: .utf8),
let object = try? JSONSerialization.jsonObject(with: data),
let request = object as? JSON
else {
respondError(id: nil, "Invalid JSON request")
continue
}
handle(request)
}

View File

@@ -1,124 +0,0 @@
import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process';
import readline from 'node:readline';
type JsonRecord = Record<string, unknown>;
type PendingRequest = {
resolve: (value: unknown) => void;
reject: (error: Error) => void;
timer: ReturnType<typeof setTimeout>;
};
const DEFAULT_TIMEOUT_MS = Number.parseInt(process.env.CLOUDCLI_SEMANTICS_HELPER_TIMEOUT_MS || '60000', 10);
function timeoutMs(): number {
return Number.isFinite(DEFAULT_TIMEOUT_MS) && DEFAULT_TIMEOUT_MS > 0 ? DEFAULT_TIMEOUT_MS : 60000;
}
function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
export class SemanticHelperProcess {
private child: ChildProcessWithoutNullStreams | null = null;
private reader: readline.Interface | null = null;
private nextId = 1;
private pending = new Map<number, PendingRequest>();
constructor(private readonly executablePath: string) {}
async request(method: string, params: JsonRecord): Promise<unknown> {
this.ensureStarted();
const child = this.child;
if (!child?.stdin.writable) {
throw new Error('Semantic helper process is not running.');
}
const id = this.nextId++;
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
this.pending.delete(id);
reject(new Error(`Semantic helper request timed out: ${method}`));
}, timeoutMs());
this.pending.set(id, { resolve, reject, timer });
child.stdin.write(`${JSON.stringify({ id, method, params })}\n`);
});
}
stop(): void {
const child = this.child;
this.child = null;
this.reader?.close();
this.reader = null;
this.rejectAll('Semantic helper stopped.');
if (child) {
try { child.kill('SIGTERM'); } catch { /* noop */ }
}
}
private ensureStarted(): void {
if (this.child) {
return;
}
this.child = spawn(this.executablePath, [], {
stdio: ['pipe', 'pipe', 'pipe'],
windowsHide: true,
});
this.reader = readline.createInterface({ input: this.child.stdout });
this.reader.on('line', (line) => this.handleLine(line));
this.child.stderr.on('data', (chunk) => {
const text = String(chunk).trim();
if (text) {
console.error('[SemanticHelper]', text);
}
});
this.child.once('error', (error) => {
this.child = null;
this.rejectAll(`Failed to start semantic helper: ${error.message}`);
});
this.child.once('exit', (code) => {
this.child = null;
this.rejectAll(`Semantic helper exited with code ${code ?? 'null'}.`);
});
}
private handleLine(line: string): void {
let message: JsonRecord;
try {
message = JSON.parse(line) as JsonRecord;
} catch (error) {
console.error('[SemanticHelper] Invalid JSON response:', errorMessage(error));
return;
}
const id = typeof message.id === 'number' ? message.id : null;
if (id === null) {
return;
}
const pending = this.pending.get(id);
if (!pending) {
return;
}
clearTimeout(pending.timer);
this.pending.delete(id);
if (message.error) {
pending.reject(new Error(typeof message.error === 'string' ? message.error : 'Semantic helper request failed.'));
return;
}
pending.resolve(message.result);
}
private rejectAll(reason: string): void {
for (const [id, request] of this.pending.entries()) {
clearTimeout(request.timer);
request.reject(new Error(reason));
this.pending.delete(id);
}
}
}

View File

@@ -1,97 +0,0 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
export type SemanticHelperPlatform = 'darwin' | 'win32';
export type SemanticHelperResolution = {
available: boolean;
path: string | null;
source: 'bundled' | 'dev' | 'missing';
platform: NodeJS.Platform;
arch: NodeJS.Architecture;
reason?: string;
};
function helperExecutableName(platform: NodeJS.Platform): string | null {
if (platform === 'darwin') {
return 'CloudCLISemantics';
}
if (platform === 'win32') {
return 'CloudCLISemantics.exe';
}
return null;
}
function pathExists(filePath: string): boolean {
try {
fs.accessSync(filePath, fs.constants.X_OK);
return true;
} catch {
try {
fs.accessSync(filePath, fs.constants.F_OK);
return true;
} catch {
return false;
}
}
}
function candidatePaths(platform: NodeJS.Platform, arch: NodeJS.Architecture): Array<{ source: 'bundled' | 'dev'; path: string }> {
const executable = helperExecutableName(platform);
if (!executable) {
return [];
}
const platformArch = `${platform}-${arch}`;
return [
{
source: 'bundled',
path: path.resolve(__dirname, '..', 'bin', platformArch, executable),
},
{
source: 'dev',
path: path.resolve(process.cwd(), 'server', 'modules', 'computer-use', 'semantics', 'bin', platformArch, executable),
},
];
}
export function resolveSemanticHelper(
platform: NodeJS.Platform = process.platform,
arch: NodeJS.Architecture = process.arch,
): SemanticHelperResolution {
const executable = helperExecutableName(platform);
if (!executable) {
return {
available: false,
path: null,
source: 'missing',
platform,
arch,
reason: `Semantic Computer Use helper is not supported on ${platform}.`,
};
}
for (const candidate of candidatePaths(platform, arch)) {
if (pathExists(candidate.path)) {
return {
available: true,
path: candidate.path,
source: candidate.source,
platform,
arch,
};
}
}
return {
available: false,
path: null,
source: 'missing',
platform,
arch,
reason: `Bundled semantic helper was not found for ${platform}-${arch} (${executable}).`,
};
}

View File

@@ -1,11 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0-windows</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<UseWindowsForms>true</UseWindowsForms>
<UseWPF>true</UseWPF>
<AssemblyName>CloudCLISemantics</AssemblyName>
</PropertyGroup>
</Project>

View File

@@ -1,534 +0,0 @@
using System.Diagnostics;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Runtime.InteropServices;
using System.Text.Json;
using System.Windows.Automation;
static class Program
{
private const int MaxStoredStates = 100;
private static readonly Dictionary<string, List<ElementRecord>> StateElements = new();
private static readonly Dictionary<string, Dictionary<string, AutomationElement>> StateAutomationElements = new();
private static readonly Queue<string> StateOrder = new();
public static void Main()
{
string? line;
while ((line = Console.ReadLine()) != null)
{
try
{
using var doc = JsonDocument.Parse(line);
var root = doc.RootElement;
var id = root.TryGetProperty("id", out var idValue) ? idValue.Clone() : default;
var method = root.TryGetProperty("method", out var methodValue) ? methodValue.GetString() ?? "" : "";
var parameters = root.TryGetProperty("params", out var paramsValue) && paramsValue.ValueKind == JsonValueKind.Object
? paramsValue.Clone()
: JsonDocument.Parse("{}").RootElement.Clone();
try
{
object result = method switch
{
"list_apps" => ListApps(),
"get_app_state" => GetAppState(parameters),
"click_element" => ClickElement(parameters),
"perform_secondary_action" => PerformSecondaryAction(parameters),
"set_value" => SetValue(parameters),
"type_text" => TypeText(parameters),
"press_key" => PressKey(parameters),
"scroll_element" => ScrollElement(parameters),
"drag" => Drag(parameters),
_ => throw new InvalidOperationException($"Method is not implemented yet: {method}")
};
Write(new Dictionary<string, object?> { ["id"] = JsonValue(id), ["result"] = result });
}
catch (Exception ex)
{
Write(new Dictionary<string, object?> { ["id"] = JsonValue(id), ["error"] = ex.Message });
}
}
catch (Exception ex)
{
Write(new Dictionary<string, object?> { ["id"] = null, ["error"] = $"Invalid JSON request: {ex.Message}" });
}
}
}
private static object? JsonValue(JsonElement element)
{
return element.ValueKind switch
{
JsonValueKind.String => element.GetString(),
JsonValueKind.Number => element.TryGetInt64(out var number) ? number : element.GetDouble(),
JsonValueKind.True => true,
JsonValueKind.False => false,
_ => null
};
}
private static void Write(object value)
{
Console.WriteLine(JsonSerializer.Serialize(value));
Console.Out.Flush();
}
private static List<Dictionary<string, object?>> ListApps()
{
return Process.GetProcesses()
.Where(process => process.MainWindowHandle != IntPtr.Zero)
.OrderBy(process => process.ProcessName)
.Select(process => new Dictionary<string, object?>
{
["id"] = process.Id.ToString(),
["name"] = process.ProcessName,
["processName"] = process.ProcessName,
["pid"] = process.Id,
["running"] = true,
["windowTitle"] = process.MainWindowTitle
})
.ToList();
}
private static Process ResolveProcess(string query)
{
var normalized = query.Trim();
if (string.IsNullOrWhiteSpace(normalized))
{
throw new InvalidOperationException("app is required.");
}
var processes = Process.GetProcesses()
.Where(process => process.MainWindowHandle != IntPtr.Zero)
.ToList();
return processes.FirstOrDefault(process => process.ProcessName.Equals(normalized, StringComparison.OrdinalIgnoreCase))
?? processes.FirstOrDefault(process => process.MainWindowTitle.Equals(normalized, StringComparison.OrdinalIgnoreCase))
?? processes.FirstOrDefault(process => process.MainWindowTitle.Contains(normalized, StringComparison.OrdinalIgnoreCase))
?? throw new InvalidOperationException($"App is not running: {query}");
}
private static Dictionary<string, object?> GetAppState(JsonElement parameters)
{
var appQuery = ReadString(parameters, "app");
var process = ResolveProcess(appQuery);
var root = AutomationElement.FromHandle(process.MainWindowHandle)
?? throw new InvalidOperationException("No UI Automation root window is available.");
var records = new List<ElementRecord>();
var automationElements = new Dictionary<string, AutomationElement>();
Walk(root, records, automationElements, 0, 5, 300);
var stateId = $"state_{Guid.NewGuid()}";
StateElements[stateId] = records;
StateAutomationElements[stateId] = automationElements;
StateOrder.Enqueue(stateId);
PruneStoredStates();
var elements = records.Select(record => record.ToDictionary()).ToList();
var bounds = root.Current.BoundingRectangle;
return new Dictionary<string, object?>
{
["stateId"] = stateId,
["app"] = process.ProcessName,
["platform"] = "win32",
["screenshotDataUrl"] = CaptureScreen(),
["displaySize"] = new Dictionary<string, object?>
{
["width"] = (int)System.Windows.Forms.Screen.PrimaryScreen!.Bounds.Width,
["height"] = (int)System.Windows.Forms.Screen.PrimaryScreen!.Bounds.Height
},
["window"] = new Dictionary<string, object?>
{
["title"] = process.MainWindowTitle,
["bounds"] = BoundsDictionary(bounds)
},
["elements"] = elements,
["accessibilityTree"] = elements,
["treeText"] = string.Join("\n", elements.Select(element => $"{element["index"]} {element["role"]} {element.GetValueOrDefault("title")}"))
};
}
private static Dictionary<string, object?> ClickElement(JsonElement parameters)
{
var mouseButton = ReadString(parameters, "mouse_button");
if ((mouseButton == "" || mouseButton == "left") && ReadInt(parameters, "click_count", 1) == 1)
{
var element = AutomationElementFor(parameters);
if (element != null && TryInvoke(element))
{
return GetAppState(parameters);
}
}
var point = PointFor(parameters);
if (point == null)
{
throw new InvalidOperationException("click_element requires x/y or stateId + element_index.");
}
SendMouseClick(point.Value.X, point.Value.Y, ReadString(parameters, "mouse_button"), ReadInt(parameters, "click_count", 1));
return GetAppState(parameters);
}
private static Dictionary<string, object?> PerformSecondaryAction(JsonElement parameters)
{
var point = PointFor(parameters);
if (point == null)
{
throw new InvalidOperationException("perform_secondary_action requires x/y or stateId + element_index.");
}
SendMouseClick(point.Value.X, point.Value.Y, "right", 1);
return GetAppState(parameters);
}
private static Dictionary<string, object?> SetValue(JsonElement parameters)
{
var value = ReadString(parameters, "value");
var element = AutomationElementFor(parameters);
var focused = false;
if (element != null)
{
if (element.TryGetCurrentPattern(ValuePattern.Pattern, out var valuePattern))
{
((ValuePattern)valuePattern).SetValue(value);
return GetAppState(parameters);
}
try
{
element.SetFocus();
focused = true;
}
catch
{
// Fall through to coordinate focus below.
}
}
var point = PointFor(parameters);
if (point != null)
{
SendMouseClick(point.Value.X, point.Value.Y, "left", 1);
focused = true;
}
else if (!focused && element == null)
{
throw new InvalidOperationException("set_value requires x/y or stateId + element_index.");
}
else if (!focused)
{
throw new InvalidOperationException("set_value could not focus the requested element.");
}
System.Windows.Forms.SendKeys.SendWait("^a");
System.Windows.Forms.SendKeys.SendWait(EscapeSendKeys(value));
return GetAppState(parameters);
}
private static Dictionary<string, object?> TypeText(JsonElement parameters)
{
var text = ReadString(parameters, "text");
System.Windows.Forms.SendKeys.SendWait(EscapeSendKeys(text));
return GetAppState(parameters);
}
private static Dictionary<string, object?> PressKey(JsonElement parameters)
{
var key = ReadString(parameters, "key");
System.Windows.Forms.SendKeys.SendWait(ToSendKeysChord(key));
return GetAppState(parameters);
}
private static Dictionary<string, object?> ScrollElement(JsonElement parameters)
{
var element = AutomationElementFor(parameters);
var direction = ReadString(parameters, "direction");
var pages = ReadDouble(parameters, "pages", 1);
if (element != null && element.TryGetCurrentPattern(ScrollPattern.Pattern, out var scrollPatternValue))
{
var scrollPattern = (ScrollPattern)scrollPatternValue;
var vertical = direction == "up" ? ScrollAmount.LargeDecrement : direction == "down" ? ScrollAmount.LargeIncrement : ScrollAmount.NoAmount;
var horizontal = direction == "left" ? ScrollAmount.LargeDecrement : direction == "right" ? ScrollAmount.LargeIncrement : ScrollAmount.NoAmount;
scrollPattern.Scroll(horizontal, vertical);
return GetAppState(parameters);
}
var point = PointFor(parameters);
if (point == null)
{
throw new InvalidOperationException("scroll_element requires x/y or stateId + element_index.");
}
SetCursorPos(point.Value.X, point.Value.Y);
var wheel = (int)Math.Round(Math.Max(1, pages) * 120);
if (direction == "down") wheel = -wheel;
mouse_event(0x0800, 0, 0, unchecked((uint)wheel), UIntPtr.Zero);
return GetAppState(parameters);
}
private static void PruneStoredStates()
{
while (StateOrder.Count > MaxStoredStates)
{
var evicted = StateOrder.Dequeue();
StateElements.Remove(evicted);
StateAutomationElements.Remove(evicted);
}
}
private static Dictionary<string, object?> Drag(JsonElement parameters)
{
var fromX = ReadDouble(parameters, "from_x", double.NaN);
var fromY = ReadDouble(parameters, "from_y", double.NaN);
var toX = ReadDouble(parameters, "to_x", double.NaN);
var toY = ReadDouble(parameters, "to_y", double.NaN);
if (double.IsNaN(fromX) || double.IsNaN(fromY) || double.IsNaN(toX) || double.IsNaN(toY))
{
throw new InvalidOperationException("drag requires from_x/from_y/to_x/to_y.");
}
SetCursorPos((int)Math.Round(fromX), (int)Math.Round(fromY));
mouse_event(0x0002, 0, 0, 0, UIntPtr.Zero);
Thread.Sleep(80);
SetCursorPos((int)Math.Round(toX), (int)Math.Round(toY));
Thread.Sleep(80);
mouse_event(0x0004, 0, 0, 0, UIntPtr.Zero);
return GetAppState(parameters);
}
private static void Walk(AutomationElement element, List<ElementRecord> records, Dictionary<string, AutomationElement> automationElements, int depth, int maxDepth, int limit)
{
if (depth > maxDepth || records.Count >= limit) return;
var index = (records.Count + 1).ToString();
records.Add(ElementRecord.From(element, index));
automationElements[index] = element;
var children = element.FindAll(TreeScope.Children, Condition.TrueCondition);
foreach (AutomationElement child in children)
{
Walk(child, records, automationElements, depth + 1, maxDepth, limit);
if (records.Count >= limit) return;
}
}
private static string ReadString(JsonElement element, string property)
{
return element.TryGetProperty(property, out var value) && value.ValueKind == JsonValueKind.String
? value.GetString() ?? ""
: "";
}
private static int ReadInt(JsonElement element, string property, int defaultValue)
{
return element.TryGetProperty(property, out var value) && value.TryGetInt32(out var number)
? number
: defaultValue;
}
private static double ReadDouble(JsonElement element, string property, double defaultValue)
{
return element.TryGetProperty(property, out var value) && value.TryGetDouble(out var number)
? number
: defaultValue;
}
private static AutomationElement? AutomationElementFor(JsonElement parameters)
{
var stateId = ReadString(parameters, "stateId");
var elementIndex = ReadString(parameters, "element_index");
return !string.IsNullOrWhiteSpace(stateId)
&& !string.IsNullOrWhiteSpace(elementIndex)
&& StateAutomationElements.TryGetValue(stateId, out var elements)
&& elements.TryGetValue(elementIndex, out var element)
? element
: null;
}
private static System.Drawing.Point? PointFor(JsonElement parameters)
{
if (parameters.TryGetProperty("x", out var xValue) && parameters.TryGetProperty("y", out var yValue)
&& xValue.TryGetDouble(out var x) && yValue.TryGetDouble(out var y))
{
return new System.Drawing.Point((int)Math.Round(x), (int)Math.Round(y));
}
var stateId = ReadString(parameters, "stateId");
var elementIndex = ReadString(parameters, "element_index");
if (string.IsNullOrWhiteSpace(stateId) || string.IsNullOrWhiteSpace(elementIndex)) return null;
if (!StateElements.TryGetValue(stateId, out var elements)) return null;
var element = elements.FirstOrDefault(item => item.Index == elementIndex);
if (element?.Bounds == null) return null;
return new System.Drawing.Point(
(int)Math.Round(element.Bounds.Value.Left + element.Bounds.Value.Width / 2),
(int)Math.Round(element.Bounds.Value.Top + element.Bounds.Value.Height / 2)
);
}
private static string CaptureScreen()
{
var bounds = System.Windows.Forms.Screen.PrimaryScreen!.Bounds;
using var bitmap = new Bitmap(bounds.Width, bounds.Height);
using var graphics = Graphics.FromImage(bitmap);
graphics.CopyFromScreen(bounds.Left, bounds.Top, 0, 0, bounds.Size);
using var stream = new MemoryStream();
bitmap.Save(stream, ImageFormat.Png);
return $"data:image/png;base64,{Convert.ToBase64String(stream.ToArray())}";
}
private static Dictionary<string, object?> BoundsDictionary(System.Windows.Rect rect)
{
return new Dictionary<string, object?>
{
["x"] = rect.X,
["y"] = rect.Y,
["width"] = rect.Width,
["height"] = rect.Height
};
}
[DllImport("user32.dll")]
private static extern bool SetCursorPos(int x, int y);
[DllImport("user32.dll")]
private static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, UIntPtr dwExtraInfo);
private static void SendMouseClick(int x, int y, string button, int clickCount)
{
var (down, up) = button switch
{
"right" => (0x0008u, 0x0010u),
"middle" => (0x0020u, 0x0040u),
_ => (0x0002u, 0x0004u)
};
SetCursorPos(x, y);
for (var i = 0; i < Math.Max(1, clickCount); i++)
{
mouse_event(down, 0, 0, 0, UIntPtr.Zero);
mouse_event(up, 0, 0, 0, UIntPtr.Zero);
Thread.Sleep(80);
}
}
private static bool TryInvoke(AutomationElement element)
{
try
{
if (!element.TryGetCurrentPattern(InvokePattern.Pattern, out var pattern)) return false;
((InvokePattern)pattern).Invoke();
return true;
}
catch
{
return false;
}
}
private static string EscapeSendKeys(string value)
{
return value
.Replace("{", "{{}")
.Replace("}", "{}}")
.Replace("+", "{+}")
.Replace("^", "{^}")
.Replace("%", "{%}")
.Replace("~", "{~}")
.Replace("(", "{(}")
.Replace(")", "{)}")
.Replace("[", "{[}")
.Replace("]", "{]}");
}
private static string ToSendKeysChord(string key)
{
var normalized = key.Trim();
if (normalized.Contains('+'))
{
var parts = normalized.Split('+', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
var modifiers = "";
var last = parts.LastOrDefault() ?? "";
foreach (var part in parts.Take(parts.Length - 1))
{
modifiers += part.ToLowerInvariant() switch
{
"ctrl" or "control" => "^",
"alt" => "%",
"shift" => "+",
"cmd" or "win" or "windows" => "^",
_ => ""
};
}
return modifiers + SendKeyName(last);
}
return SendKeyName(normalized);
}
private static string SendKeyName(string key)
{
return key.ToLowerInvariant() switch
{
"return" or "enter" => "{ENTER}",
"escape" or "esc" => "{ESC}",
"tab" => "{TAB}",
"backspace" => "{BACKSPACE}",
"delete" or "del" => "{DELETE}",
"left" => "{LEFT}",
"right" => "{RIGHT}",
"up" => "{UP}",
"down" => "{DOWN}",
"space" => " ",
_ => key.Length == 1 ? EscapeSendKeys(key) : $"{{{key.ToUpperInvariant()}}}"
};
}
private sealed record ElementRecord(
string Index,
string Role,
string? Title,
string? Value,
System.Windows.Rect? Bounds,
List<string> Actions)
{
public static ElementRecord From(AutomationElement element, string index)
{
var patterns = element.GetSupportedPatterns().Select(pattern => pattern.ProgrammaticName).ToList();
return new ElementRecord(
index,
element.Current.ControlType.ProgrammaticName.Replace("ControlType.", ""),
element.Current.Name,
TryValue(element),
element.Current.BoundingRectangle,
patterns
);
}
public Dictionary<string, object?> ToDictionary()
{
var output = new Dictionary<string, object?>
{
["index"] = Index,
["role"] = Role,
["actions"] = Actions
};
if (!string.IsNullOrEmpty(Title)) output["title"] = Title;
if (!string.IsNullOrEmpty(Value)) output["value"] = Value;
if (Bounds != null) output["bounds"] = BoundsDictionary(Bounds.Value);
return output;
}
private static string? TryValue(AutomationElement element)
{
try
{
if (element.TryGetCurrentPattern(ValuePattern.Pattern, out var pattern))
{
return ((ValuePattern)pattern).Current.Value;
}
}
catch
{
return null;
}
return null;
}
}
}

View File

@@ -1,87 +0,0 @@
import { randomUUID } from 'node:crypto';
import type { SemanticAppState, SemanticElement } from '@/modules/computer-use/semantics/semantic-types.js';
const DEFAULT_STATE_TTL_MS = Number.parseInt(process.env.CLOUDCLI_COMPUTER_SEMANTIC_STATE_TTL_MS || String(10 * 60 * 1000), 10);
type StoredState = {
sessionId: string;
appKey: string;
state: SemanticAppState;
updatedAt: number;
};
function normalizeAppKey(app: string): string {
return app.trim().toLowerCase();
}
export class SemanticSessionStore {
private states = new Map<string, StoredState>();
private latestBySessionApp = new Map<string, string>();
createStateId(): string {
return `state_${randomUUID()}`;
}
save(sessionId: string, state: SemanticAppState): SemanticAppState {
const appKey = normalizeAppKey(state.app);
const nextState = {
...state,
stateId: state.stateId || this.createStateId(),
};
this.states.set(nextState.stateId, {
sessionId,
appKey,
state: nextState,
updatedAt: Date.now(),
});
this.latestBySessionApp.set(this.latestKey(sessionId, appKey), nextState.stateId);
return nextState;
}
getState(sessionId: string, app: string, stateId?: string): SemanticAppState | null {
this.expire();
if (stateId) {
const entry = this.states.get(stateId);
const appKey = normalizeAppKey(app);
return entry && entry.sessionId === sessionId && entry.appKey === appKey ? entry.state : null;
}
const latestStateId = this.latestBySessionApp.get(this.latestKey(sessionId, normalizeAppKey(app)));
return latestStateId ? this.states.get(latestStateId)?.state || null : null;
}
getElement(sessionId: string, app: string, elementIndex: string, stateId?: string): SemanticElement | null {
const state = this.getState(sessionId, app, stateId);
return state?.elements.find((element) => element.index === elementIndex) || null;
}
clearSession(sessionId: string): void {
for (const [stateId, entry] of this.states.entries()) {
if (entry.sessionId === sessionId) {
this.states.delete(stateId);
this.latestBySessionApp.delete(this.latestKey(entry.sessionId, entry.appKey));
}
}
}
expire(now = Date.now()): void {
const ttl = Number.isFinite(DEFAULT_STATE_TTL_MS) && DEFAULT_STATE_TTL_MS > 0
? DEFAULT_STATE_TTL_MS
: 10 * 60 * 1000;
for (const [stateId, entry] of this.states.entries()) {
if (now - entry.updatedAt > ttl) {
this.states.delete(stateId);
const key = this.latestKey(entry.sessionId, entry.appKey);
if (this.latestBySessionApp.get(key) === stateId) {
this.latestBySessionApp.delete(key);
}
}
}
}
private latestKey(sessionId: string, appKey: string): string {
return `${sessionId}:${appKey}`;
}
}
export const semanticSessionStore = new SemanticSessionStore();

View File

@@ -1,17 +0,0 @@
export const semanticMcpToolMap: Record<string, string> = {
computer_app_drag: 'drag',
computer_click_element: 'click_element',
computer_get_app_state: 'get_app_state',
computer_list_apps: 'list_apps',
computer_perform_secondary_action: 'perform_secondary_action',
computer_press_key: 'press_key',
computer_scroll_element: 'scroll_element',
computer_set_value: 'set_value',
computer_type_text: 'type_text',
};
export const semanticOperationNames = new Set(Object.values(semanticMcpToolMap));
export function semanticOperationForMcpTool(toolName: string): string | null {
return semanticMcpToolMap[toolName] || null;
}

View File

@@ -1,58 +0,0 @@
import type { DisplaySize, Point } from '@/modules/computer-use/computer-executor.js';
export type SemanticBounds = {
x: number;
y: number;
width: number;
height: number;
};
export type SemanticApp = {
id?: string;
name: string;
bundleIdentifier?: string;
processName?: string;
pid?: number;
running: boolean;
windowTitle?: string;
};
export type SemanticElement = {
index: string;
role: string;
title?: string;
value?: string;
description?: string;
enabled?: boolean;
focused?: boolean;
selected?: boolean;
bounds?: SemanticBounds;
actions?: string[];
settableValue?: boolean;
};
export type SemanticAppState = {
stateId: string;
app: string;
platform: NodeJS.Platform;
screenshotDataUrl: string | null;
displaySize: DisplaySize | null;
elements: SemanticElement[];
accessibilityTree: SemanticElement[];
treeText?: string;
message?: string;
};
export type SemanticToolInput = Record<string, unknown> & {
sessionId?: string;
app?: string;
stateId?: string;
element_index?: string;
};
export type SemanticToolResult = SemanticAppState | {
apps: SemanticApp[];
platform: NodeJS.Platform;
};
export type SemanticActionPoint = Point;

View File

@@ -0,0 +1,135 @@
import { readFile } from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import spawn from 'cross-spawn';
import type { IProviderAuth } from '@/shared/interfaces.js';
import type { ProviderAuthStatus } from '@/shared/types.js';
import { readObjectRecord, readOptionalString } from '@/shared/utils.js';
export class HermesProviderAuth implements IProviderAuth {
private checkInstalled(): boolean {
const cliPath = process.env.HERMES_CLI_PATH || 'hermes acp';
const [command, ...args] = cliPath.trim().split(/\s+/);
try {
const result = spawn.sync(command || 'hermes', [...args, '--version'], { stdio: 'ignore', timeout: 5000 });
return result.error ? false : result.status === 0 || result.status === null;
} catch {
return false;
}
}
async getStatus(): Promise<ProviderAuthStatus> {
const installed = this.checkInstalled();
if (!installed) {
return {
provider: 'hermes',
installed: false,
authenticated: false,
email: null,
method: null,
error: 'Hermes ACP is not installed',
};
}
const credentials = await this.checkCredentials();
return {
provider: 'hermes',
installed,
authenticated: credentials.authenticated,
email: credentials.email,
method: credentials.method,
error: credentials.authenticated ? undefined : 'Hermes credentials were not found',
};
}
private async checkCredentials(): Promise<{ authenticated: boolean; email: string | null; method: string | null }> {
if (this.hasKnownProviderEnv(process.env)) {
return { authenticated: true, email: 'API Key Auth', method: 'env' };
}
const hermesHome = path.join(os.homedir(), '.hermes');
try {
const authJson = readObjectRecord(JSON.parse(await readFile(path.join(hermesHome, 'auth.json'), 'utf8')));
if (
readOptionalString(authJson?.apiKey)
|| readOptionalString(authJson?.api_key)
|| readOptionalString(authJson?.token)
|| readOptionalString(authJson?.access_token)
|| readOptionalString(authJson?.refresh_token)
) {
return {
authenticated: true,
email: readOptionalString(authJson?.email) ?? 'Hermes Auth',
method: 'credentials_file',
};
}
} catch {
// Fall through to dotenv check.
}
try {
const envContent = await readFile(path.join(hermesHome, '.env'), 'utf8');
if (this.hasKnownProviderEnv(this.parseEnvFile(envContent))) {
return { authenticated: true, email: 'API Key Auth', method: 'env_file' };
}
} catch {
// Fall through.
}
try {
const configContent = await readFile(path.join(hermesHome, 'config.yaml'), 'utf8');
if (/^\s*api_key\s*:\s*["']?[^"'#\s]+/m.test(configContent)) {
return { authenticated: true, email: 'Hermes Config', method: 'config_file' };
}
} catch {
// Fall through.
}
return { authenticated: false, email: null, method: null };
}
private parseEnvFile(content: string): Record<string, string> {
const parsed: Record<string, string> = {};
for (const rawLine of content.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line || line.startsWith('#')) {
continue;
}
const separatorIndex = line.indexOf('=');
if (separatorIndex <= 0) {
continue;
}
const key = line.slice(0, separatorIndex).trim();
const value = line.slice(separatorIndex + 1).trim().replace(/^['"]|['"]$/g, '');
if (key && value) {
parsed[key] = value;
}
}
return parsed;
}
private hasKnownProviderEnv(env: Record<string, string | undefined>): boolean {
const keys = [
'HERMES_API_KEY',
'NOUS_API_KEY',
'OPENROUTER_API_KEY',
'OPENAI_API_KEY',
'ANTHROPIC_API_KEY',
'GOOGLE_API_KEY',
'GEMINI_API_KEY',
'GLM_API_KEY',
'KIMI_API_KEY',
'MINIMAX_API_KEY',
'MINIMAX_CN_API_KEY',
'HF_TOKEN',
'NVIDIA_API_KEY',
'ARCEEAI_API_KEY',
'OLLAMA_API_KEY',
'KILOCODE_API_KEY',
'GITHUB_TOKEN',
];
return keys.some((key) => Boolean(env[key]?.trim()));
}
}

View File

@@ -0,0 +1,296 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import { McpProvider } from '@/modules/providers/shared/mcp/mcp.provider.js';
import type { McpScope, ProviderMcpServer, UpsertProviderMcpServerInput } from '@/shared/types.js';
import {
AppError,
readObjectRecord,
readOptionalString,
readStringArray,
readStringRecord,
} from '@/shared/utils.js';
const yamlScalar = (value: unknown): string => {
if (typeof value === 'number' || typeof value === 'boolean') {
return String(value);
}
if (value === null) {
return 'null';
}
return JSON.stringify(String(value));
};
const parseYamlScalar = (value: string): unknown => {
const trimmed = value.trim();
if (!trimmed) {
return '';
}
if (trimmed === 'null') {
return null;
}
if (trimmed === 'true') {
return true;
}
if (trimmed === 'false') {
return false;
}
if ((trimmed.startsWith('"') && trimmed.endsWith('"')) || (trimmed.startsWith('\'') && trimmed.endsWith('\''))) {
try {
return JSON.parse(trimmed);
} catch {
return trimmed.slice(1, -1);
}
}
if (
(trimmed.startsWith('[') && trimmed.endsWith(']'))
|| (trimmed.startsWith('{') && trimmed.endsWith('}'))
) {
try {
return JSON.parse(trimmed);
} catch {
return trimmed;
}
}
return trimmed.replace(/\s+#.*$/, '').trim();
};
const getIndent = (line: string): number => line.match(/^\s*/)?.[0].length ?? 0;
const parseYamlArray = (
lines: string[],
startIndex: number,
indent: number,
): { value: unknown[]; nextIndex: number } => {
const value: unknown[] = [];
let index = startIndex;
while (index < lines.length) {
const line = lines[index];
if (!line.trim()) {
index += 1;
continue;
}
if (getIndent(line) !== indent || !line.trimStart().startsWith('- ')) {
break;
}
value.push(parseYamlScalar(line.trimStart().slice(2)));
index += 1;
}
return { value, nextIndex: index };
};
const parseYamlMap = (
lines: string[],
startIndex: number,
indent: number,
): { value: Record<string, unknown>; nextIndex: number } => {
const value: Record<string, unknown> = {};
let index = startIndex;
while (index < lines.length) {
const line = lines[index];
if (!line.trim()) {
index += 1;
continue;
}
const currentIndent = getIndent(line);
if (currentIndent < indent) {
break;
}
if (currentIndent > indent) {
index += 1;
continue;
}
const match = line.slice(indent).match(/^([^:#]+):(?:\s*(.*))?$/);
if (!match) {
index += 1;
continue;
}
const key = match[1].trim();
const raw = match[2]?.trim() ?? '';
if (raw) {
value[key] = parseYamlScalar(raw);
index += 1;
continue;
}
const nextLine = lines[index + 1];
if (nextLine && getIndent(nextLine) > indent && nextLine.trimStart().startsWith('- ')) {
const parsed = parseYamlArray(lines, index + 1, getIndent(nextLine));
value[key] = parsed.value;
index = parsed.nextIndex;
continue;
}
const parsed = parseYamlMap(lines, index + 1, indent + 2);
value[key] = parsed.value;
index = parsed.nextIndex;
}
return { value, nextIndex: index };
};
const readYamlConfig = async (filePath: string): Promise<string> => {
try {
return await readFile(filePath, 'utf8');
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (code === 'ENOENT') {
return '';
}
throw error;
}
};
const readMcpServers = async (filePath: string): Promise<Record<string, unknown>> => {
const content = await readYamlConfig(filePath);
const lines = content.split(/\r?\n/);
const sectionIndex = lines.findIndex((line) => /^mcp_servers\s*:\s*$/.test(line));
if (sectionIndex === -1) {
return {};
}
const parsed = parseYamlMap(lines, sectionIndex + 1, 2);
return readObjectRecord(parsed.value) ?? {};
};
const serializeYamlMap = (value: Record<string, unknown>, indent = 0): string[] => {
const lines: string[] = [];
for (const [key, rawValue] of Object.entries(value)) {
if (rawValue === undefined) {
continue;
}
const prefix = `${' '.repeat(indent)}${key}:`;
if (Array.isArray(rawValue)) {
lines.push(prefix);
for (const item of rawValue) {
lines.push(`${' '.repeat(indent + 2)}- ${yamlScalar(item)}`);
}
continue;
}
const nested = readObjectRecord(rawValue);
if (nested) {
lines.push(prefix);
lines.push(...serializeYamlMap(nested, indent + 2));
continue;
}
lines.push(`${prefix} ${yamlScalar(rawValue)}`);
}
return lines;
};
const replaceMcpServersSection = (content: string, servers: Record<string, unknown>): string => {
const lines = content.split(/\r?\n/);
const sectionIndex = lines.findIndex((line) => /^mcp_servers\s*:\s*$/.test(line));
const serialized = ['mcp_servers:', ...serializeYamlMap(servers, 2)];
if (sectionIndex === -1) {
const prefix = content.trimEnd();
return `${prefix ? `${prefix}\n\n` : ''}${serialized.join('\n')}\n`;
}
let endIndex = sectionIndex + 1;
while (endIndex < lines.length) {
const line = lines[endIndex];
if (line.trim() && getIndent(line) === 0) {
break;
}
endIndex += 1;
}
lines.splice(sectionIndex, endIndex - sectionIndex, ...serialized);
return `${lines.join('\n').trimEnd()}\n`;
};
const writeMcpServers = async (filePath: string, servers: Record<string, unknown>): Promise<void> => {
const content = await readYamlConfig(filePath);
await mkdir(path.dirname(filePath), { recursive: true });
await writeFile(filePath, replaceMcpServersSection(content, servers), 'utf8');
};
export class HermesMcpProvider extends McpProvider {
constructor() {
super('hermes', ['user', 'project'], ['stdio', 'http']);
}
protected async readScopedServers(scope: McpScope, workspacePath: string): Promise<Record<string, unknown>> {
const filePath = scope === 'user'
? path.join(os.homedir(), '.hermes', 'config.yaml')
: path.join(workspacePath, '.hermes', 'config.yaml');
return readMcpServers(filePath);
}
protected async writeScopedServers(
scope: McpScope,
workspacePath: string,
servers: Record<string, unknown>,
): Promise<void> {
const filePath = scope === 'user'
? path.join(os.homedir(), '.hermes', 'config.yaml')
: path.join(workspacePath, '.hermes', 'config.yaml');
await writeMcpServers(filePath, servers);
}
protected buildServerConfig(input: UpsertProviderMcpServerInput): Record<string, unknown> {
if (input.transport === 'stdio') {
if (!input.command?.trim()) {
throw new AppError('command is required for stdio MCP servers.', {
code: 'MCP_COMMAND_REQUIRED',
statusCode: 400,
});
}
return {
command: input.command,
args: input.args ?? [],
env: input.env ?? {},
cwd: input.cwd,
};
}
if (!input.url?.trim()) {
throw new AppError('url is required for http/sse MCP servers.', {
code: 'MCP_URL_REQUIRED',
statusCode: 400,
});
}
return {
type: input.transport,
url: input.url,
headers: input.headers ?? {},
};
}
protected normalizeServerConfig(scope: McpScope, name: string, rawConfig: unknown): ProviderMcpServer | null {
const config = readObjectRecord(rawConfig);
if (!config) {
return null;
}
if (typeof config.command === 'string') {
return {
provider: 'hermes',
name,
scope,
transport: 'stdio',
command: config.command,
args: readStringArray(config.args),
env: readStringRecord(config.env),
cwd: readOptionalString(config.cwd),
};
}
if (typeof config.url === 'string') {
return {
provider: 'hermes',
name,
scope,
transport: 'http',
url: config.url,
headers: readStringRecord(config.headers),
};
}
return null;
}
}

View File

@@ -0,0 +1,152 @@
import { readFile } from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import type { IProviderModels } from '@/shared/interfaces.js';
import type {
ProviderChangeActiveModelInput,
ProviderCurrentActiveModel,
ProviderModelsDefinition,
ProviderSessionActiveModelChange,
} from '@/shared/types.js';
import {
buildDefaultProviderCurrentActiveModel,
readOptionalString,
writeProviderSessionActiveModelChange,
} from '@/shared/utils.js';
export const HERMES_CONFIGURED_MODEL = '__hermes_configured_model__';
export const HERMES_FALLBACK_MODELS: ProviderModelsDefinition = {
OPTIONS: [
{
value: HERMES_CONFIGURED_MODEL,
label: 'Configured in Hermes',
description: 'Uses the provider and model selected with `hermes model`.',
},
],
DEFAULT: HERMES_CONFIGURED_MODEL,
};
const HERMES_CONFIG_PATH = path.join(os.homedir(), '.hermes', 'config.yaml');
function escapeRegex(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function stripScalar(raw: string): string | null {
let value = raw.trim();
// Drop an unquoted trailing comment.
if (!value.startsWith('"') && !value.startsWith("'")) {
const comment = value.search(/\s#/);
if (comment >= 0) {
value = value.slice(0, comment).trim();
}
}
if (
(value.startsWith('"') && value.endsWith('"')) ||
(value.startsWith("'") && value.endsWith("'"))
) {
value = value.slice(1, -1);
}
return value.trim() || null;
}
const indentOf = (line: string): number => line.length - line.replace(/^\s+/, '').length;
// Minimal, indentation-aware reader for the flat `key: value` and one-level
// nested (`section:`\n` key: value`) shapes used by ~/.hermes/config.yaml.
// Avoids the fragile single-regex lookahead that could terminate a section
// early and silently miss the configured model.
export function readYamlPath(content: string, pathParts: string[]): string | null {
const lines = content.split(/\r?\n/);
if (pathParts.length === 1) {
const re = new RegExp(`^\\s*${escapeRegex(pathParts[0])}\\s*:\\s*(.*)$`);
for (const line of lines) {
if (!line.trim() || line.trim().startsWith('#')) continue;
const match = line.match(re);
if (match) return stripScalar(match[1]);
}
return null;
}
const [section, key] = pathParts;
const sectionRe = new RegExp(`^(\\s*)${escapeRegex(section)}\\s*:\\s*$`);
const keyRe = new RegExp(`^\\s*${escapeRegex(key)}\\s*:\\s*(.*)$`);
let sectionIndent: number | null = null;
for (const line of lines) {
if (!line.trim() || line.trim().startsWith('#')) continue;
if (sectionIndent === null) {
const match = line.match(sectionRe);
if (match) sectionIndent = match[1].length;
continue;
}
// Left the nested block once indentation returns to the section level or less.
if (indentOf(line) <= sectionIndent) {
sectionIndent = line.match(sectionRe)?.[1].length ?? null;
continue;
}
const match = line.match(keyRe);
if (match) return stripScalar(match[1]);
}
return null;
}
export class HermesProviderModels implements IProviderModels {
async getSupportedModels(): Promise<ProviderModelsDefinition> {
const activeModel = await this.readConfiguredModel();
if (!activeModel) {
return HERMES_FALLBACK_MODELS;
}
const options = [
{ value: activeModel, label: activeModel },
...HERMES_FALLBACK_MODELS.OPTIONS,
];
return {
OPTIONS: options,
DEFAULT: activeModel,
};
}
async getCurrentActiveModel(): Promise<ProviderCurrentActiveModel> {
const configured = await this.readConfiguredModel();
if (configured) {
return { model: configured };
}
return buildDefaultProviderCurrentActiveModel(await this.getSupportedModels());
}
async changeActiveModel(input: ProviderChangeActiveModelInput): Promise<ProviderSessionActiveModelChange> {
if (input.model === HERMES_CONFIGURED_MODEL) {
return {
provider: 'hermes',
sessionId: input.sessionId,
supported: true,
changed: false,
model: null,
};
}
return writeProviderSessionActiveModelChange('hermes', input);
}
private async readConfiguredModel(): Promise<string | null> {
try {
const content = await readFile(HERMES_CONFIG_PATH, 'utf8');
return readOptionalString(readYamlPath(content, ['model', 'default']))
?? readOptionalString(readYamlPath(content, ['model']))
?? null;
} catch {
return null;
}
}
}

View File

@@ -0,0 +1,110 @@
import fsSync from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import Database from 'better-sqlite3';
import { sessionsDb } from '@/modules/database/index.js';
import type { IProviderSessionSynchronizer } from '@/shared/interfaces.js';
import { normalizeSessionName } from '@/shared/utils.js';
type HermesSessionRow = {
id: string;
cwd: string | null;
title: string | null;
started_at: number | null;
ended_at: number | null;
message_count: number | null;
};
const HERMES_DB_PATH = path.join(os.homedir(), '.hermes', 'state.db');
function unixSecondsToIso(value: number | null | undefined): string {
if (!value || !Number.isFinite(value)) {
return new Date().toISOString();
}
return new Date(value * 1000).toISOString();
}
function openHermesDatabase(): Database.Database | null {
if (!fsSync.existsSync(HERMES_DB_PATH)) {
return null;
}
return new Database(HERMES_DB_PATH, { readonly: true, fileMustExist: true });
}
export class HermesSessionSynchronizer implements IProviderSessionSynchronizer {
private readonly provider = 'hermes' as const;
async synchronize(since?: Date): Promise<number> {
const db = openHermesDatabase();
if (!db) {
return 0;
}
try {
const rows = since
? db.prepare(`
SELECT id, cwd, title, started_at, ended_at, message_count
FROM sessions
WHERE COALESCE(ended_at, started_at) >= ?
ORDER BY COALESCE(ended_at, started_at) ASC
`).all(Math.floor(since.getTime() / 1000)) as HermesSessionRow[]
: db.prepare(`
SELECT id, cwd, title, started_at, ended_at, message_count
FROM sessions
ORDER BY COALESCE(ended_at, started_at) ASC
`).all() as HermesSessionRow[];
let processed = 0;
for (const row of rows) {
if (this.upsertRow(row)) {
processed += 1;
}
}
return processed;
} finally {
db.close();
}
}
async synchronizeFile(filePath: string): Promise<string | null> {
if (path.resolve(filePath) !== HERMES_DB_PATH) {
return null;
}
const db = openHermesDatabase();
if (!db) {
return null;
}
try {
const row = db.prepare(`
SELECT id, cwd, title, started_at, ended_at, message_count
FROM sessions
ORDER BY COALESCE(ended_at, started_at) DESC
LIMIT 1
`).get() as HermesSessionRow | undefined;
return row && this.upsertRow(row) ? row.id : null;
} finally {
db.close();
}
}
private upsertRow(row: HermesSessionRow): boolean {
if (!row.id || !row.cwd) {
return false;
}
sessionsDb.createSession(
row.id,
this.provider,
row.cwd,
normalizeSessionName(row.title ?? undefined, 'Untitled Hermes Session'),
unixSecondsToIso(row.started_at),
unixSecondsToIso(row.ended_at ?? row.started_at),
HERMES_DB_PATH,
);
return true;
}
}

View File

@@ -0,0 +1,307 @@
import fsSync from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import Database from 'better-sqlite3';
import { sessionsDb } from '@/modules/database/index.js';
import type { IProviderSessions } from '@/shared/interfaces.js';
import type { AnyRecord, FetchHistoryOptions, FetchHistoryResult, NormalizedMessage } from '@/shared/types.js';
import {
createNormalizedMessage,
generateMessageId,
normalizeProviderTimestamp,
readObjectRecord,
readOptionalString,
sliceTailPage,
} from '@/shared/utils.js';
const PROVIDER = 'hermes';
const HERMES_DB_PATH = path.join(os.homedir(), '.hermes', 'state.db');
type HermesMessageRow = {
id: number;
role: string;
content: string | null;
tool_call_id: string | null;
tool_calls: string | null;
tool_name: string | null;
timestamp: number;
reasoning: string | null;
reasoning_content: string | null;
finish_reason: string | null;
};
function formatContent(value: unknown): string {
if (value === undefined || value === null) {
return '';
}
if (typeof value === 'string') {
return value;
}
try {
return JSON.stringify(value, null, 2);
} catch {
return String(value);
}
}
function readUpdateType(raw: AnyRecord): string {
return readOptionalString(raw.type)
?? readOptionalString(raw.kind)
?? readOptionalString(raw.sessionUpdate)
?? readOptionalString(raw.session_update)
?? readOptionalString(raw.update)
?? readOptionalString(raw.event)
?? '';
}
function readEventSessionId(raw: AnyRecord, sessionId: string | null): string | null {
return readOptionalString(raw.sessionId) ?? readOptionalString(raw.session_id) ?? sessionId;
}
function normalizeHermesEvent(rawMessage: unknown, sessionId: string | null, history = false): NormalizedMessage[] {
const envelope = readObjectRecord(rawMessage);
if (!envelope) {
return [];
}
const nestedUpdate = readObjectRecord(envelope.update);
const raw = nestedUpdate ? { ...nestedUpdate, sessionId: envelope.sessionId ?? envelope.session_id ?? sessionId } : envelope;
const type = readUpdateType(raw);
const eventSessionId = readEventSessionId(raw, sessionId);
const timestamp = normalizeProviderTimestamp(raw.timestamp ?? raw.time ?? raw.createdAt ?? raw.created_at);
const baseId = readOptionalString(raw.id) ?? readOptionalString(raw.messageId) ?? readOptionalString(raw.message_id) ?? generateMessageId(PROVIDER);
if (['agent_message_chunk', 'assistant_message_chunk', 'message_delta', 'text_delta', 'text'].includes(type)) {
const content = readOptionalString(raw.content)
?? readOptionalString(raw.text)
?? readOptionalString(raw.delta)
?? readOptionalString(readObjectRecord(raw.message)?.content)
?? '';
if (!content.trim()) {
return [];
}
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: history ? 'text' : 'stream_delta',
role: history ? 'assistant' : undefined,
content,
})];
}
if (['agent_message', 'assistant_message', 'message'].includes(type)) {
const role = readOptionalString(raw.role) === 'user' ? 'user' : 'assistant';
const content = readOptionalString(raw.content)
?? readOptionalString(raw.text)
?? readOptionalString(readObjectRecord(raw.message)?.content)
?? '';
if (!content.trim()) {
return [];
}
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: history ? 'text' : role === 'assistant' ? 'stream_delta' : 'text',
role: history || role === 'user' ? role : undefined,
content,
})];
}
if (['agent_thought_chunk', 'thought_delta', 'thinking', 'reasoning'].includes(type)) {
const content = readOptionalString(raw.content) ?? readOptionalString(raw.text) ?? readOptionalString(raw.delta) ?? '';
if (!content.trim()) {
return [];
}
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: 'thinking',
content,
})];
}
if (['tool_call', 'tool_use', 'tool_call_start'].includes(type)) {
const tool = readObjectRecord(raw.tool);
const toolId = readOptionalString(raw.toolCallId) ?? readOptionalString(raw.tool_call_id) ?? readOptionalString(raw.toolId) ?? baseId;
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: 'tool_use',
toolName: readOptionalString(raw.toolName)
?? readOptionalString(raw.tool_name)
?? readOptionalString(raw.title)
?? readOptionalString(raw.name)
?? readOptionalString(tool?.name)
?? 'Tool',
toolInput: raw.rawInput ?? raw.raw_input ?? raw.input ?? raw.arguments ?? raw.params ?? tool?.input ?? {},
toolId,
})];
}
if (['tool_call_update', 'tool_result', 'tool_call_result', 'tool_call_done'].includes(type)) {
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: 'tool_result',
toolId: readOptionalString(raw.toolCallId) ?? readOptionalString(raw.tool_call_id) ?? readOptionalString(raw.toolId) ?? '',
content: formatContent(raw.output ?? raw.result ?? raw.content ?? raw.delta ?? ''),
isError: Boolean(raw.error) || raw.status === 'error',
})];
}
if (type === 'plan') {
const content = readOptionalString(raw.content) ?? readOptionalString(raw.text) ?? formatContent(raw.plan);
if (!content.trim()) {
return [];
}
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: 'status',
text: 'plan',
summary: content,
})];
}
if (type === 'error') {
return [createNormalizedMessage({
id: baseId,
sessionId: eventSessionId,
timestamp,
provider: PROVIDER,
kind: 'error',
content: readOptionalString(raw.error) ?? readOptionalString(raw.message) ?? 'Unknown Hermes error',
})];
}
return [];
}
function parseJsonArray(value: string | null): unknown[] {
if (!value) {
return [];
}
try {
const parsed = JSON.parse(value);
return Array.isArray(parsed) ? parsed : [];
} catch {
return [];
}
}
function readHermesHistoryFromDatabase(sessionId: string): NormalizedMessage[] {
const normalized: NormalizedMessage[] = [];
if (!fsSync.existsSync(HERMES_DB_PATH)) {
return normalized;
}
const db = new Database(HERMES_DB_PATH, { readonly: true, fileMustExist: true });
try {
const rows = db.prepare(`
SELECT id, role, content, tool_call_id, tool_calls, tool_name, timestamp, reasoning, reasoning_content, finish_reason
FROM messages
WHERE session_id = ? AND active = 1
ORDER BY timestamp ASC, id ASC
`).all(sessionId) as HermesMessageRow[];
for (const row of rows) {
const timestamp = new Date(row.timestamp * 1000).toISOString();
const baseId = `hermes-${sessionId}-${row.id}`;
const reasoning = row.reasoning_content || row.reasoning;
if (reasoning?.trim()) {
normalized.push(createNormalizedMessage({
id: `${baseId}-thinking`,
sessionId,
timestamp,
provider: PROVIDER,
kind: 'thinking',
content: reasoning,
}));
}
for (const toolCall of parseJsonArray(row.tool_calls)) {
const call = readObjectRecord(toolCall);
const fn = readObjectRecord(call?.function);
normalized.push(createNormalizedMessage({
id: `${baseId}-tool-${readOptionalString(call?.id) ?? normalized.length}`,
sessionId,
timestamp,
provider: PROVIDER,
kind: 'tool_use',
toolName: readOptionalString(fn?.name) ?? readOptionalString(call?.name) ?? 'Tool',
toolInput: fn?.arguments ?? call?.arguments ?? {},
toolId: readOptionalString(call?.id) ?? `${baseId}-tool`,
}));
}
if (row.role === 'tool') {
normalized.push(createNormalizedMessage({
id: `${baseId}-result`,
sessionId,
timestamp,
provider: PROVIDER,
kind: 'tool_result',
toolId: row.tool_call_id ?? '',
content: row.content ?? '',
isError: row.finish_reason === 'error',
}));
continue;
}
if (row.content?.trim()) {
normalized.push(createNormalizedMessage({
id: baseId,
sessionId,
timestamp,
provider: PROVIDER,
kind: 'text',
role: row.role === 'user' ? 'user' : 'assistant',
content: row.content,
}));
}
}
} finally {
db.close();
}
return normalized;
}
export class HermesSessionsProvider implements IProviderSessions {
normalizeMessage(rawMessage: unknown, sessionId: string | null): NormalizedMessage[] {
return normalizeHermesEvent(rawMessage, sessionId);
}
async fetchHistory(sessionId: string, options: FetchHistoryOptions = {}): Promise<FetchHistoryResult> {
const { limit = null, offset = 0 } = options;
const row = sessionsDb.getSessionById(sessionId) ?? sessionsDb.getSessionByProviderSessionId(sessionId);
const messages = readHermesHistoryFromDatabase(row?.provider_session_id ?? sessionId);
const start = Math.max(0, offset);
const pageLimit = limit === null ? null : Math.max(0, limit);
const page = sliceTailPage(messages, pageLimit, start);
return {
messages: page.page,
total: messages.length,
hasMore: page.hasMore,
offset: start,
limit: pageLimit,
};
}
}

View File

@@ -0,0 +1,181 @@
import os from 'node:os';
import path from 'node:path';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { SkillsProvider } from '@/modules/providers/shared/skills/skills.provider.js';
import type {
ProviderSkillRegistryActionResult,
ProviderSkillRegistryInstallInput,
ProviderSkillRegistrySearchOptions,
ProviderSkillRegistrySearchResult,
ProviderSkillSource,
} from '@/shared/types.js';
import { AppError, addUniqueProviderSkillSource, readObjectRecord, readOptionalString } from '@/shared/utils.js';
const execFileAsync = promisify(execFile);
const HERMES_COMMAND =
(process.env.HERMES_COMMAND_PATH || process.env.HERMES_CLI_PATH || 'hermes').trim().split(/\s+/)[0] || 'hermes';
const HERMES_SKILLS_TIMEOUT_MS = 45_000;
const HERMES_SKILLS_MAX_BUFFER = 1024 * 1024 * 8;
function normalizeSearchResult(value: unknown): ProviderSkillRegistrySearchResult | null {
const record = readObjectRecord(value);
if (!record) {
return null;
}
const name = readOptionalString(record.name);
const identifier = readOptionalString(record.identifier);
if (!name || !identifier) {
return null;
}
return {
name,
identifier,
source: readOptionalString(record.source) ?? undefined,
trustLevel: readOptionalString(record.trust_level) ?? readOptionalString(record.trustLevel) ?? undefined,
description: readOptionalString(record.description) ?? undefined,
};
}
export class HermesSkillsProvider extends SkillsProvider {
constructor() {
super('hermes');
}
async searchRegistry(
query: string,
options: ProviderSkillRegistrySearchOptions = {},
): Promise<ProviderSkillRegistrySearchResult[]> {
const normalizedQuery = query.trim();
if (!normalizedQuery) {
return [];
}
const args = ['skills', 'search', normalizedQuery, '--json'];
const source = options.source?.trim();
if (source) {
args.push('--source', source);
}
if (options.limit && Number.isFinite(options.limit)) {
args.push('--limit', String(Math.max(1, Math.min(Math.floor(options.limit), 50))));
}
const result = await this.runHermes(args);
try {
const parsed = JSON.parse(result.stdout);
return Array.isArray(parsed)
? parsed.map(normalizeSearchResult).filter((entry): entry is ProviderSkillRegistrySearchResult => Boolean(entry))
: [];
} catch (error) {
throw new AppError('Hermes returned invalid skill search JSON.', {
code: 'HERMES_SKILL_SEARCH_PARSE_FAILED',
statusCode: 502,
details: error instanceof Error ? error.message : String(error),
});
}
}
async installRegistrySkill(input: ProviderSkillRegistryInstallInput): Promise<ProviderSkillRegistryActionResult> {
const identifier = input.identifier.trim();
if (!identifier) {
throw new AppError('identifier is required.', {
code: 'HERMES_SKILL_IDENTIFIER_REQUIRED',
statusCode: 400,
});
}
const args = ['skills', 'install', identifier, '--yes'];
if (input.category?.trim()) {
args.push('--category', input.category.trim());
}
if (input.name?.trim()) {
args.push('--name', input.name.trim());
}
if (input.force) {
args.push('--force');
}
return this.runHermes(args);
}
async uninstallRegistrySkill(name: string): Promise<ProviderSkillRegistryActionResult> {
const normalizedName = name.trim();
if (!normalizedName) {
throw new AppError('name is required.', {
code: 'HERMES_SKILL_NAME_REQUIRED',
statusCode: 400,
});
}
return this.runHermes(['skills', 'uninstall', normalizedName]);
}
async checkRegistryUpdates(): Promise<ProviderSkillRegistryActionResult> {
return this.runHermes(['skills', 'check']);
}
async updateRegistrySkills(): Promise<ProviderSkillRegistryActionResult> {
return this.runHermes(['skills', 'update']);
}
async auditRegistrySkills(): Promise<ProviderSkillRegistryActionResult> {
return this.runHermes(['skills', 'audit']);
}
protected async getSkillSources(workspacePath: string): Promise<ProviderSkillSource[]> {
const sources: ProviderSkillSource[] = [];
const seenRootDirs = new Set<string>();
addUniqueProviderSkillSource(sources, seenRootDirs, {
scope: 'repo',
rootDir: path.join(workspacePath, '.hermes', 'skills'),
commandPrefix: '/',
recursive: true,
});
addUniqueProviderSkillSource(sources, seenRootDirs, {
scope: 'user',
rootDir: path.join(os.homedir(), '.hermes', 'skills'),
commandPrefix: '/',
recursive: true,
});
return sources;
}
protected async getGlobalSkillSource(): Promise<ProviderSkillSource> {
return {
scope: 'user',
rootDir: path.join(os.homedir(), '.hermes', 'skills'),
commandPrefix: '/',
recursive: true,
};
}
private async runHermes(args: string[]): Promise<ProviderSkillRegistryActionResult> {
try {
const { stdout, stderr } = await execFileAsync(HERMES_COMMAND, args, {
timeout: HERMES_SKILLS_TIMEOUT_MS,
maxBuffer: HERMES_SKILLS_MAX_BUFFER,
env: process.env,
});
return { ok: true, stdout, stderr };
} catch (error) {
const maybeError = error as Error & {
stdout?: string;
stderr?: string;
code?: number | string;
};
throw new AppError(maybeError.stderr || maybeError.message || 'Hermes skill command failed.', {
code: 'HERMES_SKILL_COMMAND_FAILED',
statusCode: 502,
details: {
exitCode: maybeError.code,
stdout: maybeError.stdout,
stderr: maybeError.stderr,
},
});
}
}
}

View File

@@ -0,0 +1,27 @@
import { HermesProviderAuth } from '@/modules/providers/list/hermes/hermes-auth.provider.js';
import { HermesMcpProvider } from '@/modules/providers/list/hermes/hermes-mcp.provider.js';
import { HermesProviderModels } from '@/modules/providers/list/hermes/hermes-models.provider.js';
import { HermesSessionSynchronizer } from '@/modules/providers/list/hermes/hermes-session-synchronizer.provider.js';
import { HermesSessionsProvider } from '@/modules/providers/list/hermes/hermes-sessions.provider.js';
import { HermesSkillsProvider } from '@/modules/providers/list/hermes/hermes-skills.provider.js';
import { AbstractProvider } from '@/modules/providers/shared/base/abstract.provider.js';
import type {
IProviderAuth,
IProviderModels,
IProviderSessionSynchronizer,
IProviderSkills,
IProviderSessions,
} from '@/shared/interfaces.js';
export class HermesProvider extends AbstractProvider {
readonly models: IProviderModels = new HermesProviderModels();
readonly mcp = new HermesMcpProvider();
readonly auth: IProviderAuth = new HermesProviderAuth();
readonly skills: IProviderSkills = new HermesSkillsProvider();
readonly sessions: IProviderSessions = new HermesSessionsProvider();
readonly sessionSynchronizer: IProviderSessionSynchronizer = new HermesSessionSynchronizer();
constructor() {
super('hermes');
}
}

View File

@@ -2,6 +2,7 @@ import { ClaudeProvider } from '@/modules/providers/list/claude/claude.provider.
import { CodexProvider } from '@/modules/providers/list/codex/codex.provider.js';
import { CursorProvider } from '@/modules/providers/list/cursor/cursor.provider.js';
import { GeminiProvider } from '@/modules/providers/list/gemini/gemini.provider.js';
import { HermesProvider } from '@/modules/providers/list/hermes/hermes.provider.js';
import { OpenCodeProvider } from '@/modules/providers/list/opencode/opencode.provider.js';
import type { IProvider } from '@/shared/interfaces.js';
import type { LLMProvider } from '@/shared/types.js';
@@ -13,6 +14,7 @@ const providers: Record<LLMProvider, IProvider> = {
cursor: new CursorProvider(),
gemini: new GeminiProvider(),
opencode: new OpenCodeProvider(),
hermes: new HermesProvider(),
};
/**

View File

@@ -279,6 +279,48 @@ const parseProviderSkillCreatePayload = (payload: unknown): ProviderSkillCreateI
return { entries };
};
const parseSkillRegistryLimit = (value: unknown): number => {
const raw = readOptionalQueryString(value);
if (!raw) {
return 10;
}
const parsed = Number.parseInt(raw, 10);
if (Number.isNaN(parsed)) {
throw new AppError('limit must be a valid integer.', {
code: 'INVALID_QUERY_PARAMETER',
statusCode: 400,
});
}
return Math.max(1, Math.min(parsed, 50));
};
const parseSkillRegistryInstallPayload = (payload: unknown) => {
if (!payload || typeof payload !== 'object') {
throw new AppError('Request body must be an object.', {
code: 'INVALID_REQUEST_BODY',
statusCode: 400,
});
}
const body = payload as Record<string, unknown>;
const identifier = readOptionalQueryString(body.identifier);
if (!identifier) {
throw new AppError('identifier is required.', {
code: 'SKILL_IDENTIFIER_REQUIRED',
statusCode: 400,
});
}
return {
identifier,
category: readOptionalQueryString(body.category),
name: readOptionalQueryString(body.name),
force: body.force === true,
};
};
const parseProvider = (value: unknown): LLMProvider => {
const normalized = normalizeProviderParam(value);
if (
@@ -287,6 +329,7 @@ const parseProvider = (value: unknown): LLMProvider => {
|| normalized === 'cursor'
|| normalized === 'gemini'
|| normalized === 'opencode'
|| normalized === 'hermes'
) {
return normalized;
}
@@ -441,6 +484,77 @@ router.delete(
}),
);
router.get(
'/:provider/skills/registry/search',
asyncHandler(async (req: Request, res: Response) => {
const provider = parseProvider(req.params.provider);
const query = readOptionalQueryString(req.query.query);
if (!query) {
throw new AppError('query is required.', {
code: 'SKILL_SEARCH_QUERY_REQUIRED',
statusCode: 400,
});
}
const results = await providerSkillsService.searchSkillRegistry(provider, query, {
source: readOptionalQueryString(req.query.source),
limit: parseSkillRegistryLimit(req.query.limit),
});
res.json(createApiSuccessResponse({ provider, results }));
}),
);
router.post(
'/:provider/skills/registry/install',
asyncHandler(async (req: Request, res: Response) => {
const provider = parseProvider(req.params.provider);
const result = await providerSkillsService.installRegistrySkill(
provider,
parseSkillRegistryInstallPayload(req.body),
);
res.status(201).json(createApiSuccessResponse({ provider, result }));
}),
);
router.post(
'/:provider/skills/registry/check',
asyncHandler(async (req: Request, res: Response) => {
const provider = parseProvider(req.params.provider);
const result = await providerSkillsService.checkRegistryUpdates(provider);
res.json(createApiSuccessResponse({ provider, result }));
}),
);
router.post(
'/:provider/skills/registry/update',
asyncHandler(async (req: Request, res: Response) => {
const provider = parseProvider(req.params.provider);
const result = await providerSkillsService.updateRegistrySkills(provider);
res.json(createApiSuccessResponse({ provider, result }));
}),
);
router.post(
'/:provider/skills/registry/audit',
asyncHandler(async (req: Request, res: Response) => {
const provider = parseProvider(req.params.provider);
const result = await providerSkillsService.auditRegistrySkills(provider);
res.json(createApiSuccessResponse({ provider, result }));
}),
);
router.delete(
'/:provider/skills/registry/:name',
asyncHandler(async (req: Request, res: Response) => {
const provider = parseProvider(req.params.provider);
const result = await providerSkillsService.uninstallRegistrySkill(
provider,
readPathParam(req.params.name, 'name'),
);
res.json(createApiSuccessResponse({ provider, result }));
}),
);
// ----------------- MCP routes -----------------
router.get(
'/:provider/mcp/servers',

View File

@@ -75,6 +75,15 @@ const PROVIDER_CAPABILITIES: Record<LLMProvider, ProviderCapabilities> = {
supportsPermissionRequests: false,
supportsTokenUsage: true,
},
hermes: {
provider: 'hermes',
permissionModes: ['default'],
defaultPermissionMode: 'default',
supportsImages: false,
supportsAbort: true,
supportsPermissionRequests: true,
supportsTokenUsage: false,
},
};
/**

View File

@@ -23,6 +23,7 @@ export const sessionSynchronizerService = {
cursor: 0,
gemini: 0,
opencode: 0,
hermes: 0,
};
const failures: string[] = [];

View File

@@ -39,6 +39,10 @@ const PROVIDER_WATCH_PATHS: Array<{ provider: LLMProvider; rootPath: string }> =
provider: 'opencode',
rootPath: path.join(os.homedir(), '.local', 'share', 'opencode'),
},
{
provider: 'hermes',
rootPath: path.join(os.homedir(), '.hermes'),
},
];
const WATCHER_IGNORED_PATTERNS = [
@@ -81,6 +85,10 @@ function isWatcherTargetFile(provider: LLMProvider, filePath: string): boolean {
return path.basename(filePath) === 'opencode.db';
}
if (provider === 'hermes') {
return path.basename(filePath) === 'state.db';
}
if (provider === 'gemini') {
return filePath.endsWith('.json') || filePath.endsWith('.jsonl');
}

View File

@@ -4,7 +4,29 @@ import type {
ProviderSkillCreateInput,
ProviderSkillListOptions,
ProviderSkillRemoveInput,
ProviderSkillRegistryActionResult,
ProviderSkillRegistryInstallInput,
ProviderSkillRegistrySearchOptions,
ProviderSkillRegistrySearchResult,
} from '@/shared/types.js';
import { AppError } from '@/shared/utils.js';
const getProviderSkills = (providerName: string) => providerRegistry.resolveProvider(providerName).skills;
const requireSkillRegistryMethod = <TMethod extends keyof ReturnType<typeof getProviderSkills>>(
providerName: string,
methodName: TMethod,
): NonNullable<ReturnType<typeof getProviderSkills>[TMethod]> => {
const skills = getProviderSkills(providerName);
const method = skills[methodName];
if (typeof method !== 'function') {
throw new AppError(`${providerName} does not support skill registry operations.`, {
code: 'PROVIDER_SKILL_REGISTRY_UNSUPPORTED',
statusCode: 400,
});
}
return method as NonNullable<ReturnType<typeof getProviderSkills>[TMethod]>;
};
export const providerSkillsService = {
/**
@@ -14,8 +36,7 @@ export const providerSkillsService = {
providerName: string,
options?: ProviderSkillListOptions,
): Promise<ProviderSkill[]> {
const provider = providerRegistry.resolveProvider(providerName);
return provider.skills.listSkills(options);
return getProviderSkills(providerName).listSkills(options);
},
/**
@@ -25,8 +46,44 @@ export const providerSkillsService = {
providerName: string,
input: ProviderSkillCreateInput,
): Promise<ProviderSkill[]> {
const provider = providerRegistry.resolveProvider(providerName);
return provider.skills.addSkills(input);
return getProviderSkills(providerName).addSkills(input);
},
async searchSkillRegistry(
providerName: string,
query: string,
options?: ProviderSkillRegistrySearchOptions,
): Promise<ProviderSkillRegistrySearchResult[]> {
const searchRegistry = requireSkillRegistryMethod(providerName, 'searchRegistry');
return searchRegistry.call(getProviderSkills(providerName), query, options);
},
async installRegistrySkill(
providerName: string,
input: ProviderSkillRegistryInstallInput,
): Promise<ProviderSkillRegistryActionResult> {
const installRegistrySkill = requireSkillRegistryMethod(providerName, 'installRegistrySkill');
return installRegistrySkill.call(getProviderSkills(providerName), input);
},
async uninstallRegistrySkill(providerName: string, name: string): Promise<ProviderSkillRegistryActionResult> {
const uninstallRegistrySkill = requireSkillRegistryMethod(providerName, 'uninstallRegistrySkill');
return uninstallRegistrySkill.call(getProviderSkills(providerName), name);
},
async checkRegistryUpdates(providerName: string): Promise<ProviderSkillRegistryActionResult> {
const checkRegistryUpdates = requireSkillRegistryMethod(providerName, 'checkRegistryUpdates');
return checkRegistryUpdates.call(getProviderSkills(providerName));
},
async updateRegistrySkills(providerName: string): Promise<ProviderSkillRegistryActionResult> {
const updateRegistrySkills = requireSkillRegistryMethod(providerName, 'updateRegistrySkills');
return updateRegistrySkills.call(getProviderSkills(providerName));
},
async auditRegistrySkills(providerName: string): Promise<ProviderSkillRegistryActionResult> {
const auditRegistrySkills = requireSkillRegistryMethod(providerName, 'auditRegistrySkills');
return auditRegistrySkills.call(getProviderSkills(providerName));
},
async removeProviderSkill(

View File

@@ -341,7 +341,7 @@ test('providerMcpService global adder writes to all providers and rejects unsupp
workspacePath,
});
assert.equal(globalResult.length, 5);
assert.equal(globalResult.length, 6);
assert.ok(globalResult.every((entry) => entry.created === true));
const claudeProject = await readJson(path.join(workspacePath, '.mcp.json'));
@@ -356,6 +356,11 @@ test('providerMcpService global adder writes to all providers and rejects unsupp
const opencodeProject = await readJson(path.join(workspacePath, 'opencode.json'));
assert.ok((opencodeProject.mcp as Record<string, unknown>)['global-http']);
const hermesProject = await fs.readFile(path.join(workspacePath, '.hermes', 'config.yaml'), 'utf8');
assert.match(hermesProject, /^mcp_servers:\n/m);
assert.match(hermesProject, /^\s+global-http:\n/m);
assert.match(hermesProject, /^\s+url: "https:\/\/global\.example\.com\/mcp"\n/m);
const cursorProject = await readJson(path.join(workspacePath, '.cursor', 'mcp.json'));
assert.ok((cursorProject.mcpServers as Record<string, unknown>)['global-http']);
@@ -377,4 +382,3 @@ test('providerMcpService global adder writes to all providers and rejects unsupp
await fs.rm(tempRoot, { recursive: true, force: true });
}
});

View File

@@ -1,51 +0,0 @@
import type { WebSocket } from 'ws';
import { desktopAgentRelay } from '@/modules/computer-use/index.js';
import type { AuthenticatedWebSocketRequest } from '@/shared/types.js';
import { parseIncomingJsonObject } from '@/shared/utils.js';
/**
* Handles the `/desktop-agent` websocket — the inbound side of the cloud
* Computer Use relay. A linked CloudCLI desktop app connects here and registers
* itself as the executor for this hosted environment. The server then forwards
* `computer_*` actions via `desktopAgentRelay`, and the agent returns results as
* `computer_relay_result` frames correlated by `id`.
*/
export function handleDesktopAgentConnection(
ws: WebSocket,
request: AuthenticatedWebSocketRequest
): void {
let registered = false;
ws.on('message', (rawMessage) => {
const data = parseIncomingJsonObject(rawMessage);
if (!data) {
return;
}
const kind = typeof data.kind === 'string' ? data.kind : typeof data.type === 'string' ? data.type : '';
if (kind === 'register' && !registered) {
const label = typeof data.label === 'string' && data.label.trim()
? data.label.trim()
: request.user?.username
? `desktop:${request.user.username}`
: 'desktop-agent';
registered = true;
console.log('[INFO] Desktop agent websocket registered:', label);
desktopAgentRelay.register(ws, label);
return;
}
if (kind === 'computer_relay_result' && typeof data.id === 'string') {
desktopAgentRelay.handleResult(
data.id,
(data as Record<string, unknown>).result,
typeof (data as Record<string, unknown>).error === 'string'
? ((data as Record<string, unknown>).error as string)
: undefined
);
}
});
ws.on('close', () => {
console.log('[INFO] Desktop agent websocket disconnected');
});
}

View File

@@ -6,7 +6,6 @@ import { handleChatConnection } from '@/modules/websocket/services/chat-websocke
import { verifyWebSocketClient } from '@/modules/websocket/services/websocket-auth.service.js';
import { handlePluginWsProxy } from '@/modules/websocket/services/plugin-websocket-proxy.service.js';
import { handleShellConnection } from '@/modules/websocket/services/shell-websocket.service.js';
import { handleDesktopAgentConnection } from '@/modules/websocket/services/desktop-agent-websocket.service.js';
import { handleDesktopNotificationsConnection } from '@/modules/notifications/index.js';
import type { AuthenticatedWebSocketRequest } from '@/shared/types.js';
@@ -65,11 +64,6 @@ export function createWebSocketServer(
return;
}
if (pathname === '/desktop-agent') {
handleDesktopAgentConnection(ws, incomingRequest);
return;
}
if (pathname === '/desktop-notifications') {
handleDesktopNotificationsConnection(ws, incomingRequest);
return;

View File

@@ -10,12 +10,14 @@ import { spawnCursor } from '../cursor-cli.js';
import { queryCodex } from '../openai-codex.js';
import { spawnGemini } from '../gemini-cli.js';
import { spawnOpenCode } from '../opencode-cli.js';
import { spawnHermes } from '../hermes-cli.js';
import { Octokit } from '@octokit/rest';
import { providerModelsService } from '../modules/providers/services/provider-models.service.js';
import { IS_PLATFORM } from '../constants/config.js';
import { normalizeProjectPath } from '../shared/utils.js';
const router = express.Router();
const HERMES_CONFIGURED_MODEL = '__hermes_configured_model__';
/**
* Middleware to authenticate agent API requests.
@@ -636,7 +638,7 @@ class ResponseCollector {
* - Source for auto-generated branch names (if createBranch=true and no branchName)
* - Fallback for PR title if no commits are made
*
* @param {string} provider - (Optional) AI provider to use. Options: 'claude' | 'cursor' | 'codex' | 'gemini' | 'opencode'
* @param {string} provider - (Optional) AI provider to use. Options: 'claude' | 'cursor' | 'codex' | 'gemini' | 'opencode' | 'hermes'
* Default: 'claude'
*
* @param {boolean} stream - (Optional) Enable Server-Sent Events (SSE) streaming for real-time updates.
@@ -754,7 +756,7 @@ class ResponseCollector {
* Input Validations (400 Bad Request):
* - Either githubUrl OR projectPath must be provided (not neither)
* - message must be non-empty string
* - provider must be 'claude', 'cursor', 'codex', 'gemini', or 'opencode'
* - provider must be 'claude', 'cursor', 'codex', 'gemini', 'opencode', or 'hermes'
* - createBranch/createPR requires githubUrl OR projectPath (not neither)
* - branchName must pass Git naming rules (if provided)
*
@@ -862,8 +864,8 @@ router.post('/', validateExternalApiKey, async (req, res) => {
return res.status(400).json({ error: 'message is required' });
}
if (!['claude', 'cursor', 'codex', 'gemini', 'opencode'].includes(provider)) {
return res.status(400).json({ error: 'provider must be "claude", "cursor", "codex", "gemini", or "opencode"' });
if (!['claude', 'cursor', 'codex', 'gemini', 'opencode', 'hermes'].includes(provider)) {
return res.status(400).json({ error: 'provider must be "claude", "cursor", "codex", "gemini", "opencode", or "hermes"' });
}
// Validate GitHub branch/PR creation requirements
@@ -944,6 +946,7 @@ router.post('/', validateExternalApiKey, async (req, res) => {
const codexModels = (await providerModelsService.getProviderModels('codex')).models;
const geminiModels = (await providerModelsService.getProviderModels('gemini')).models;
const opencodeModels = (await providerModelsService.getProviderModels('opencode')).models;
const hermesModels = (await providerModelsService.getProviderModels('hermes')).models;
// Start the appropriate session
if (provider === 'claude') {
@@ -996,6 +999,15 @@ router.post('/', validateExternalApiKey, async (req, res) => {
sessionId: sessionId || null,
model: model || opencodeModels.DEFAULT
}, writer);
} else if (provider === 'hermes') {
console.log('Starting Hermes ACP session');
await spawnHermes(message.trim(), {
projectPath: finalProjectPath,
cwd: finalProjectPath,
sessionId: sessionId || null,
model: model || (hermesModels.DEFAULT === HERMES_CONFIGURED_MODEL ? undefined : hermesModels.DEFAULT)
}, writer);
}
// Handle GitHub branch and PR creation after successful agent completion

View File

@@ -15,7 +15,7 @@ const APP_ROOT = findAppRoot(__dirname);
const router = express.Router();
const MODEL_PROVIDERS = ["claude", "cursor", "codex", "gemini", "opencode"];
const MODEL_PROVIDERS = ["claude", "cursor", "codex", "gemini", "opencode", "hermes"];
const MODEL_PROVIDER_LABELS = {
claude: "Claude",
@@ -23,6 +23,7 @@ const MODEL_PROVIDER_LABELS = {
codex: "Codex",
gemini: "Gemini",
opencode: "OpenCode",
hermes: "Hermes",
};
const readModelProvider = (value) => {

View File

@@ -5,6 +5,10 @@ import type {
McpScope,
NormalizedMessage,
ProviderSkill,
ProviderSkillRegistryActionResult,
ProviderSkillRegistryInstallInput,
ProviderSkillRegistrySearchOptions,
ProviderSkillRegistrySearchResult,
ProviderSkillListOptions,
ProviderAuthStatus,
ProviderChangeActiveModelInput,
@@ -116,6 +120,21 @@ export interface IProviderSkills {
removeSkill(
input: ProviderSkillRemoveInput,
): Promise<{ removed: boolean; provider: LLMProvider; directoryName: string }>;
searchRegistry?(
query: string,
options?: ProviderSkillRegistrySearchOptions,
): Promise<ProviderSkillRegistrySearchResult[]>;
installRegistrySkill?(input: ProviderSkillRegistryInstallInput): Promise<ProviderSkillRegistryActionResult>;
uninstallRegistrySkill?(name: string): Promise<ProviderSkillRegistryActionResult>;
checkRegistryUpdates?(): Promise<ProviderSkillRegistryActionResult>;
updateRegistrySkills?(): Promise<ProviderSkillRegistryActionResult>;
auditRegistrySkills?(): Promise<ProviderSkillRegistryActionResult>;
}
// ---------------------------

View File

@@ -0,0 +1,83 @@
const pendingApprovals = new Map();
const APPROVAL_MAX_AGE_MS = 30 * 60 * 1000;
// Drop approvals whose run died without resolving them (WS disconnect, process
// crash) so their captured payloads/closures don't accumulate unbounded.
function sweepExpiredApprovals(now = Date.now()) {
for (const [requestId, entry] of pendingApprovals) {
const receivedAt = entry.receivedAt instanceof Date ? entry.receivedAt.getTime() : 0;
if (receivedAt && now - receivedAt > APPROVAL_MAX_AGE_MS) {
pendingApprovals.delete(requestId);
}
}
}
function clearApprovalsForSession(sessionId) {
if (!sessionId) {
return;
}
for (const [requestId, entry] of pendingApprovals) {
if (entry.sessionId === sessionId) {
pendingApprovals.delete(requestId);
}
}
}
function registerApproval(requestId, { resolver, sessionId = null, provider = null, meta = {} } = {}) {
if (!requestId || typeof resolver !== 'function') {
return;
}
sweepExpiredApprovals();
pendingApprovals.set(requestId, {
resolver,
sessionId,
provider,
meta,
receivedAt: meta.receivedAt || meta._receivedAt || new Date(),
});
}
function unregisterApproval(requestId) {
pendingApprovals.delete(requestId);
}
function resolveToolApproval(requestId, decision) {
const entry = pendingApprovals.get(requestId);
if (!entry) {
return false;
}
entry.resolver(decision);
return true;
}
function getPendingApprovalsForSession(sessionId) {
const pending = [];
for (const [requestId, entry] of pendingApprovals.entries()) {
if (entry.sessionId !== sessionId) {
continue;
}
pending.push({
requestId,
toolName: entry.meta.toolName || entry.meta._toolName || 'UnknownTool',
input: entry.meta.input ?? entry.meta._input,
context: entry.meta.context ?? entry.meta._context,
sessionId,
provider: entry.provider,
receivedAt: entry.receivedAt,
});
}
return pending;
}
export {
registerApproval,
unregisterApproval,
resolveToolApproval,
getPendingApprovalsForSession,
clearApprovalsForSession,
};

View File

@@ -65,7 +65,7 @@ export type AuthenticatedWebSocketRequest = IncomingMessage & {
* Use this as the source of truth whenever a function or payload needs to identify
* a specific LLM integration.
*/
export type LLMProvider = 'claude' | 'codex' | 'gemini' | 'cursor' | 'opencode';
export type LLMProvider = 'claude' | 'codex' | 'gemini' | 'cursor' | 'opencode' | 'hermes';
/**
* One selectable model row in a provider model catalog.
@@ -365,6 +365,32 @@ export type ProviderSkillRemoveInput = {
directoryName: string;
};
export type ProviderSkillRegistrySearchOptions = {
source?: string;
limit?: number;
};
export type ProviderSkillRegistrySearchResult = {
name: string;
identifier: string;
source?: string;
trustLevel?: string;
description?: string;
};
export type ProviderSkillRegistryInstallInput = {
identifier: string;
category?: string;
name?: string;
force?: boolean;
};
export type ProviderSkillRegistryActionResult = {
ok: boolean;
stdout: string;
stderr: string;
};
/**
* Normalized skill record returned by provider skill adapters.
*

View File

@@ -39,6 +39,7 @@ interface UseChatComposerStateArgs {
codexModel: string;
geminiModel: string;
opencodeModel: string;
hermesModel: string;
isLoading: boolean;
canAbortSession: boolean;
tokenBudget: Record<string, unknown> | null;
@@ -173,6 +174,7 @@ export function useChatComposerState({
codexModel,
geminiModel,
opencodeModel,
hermesModel,
isLoading,
canAbortSession,
tokenBudget,
@@ -336,6 +338,8 @@ export function useChatComposerState({
? geminiModel
: provider === 'opencode'
? opencodeModel
: provider === 'hermes'
? (hermesModel === '__hermes_configured_model__' ? undefined : hermesModel)
: claudeModel,
tokenUsage: tokenBudget,
};
@@ -391,6 +395,7 @@ export function useChatComposerState({
cursorModel,
geminiModel,
opencodeModel,
hermesModel,
handleBuiltInCommand,
handleCustomCommand,
input,
@@ -703,6 +708,8 @@ export function useChatComposerState({
? 'gemini-settings'
: provider === 'opencode'
? 'opencode-settings'
: provider === 'hermes'
? 'hermes-settings'
: 'claude-settings';
const savedSettings = safeLocalStorage.getItem(settingsKey);
if (savedSettings) {
@@ -729,6 +736,8 @@ export function useChatComposerState({
? geminiModel
: provider === 'opencode'
? opencodeModel
: provider === 'hermes'
? (hermesModel === '__hermes_configured_model__' ? undefined : hermesModel)
: claudeModel;
// One message shape for every provider. The backend resolves the
@@ -774,6 +783,7 @@ export function useChatComposerState({
executeCommand,
geminiModel,
opencodeModel,
hermesModel,
isLoading,
onSessionProcessing,
onSessionEstablished,

View File

@@ -207,6 +207,15 @@ export function normalizedToChatMessages(messages: NormalizedMessage[]): ChatMes
break;
}
// A result with a toolId but no matching tool_use in the loaded set is
// almost always a tool_use/tool_result pair split across a pagination
// boundary (older page not loaded yet). Rendering its raw content here
// produces an unstyled dump that "fixes itself" once the older page
// loads; skip it and let it attach to its tool_use when that arrives.
if (msg.toolId) {
break;
}
const content = formatToolResultContent(msg.content || '');
if (!content.trim()) {
break;

View File

@@ -15,6 +15,7 @@ const FALLBACK_DEFAULT_MODEL: Record<LLMProvider, string> = {
codex: 'gpt-5.4',
gemini: 'gemini-3.1-pro-preview',
opencode: 'anthropic/claude-sonnet-4-5',
hermes: '__hermes_configured_model__',
};
/**
@@ -29,6 +30,7 @@ const FALLBACK_PERMISSION_MODES: Record<LLMProvider, PermissionMode[]> = {
codex: ['default', 'acceptEdits', 'bypassPermissions'],
gemini: ['default', 'acceptEdits', 'bypassPermissions', 'plan'],
opencode: ['default'],
hermes: ['default'],
};
type ProviderCapabilities = {
@@ -93,6 +95,9 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
const [opencodeModel, setOpenCodeModel] = useState<string>(() => {
return localStorage.getItem('opencode-model') || FALLBACK_DEFAULT_MODEL.opencode;
});
const [hermesModel, setHermesModel] = useState<string>(() => {
return localStorage.getItem('hermes-model') || FALLBACK_DEFAULT_MODEL.hermes;
});
/**
* Backend-owned capability matrix keyed by provider. Drives the permission
@@ -141,12 +146,20 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
return;
}
setOpenCodeModel(model);
localStorage.setItem('opencode-model', model);
if (targetProvider === 'opencode') {
setOpenCodeModel(model);
localStorage.setItem('opencode-model', model);
return;
}
if (targetProvider === 'hermes') {
setHermesModel(model);
localStorage.setItem('hermes-model', model);
}
}, []);
const loadProviderModels = useCallback(async (options: { bypassCache?: boolean } = {}) => {
const providers: LLMProvider[] = ['claude', 'cursor', 'codex', 'gemini', 'opencode'];
const providers: LLMProvider[] = ['claude', 'cursor', 'codex', 'gemini', 'opencode', 'hermes'];
const requestId = providerModelsRequestIdRef.current + 1;
providerModelsRequestIdRef.current = requestId;
const isHardRefresh = options.bypassCache === true;
@@ -324,6 +337,19 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
}
}, [providerModelCatalog.opencode, opencodeModel]);
useEffect(() => {
const hermes = providerModelCatalog.hermes;
if (hermes) {
const next = pickStoredOrCurrent('hermes-model', hermesModel, hermes);
if (next !== hermesModel) {
setHermesModel(next);
}
if (localStorage.getItem('hermes-model') !== next) {
localStorage.setItem('hermes-model', next);
}
}
}, [providerModelCatalog.hermes, hermesModel]);
useEffect(() => {
if (!selectedSession?.id) {
return;
@@ -434,6 +460,8 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
setGeminiModel,
opencodeModel,
setOpenCodeModel,
hermesModel,
setHermesModel,
permissionMode,
setPermissionMode,
pendingPermissionRequests,

View File

@@ -4,7 +4,7 @@ import type { Project } from '../../../types/app';
import type { SubagentChildTool } from '../types/types';
import { getToolConfig } from './configs/toolConfigs';
import { OneLineDisplay, CollapsibleDisplay, ToolDiffViewer, MarkdownContent, FileListContent, TodoListContent, TaskListContent, TextContent, QuestionAnswerContent, SubagentContainer } from './components';
import { OneLineDisplay, BashCommandDisplay, CollapsibleDisplay, ToolDiffViewer, MarkdownContent, FileListContent, TodoListContent, TaskListContent, TextContent, QuestionAnswerContent, SubagentContainer } from './components';
import { PlanDisplay } from './components/PlanDisplay';
import { ToolStatusBadge } from './components/ToolStatusBadge';
import type { ToolStatus } from './components/ToolStatusBadge';
@@ -125,6 +125,39 @@ export const ToolRenderer: React.FC<ToolRendererProps> = memo(({
if (!displayConfig) return null;
// Bash renders as a Codex-style command row: the command on a single line with
// a chevron that expands to show the output inline. The combined view lives on
// the input render; the separate result section is suppressed in MessageComponent.
if (toolName === 'Bash' && mode === 'input') {
const command = typeof parsedData === 'object' && parsedData !== null && 'command' in parsedData
? String(parsedData.command || '')
: typeof toolInput === 'string'
? toolInput
: typeof rawToolInput === 'string'
? rawToolInput
: '';
const description = typeof parsedData === 'object' && parsedData !== null && 'description' in parsedData
? String(parsedData.description || '')
: undefined;
const output = typeof toolResult?.content === 'string'
? toolResult.content
: toolResult?.content != null
? String(toolResult.content)
: '';
return (
<BashCommandDisplay
command={command}
description={description}
output={output}
isError={Boolean(toolResult?.isError)}
status={toolStatus !== 'completed' ? toolStatus : undefined}
// Commands stay collapsed by default (even consecutive ones); only
// failures auto-expand so they remain visible.
defaultOpen={false}
/>
);
}
if (displayConfig.type === 'one-line') {
const value = displayConfig.getValue?.(parsedData) || '';
const secondary = displayConfig.getSecondary?.(parsedData);

View File

@@ -0,0 +1,156 @@
import React, { useEffect, useRef, useState } from 'react';
import { ChevronRight, Copy, Check } from 'lucide-react';
import { cn } from '../../../../lib/utils';
import { copyTextToClipboard } from '../../../../utils/clipboard';
import { ToolStatusBadge } from './ToolStatusBadge';
import type { ToolStatus } from './ToolStatusBadge';
interface BashCommandDisplayProps {
command: string;
description?: string;
/** Combined stdout/stderr from the tool result (empty while running). */
output?: string;
isError?: boolean;
status?: ToolStatus;
defaultOpen?: boolean;
}
/**
* Codex-in-VSCode style command row: a compact, single-line command with a
* chevron on the left. When the command produced output, the row becomes a
* dropdown that expands to reveal the output inline. Theme-integrated surfaces
* keep it clean in both light and dark mode; consecutive commands stack tightly
* into a clean list.
*/
export const BashCommandDisplay: React.FC<BashCommandDisplayProps> = ({
command,
description,
output,
isError = false,
status,
defaultOpen = false,
}) => {
const trimmedOutput = (output || '').replace(/\s+$/, '');
const hasOutput = trimmedOutput.length > 0;
const outputLineCount = hasOutput ? trimmedOutput.split('\n').length : 0;
const isRunning = status === 'running';
const [open, setOpen] = useState(false);
const [copied, setCopied] = useState(false);
// Output (and errors) often arrive after this component first mounts, so apply
// the auto-open intent once when there is finally something to show. After that
// the user is in control of the toggle.
const autoAppliedRef = useRef(false);
useEffect(() => {
if (!autoAppliedRef.current && hasOutput && (defaultOpen || isError)) {
autoAppliedRef.current = true;
setOpen(true);
}
}, [hasOutput, defaultOpen, isError]);
const toggle = () => {
if (hasOutput) {
setOpen((prev) => !prev);
}
};
const handleCopy = async (event: React.MouseEvent) => {
event.stopPropagation();
const didCopy = await copyTextToClipboard(command);
if (!didCopy) return;
setCopied(true);
setTimeout(() => setCopied(false), 2000);
};
return (
<div
className={cn(
'group/cmd overflow-hidden rounded-lg border bg-muted/40 backdrop-blur-sm transition-all duration-200',
isError ? 'border-red-500/30' : 'border-border/60',
hasOutput && !open && 'hover:border-border hover:bg-muted/60',
open && 'bg-muted/50 shadow-sm',
)}
>
{/* Command header — clickable when there is output to expand */}
<div
role={hasOutput ? 'button' : undefined}
tabIndex={hasOutput ? 0 : undefined}
aria-expanded={hasOutput ? open : undefined}
onClick={toggle}
onKeyDown={(event) => {
if (hasOutput && (event.key === 'Enter' || event.key === ' ')) {
event.preventDefault();
toggle();
}
}}
className={cn(
'flex items-center gap-2 px-2.5 py-1.5 outline-none',
hasOutput && 'cursor-pointer focus-visible:ring-1 focus-visible:ring-ring',
)}
>
<ChevronRight
className={cn(
'h-3.5 w-3.5 flex-shrink-0 text-muted-foreground/70 transition-transform duration-200',
open && 'rotate-90',
!hasOutput && 'opacity-0',
)}
/>
<span className="flex-shrink-0 select-none font-mono text-xs font-semibold text-emerald-500 dark:text-emerald-400">
$
</span>
<code
className={cn(
'min-w-0 flex-1 font-mono text-xs text-foreground',
open ? 'whitespace-pre-wrap break-all' : 'truncate',
)}
>
{command}
</code>
{isRunning && (
<span className="h-2.5 w-2.5 flex-shrink-0 animate-spin rounded-full border-[1.5px] border-muted-foreground/30 border-t-emerald-400" />
)}
{status && status !== 'running' && <ToolStatusBadge status={status} className="flex-shrink-0" />}
{!open && hasOutput && !isRunning && (
<span className="flex-shrink-0 text-[10px] tabular-nums text-muted-foreground/70 transition-opacity group-hover/cmd:opacity-0">
{outputLineCount} {outputLineCount === 1 ? 'line' : 'lines'}
</span>
)}
<button
onClick={handleCopy}
onKeyDown={(event) => event.stopPropagation()}
className="flex-shrink-0 rounded p-0.5 text-muted-foreground/60 opacity-0 transition-all hover:bg-foreground/10 hover:text-foreground focus:opacity-100 group-hover/cmd:opacity-100"
title="Copy command"
aria-label="Copy command"
>
{copied ? <Check className="h-3.5 w-3.5 text-emerald-500" /> : <Copy className="h-3.5 w-3.5" />}
</button>
</div>
{description && !open && (
<div className="truncate px-2.5 pb-1.5 pl-[2.4rem] text-[11px] italic text-muted-foreground/70">
{description}
</div>
)}
{/* Expanded output */}
{open && hasOutput && (
<div className="settings-content-enter border-t border-border/50 bg-background/50">
{description && (
<div className="px-3 pt-2 text-[11px] italic text-muted-foreground/70">{description}</div>
)}
<pre
className={cn(
'max-h-80 overflow-auto whitespace-pre-wrap break-all px-3 py-2 font-mono text-xs leading-relaxed',
isError ? 'text-red-600 dark:text-red-400' : 'text-muted-foreground',
)}
>
{trimmedOutput}
</pre>
</div>
)}
</div>
);
};

View File

@@ -0,0 +1,77 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import React from 'react';
import { renderToStaticMarkup } from 'react-dom/server';
import { QuestionAnswerContent } from './QuestionAnswerContent';
// Regression coverage for the chat-interface crash where an AskUserQuestion
// payload loaded from a session transcript arrives with a non-array `questions`
// or a question missing its `options` array. Rendering must degrade gracefully
// instead of throwing "TypeError: e.map is not a function".
test('renders without throwing when questions is a non-array value', () => {
assert.doesNotThrow(() => {
renderToStaticMarkup(
React.createElement(QuestionAnswerContent, {
// Malformed: object instead of an array
questions: { 0: { question: 'q?', options: [{ label: 'a' }] } } as never,
answers: {},
}),
);
});
});
test('renders without throwing when a question is missing options[]', () => {
assert.doesNotThrow(() => {
renderToStaticMarkup(
React.createElement(QuestionAnswerContent, {
questions: [{ question: 'Pick one?', header: 'H' } as never],
answers: { 'Pick one?': 'X' },
}),
);
});
});
test('renders without throwing when options[] contains malformed entries', () => {
assert.doesNotThrow(() => {
renderToStaticMarkup(
React.createElement(QuestionAnswerContent, {
questions: [{ question: 'Pick one?', options: [null, 'oops', { label: 'A' }] } as never],
answers: { 'Pick one?': 'A, Custom' },
}),
);
});
});
test('renders without throwing when a questions entry is null/non-object', () => {
assert.doesNotThrow(() => {
renderToStaticMarkup(
React.createElement(QuestionAnswerContent, {
questions: [null, 'oops', { question: 'Ok?', options: [{ label: 'A' }] }] as never,
answers: {},
}),
);
});
});
test('renders without throwing when an answer is a non-string value', () => {
assert.doesNotThrow(() => {
renderToStaticMarkup(
React.createElement(QuestionAnswerContent, {
questions: [{ question: 'Pick one?', options: [{ label: 'A' }] }],
// Malformed: answer is an object instead of the expected string
answers: { 'Pick one?': { unexpected: true } } as never,
}),
);
});
});
test('still renders a well-formed question + answer', () => {
const html = renderToStaticMarkup(
React.createElement(QuestionAnswerContent, {
questions: [{ question: 'Pick one?', header: 'H', options: [{ label: 'A' }, { label: 'B' }] }],
answers: { 'Pick one?': 'A' },
}),
);
assert.ok(html.includes('Pick one?'));
});

View File

@@ -15,7 +15,11 @@ export const QuestionAnswerContent: React.FC<QuestionAnswerContentProps> = ({
}) => {
const [expandedIdx, setExpandedIdx] = useState<number | null>(null);
if (!questions || questions.length === 0) {
// Tool inputs are runtime data loaded from session transcripts and may be
// malformed (e.g. `questions` arriving as a non-array). Guard with
// Array.isArray so a single bad payload can't crash the whole chat view
// with "e.map is not a function".
if (!Array.isArray(questions) || questions.length === 0) {
return null;
}
@@ -24,11 +28,23 @@ export const QuestionAnswerContent: React.FC<QuestionAnswerContentProps> = ({
return (
<div className={`space-y-2 ${className}`}>
{questions.map((q, idx) => {
{questions.map((rawQuestion, idx) => {
// Entries come from session transcripts and may be malformed; skip
// anything that isn't a proper question object with a string prompt.
if (!rawQuestion || typeof rawQuestion !== 'object' || typeof rawQuestion.question !== 'string') {
return null;
}
const q = rawQuestion;
const answer = answers?.[q.question];
const answerLabels = answer ? answer.split(', ') : [];
// `answer` may be a non-string (or absent) in malformed payloads.
const answerLabels = typeof answer === 'string' ? answer.split(', ') : [];
const skipped = !answer;
const isExpanded = expandedIdx === idx;
// `options` is typed as an array but comes from untrusted runtime data;
// keep only valid entries so `.some`/`.map` below never throw.
const options = Array.isArray(q.options)
? q.options.filter((opt) => opt && typeof opt === 'object' && typeof opt.label === 'string')
: [];
return (
<div
@@ -74,7 +90,7 @@ export const QuestionAnswerContent: React.FC<QuestionAnswerContentProps> = ({
{!isExpanded && answerLabels.length > 0 && (
<div className="mt-1.5 flex flex-wrap gap-1">
{answerLabels.map((lbl) => {
const isCustom = !q.options.some(o => o.label === lbl);
const isCustom = !options.some(o => o.label === lbl);
return (
<span
key={lbl}
@@ -110,7 +126,7 @@ export const QuestionAnswerContent: React.FC<QuestionAnswerContentProps> = ({
{isExpanded && (
<div className="border-t border-gray-100 px-3 pb-2.5 pt-0.5 dark:border-gray-700/40">
<div className="ml-6.5 space-y-1">
{q.options.map((opt) => {
{options.map((opt) => {
const wasSelected = answerLabels.includes(opt.label);
return (
<div
@@ -148,7 +164,7 @@ export const QuestionAnswerContent: React.FC<QuestionAnswerContentProps> = ({
);
})}
{answerLabels.filter(lbl => !q.options.some(o => o.label === lbl)).map(lbl => (
{answerLabels.filter(lbl => !options.some(o => o.label === lbl)).map(lbl => (
<div
key={lbl}
className="flex items-start gap-2 rounded-lg border border-blue-200/60 bg-blue-50/80 px-2.5 py-1.5 text-[12px] dark:border-blue-800/40 dark:bg-blue-900/20"

View File

@@ -1,6 +1,7 @@
export { CollapsibleSection } from './CollapsibleSection';
export { ToolDiffViewer } from './ToolDiffViewer';
export { OneLineDisplay } from './OneLineDisplay';
export { BashCommandDisplay } from './BashCommandDisplay';
export { CollapsibleDisplay } from './CollapsibleDisplay';
export { SubagentContainer } from './SubagentContainer';
export * from './ContentRenderers';

View File

@@ -75,6 +75,8 @@ function ChatInterface({
setGeminiModel,
opencodeModel,
setOpenCodeModel,
hermesModel,
setHermesModel,
permissionMode,
pendingPermissionRequests,
setPendingPermissionRequests,
@@ -201,6 +203,7 @@ function ChatInterface({
codexModel,
geminiModel,
opencodeModel,
hermesModel,
isLoading: isProcessing,
canAbortSession,
tokenBudget,
@@ -293,7 +296,9 @@ function ChatInterface({
? t('messageTypes.gemini')
: provider === 'opencode'
? t('messageTypes.opencode', { defaultValue: 'OpenCode' })
: t('messageTypes.claude');
: provider === 'hermes'
? t('messageTypes.hermes', { defaultValue: 'Hermes' })
: t('messageTypes.claude');
return (
<div className="flex h-full items-center justify-center">
@@ -334,6 +339,8 @@ function ChatInterface({
setGeminiModel={setGeminiModel}
opencodeModel={opencodeModel}
setOpenCodeModel={setOpenCodeModel}
hermesModel={hermesModel}
setHermesModel={setHermesModel}
providerModelCatalog={providerModelCatalog}
providerModelsLoading={providerModelsLoading}
tasksEnabled={tasksEnabled}
@@ -425,7 +432,9 @@ function ChatInterface({
? t('messageTypes.gemini')
: provider === 'opencode'
? t('messageTypes.opencode', { defaultValue: 'OpenCode' })
: t('messageTypes.claude'),
: provider === 'hermes'
? t('messageTypes.hermes', { defaultValue: 'Hermes' })
: t('messageTypes.claude'),
})}
isTextareaExpanded={isTextareaExpanded}
sendByCtrlEnter={sendByCtrlEnter}

View File

@@ -39,6 +39,8 @@ interface ChatMessagesPaneProps {
setGeminiModel: (model: string) => void;
opencodeModel: string;
setOpenCodeModel: (model: string) => void;
hermesModel: string;
setHermesModel: (model: string) => void;
providerModelCatalog: Partial<Record<LLMProvider, ProviderModelsDefinition>>;
providerModelsLoading: boolean;
tasksEnabled: boolean;
@@ -89,6 +91,8 @@ function ChatMessagesPane({
setGeminiModel,
opencodeModel,
setOpenCodeModel,
hermesModel,
setHermesModel,
providerModelCatalog,
providerModelsLoading,
tasksEnabled,
@@ -177,6 +181,8 @@ function ChatMessagesPane({
setGeminiModel={setGeminiModel}
opencodeModel={opencodeModel}
setOpenCodeModel={setOpenCodeModel}
hermesModel={hermesModel}
setHermesModel={setHermesModel}
providerModelCatalog={providerModelCatalog}
providerModelsLoading={providerModelsLoading}
tasksEnabled={tasksEnabled}

View File

@@ -2,9 +2,7 @@ import { useMemo, useState } from 'react';
import {
Activity,
BadgeCheck,
Check,
CircleHelp,
Clipboard,
Coins,
Cpu,
Gauge,
@@ -59,25 +57,13 @@ type ModelOption = {
description?: string;
};
const formatUpdatedAt = (value?: string) => {
if (!value) {
return 'Not cached yet';
}
const parsed = new Date(value);
if (Number.isNaN(parsed.getTime())) {
return 'Not cached yet';
}
return parsed.toLocaleString();
};
const PROVIDER_LABELS: Record<string, string> = {
claude: 'Claude',
cursor: 'Cursor',
codex: 'Codex',
gemini: 'Gemini',
opencode: 'OpenCode',
hermes: 'Hermes',
};
const FALLBACK_COMMANDS: CommandEntry[] = [
@@ -246,7 +232,6 @@ function HelpContent({ data }: { data: HelpCommandData }) {
function ModelsContent({
data,
providerModelCatalog,
providerModelCacheCatalog,
providerModelsRefreshing,
onHardRefreshProviderModels,
currentSessionId,
@@ -254,14 +239,12 @@ function ModelsContent({
}: {
data: ModelCommandData;
providerModelCatalog: Partial<Record<LLMProvider, ProviderModelsDefinition>>;
providerModelCacheCatalog: Partial<Record<LLMProvider, ProviderModelsCacheInfo>>;
providerModelsRefreshing: boolean;
onHardRefreshProviderModels: () => void;
currentSessionId: string | null;
onSelectProviderModel: CommandResultModalProps['onSelectProviderModel'];
}) {
const [query, setQuery] = useState('');
const [copiedModel, setCopiedModel] = useState<string | null>(null);
const [changingModel, setChangingModel] = useState<string | null>(null);
const [pendingSessionModel, setPendingSessionModel] = useState<string | null>(null);
const [selectionNotice, setSelectionNotice] = useState<string | null>(null);
@@ -269,7 +252,6 @@ function ModelsContent({
const currentModel = data?.current?.model || 'Unknown';
const providerLabel = data?.current?.providerLabel || getProviderLabel(currentProvider);
const liveDefinition = providerModelCatalog[currentProvider];
const currentCache = providerModelCacheCatalog[currentProvider] ?? data?.cache;
const availableOptions = useMemo<ModelOption[]>(() => {
if (liveDefinition?.OPTIONS && liveDefinition.OPTIONS.length > 0) {
return liveDefinition.OPTIONS;
@@ -282,7 +264,6 @@ function ModelsContent({
const availableModels = Array.isArray(data?.availableModels) ? data.availableModels : [];
return availableModels.map((model) => ({ value: model, label: model }));
}, [data, liveDefinition]);
const defaultModel = liveDefinition?.DEFAULT || data?.defaultModel || currentModel;
const filteredOptions = useMemo(() => {
const normalized = query.trim().toLowerCase();
@@ -296,18 +277,8 @@ function ModelsContent({
});
}, [availableOptions, query]);
const activeOption = availableOptions.find((option) => option.value === currentModel);
const hasConcreteSessionId = typeof currentSessionId === 'string' && currentSessionId.trim().length > 0;
const copyModel = (model: string) => {
if (typeof navigator !== 'undefined' && navigator.clipboard) {
void navigator.clipboard.writeText(model).catch(() => undefined);
}
setCopiedModel(model);
window.setTimeout(() => {
setCopiedModel((current) => (current === model ? null : current));
}, 1300);
};
const showSearch = availableOptions.length > 6;
const handleSelectModel = async (model: string) => {
setChangingModel(model);
@@ -330,162 +301,106 @@ function ModelsContent({
};
return (
<div className="flex h-full min-h-0 flex-col gap-2.5">
<div className="rounded-2xl border border-border/70 bg-muted/20 p-2.5">
<div className="grid gap-2.5 lg:grid-cols-[minmax(0,1.55fr)_minmax(12rem,0.7fr)_minmax(15rem,0.9fr)] lg:items-start">
<div className="min-w-0">
<div className="flex flex-wrap items-center gap-2">
<Badge variant="secondary" className="rounded-lg border border-primary/20 bg-primary/10 px-2.5 py-1 text-[10px] font-semibold uppercase tracking-[0.18em] text-primary">
{providerLabel}
</Badge>
<Badge variant="secondary" className="rounded-lg px-2.5 py-1 text-[10px] font-semibold uppercase tracking-[0.18em] text-foreground">
{availableOptions.length} models
</Badge>
</div>
<div className="mt-2 rounded-xl border border-primary/15 bg-primary/[0.06] px-3 py-2">
<p className="text-[11px] font-bold uppercase tracking-[0.2em] text-primary">Active Model</p>
<p className="mt-1 break-all font-mono text-[0.98rem] font-semibold leading-5 text-foreground sm:text-[1.05rem]">
{currentModel}
</p>
{activeOption?.label && activeOption.label !== currentModel && (
<p className="mt-1 text-[11px] font-medium text-foreground/85">{activeOption.label}</p>
)}
{activeOption?.description && (
<p className="mt-0.5 line-clamp-1 text-[11px] text-muted-foreground">{activeOption.description}</p>
)}
{pendingSessionModel && pendingSessionModel !== currentModel && (
<p className="mt-1 text-[10px] font-semibold uppercase tracking-[0.16em] text-primary">
Next response: {pendingSessionModel}
</p>
)}
</div>
</div>
<div className="grid gap-2 sm:grid-cols-2 lg:grid-cols-1">
<div className="rounded-xl border border-border/60 bg-background/55 px-2.5 py-1.5">
<p className="text-[10px] font-bold uppercase tracking-[0.18em] text-foreground/80">Default</p>
<p className="mt-1 break-all font-mono text-[11px] font-medium text-foreground">{defaultModel}</p>
</div>
<div className="rounded-xl border border-border/60 bg-background/55 px-2.5 py-1.5">
<p className="text-[10px] font-bold uppercase tracking-[0.18em] text-foreground/80">Updated</p>
<p className="mt-1 text-[11px] font-medium text-foreground">{formatUpdatedAt(currentCache?.updatedAt)}</p>
</div>
</div>
<div className="rounded-xl border border-border/60 bg-background/55 p-2.5">
<div className="flex flex-wrap items-center gap-1.5">
<p className="text-[10px] font-bold uppercase tracking-[0.18em] text-foreground/80">
Catalog Refresh
</p>
<Badge variant="secondary" className="rounded-md px-1.5 py-0 text-[9px] uppercase tracking-[0.14em]">
All providers
</Badge>
</div>
<p className="mt-1.5 text-[11px] leading-4 text-muted-foreground">
Model lists are cached for 3 days. Refresh after CLI, auth, or config changes,
or when a new model is missing.
</p>
<Button
type="button"
variant="outline"
size="sm"
onClick={onHardRefreshProviderModels}
disabled={providerModelsRefreshing}
className="mt-2 h-8 w-full rounded-xl px-3"
>
<RefreshCw className={providerModelsRefreshing ? 'animate-spin' : ''} />
{providerModelsRefreshing ? 'Refreshing catalogs...' : 'Refresh from providers'}
</Button>
</div>
</div>
<div className="mt-2 border-t border-border/50 pt-1.5 text-[11px] text-muted-foreground">
{hasConcreteSessionId
? 'Selecting a model stores a session override and applies it on the next response for this session.'
: 'Selecting a model updates the default model used for new turns in this provider.'}
{selectionNotice && <span className="ml-2 text-foreground">{selectionNotice}</span>}
<div className="flex h-full min-h-0 flex-col gap-3">
{/* Compact context bar: active model + refresh, no clutter */}
<div className="flex items-center justify-between gap-3 rounded-2xl border border-border/70 bg-muted/20 px-3.5 py-2.5">
<div className="min-w-0">
<p className="text-[10px] font-semibold uppercase tracking-[0.18em] text-muted-foreground">
Active model · {providerLabel}
</p>
<p className="mt-0.5 flex flex-wrap items-center gap-x-2 gap-y-0.5">
<span className="break-all font-mono text-sm font-semibold text-foreground">{currentModel}</span>
{pendingSessionModel && pendingSessionModel !== currentModel && (
<span className="text-[11px] font-semibold uppercase tracking-[0.14em] text-emerald-500 dark:text-emerald-400">
{pendingSessionModel} next
</span>
)}
</p>
</div>
<Button
type="button"
variant="ghost"
size="icon"
onClick={onHardRefreshProviderModels}
disabled={providerModelsRefreshing}
title="Refresh model list from providers"
aria-label="Refresh model list from providers"
className="h-9 w-9 shrink-0 rounded-xl text-muted-foreground hover:text-foreground"
>
<RefreshCw className={`h-4 w-4 ${providerModelsRefreshing ? 'animate-spin' : ''}`} />
</Button>
</div>
<div className="flex min-h-0 flex-1 flex-col rounded-3xl border border-border/70 bg-muted/15 p-3 sm:p-4">
<div className="mb-2.5 grid gap-2 sm:grid-cols-[1fr_auto] sm:items-center">
<div className="min-w-0">
<SearchField value={query} onChange={setQuery} placeholder={`Search ${providerLabel} models...`} />
</div>
<Badge variant="secondary" className="h-9 justify-center rounded-xl px-3 font-mono text-xs">
{filteredOptions.length} shown
</Badge>
</div>
{showSearch && (
<SearchField value={query} onChange={setQuery} placeholder={`Search ${providerLabel} models...`} />
)}
{filteredOptions.length > 0 ? (
<div className="scrollbar-thin min-h-0 flex-1 overflow-y-auto pr-1">
<div className="grid gap-2 md:grid-cols-2">
{filteredOptions.map((option, index) => {
const isCurrent = option.value === currentModel;
const wasCopied = copiedModel === option.value;
const isPendingSelection = option.value === pendingSessionModel;
const isChanging = option.value === changingModel;
return (
<div
key={option.value}
className={`settings-content-enter group flex min-h-[4.5rem] items-start gap-3 rounded-2xl border p-3 shadow-sm transition-all duration-200 hover:-translate-y-0.5 hover:shadow-md ${
isCurrent
? 'border-primary/45 bg-primary/10'
: isPendingSelection
? 'border-emerald-500/35 bg-emerald-500/10'
: 'border-border/70 bg-background/80 hover:border-primary/30 hover:bg-background'
}`}
style={{ animationDelay: `${Math.min(index * 14, 180)}ms` }}
>
<button
type="button"
onClick={() => handleSelectModel(option.value)}
disabled={Boolean(changingModel)}
className="min-w-0 flex-1 text-left focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring"
aria-label={`Use model ${option.value}`}
>
<span className="flex items-center gap-2">
<span className="break-all font-mono text-sm font-semibold text-foreground">{option.value}</span>
{isCurrent && <BadgeCheck className="h-4 w-4 shrink-0 text-primary" />}
</span>
{option.label && option.label !== option.value && (
<span className="mt-1 block text-xs text-muted-foreground">{option.label}</span>
)}
{option.description && (
<span className="mt-1 block text-xs leading-5 text-muted-foreground">{option.description}</span>
)}
{isCurrent && <span className="mt-2 block text-[11px] font-semibold uppercase tracking-[0.16em] text-primary">Current selection</span>}
{isPendingSelection && !isCurrent && (
<span className="mt-2 block text-[11px] font-semibold uppercase tracking-[0.16em] text-emerald-400">
Next response selection
</span>
)}
{isChanging && (
<span className="mt-2 block text-[11px] font-semibold uppercase tracking-[0.16em] text-primary">
Applying...
</span>
)}
</button>
<button
type="button"
onClick={() => copyModel(option.value)}
className="rounded-lg border border-border/70 bg-muted/30 p-2 text-muted-foreground transition-colors group-hover:text-primary"
aria-label={`Copy model id ${option.value}`}
>
{wasCopied ? <Check className="h-4 w-4" /> : <Clipboard className="h-4 w-4" />}
</button>
</div>
);
})}
</div>
{filteredOptions.length > 0 ? (
<div className="scrollbar-thin -mr-1 min-h-0 flex-1 overflow-y-auto pr-1">
<div className="grid gap-2 md:grid-cols-2">
{filteredOptions.map((option, index) => {
const isCurrent = option.value === currentModel;
const isPendingSelection = option.value === pendingSessionModel;
const isChanging = option.value === changingModel;
return (
<button
key={option.value}
type="button"
onClick={() => handleSelectModel(option.value)}
disabled={Boolean(changingModel)}
aria-label={`Select model ${option.value}`}
className={`settings-content-enter group flex min-h-[4rem] flex-col rounded-2xl border p-3 text-left shadow-sm transition-all duration-200 hover:-translate-y-0.5 hover:shadow-md focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:cursor-default disabled:opacity-60 ${
isCurrent
? 'border-primary/45 bg-primary/10'
: isPendingSelection
? 'border-emerald-500/35 bg-emerald-500/10'
: 'border-border/70 bg-background/80 hover:border-primary/30 hover:bg-background'
}`}
style={{ animationDelay: `${Math.min(index * 14, 180)}ms` }}
>
<span className="flex items-center justify-between gap-2">
<span className="break-all font-mono text-sm font-semibold text-foreground">{option.value}</span>
{isCurrent ? (
<BadgeCheck className="h-4 w-4 shrink-0 text-primary" />
) : isChanging ? (
<RefreshCw className="h-4 w-4 shrink-0 animate-spin text-primary" />
) : null}
</span>
{option.label && option.label !== option.value && (
<span className="mt-1 text-xs font-medium text-foreground/85">{option.label}</span>
)}
{option.description && (
<span className="mt-1 text-xs leading-5 text-muted-foreground">{option.description}</span>
)}
{isCurrent && (
<span className="mt-2 text-[11px] font-semibold uppercase tracking-[0.16em] text-primary">Current selection</span>
)}
{isPendingSelection && !isCurrent && (
<span className="mt-2 text-[11px] font-semibold uppercase tracking-[0.16em] text-emerald-500 dark:text-emerald-400">
Applies next response
</span>
)}
</button>
);
})}
</div>
</div>
) : (
<div className="rounded-2xl border border-dashed border-border bg-background/60 px-4 py-10 text-center text-sm text-muted-foreground">
No models match that search.
</div>
)}
{/* Single quiet line of guidance / feedback */}
<p className="shrink-0 text-[11px] leading-4 text-muted-foreground">
{selectionNotice ? (
<span className="text-foreground">{selectionNotice}</span>
) : hasConcreteSessionId ? (
'Your choice applies to this session on the next response.'
) : (
<div className="rounded-2xl border border-dashed border-border bg-background/60 px-4 py-10 text-center text-sm text-muted-foreground">
No models match that search.
</div>
'Your choice becomes the default model for new turns.'
)}
</div>
</p>
</div>
);
}
@@ -606,7 +521,6 @@ export default function CommandResultModal({
payload,
onClose,
providerModelCatalog,
providerModelCacheCatalog,
providerModelsRefreshing,
onHardRefreshProviderModels,
currentSessionId,
@@ -624,9 +538,9 @@ export default function CommandResultModal({
icon: CircleHelp,
},
models: {
eyebrow: 'Model inventory',
title: 'Available Models',
subtitle: 'Browse, search, and copy model IDs for the active provider.',
eyebrow: 'Model selection',
title: 'Choose a Model',
subtitle: 'Pick the model this provider should use.',
icon: Cpu,
},
cost: {
@@ -700,7 +614,6 @@ export default function CommandResultModal({
<ModelsContent
data={payload.data as ModelCommandData}
providerModelCatalog={providerModelCatalog}
providerModelCacheCatalog={providerModelCacheCatalog}
providerModelsRefreshing={providerModelsRefreshing}
onHardRefreshProviderModels={onHardRefreshProviderModels}
currentSessionId={currentSessionId}

View File

@@ -8,12 +8,48 @@ import { oneDark } from 'react-syntax-highlighter/dist/esm/styles/prism';
import { useTranslation } from 'react-i18next';
import { normalizeInlineCodeFences } from '../../utils/chatFormatting';
import { copyTextToClipboard } from '../../../../utils/clipboard';
import { usePaletteOps } from '../../../../contexts/PaletteOpsContext';
type MarkdownProps = {
children: React.ReactNode;
className?: string;
};
// Links to the wider web (or in-page anchors) keep normal browser navigation;
// everything else is treated as a workspace file reference.
const isExternalHref = (href?: string): boolean =>
!!href && (/^(https?:|mailto:|tel:|data:)/i.test(href) || href.startsWith('#'));
// Strip a trailing `:line` / `:line:col` suffix (e.g. `src/foo.ts:130`).
const stripLineSuffix = (value: string): string => value.replace(/:\d+(?::\d+)?$/, '');
// A usable file path contains a separator or a filename with an extension.
const looksLikeFilePath = (value?: string): value is string => {
if (!value) {
return false;
}
const cleaned = stripLineSuffix(value.trim());
if (!cleaned || cleaned === '#') {
return false;
}
return /[\\/]/.test(cleaned) || /\.[a-z0-9]+$/i.test(cleaned);
};
// Extract plain text from link children so a reference rendered only as link
// text (e.g. `[src/foo.ts]()` with an empty href) can still be opened.
const childrenToText = (children: React.ReactNode): string => {
if (typeof children === 'string' || typeof children === 'number') {
return String(children);
}
if (Array.isArray(children)) {
return children.map(childrenToText).join('');
}
if (React.isValidElement(children)) {
return childrenToText((children.props as { children?: React.ReactNode }).children);
}
return '';
};
type CodeBlockProps = {
node?: any;
inline?: boolean;
@@ -123,11 +159,6 @@ const markdownComponents = {
{children}
</blockquote>
),
a: ({ href, children }: { href?: string; children?: React.ReactNode }) => (
<a href={href} className="text-blue-600 hover:underline dark:text-blue-400" target="_blank" rel="noopener noreferrer">
{children}
</a>
),
p: ({ children }: { children?: React.ReactNode }) => <div className="mb-2 last:mb-0">{children}</div>,
table: ({ children }: { children?: React.ReactNode }) => (
<div className="my-2 overflow-x-auto">
@@ -147,10 +178,50 @@ export function Markdown({ children, className }: MarkdownProps) {
const content = normalizeInlineCodeFences(String(children ?? ''));
const remarkPlugins = useMemo(() => [remarkGfm, remarkMath], []);
const rehypePlugins = useMemo(() => [rehypeKatex], []);
const { openFileInEditor } = usePaletteOps();
const components = useMemo(
() => ({
...markdownComponents,
a: ({ href, children: linkChildren }: { href?: string; children?: React.ReactNode }) => {
// Prefer the href when it is a real path; otherwise fall back to the
// link text, since models often emit `[src/foo.ts]()` with an empty href.
const linkText = childrenToText(linkChildren);
const fileRef = looksLikeFilePath(href) ? href : looksLikeFilePath(linkText) ? linkText : undefined;
if (fileRef && !isExternalHref(href)) {
return (
<a
href={href || fileRef}
className="cursor-pointer text-blue-600 hover:underline dark:text-blue-400"
onClick={(event) => {
event.preventDefault();
openFileInEditor(stripLineSuffix(fileRef));
}}
>
{linkChildren}
</a>
);
}
return (
<a
href={href}
className="text-blue-600 hover:underline dark:text-blue-400"
target="_blank"
rel="noopener noreferrer"
>
{linkChildren}
</a>
);
},
}),
[openFileInEditor],
);
return (
<div className={className}>
<ReactMarkdown remarkPlugins={remarkPlugins} rehypePlugins={rehypePlugins} components={markdownComponents as any}>
<ReactMarkdown remarkPlugins={remarkPlugins} rehypePlugins={rehypePlugins} components={components as any}>
{content}
</ReactMarkdown>
</div>

View File

@@ -183,6 +183,8 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
? t('messageTypes.gemini')
: provider === 'opencode'
? t('messageTypes.opencode', { defaultValue: 'OpenCode' })
: provider === 'hermes'
? t('messageTypes.hermes', { defaultValue: 'Hermes' })
: t('messageTypes.claude'))}
</div>
</div>
@@ -218,8 +220,8 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
/>
)}
{/* Tool Result Section */}
{message.toolResult && !shouldHideToolResult(message.toolName || 'UnknownTool', message.toolResult) && (
{/* Tool Result Section — Bash renders its output inside the command row above. */}
{message.toolResult && message.toolName !== 'Bash' && !shouldHideToolResult(message.toolName || 'UnknownTool', message.toolResult) && (
message.toolResult.isError ? (
// Error results - red error box with content
<div
@@ -430,4 +432,3 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
});
export default MessageComponent;

View File

@@ -29,6 +29,7 @@ const PROVIDER_META: { id: LLMProvider; name: string }[] = [
{ id: "gemini", name: "Google" },
{ id: "cursor", name: "Cursor" },
{ id: "opencode", name: "OpenCode" },
{ id: "hermes", name: "Hermes" },
];
const MOD_KEY =
@@ -50,6 +51,8 @@ type ProviderSelectionEmptyStateProps = {
setGeminiModel: (model: string) => void;
opencodeModel: string;
setOpenCodeModel: (model: string) => void;
hermesModel: string;
setHermesModel: (model: string) => void;
providerModelCatalog: Partial<Record<LLMProvider, ProviderModelsDefinition>>;
providerModelsLoading: boolean;
tasksEnabled: boolean;
@@ -79,11 +82,13 @@ function getCurrentModel(
co: string,
g: string,
o: string,
h: string,
) {
if (p === "claude") return c;
if (p === "codex") return co;
if (p === "gemini") return g;
if (p === "opencode") return o;
if (p === "hermes") return h;
return cu;
}
@@ -92,6 +97,7 @@ function getProviderDisplayName(p: LLMProvider) {
if (p === "cursor") return "Cursor";
if (p === "codex") return "Codex";
if (p === "opencode") return "OpenCode";
if (p === "hermes") return "Hermes";
return "Gemini";
}
@@ -111,6 +117,8 @@ export default function ProviderSelectionEmptyState({
setGeminiModel,
opencodeModel,
setOpenCodeModel,
hermesModel,
setHermesModel,
providerModelCatalog,
providerModelsLoading,
tasksEnabled,
@@ -140,6 +148,7 @@ export default function ProviderSelectionEmptyState({
codexModel,
geminiModel,
opencodeModel,
hermesModel,
);
const currentModelLabel = useMemo(() => {
@@ -164,12 +173,15 @@ export default function ProviderSelectionEmptyState({
} else if (providerId === "opencode") {
setOpenCodeModel(modelValue);
localStorage.setItem("opencode-model", modelValue);
} else if (providerId === "hermes") {
setHermesModel(modelValue);
localStorage.setItem("hermes-model", modelValue);
} else {
setCursorModel(modelValue);
localStorage.setItem("cursor-model", modelValue);
}
},
[setClaudeModel, setCursorModel, setCodexModel, setGeminiModel, setOpenCodeModel],
[setClaudeModel, setCursorModel, setCodexModel, setGeminiModel, setOpenCodeModel, setHermesModel],
);
const handleModelSelect = useCallback(
@@ -319,6 +331,10 @@ export default function ProviderSelectionEmptyState({
model: opencodeModel,
defaultValue: "Ready with OpenCode {{model}}",
}),
hermes: t("providerSelection.readyPrompt.hermes", {
model: hermesModel,
defaultValue: "Ready with Hermes {{model}}",
}),
}[provider]
}
</p>

View File

@@ -2,6 +2,7 @@ import { useCallback, useEffect, useState } from 'react';
import { api } from '../../../utils/api';
import type { CodeEditorFile } from '../types/types';
import { isBinaryFile } from '../utils/binaryFile';
import { getPreviewKind } from '../utils/previewableFile';
type UseCodeEditorDocumentParams = {
file: CodeEditorFile;
@@ -23,6 +24,9 @@ export const useCodeEditorDocument = ({ file, projectPath }: UseCodeEditorDocume
const [saveSuccess, setSaveSuccess] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
const [isBinary, setIsBinary] = useState(false);
// Some binaries (images, PDFs, audio, video) can be rendered natively, so the
// editor shows an inline preview instead of the generic binary placeholder.
const previewKind = getPreviewKind(file.name);
// `fileProjectId` is the DB primary key passed down from the editor sidebar;
// the fallback to `projectPath` preserves older callers that didn't yet
// propagate the identifier.
@@ -38,8 +42,19 @@ export const useCodeEditorDocument = ({ file, projectPath }: UseCodeEditorDocume
setLoading(true);
setIsBinary(false);
// Natively previewable media (image/pdf/audio/video) is rendered by
// CodeEditorMediaPreview, so there is nothing to read as text here.
// Clear any buffer left over from a previously opened text file so a
// stray save can't write stale content over the binary file.
if (getPreviewKind(file.name)) {
setContent('');
setLoading(false);
return;
}
// Check if file is binary by extension
if (isBinaryFile(file.name)) {
setContent('');
setIsBinary(true);
setLoading(false);
return;
@@ -76,6 +91,12 @@ export const useCodeEditorDocument = ({ file, projectPath }: UseCodeEditorDocume
}, [file.diffInfo, file.name, fileDiffNewString, fileDiffOldString, fileName, filePath, fileProjectId]);
const handleSave = useCallback(async () => {
// Preview-only and binary files have no editable text buffer; never write
// them back (e.g. via Cmd/Ctrl+S) or we'd corrupt the file on disk.
if (previewKind || isBinaryFile(fileName)) {
return;
}
setSaving(true);
setSaveError(null);
@@ -109,7 +130,7 @@ export const useCodeEditorDocument = ({ file, projectPath }: UseCodeEditorDocume
} finally {
setSaving(false);
}
}, [content, filePath, fileProjectId]);
}, [content, filePath, fileProjectId, previewKind, fileName]);
const handleDownload = useCallback(() => {
const blob = new Blob([content], { type: 'text/plain' });
@@ -134,6 +155,8 @@ export const useCodeEditorDocument = ({ file, projectPath }: UseCodeEditorDocume
saveSuccess,
saveError,
isBinary,
previewKind,
fileProjectId,
handleSave,
handleDownload,
};

View File

@@ -0,0 +1,63 @@
// Some binary files can't be edited as text, but the browser can still render
// them natively (images, PDFs, audio, video). For those we show an inline
// preview instead of the generic "binary file" placeholder. Anything not listed
// here (zip, exe, avi, mkv, fonts, ...) falls through to the binary message.
export type PreviewKind = 'image' | 'pdf' | 'video' | 'audio';
// Single source of truth: every extension the browser can preview, mapped to the
// MIME type we apply when the server response has a missing/generic Content-Type.
// The preview kind is derived from the MIME type so the two never drift apart.
// Formats browsers generally can't play (avi, mkv, flv, wmv) are intentionally
// absent and keep the binary fallback.
const EXTENSION_MIME: Record<string, string> = {
// Images
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
svg: 'image/svg+xml',
webp: 'image/webp',
ico: 'image/x-icon',
bmp: 'image/bmp',
avif: 'image/avif',
apng: 'image/apng',
// PDF
pdf: 'application/pdf',
// Video
mp4: 'video/mp4',
webm: 'video/webm',
ogv: 'video/ogg',
mov: 'video/quicktime',
m4v: 'video/x-m4v',
// Audio
mp3: 'audio/mpeg',
wav: 'audio/wav',
m4a: 'audio/mp4',
aac: 'audio/aac',
flac: 'audio/flac',
opus: 'audio/opus',
oga: 'audio/ogg',
ogg: 'audio/ogg',
weba: 'audio/webm',
};
const extensionOf = (filename: string): string => filename.split('.').pop()?.toLowerCase() ?? '';
const kindForMime = (mime: string): PreviewKind | null => {
if (mime === 'application/pdf') return 'pdf';
if (mime.startsWith('image/')) return 'image';
if (mime.startsWith('video/')) return 'video';
if (mime.startsWith('audio/')) return 'audio';
return null;
};
export const getPreviewKind = (filename: string): PreviewKind | null => {
const mime = EXTENSION_MIME[extensionOf(filename)];
return mime ? kindForMime(mime) : null;
};
// MIME type to fall back to when the server returns no/generic Content-Type.
// Returns undefined for non-previewable extensions.
export const getPreviewMimeType = (filename: string): string | undefined =>
EXTENSION_MIME[extensionOf(filename)];

View File

@@ -1,8 +1,9 @@
import { EditorView } from '@codemirror/view';
import { unifiedMergeView } from '@codemirror/merge';
import type { Extension } from '@codemirror/state';
import { useMemo, useState } from 'react';
import { useCallback, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { usePaletteOps } from '../../../contexts/PaletteOpsContext';
import { useCodeEditorDocument } from '../hooks/useCodeEditorDocument';
import { useCodeEditorSettings } from '../hooks/useCodeEditorSettings';
@@ -11,11 +12,13 @@ import type { CodeEditorFile } from '../types/types';
import { createMinimapExtension, createScrollToFirstChunkExtension, getLanguageExtensions } from '../utils/editorExtensions';
import { getEditorStyles } from '../utils/editorStyles';
import { createEditorToolbarPanelExtension } from '../utils/editorToolbarPanel';
import CodeEditorFooter from './subcomponents/CodeEditorFooter';
import CodeEditorHeader from './subcomponents/CodeEditorHeader';
import CodeEditorLoadingState from './subcomponents/CodeEditorLoadingState';
import CodeEditorSurface from './subcomponents/CodeEditorSurface';
import CodeEditorBinaryFile from './subcomponents/CodeEditorBinaryFile';
import CodeEditorMediaPreview from './subcomponents/CodeEditorMediaPreview';
type CodeEditorProps = {
file: CodeEditorFile;
@@ -58,6 +61,8 @@ export default function CodeEditor({
saveSuccess,
saveError,
isBinary,
previewKind,
fileProjectId,
handleSave,
handleDownload,
} = useCodeEditorDocument({
@@ -70,6 +75,29 @@ export default function CodeEditor({
return extension === 'md' || extension === 'markdown';
}, [file.name]);
const isHtmlPreviewFile = useMemo(() => {
const extension = file.name.split('.').pop()?.toLowerCase();
return extension === 'html' || extension === 'htm';
}, [file.name]);
const openHtmlPreview = useCallback(() => {
const previewWindow = window.open('', '_blank');
if (!previewWindow) return;
previewWindow.opener = null;
previewWindow.document.title = file.name;
previewWindow.document.body.style.margin = '0';
const iframe = previewWindow.document.createElement('iframe');
iframe.title = file.name;
iframe.sandbox.add('allow-forms', 'allow-modals', 'allow-popups', 'allow-scripts');
iframe.style.cssText = 'position:fixed;inset:0;width:100%;height:100%;border:0;background:white';
iframe.srcdoc = content;
previewWindow.document.body.appendChild(iframe);
}, [content, file.name]);
const minimapExtension = useMemo(
() => (
createMinimapExtension({
@@ -162,6 +190,30 @@ export default function CodeEditor({
);
}
// Natively previewable media (image/pdf/audio/video) is rendered inline
// instead of showing the generic "cannot be displayed" placeholder.
if (previewKind) {
return (
<CodeEditorMediaPreview
file={file}
kind={previewKind}
projectId={fileProjectId}
isSidebar={isSidebar}
isFullscreen={isFullscreen}
onClose={onClose}
onToggleFullscreen={() => setIsFullscreen((previous) => !previous)}
labels={{
loading: t('filePreview.loading', 'Loading preview...'),
error: t('filePreview.error', 'Unable to display this file.'),
openInNewTab: t('filePreview.openInNewTab', 'Open in new tab'),
fullscreen: t('actions.fullscreen', 'Fullscreen'),
exitFullscreen: t('actions.exitFullscreen', 'Exit fullscreen'),
close: t('actions.close', 'Close'),
}}
/>
);
}
// Binary file display
if (isBinary) {
return (
@@ -197,10 +249,12 @@ export default function CodeEditor({
isSidebar={isSidebar}
isFullscreen={isFullscreen}
isMarkdownFile={isMarkdownFile}
isHtmlPreviewFile={isHtmlPreviewFile}
markdownPreview={markdownPreview}
saving={saving}
saveSuccess={saveSuccess}
onToggleMarkdownPreview={() => setMarkdownPreview((previous) => !previous)}
onOpenHtmlPreview={openHtmlPreview}
onOpenSettings={() => paletteOps.openSettings('appearance')}
onDownload={handleDownload}
onSave={handleSave}
@@ -210,6 +264,7 @@ export default function CodeEditor({
showingChanges: t('header.showingChanges'),
editMarkdown: t('actions.editMarkdown'),
previewMarkdown: t('actions.previewMarkdown'),
previewHtml: t('actions.previewHtml', 'Open HTML preview in new tab'),
settings: t('toolbar.settings'),
download: t('actions.download'),
save: t('actions.save'),

View File

@@ -1,4 +1,5 @@
import { Code2, Download, Eye, Maximize2, Minimize2, Save, Settings as SettingsIcon, X } from 'lucide-react';
import type { CodeEditorFile } from '../../types/types';
type CodeEditorHeaderProps = {
@@ -6,10 +7,12 @@ type CodeEditorHeaderProps = {
isSidebar: boolean;
isFullscreen: boolean;
isMarkdownFile: boolean;
isHtmlPreviewFile: boolean;
markdownPreview: boolean;
saving: boolean;
saveSuccess: boolean;
onToggleMarkdownPreview: () => void;
onOpenHtmlPreview: () => void;
onOpenSettings: () => void;
onDownload: () => void;
onSave: () => void;
@@ -19,6 +22,7 @@ type CodeEditorHeaderProps = {
showingChanges: string;
editMarkdown: string;
previewMarkdown: string;
previewHtml: string;
settings: string;
download: string;
save: string;
@@ -35,10 +39,12 @@ export default function CodeEditorHeader({
isSidebar,
isFullscreen,
isMarkdownFile,
isHtmlPreviewFile,
markdownPreview,
saving,
saveSuccess,
onToggleMarkdownPreview,
onOpenHtmlPreview,
onOpenSettings,
onDownload,
onSave,
@@ -82,6 +88,17 @@ export default function CodeEditorHeader({
</button>
)}
{isHtmlPreviewFile && (
<button
type="button"
onClick={onOpenHtmlPreview}
className="flex items-center justify-center rounded-md p-1.5 text-gray-600 hover:bg-gray-100 hover:text-gray-900 dark:text-gray-400 dark:hover:bg-gray-800 dark:hover:text-white"
title={labels.previewHtml}
>
<Eye className="h-4 w-4" />
</button>
)}
<button
type="button"
onClick={onOpenSettings}

View File

@@ -0,0 +1,289 @@
import { useEffect, useState } from 'react';
import { authenticatedFetch } from '../../../../utils/api';
import type { CodeEditorFile } from '../../types/types';
import { getPreviewMimeType, type PreviewKind } from '../../utils/previewableFile';
type CodeEditorMediaPreviewProps = {
file: CodeEditorFile;
kind: PreviewKind;
// DB projectId used to build the raw-content URL; falls back to projectPath
// for older callers, mirroring useCodeEditorDocument.
projectId?: string;
isSidebar: boolean;
isFullscreen: boolean;
onClose: () => void;
onToggleFullscreen: () => void;
labels: {
loading: string;
error: string;
openInNewTab: string;
fullscreen: string;
exitFullscreen: string;
close: string;
};
};
// Reject a "PDF" whose bytes aren't actually a PDF before handing it to the
// same-origin iframe, so a mislabeled HTML/SVG file can't run in the app origin.
const PDF_HEADER_SCAN_BYTES = 1024;
const looksLikePdf = async (blob: Blob): Promise<boolean> => {
const header = await blob.slice(0, PDF_HEADER_SCAN_BYTES).arrayBuffer();
// PDFs must contain the "%PDF-" marker at the very start of the file.
return new TextDecoder('latin1').decode(header).includes('%PDF-');
};
export default function CodeEditorMediaPreview({
file,
kind,
projectId,
isSidebar,
isFullscreen,
onClose,
onToggleFullscreen,
labels,
}: CodeEditorMediaPreviewProps) {
const [url, setUrl] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [loading, setLoading] = useState(true);
// Identifies which file the current `url` was loaded for. Rendering is gated on
// this so a blob from a previously-opened file can never show under the new
// file (the editor reuses this component instance across files).
const [loadedKey, setLoadedKey] = useState<string | null>(null);
const sourceKey = `${projectId ?? ''}:${file.path}:${kind}`;
useEffect(() => {
if (!projectId) {
setUrl(null);
setLoadedKey(null);
setError(labels.error);
setLoading(false);
return;
}
let objectUrl: string | null = null;
const controller = new AbortController();
const loadMedia = async () => {
try {
setLoading(true);
setError(null);
setUrl(null);
// The content endpoint requires the auth header, so we fetch the bytes
// ourselves and hand the media element a blob URL instead of a bare src.
// Fetching a blob (rather than streaming) also lets <video>/<audio> seek.
const contentUrl = `/api/projects/${projectId}/files/content?path=${encodeURIComponent(file.path)}`;
const response = await authenticatedFetch(contentUrl, { signal: controller.signal });
if (!response.ok) {
throw new Error(`Request failed with status ${response.status}`);
}
const blob = await response.blob();
// Pick the MIME type to expose to the browser. Preserve a valid
// Content-Type from the server, but supply an extension-specific
// default when it is missing or generic (application/octet-stream),
// otherwise formats like webm/ogg/flac/svg won't render.
const fallbackMime = getPreviewMimeType(file.name);
const isGenericType = !blob.type || blob.type === 'application/octet-stream';
const isMislabeledVideo = kind === 'video' && Boolean(fallbackMime) && !blob.type.startsWith('video/');
let outType = isGenericType || isMislabeledVideo ? (fallbackMime ?? blob.type) : blob.type;
if (kind === 'pdf') {
// The PDF renders in a same-origin <iframe>, so verify the bytes are
// really a PDF and pin the type to application/pdf. That forces the
// browser's PDF handler and prevents a mislabeled HTML/SVG file from
// executing scripts in the app's origin.
if (!(await looksLikePdf(blob))) {
throw new Error('File is not a valid PDF');
}
outType = 'application/pdf';
}
const typed = outType && outType !== blob.type ? new Blob([blob], { type: outType }) : blob;
objectUrl = URL.createObjectURL(typed);
// The cleanup may have already run (deps changed during an await), in
// which case it revoked nothing because objectUrl was still null. Don't
// publish a URL the cleanup will never revoke — drop it ourselves.
if (controller.signal.aborted) {
URL.revokeObjectURL(objectUrl);
objectUrl = null;
return;
}
setUrl(objectUrl);
setLoadedKey(sourceKey);
} catch (loadError: unknown) {
if (loadError instanceof Error && loadError.name === 'AbortError') {
return;
}
console.error('Error loading preview:', loadError);
setError(labels.error);
} finally {
setLoading(false);
}
};
loadMedia();
return () => {
controller.abort();
if (objectUrl) {
URL.revokeObjectURL(objectUrl);
}
};
}, [file.path, file.name, projectId, kind, sourceKey, labels.error]);
// Only expose the blob once it matches the file currently being shown, so a
// stale URL from the previous file is never rendered during a switch.
const currentUrl = url && loadedKey === sourceKey ? url : null;
// SVGs render safely inline via <img> (scripts don't execute there), but the
// open-in-new-tab link is a top-level navigation. A blob URL inherits the
// app's origin, so a user-controlled SVG with an embedded <script> would run
// as same-origin script. Withhold the new-tab action for SVGs.
const isSvg = getPreviewMimeType(file.name) === 'image/svg+xml';
const canOpenInNewTab = Boolean(currentUrl) && !isSvg;
const renderMedia = () => {
if (!currentUrl) return null;
switch (kind) {
case 'image':
return (
<img
src={currentUrl}
alt={file.name}
className="max-h-full max-w-full object-contain"
/>
);
case 'pdf':
// Not sandboxed on purpose: the browser's built-in PDF viewer refuses to
// load inside a sandboxed frame (any `sandbox` value yields a broken
// viewer). Script execution is instead prevented upstream by validating
// the PDF magic bytes and pinning the blob's MIME type to application/pdf.
return <iframe src={currentUrl} title={file.name} className="h-full w-full border-0 bg-white" />;
case 'video':
return (
<video src={currentUrl} controls className="max-h-full max-w-full" autoPlay={false}>
{labels.error}
</video>
);
case 'audio':
return (
<div className="flex w-full max-w-xl flex-col items-center gap-4 px-6">
<p className="max-w-full truncate text-sm text-muted-foreground">{file.name}</p>
<audio src={currentUrl} controls className="w-full">
{labels.error}
</audio>
</div>
);
default:
return null;
}
};
const previewBody = (
<div className="relative flex h-full w-full flex-col items-center justify-center bg-muted/30 p-2">
{loading && (
<div className="text-sm text-muted-foreground">{labels.loading}</div>
)}
{!loading && currentUrl && renderMedia()}
{!loading && !currentUrl && (
<div className="flex flex-col items-center gap-3 p-8 text-center text-muted-foreground">
<p className="text-sm">{error || labels.error}</p>
<p className="break-all text-xs">{file.path}</p>
</div>
)}
</div>
);
const headerActions = (
<div className="flex shrink-0 items-center gap-0.5">
{canOpenInNewTab && currentUrl && (
<a
href={currentUrl}
target="_blank"
rel="noopener noreferrer"
className="flex items-center justify-center rounded-md p-1.5 text-gray-600 hover:bg-gray-100 hover:text-gray-900 dark:text-gray-400 dark:hover:bg-gray-800 dark:hover:text-white"
aria-label={labels.openInNewTab}
title={labels.openInNewTab}
>
<svg aria-hidden="true" className="h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" />
</svg>
</a>
)}
{!isSidebar && (
<button
type="button"
onClick={onToggleFullscreen}
className="flex items-center justify-center rounded-md p-1.5 text-gray-600 hover:bg-gray-100 hover:text-gray-900 dark:text-gray-400 dark:hover:bg-gray-800 dark:hover:text-white"
aria-label={isFullscreen ? labels.exitFullscreen : labels.fullscreen}
title={isFullscreen ? labels.exitFullscreen : labels.fullscreen}
>
{isFullscreen ? (
<svg aria-hidden="true" className="h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 9V4.5M9 9H4.5M9 9L3.5 3.5M9 15v4.5M9 15H4.5M9 15l-5.5 5.5M15 9h4.5M15 9V4.5M15 9l5.5-5.5M15 15h4.5M15 15v4.5m0-4.5l5.5 5.5" />
</svg>
) : (
<svg aria-hidden="true" className="h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 8V4m0 0h4M4 4l5 5m11-1V4m0 0h-4m4 0l-5 5M4 16v4m0 0h4m-4 0l5-5m11 5l-5-5m5 5v-4m0 4h-4" />
</svg>
)}
</button>
)}
<button
type="button"
onClick={onClose}
className="flex items-center justify-center rounded-md p-1.5 text-gray-600 hover:bg-gray-100 hover:text-gray-900 dark:text-gray-400 dark:hover:bg-gray-800 dark:hover:text-white"
aria-label={labels.close}
title={labels.close}
>
<svg aria-hidden="true" className="h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</div>
);
const header = (
<div className="flex flex-shrink-0 items-center justify-between border-b border-border px-3 py-1.5">
<div className="flex min-w-0 flex-1 items-center gap-2">
<h3 className="truncate text-sm font-medium text-gray-900 dark:text-white">{file.name}</h3>
</div>
{headerActions}
</div>
);
if (isSidebar) {
return (
<div className="flex h-full w-full flex-col bg-background">
{header}
{previewBody}
</div>
);
}
const containerClassName = isFullscreen
? 'fixed inset-0 z-[9999] bg-background flex flex-col'
: 'fixed inset-0 z-[9999] md:bg-black/50 md:flex md:items-center md:justify-center md:p-4';
const innerClassName = isFullscreen
? 'bg-background flex flex-col w-full h-full'
: 'bg-background shadow-2xl flex flex-col w-full h-full md:rounded-lg md:shadow-2xl md:w-full md:max-w-6xl md:h-[80vh] md:max-h-[80vh]';
return (
<div className={containerClassName}>
<div className={innerClassName}>
{header}
{previewBody}
</div>
</div>
);
}

View File

@@ -1 +0,0 @@
export { default as ComputerUsePanel } from './view/ComputerUsePanel';

View File

@@ -1,537 +0,0 @@
import { useCallback, useEffect, useMemo, useRef, useState, type KeyboardEvent, type MouseEvent } from 'react';
import { Bot, Camera, Download, Expand, Loader2, MonitorCog, RefreshCw, Settings, ShieldCheck, Square, Trash2, X } from 'lucide-react';
import { cn } from '../../../lib/utils';
import { Badge, Button } from '../../../shared/view/ui';
import { authenticatedFetch } from '../../../utils/api';
import type { SettingsMainTab } from '../../settings/types/types';
type ComputerUseStatus = {
enabled: boolean;
runtime: 'cloud' | 'local';
available: boolean;
desktopAgentConnected?: boolean;
desktopAgentCount?: number;
nutInstalled: boolean;
screenshotInstalled: boolean;
installInProgress: boolean;
sessionCount: number;
message: string;
};
type ComputerUseSession = {
id: string;
status: 'ready' | 'stopped' | 'unavailable';
screenshotDataUrl: string | null;
createdAt: string;
updatedAt: string;
lastAction: string | null;
message: string | null;
agentAccessEnabled: boolean;
createdBy: 'user' | 'agent';
displaySize: {
width: number;
height: number;
} | null;
cursor: {
x: number;
y: number;
actor: 'agent' | 'user';
} | null;
};
type ComputerUsePanelProps = {
isVisible: boolean;
onShowSettings?: (tab?: SettingsMainTab) => void;
};
async function readJson<T>(response: Response): Promise<T> {
const data = await response.json();
if (!response.ok || data.success === false) {
throw new Error(data.error || data.details || `Request failed (${response.status})`);
}
return data as T;
}
function getRuntimeTone(status: ComputerUseStatus | null, installing: boolean): string {
if (!status?.enabled) return 'border-border bg-muted text-muted-foreground';
if (status.runtime === 'cloud') {
return status.desktopAgentConnected
? 'border-primary/30 bg-primary/5 text-foreground'
: 'border-amber-500/30 bg-amber-500/10 text-amber-700 dark:text-amber-300';
}
if (status.available) return 'border-primary/30 bg-primary/5 text-foreground';
if (status.installInProgress || installing) return 'border-primary/30 bg-primary/5 text-foreground';
return 'border-border bg-background text-muted-foreground';
}
function getRuntimeLabel(status: ComputerUseStatus | null, installing: boolean): string {
if (!status?.enabled) return 'Disabled';
if (status.runtime === 'cloud') {
const count = status.desktopAgentCount ?? (status.desktopAgentConnected ? 1 : 0);
if (count > 1) return `${count} desktops linked`;
if (count === 1) return 'Desktop linked';
return 'Desktop not linked';
}
if (status.available) return 'Ready';
if (status.installInProgress || installing) return 'Installing';
return 'Setup required';
}
export default function ComputerUsePanel({ isVisible, onShowSettings }: ComputerUsePanelProps) {
const [status, setStatus] = useState<ComputerUseStatus | null>(null);
const [sessions, setSessions] = useState<ComputerUseSession[]>([]);
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
const [isRefreshing, setIsRefreshing] = useState(false);
const [isBusy, setIsBusy] = useState(false);
const [isInstalling, setIsInstalling] = useState(false);
const [isFullscreen, setIsFullscreen] = useState(false);
const [error, setError] = useState<string | null>(null);
const viewerRef = useRef<HTMLDivElement | null>(null);
const selectedSession = useMemo(
() => sessions.find((session) => session.id === selectedSessionId) || sessions[0] || null,
[selectedSessionId, sessions],
);
const refresh = useCallback(async () => {
setIsRefreshing(true);
try {
const [statusResponse, sessionsResponse] = await Promise.all([
authenticatedFetch('/api/computer-use/status'),
authenticatedFetch('/api/computer-use/sessions'),
]);
const statusData = await readJson<{ data: ComputerUseStatus }>(statusResponse);
const sessionsData = await readJson<{ data: { sessions: ComputerUseSession[] } }>(sessionsResponse);
setStatus(statusData.data);
setSessions(sessionsData.data.sessions);
setSelectedSessionId((current) => (
current && sessionsData.data.sessions.some((session) => session.id === current)
? current
: sessionsData.data.sessions[0]?.id || null
));
setError(null);
} finally {
setIsRefreshing(false);
}
}, []);
useEffect(() => {
if (!isVisible) return;
void refresh().catch((err) => setError(err instanceof Error ? err.message : 'Failed to load Computer Use'));
}, [isVisible, refresh]);
const handleRefresh = useCallback(() => {
void refresh().catch((err) => setError(err instanceof Error ? err.message : 'Failed to refresh Computer Use'));
}, [refresh]);
// Poll while an active session exists so agent-driven changes show up live.
useEffect(() => {
if (!isVisible || !selectedSession || selectedSession.status !== 'ready') return;
const timer = window.setInterval(() => {
void refresh().catch(() => undefined);
}, 1500);
return () => window.clearInterval(timer);
}, [isVisible, selectedSession, refresh]);
const runAction = useCallback(async (action: () => Promise<void>) => {
setIsBusy(true);
setError(null);
try {
await action();
await refresh();
} catch (err) {
setError(err instanceof Error ? err.message : 'Computer Use action failed');
} finally {
setIsBusy(false);
}
}, [refresh]);
const captureScreenshot = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/screenshot`, { method: 'POST' });
await readJson(response);
});
const stopSession = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/stop`, { method: 'POST' });
await readJson(response);
});
const deleteSession = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}`, { method: 'DELETE' });
await readJson(response);
setIsFullscreen(false);
});
const grantControl = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/consent/grant`, { method: 'POST' });
await readJson(response);
});
const revokeControl = () => runAction(async () => {
if (!selectedSession) return;
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/consent/revoke`, { method: 'POST' });
await readJson(response);
});
const installRuntime = () => runAction(async () => {
setIsInstalling(true);
try {
const response = await authenticatedFetch('/api/computer-use/runtime/install', { method: 'POST' });
await readJson(response);
} finally {
setIsInstalling(false);
}
});
const clickViewer = useCallback((event: MouseEvent<HTMLImageElement>) => {
if (!selectedSession || selectedSession.status !== 'ready' || !selectedSession.displaySize) {
return;
}
viewerRef.current?.focus();
const bounds = event.currentTarget.getBoundingClientRect();
const scaleX = selectedSession.displaySize.width / bounds.width;
const scaleY = selectedSession.displaySize.height / bounds.height;
const x = Math.round((event.clientX - bounds.left) * scaleX);
const y = Math.round((event.clientY - bounds.top) * scaleY);
void runAction(async () => {
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/click`, {
method: 'POST',
body: JSON.stringify({ x, y, double: event.detail === 2 }),
});
await readJson(response);
});
}, [runAction, selectedSession]);
const keyForEvent = useCallback((event: KeyboardEvent<HTMLDivElement>) => {
if (event.key === ' ') return 'Space';
const parts: string[] = [];
if (event.ctrlKey) parts.push('ctrl');
if (event.altKey) parts.push('alt');
if (event.shiftKey && event.key.length > 1) parts.push('shift');
if (event.metaKey) parts.push('meta');
parts.push(event.key);
return parts.join('+');
}, []);
const pressViewerKey = useCallback((event: KeyboardEvent<HTMLDivElement>) => {
if (!selectedSession || selectedSession.status !== 'ready') {
return;
}
const ignoredKeys = new Set(['Shift', 'Control', 'Alt', 'Meta', 'CapsLock']);
if (ignoredKeys.has(event.key)) {
return;
}
event.preventDefault();
const key = keyForEvent(event);
void runAction(async () => {
const response = await authenticatedFetch(`/api/computer-use/sessions/${selectedSession.id}/press-key`, {
method: 'POST',
body: JSON.stringify({ key }),
});
await readJson(response);
});
}, [keyForEvent, runAction, selectedSession]);
const needsRuntime = Boolean(status?.enabled && status.runtime === 'local' && (!status.nutInstalled || !status.screenshotInstalled));
const isCloud = status?.runtime === 'cloud';
const desktopAgentCount = status?.desktopAgentCount ?? (status?.desktopAgentConnected ? 1 : 0);
const runtimeLabel = getRuntimeLabel(status, isInstalling);
const cursorStyle = selectedSession?.cursor && selectedSession.displaySize
? {
left: `${(selectedSession.cursor.x / selectedSession.displaySize.width) * 100}%`,
top: `${(selectedSession.cursor.y / selectedSession.displaySize.height) * 100}%`,
}
: null;
const renderSurface = (fullscreen = false) => (
<div
ref={viewerRef}
tabIndex={selectedSession?.status === 'ready' ? 0 : -1}
onKeyDown={pressViewerKey}
className={`flex min-h-[360px] flex-1 items-center justify-center bg-neutral-950 outline-none ${fullscreen ? 'min-h-[80vh]' : ''}`}
>
{selectedSession?.screenshotDataUrl ? (
<div className="relative inline-block max-h-full">
<img
src={selectedSession.screenshotDataUrl}
alt="Desktop screenshot"
className={fullscreen ? 'block max-h-[80vh] w-auto max-w-full object-contain' : 'block max-h-[70vh] w-auto max-w-full object-contain'}
onClick={clickViewer}
/>
{cursorStyle && (
<div
className="pointer-events-none absolute h-5 w-5 -translate-x-1/2 -translate-y-1/2 rounded-full border-2 border-white/90 bg-sky-500/80 shadow-[0_0_0_6px_rgba(14,165,233,0.18)]"
style={cursorStyle}
>
<div className="absolute left-1/2 top-1/2 h-2 w-2 -translate-x-1/2 -translate-y-1/2 rounded-full bg-white" />
</div>
)}
</div>
) : (
<div className="max-w-md px-6 text-center">
<MonitorCog className="mx-auto h-10 w-10 text-neutral-500" />
<div className="mt-3 text-sm font-medium text-neutral-100">
{selectedSession?.message || 'No active Computer Use session.'}
</div>
<p className="mt-2 text-xs leading-relaxed text-neutral-400">
{isCloud
? 'Agents create sessions automatically. Keep the CloudCLI desktop app connected to approve control requests.'
: 'Agents create sessions automatically. Enable Computer Use and install the local runtime if needed.'}
</p>
</div>
)}
</div>
);
return (
<div className="flex h-full min-h-0 flex-col bg-background">
<div className="flex flex-wrap items-center justify-between gap-3 border-b border-border/60 px-4 py-3">
<div className="min-w-0">
<div className="flex items-center gap-2">
<MonitorCog className="h-4 w-4 text-primary" />
<h3 className="text-sm font-semibold text-foreground">Computer Use</h3>
<Badge variant="outline" className={cn('text-[10px]', getRuntimeTone(status, isInstalling))}>
{runtimeLabel}
</Badge>
</div>
<p className="mt-0.5 text-xs text-muted-foreground">
{isCloud
? 'Monitor cloud agent desktop sessions and linked desktops.'
: 'Monitor local desktop sessions and grant control only when an agent needs it.'}
</p>
</div>
<div className="flex items-center gap-1.5">
{onShowSettings && (
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={() => onShowSettings('computer')}
title="Open Computer Use settings"
aria-label="Open Computer Use settings"
>
<Settings className="h-3.5 w-3.5" />
</Button>
)}
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={handleRefresh}
disabled={isRefreshing || isBusy}
title="Refresh Computer Use"
aria-label="Refresh Computer Use"
>
<RefreshCw className={cn('h-3.5 w-3.5', isRefreshing && 'animate-spin')} />
</Button>
</div>
</div>
<div className="grid min-h-0 flex-1 grid-cols-1 lg:grid-cols-[300px_minmax(0,1fr)]">
<aside className="border-b border-border/60 p-3 lg:border-b-0 lg:border-r">
{isCloud && (
<div className="rounded-lg border border-border/70 bg-card/40 p-3">
<div className="flex items-start justify-between gap-3">
<div className="min-w-0">
<div className="text-xs font-medium uppercase tracking-wide text-muted-foreground">Cloud desktop access</div>
<div className="mt-1 text-sm font-medium text-foreground">{runtimeLabel}</div>
</div>
<Badge variant="outline" className={cn('shrink-0 text-[10px]', getRuntimeTone(status, isInstalling))}>
{desktopAgentCount > 0 ? `${desktopAgentCount} linked` : 'Not linked'}
</Badge>
</div>
<p className="mt-2 text-xs leading-relaxed text-muted-foreground">
{desktopAgentCount > 1
? 'More than one CloudCLI Desktop app is linked. Agents will use one available desktop.'
: desktopAgentCount === 1
? 'CloudCLI Desktop is connected. Approval prompts appear on that computer.'
: 'Open CloudCLI Desktop on the computer you want agents to use, connect the same account, and enable Computer Use.'}
</p>
</div>
)}
{needsRuntime && (
<div className={cn('rounded-lg border border-border/70 bg-card/40 p-3', isCloud && 'mt-3')}>
<div className="text-xs font-medium uppercase tracking-wide text-muted-foreground">Desktop runtime required</div>
<p className="mt-2 text-xs leading-relaxed text-muted-foreground">
{status?.message || 'Install the desktop control runtime to enable Computer Use.'}
</p>
<div className="mt-3 flex flex-wrap gap-2 text-xs text-muted-foreground">
<span className="rounded-md border border-border px-2 py-1">
Control lib: {status?.nutInstalled ? 'installed' : 'missing'}
</span>
<span className="rounded-md border border-border px-2 py-1">
Screen capture: {status?.screenshotInstalled ? 'installed' : 'missing'}
</span>
</div>
<Button
type="button"
size="sm"
className="mt-3 w-full"
onClick={installRuntime}
disabled={isBusy || isInstalling || status?.installInProgress}
>
{isInstalling || status?.installInProgress ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Download className="h-4 w-4" />
)}
{isInstalling || status?.installInProgress ? 'Installing…' : 'Install Runtime'}
</Button>
</div>
)}
<div className="mt-3 space-y-2">
<div className="rounded-lg border border-border/70 bg-muted/30 p-3 text-xs leading-relaxed text-muted-foreground">
<div className="flex items-center gap-1.5 font-medium text-foreground">
<ShieldCheck className="h-3.5 w-3.5" />
Safety
</div>
{isCloud ? (
<p className="mt-1.5">
Agents create sessions automatically through MCP. The CloudCLI desktop app asks for approval on this
computer, and <span className="font-medium text-foreground">Stop</span> ends the session and clears access.
</p>
) : (
<p className="mt-1.5">
Agents create sessions automatically through MCP but cannot act until you grant control here. Use
<span className="font-medium text-foreground"> Grant Control </span>
to allow agent actions, and
<span className="font-medium text-foreground"> Stop </span>
to revoke instantly.
</p>
)}
</div>
{sessions.map((session) => (
<button
key={session.id}
type="button"
onClick={() => setSelectedSessionId(session.id)}
className={`w-full rounded-lg border px-3 py-2 text-left text-sm transition-colors ${selectedSession?.id === session.id
? 'border-primary/50 bg-primary/10 text-foreground'
: 'border-border/60 bg-card/30 text-muted-foreground hover:bg-muted/50'
}`}
>
<div className="flex items-center justify-between gap-2">
<span className="truncate font-medium">
{session.createdBy === 'agent' ? 'Agent session' : 'Desktop session'}
</span>
<Badge variant="outline" className="text-[10px]">{session.status}</Badge>
</div>
<div className="mt-1 flex flex-wrap gap-1">
{session.agentAccessEnabled ? (
<span className="rounded border border-emerald-500/30 px-1.5 py-0.5 text-[10px] text-emerald-600 dark:text-emerald-300">
control granted
</span>
) : (
<span className="rounded border border-amber-500/30 px-1.5 py-0.5 text-[10px] text-amber-600 dark:text-amber-300">
awaiting consent
</span>
)}
</div>
<div className="mt-1 truncate text-xs">{session.lastAction || session.message || session.id}</div>
</button>
))}
{sessions.length === 0 && (
<div className="rounded-lg border border-dashed border-border/70 px-3 py-8 text-center text-xs text-muted-foreground">
Agents will create sessions automatically when they need desktop access.
</div>
)}
</div>
</aside>
<main className="flex min-h-0 flex-col">
<div className="flex flex-wrap items-center gap-2 border-b border-border/60 px-3 py-2">
<Button variant="outline" size="sm" onClick={captureScreenshot} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
<Camera className="h-4 w-4" />
Screenshot
</Button>
{!isCloud && selectedSession?.agentAccessEnabled ? (
<Button variant="outline" size="sm" onClick={revokeControl} disabled={isBusy || !selectedSession}>
<X className="h-4 w-4" />
Revoke Control
</Button>
) : !isCloud ? (
<Button
variant="outline"
size="sm"
onClick={grantControl}
disabled={isBusy || !selectedSession || selectedSession.status !== 'ready' || !status?.enabled}
>
<Bot className="h-4 w-4" />
Grant Control
</Button>
) : null}
<Button variant="outline" size="sm" onClick={() => setIsFullscreen(true)} disabled={!selectedSession?.screenshotDataUrl}>
<Expand className="h-4 w-4" />
Full Screen
</Button>
<Button variant="outline" size="sm" onClick={stopSession} disabled={isBusy || !selectedSession || selectedSession.status !== 'ready'}>
<Square className="h-4 w-4" />
Stop
</Button>
<Button variant="outline" size="sm" onClick={deleteSession} disabled={isBusy || !selectedSession}>
<Trash2 className="h-4 w-4" />
Delete
</Button>
</div>
{error && (
<div className="border-b border-red-200 bg-red-50 px-4 py-2 text-sm text-red-700 dark:border-red-900/50 dark:bg-red-950/30 dark:text-red-200">
{error}
</div>
)}
<div className="min-h-0 flex-1 overflow-auto bg-muted/20 p-4">
<div className="mx-auto flex min-h-[420px] max-w-6xl flex-col overflow-hidden rounded-lg border border-border bg-background shadow-sm">
<div className="flex items-center gap-2 border-b border-border/60 px-3 py-2 text-xs text-muted-foreground">
<MonitorCog className="h-3.5 w-3.5" />
<span className="truncate">
{selectedSession?.displaySize
? `${selectedSession.displaySize.width}×${selectedSession.displaySize.height}`
: 'No screen captured'}
</span>
{selectedSession?.agentAccessEnabled && (
<span className="ml-auto inline-flex items-center gap-1 rounded border border-emerald-500/30 px-2 py-0.5 text-emerald-600 dark:text-emerald-300">
<Bot className="h-3.5 w-3.5" />
{isCloud ? 'Desktop-approved session' : 'Agent control active'}
</span>
)}
</div>
{renderSurface()}
</div>
<p className="mx-auto mt-2 max-w-6xl text-center text-xs text-muted-foreground">
{selectedSession
? 'Click the screenshot to click the real desktop. Focus the view and type to send keystrokes.'
: 'Computer Use sessions appear here after an agent requests desktop access.'}
</p>
</div>
</main>
</div>
{isFullscreen && selectedSession && (
<div className="fixed inset-0 z-50 bg-black/90 p-6">
<div className="flex h-full flex-col rounded-lg border border-white/10 bg-black">
<div className="flex items-center justify-between border-b border-white/10 px-4 py-3 text-sm text-white/80">
<div className="min-w-0 truncate">Desktop session</div>
<Button variant="outline" size="sm" onClick={() => setIsFullscreen(false)}>
<X className="h-4 w-4" />
Close
</Button>
</div>
{renderSurface(true)}
</div>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,16 @@
type HermesLogoProps = {
className?: string;
};
export default function HermesLogo({ className = 'w-5 h-5' }: HermesLogoProps) {
return (
<svg className={className} viewBox="0 0 24 24" role="img" aria-label="Hermes">
<rect width="24" height="24" rx="6" fill="#047857" />
<path
d="M6.2 6.5h2.4v4.3h6.8V6.5h2.4v11h-2.4v-4.6H8.6v4.6H6.2v-11Z"
fill="white"
/>
<path d="M9.3 4.7h5.4l-1.2 1.2h-3L9.3 4.7Z" fill="#A7F3D0" />
</svg>
);
}

View File

@@ -3,6 +3,7 @@ import ClaudeLogo from './ClaudeLogo';
import CodexLogo from './CodexLogo';
import CursorLogo from './CursorLogo';
import GeminiLogo from './GeminiLogo';
import HermesLogo from './HermesLogo';
import OpenCodeLogo from './OpenCodeLogo';
type SessionProviderLogoProps = {
@@ -30,5 +31,9 @@ export default function SessionProviderLogo({
return <OpenCodeLogo className={className} />;
}
if (provider === 'hermes') {
return <HermesLogo className={className} />;
}
return <ClaudeLogo className={className} />;
}

View File

@@ -66,7 +66,6 @@ export type MainContentHeaderProps = {
selectedSession: ProjectSession | null;
shouldShowTasksTab: boolean;
shouldShowBrowserTab: boolean;
shouldShowComputerTab: boolean;
isMobile: boolean;
onMenuClick: () => void;
};

View File

@@ -6,13 +6,12 @@ import StandaloneShell from '../../standalone-shell/view/StandaloneShell';
import GitPanel from '../../git-panel/view/GitPanel';
import PluginTabContent from '../../plugins/view/PluginTabContent';
import { BrowserUsePanel } from '../../browser-use';
import { ComputerUsePanel } from '../../computer-use';
import type { MainContentProps } from '../types/types';
import { useTaskMaster } from '../../../contexts/TaskMasterContext';
import { usePaletteOpsRegister } from '../../../contexts/PaletteOpsContext';
import { useTasksSettings } from '../../../contexts/TasksSettingsContext';
import { useUiPreferences } from '../../../hooks/useUiPreferences';
import { COMPUTER_USE_MENUS_ENABLED } from '../../../constants/featureFlags';
import { useFileOpenResolver } from '../../../hooks/useFileOpenResolver';
import { authenticatedFetch } from '../../../utils/api';
import { useEditorSidebar } from '../../code-editor/hooks/useEditorSidebar';
import EditorSidebar from '../../code-editor/view/EditorSidebar';
@@ -60,11 +59,9 @@ function MainContent({
const { currentProject, setCurrentProject } = useTaskMaster() as TaskMasterContextValue;
const { tasksEnabled, isTaskMasterInstalled } = useTasksSettings() as TasksSettingsContextValue;
const [browserUseEnabled, setBrowserUseEnabled] = useState(false);
const [computerUseEnabled, setComputerUseEnabled] = useState<boolean | undefined>(undefined);
const shouldShowTasksTab = Boolean(tasksEnabled && isTaskMasterInstalled);
const shouldShowBrowserTab = browserUseEnabled;
const shouldShowComputerTab = COMPUTER_USE_MENUS_ENABLED && computerUseEnabled === true;
const {
editingFile,
@@ -81,6 +78,10 @@ function MainContent({
isMobile,
});
// Resolves bare/partial file references (e.g. links inside chat messages) to
// real project files before opening them in the in-app editor.
const resolvedFileOpen = useFileOpenResolver(selectedProject, handleFileOpen);
useEffect(() => {
// Identify projects by DB `projectId`; the TaskMaster context uses the
// same identifier to key its internal maps.
@@ -120,65 +121,15 @@ function MainContent({
}
}, [shouldShowBrowserTab, activeTab, setActiveTab]);
const loadComputerUseSettings = useCallback(async () => {
try {
const [settingsResponse, statusResponse] = await Promise.allSettled([
authenticatedFetch('/api/computer-use/settings'),
authenticatedFetch('/api/computer-use/status'),
]);
const settingsRes = settingsResponse.status === 'fulfilled' ? settingsResponse.value : null;
const statusRes = statusResponse.status === 'fulfilled' ? statusResponse.value : null;
const readJson = async (response: Response | null) => {
if (!response) return null;
try {
return await response.json();
} catch {
return null;
}
};
const settingsData = await readJson(settingsRes);
const statusData = await readJson(statusRes);
const runtime = statusData?.data?.runtime;
const settingsUsable = Boolean(settingsRes?.ok && settingsData?.success !== false);
const statusUsable = Boolean(statusRes?.ok && statusData?.success !== false);
const settingsEnabled = Boolean(
settingsUsable &&
settingsData?.data?.settings?.enabled
);
const cloudEnabled = Boolean(
statusUsable &&
runtime === 'cloud' &&
statusData?.data?.enabled
);
if (runtime === 'cloud') {
setComputerUseEnabled(cloudEnabled);
} else if (settingsUsable) {
setComputerUseEnabled(settingsEnabled);
} else if (statusUsable) {
setComputerUseEnabled(Boolean(statusData?.data?.enabled));
}
} catch {
// Keep the current tab availability on transient status/settings failures.
}
}, []);
useEffect(() => {
void loadComputerUseSettings();
window.addEventListener('computerUseSettingsChanged', loadComputerUseSettings);
return () => window.removeEventListener('computerUseSettingsChanged', loadComputerUseSettings);
}, [loadComputerUseSettings]);
useEffect(() => {
if (!shouldShowComputerTab && activeTab === 'computer') {
setActiveTab('chat');
}
}, [shouldShowComputerTab, activeTab, setActiveTab]);
usePaletteOpsRegister({
openFile: (filePath: string) => {
setActiveTab('files');
handleFileOpen(filePath);
},
// Opens the editor side panel in place, keeping the current tab (e.g. chat).
openFileInEditor: (filePath: string) => {
resolvedFileOpen(filePath);
},
});
if (isLoading) {
@@ -198,7 +149,6 @@ function MainContent({
selectedSession={selectedSession}
shouldShowTasksTab={shouldShowTasksTab}
shouldShowBrowserTab={shouldShowBrowserTab}
shouldShowComputerTab={shouldShowComputerTab}
isMobile={isMobile}
onMenuClick={onMenuClick}
/>
@@ -263,12 +213,6 @@ function MainContent({
</div>
)}
{shouldShowComputerTab && activeTab === 'computer' && (
<div className="h-full overflow-hidden">
<ComputerUsePanel isVisible={activeTab === 'computer'} onShowSettings={onShowSettings} />
</div>
)}
{activeTab.startsWith('plugin:') && (
<div className="h-full overflow-hidden">
<PluginTabContent

View File

@@ -11,7 +11,6 @@ export default function MainContentHeader({
selectedSession,
shouldShowTasksTab,
shouldShowBrowserTab,
shouldShowComputerTab,
isMobile,
onMenuClick,
}: MainContentHeaderProps) {
@@ -62,7 +61,6 @@ export default function MainContentHeader({
setActiveTab={setActiveTab}
shouldShowTasksTab={shouldShowTasksTab}
shouldShowBrowserTab={shouldShowBrowserTab}
shouldShowComputerTab={shouldShowComputerTab}
/>
</div>
{canScrollRight && (

View File

@@ -1,4 +1,4 @@
import { MessageSquare, Terminal, Folder, GitBranch, ClipboardCheck, MonitorCog, MonitorPlay, type LucideIcon } from 'lucide-react';
import { MessageSquare, Terminal, Folder, GitBranch, ClipboardCheck, MonitorPlay, type LucideIcon } from 'lucide-react';
import type { Dispatch, SetStateAction } from 'react';
import { useTranslation } from 'react-i18next';
@@ -12,7 +12,6 @@ type MainContentTabSwitcherProps = {
setActiveTab: Dispatch<SetStateAction<AppTab>>;
shouldShowTasksTab: boolean;
shouldShowBrowserTab: boolean;
shouldShowComputerTab: boolean;
};
type BuiltInTab = {
@@ -46,13 +45,6 @@ const BROWSER_TAB: BuiltInTab = {
icon: MonitorPlay,
};
const COMPUTER_TAB: BuiltInTab = {
kind: 'builtin',
id: 'computer',
labelKey: 'tabs.computer',
icon: MonitorCog,
};
const TASKS_TAB: BuiltInTab = {
kind: 'builtin',
id: 'tasks',
@@ -65,7 +57,6 @@ export default function MainContentTabSwitcher({
setActiveTab,
shouldShowTasksTab,
shouldShowBrowserTab,
shouldShowComputerTab,
}: MainContentTabSwitcherProps) {
const { t } = useTranslation();
const { plugins } = usePlugins();
@@ -73,7 +64,6 @@ export default function MainContentTabSwitcher({
const builtInTabs: BuiltInTab[] = [
...BASE_TABS,
...(shouldShowBrowserTab ? [BROWSER_TAB] : []),
...(shouldShowComputerTab ? [COMPUTER_TAB] : []),
...(shouldShowTasksTab ? [TASKS_TAB] : []),
];

View File

@@ -32,10 +32,6 @@ function getTabTitle(activeTab: AppTab, shouldShowTasksTab: boolean, t: (key: st
return t('tabs.browser');
}
if (activeTab === 'computer') {
return t('tabs.computer');
}
return 'Project';
}

View File

@@ -6,6 +6,7 @@ export const MCP_PROVIDER_NAMES: Record<McpProvider, string> = {
codex: 'Codex',
gemini: 'Gemini',
opencode: 'OpenCode',
hermes: 'Hermes',
};
export const MCP_SUPPORTED_SCOPES: Record<McpProvider, McpScope[]> = {
@@ -14,6 +15,7 @@ export const MCP_SUPPORTED_SCOPES: Record<McpProvider, McpScope[]> = {
codex: ['user', 'project'],
gemini: ['user', 'project'],
opencode: ['user', 'project'],
hermes: ['user', 'project'],
};
export const MCP_SUPPORTED_TRANSPORTS: Record<McpProvider, McpTransport[]> = {
@@ -22,6 +24,7 @@ export const MCP_SUPPORTED_TRANSPORTS: Record<McpProvider, McpTransport[]> = {
codex: ['stdio', 'http'],
gemini: ['stdio', 'http', 'sse'],
opencode: ['stdio', 'http'],
hermes: ['stdio', 'http'],
};
export const MCP_GLOBAL_SUPPORTED_SCOPES: McpScope[] = ['user', 'project'];
@@ -34,6 +37,7 @@ export const MCP_PROVIDER_BUTTON_CLASSES: Record<McpProvider, string> = {
codex: 'bg-gray-800 text-white hover:bg-gray-900 dark:bg-gray-700 dark:hover:bg-gray-600',
gemini: 'bg-blue-600 text-white hover:bg-blue-700',
opencode: 'bg-zinc-900 text-white hover:bg-zinc-800 dark:bg-zinc-700 dark:hover:bg-zinc-600',
hermes: 'bg-emerald-700 text-white hover:bg-emerald-800 dark:bg-emerald-600 dark:hover:bg-emerald-700',
};
export const MCP_SUPPORTS_WORKING_DIRECTORY: Record<McpProvider, boolean> = {
@@ -42,6 +46,7 @@ export const MCP_SUPPORTS_WORKING_DIRECTORY: Record<McpProvider, boolean> = {
codex: true,
gemini: true,
opencode: false,
hermes: false,
};
export const DEFAULT_MCP_FORM: McpFormState = {

View File

@@ -10,7 +10,7 @@ export type ProviderAuthStatus = {
export type ProviderAuthStatusMap = Record<LLMProvider, ProviderAuthStatus>;
export const CLI_PROVIDERS: LLMProvider[] = ['claude', 'cursor', 'codex', 'gemini', 'opencode'];
export const CLI_PROVIDERS: LLMProvider[] = ['claude', 'cursor', 'codex', 'gemini', 'opencode', 'hermes'];
export const PROVIDER_AUTH_STATUS_ENDPOINTS: Record<LLMProvider, string> = {
claude: '/api/providers/claude/auth/status',
@@ -18,6 +18,7 @@ export const PROVIDER_AUTH_STATUS_ENDPOINTS: Record<LLMProvider, string> = {
codex: '/api/providers/codex/auth/status',
gemini: '/api/providers/gemini/auth/status',
opencode: '/api/providers/opencode/auth/status',
hermes: '/api/providers/hermes/auth/status',
};
export const createInitialProviderAuthStatusMap = (loading = true): ProviderAuthStatusMap => ({
@@ -26,4 +27,5 @@ export const createInitialProviderAuthStatusMap = (loading = true): ProviderAuth
codex: { authenticated: false, email: null, method: null, error: null, loading },
gemini: { authenticated: false, email: null, method: null, error: null, loading },
opencode: { authenticated: false, email: null, method: null, error: null, loading },
hermes: { authenticated: false, email: null, method: null, error: null, loading },
});

View File

@@ -9,6 +9,7 @@ type ProviderLoginModalProps = {
provider?: LLMProvider;
onComplete?: (exitCode: number) => void;
customCommand?: string;
customTitle?: string;
isAuthenticated?: boolean;
};
@@ -41,6 +42,10 @@ const getProviderCommand = ({
return 'opencode auth login';
}
if (provider === 'hermes') {
return 'hermes model';
}
return 'gemini status';
};
@@ -49,6 +54,7 @@ const getProviderTitle = (provider: LLMProvider) => {
if (provider === 'cursor') return 'Cursor CLI Login';
if (provider === 'codex') return 'Codex CLI Login';
if (provider === 'opencode') return 'OpenCode CLI Login';
if (provider === 'hermes') return 'Hermes Agent Setup';
return 'Gemini CLI Configuration';
};
@@ -58,6 +64,7 @@ export default function ProviderLoginModal({
provider = 'claude',
onComplete,
customCommand,
customTitle,
isAuthenticated = false,
}: ProviderLoginModalProps) {
if (!isOpen) {
@@ -65,7 +72,7 @@ export default function ProviderLoginModal({
}
const command = getProviderCommand({ provider, customCommand, isAuthenticated });
const title = getProviderTitle(provider);
const title = customTitle || getProviderTitle(provider);
const handleComplete = (exitCode: number) => {
onComplete?.(exitCode);

View File

@@ -39,7 +39,7 @@ export const SETTINGS_MAIN_TABS: SettingsMainTabMeta[] = [
{ id: 'about', label: 'About', keywords: 'about version info', icon: Info },
];
export const AGENT_PROVIDERS: AgentProvider[] = ['claude', 'cursor', 'codex', 'gemini', 'opencode'];
export const AGENT_PROVIDERS: AgentProvider[] = ['claude', 'cursor', 'codex', 'gemini', 'opencode', 'hermes'];
export const AGENT_CATEGORIES: AgentCategory[] = ['account', 'permissions', 'mcp'];
export const DEFAULT_PROJECT_SORT_ORDER: ProjectSortOrder = 'name';

View File

@@ -1,7 +1,6 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import { useTheme } from '../../../contexts/ThemeContext';
import { COMPUTER_USE_MENUS_ENABLED } from '../../../constants/featureFlags';
import { authenticatedFetch } from '../../../utils/api';
import { setNotificationSoundEnabled } from '../../../utils/notificationSound';
import { useProviderAuthStatus } from '../../provider-auth/hooks/useProviderAuthStatus';
@@ -55,11 +54,11 @@ type NotificationPreferencesResponse = {
type ActiveLoginProvider = AgentProvider | '';
const KNOWN_MAIN_TABS: SettingsMainTab[] = ['agents', 'appearance', 'git', 'api', 'tasks', 'browser', 'computer', 'notifications', 'plugins', 'about'];
const KNOWN_MAIN_TABS: SettingsMainTab[] = ['agents', 'appearance', 'git', 'api', 'tasks', 'browser', 'notifications', 'plugins', 'about'];
const normalizeMainTab = (tab: string): SettingsMainTab => {
// Keep backwards compatibility with older callers that still pass "tools".
if (tab === 'tools' || (tab === 'computer' && !COMPUTER_USE_MENUS_ENABLED)) {
if (tab === 'tools') {
return 'agents';
}
@@ -165,6 +164,8 @@ export function useSettingsController({ isOpen, initialTab }: UseSettingsControl
const [showLoginModal, setShowLoginModal] = useState(false);
const [loginProvider, setLoginProvider] = useState<ActiveLoginProvider>('');
const [loginCommand, setLoginCommand] = useState<string | undefined>(undefined);
const [loginTitle, setLoginTitle] = useState<string | undefined>(undefined);
const {
providerAuthStatus,
checkProviderAuthStatus,
@@ -232,8 +233,10 @@ export function useSettingsController({ isOpen, initialTab }: UseSettingsControl
}
}, []);
const openLoginForProvider = useCallback((provider: AgentProvider) => {
const openLoginForProvider = useCallback((provider: AgentProvider, customCommand?: string, customTitle?: string) => {
setLoginProvider(provider);
setLoginCommand(customCommand);
setLoginTitle(customTitle);
setShowLoginModal(true);
}, []);
@@ -418,6 +421,8 @@ export function useSettingsController({ isOpen, initialTab }: UseSettingsControl
showLoginModal,
setShowLoginModal,
loginProvider,
loginCommand,
loginTitle,
handleLoginComplete,
};
}

View File

@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
import type { LLMProvider } from '../../../types/app';
import type { ProviderAuthStatus } from '../../provider-auth/types';
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'computer' | 'notifications' | 'plugins' | 'about';
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
export type AgentProvider = LLMProvider;
export type AgentCategory = 'account' | 'permissions' | 'mcp' | 'skills';
export type ProjectSortOrder = 'name' | 'date';

View File

@@ -11,7 +11,6 @@ import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSetting
import VoiceSettingsTab from '../view/tabs/VoiceSettingsTab';
import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
import ComputerUseSettingsTab from '../view/tabs/computer-use-settings/ComputerUseSettingsTab';
import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
import TasksSettingsTab from '../view/tabs/tasks-settings/TasksSettingsTab';
import PluginSettingsTab from '../../plugins/view/PluginSettingsTab';
@@ -59,6 +58,8 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
showLoginModal,
setShowLoginModal,
loginProvider,
loginCommand,
loginTitle,
handleLoginComplete,
} = useSettingsController({
isOpen,
@@ -199,8 +200,6 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
{activeTab === 'browser' && <BrowserUseSettingsTab />}
{activeTab === 'computer' && <ComputerUseSettingsTab />}
{activeTab === 'notifications' && (
<NotificationsSettingsTab
notificationPreferences={notificationPreferences}
@@ -235,6 +234,8 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
onClose={() => setShowLoginModal(false)}
provider={loginProvider || 'claude'}
onComplete={handleLoginComplete}
customCommand={loginCommand}
customTitle={loginTitle}
isAuthenticated={isAuthenticated}
/>

Some files were not shown because too many files have changed in this diff Show More