From 278fe4f7b16a09d6d1d70c98434a713f77d816b2 Mon Sep 17 00:00:00 2001 From: Simos Mikelatos Date: Fri, 19 Jun 2026 12:46:40 +0000 Subject: [PATCH] Fix semantic review issues and release action runtime --- .../workflows/desktop-macos-branch-build.yml | 2 +- .github/workflows/desktop-macos-release.yml | 4 +-- .../desktop-windows-branch-build.yml | 2 +- .../computer-semantics.service.ts | 6 ++-- .../semantics/helpers/windows/Program.cs | 16 ++++++++- .../semantics/semantic-session-store.ts | 8 +++-- .../semantics/semantic-tool-dispatcher.ts | 4 +-- .../main-content/view/MainContent.tsx | 34 ++++++++++++++----- 8 files changed, 56 insertions(+), 20 deletions(-) diff --git a/.github/workflows/desktop-macos-branch-build.yml b/.github/workflows/desktop-macos-branch-build.yml index e4403f93..1c4647ec 100644 --- a/.github/workflows/desktop-macos-branch-build.yml +++ b/.github/workflows/desktop-macos-branch-build.yml @@ -76,7 +76,7 @@ jobs: test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)" - name: Publish branch server bundle - uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2 + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 with: tag_name: ${{ steps.artifact.outputs.server_bundle_tag }} name: CloudCLI Internal Local Runtime (${{ github.ref_name }}) diff --git a/.github/workflows/desktop-macos-release.yml b/.github/workflows/desktop-macos-release.yml index 87e06350..2a9694f0 100644 --- a/.github/workflows/desktop-macos-release.yml +++ b/.github/workflows/desktop-macos-release.yml @@ -99,7 +99,7 @@ jobs: test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)" - name: Publish local server runtime assets - uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2 + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 with: tag_name: ${{ steps.release.outputs.server_bundle_tag }} target_commitish: ${{ github.sha }} @@ -122,7 +122,7 @@ jobs: cat release/SHASUMS256.txt - name: Publish GitHub release assets - uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2 + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 with: tag_name: ${{ steps.release.outputs.tag }} target_commitish: ${{ github.sha }} diff --git a/.github/workflows/desktop-windows-branch-build.yml b/.github/workflows/desktop-windows-branch-build.yml index d51611f4..53aad84a 100644 --- a/.github/workflows/desktop-windows-branch-build.yml +++ b/.github/workflows/desktop-windows-branch-build.yml @@ -61,7 +61,7 @@ jobs: test -n "$(find release/local-server -maxdepth 1 -name 'cloudcli-local-server-*.tar.gz.sha256' -print -quit)" - name: Publish branch server bundle - uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2 + uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0 with: tag_name: ${{ steps.artifact.outputs.server_bundle_tag }} name: CloudCLI Internal Local Runtime (${{ github.ref_name }}) diff --git a/server/modules/computer-use/computer-semantics.service.ts b/server/modules/computer-use/computer-semantics.service.ts index bb18b509..d6a8eb05 100644 --- a/server/modules/computer-use/computer-semantics.service.ts +++ b/server/modules/computer-use/computer-semantics.service.ts @@ -345,7 +345,8 @@ export const computerSemanticsService = { } case 'get_app_state': return getAppState(sessionId, readString(input.app)); - case 'click': { + case 'click': + case 'click_element': { const app = readString(input.app); const helperState = await withHelperState(sessionId, (adapter) => adapter.clickElement({ ...input, sessionId, app })); if (helperState) { @@ -381,7 +382,8 @@ export const computerSemanticsService = { await executor.drag(await targetFor(sessionId, app, stateId), { x: fromX, y: fromY }, { x: toX, y: toY }, readButton(input.mouse_button ?? input.mouseButton)); return getAppState(sessionId, app); } - case 'scroll': { + case 'scroll': + case 'scroll_element': { const app = readString(input.app); const helperState = await withHelperState(sessionId, (adapter) => adapter.scrollElement({ ...input, sessionId, app })); if (helperState) { diff --git a/server/modules/computer-use/semantics/helpers/windows/Program.cs b/server/modules/computer-use/semantics/helpers/windows/Program.cs index 1594d8c2..da75a25c 100644 --- a/server/modules/computer-use/semantics/helpers/windows/Program.cs +++ b/server/modules/computer-use/semantics/helpers/windows/Program.cs @@ -8,8 +8,10 @@ using System.Windows.Automation; static class Program { + private const int MaxStoredStates = 100; private static readonly Dictionary> StateElements = new(); private static readonly Dictionary> StateAutomationElements = new(); + private static readonly Queue StateOrder = new(); public static void Main() { @@ -121,6 +123,8 @@ static class Program var stateId = $"state_{Guid.NewGuid()}"; StateElements[stateId] = records; StateAutomationElements[stateId] = automationElements; + StateOrder.Enqueue(stateId); + PruneStoredStates(); var elements = records.Select(record => record.ToDictionary()).ToList(); var bounds = root.Current.BoundingRectangle; @@ -258,11 +262,21 @@ static class Program } SetCursorPos(point.Value.X, point.Value.Y); var wheel = (int)Math.Round(Math.Max(1, pages) * 120); - if (direction == "up") wheel = -wheel; + if (direction == "down") wheel = -wheel; mouse_event(0x0800, 0, 0, unchecked((uint)wheel), UIntPtr.Zero); return GetAppState(parameters); } + private static void PruneStoredStates() + { + while (StateOrder.Count > MaxStoredStates) + { + var evicted = StateOrder.Dequeue(); + StateElements.Remove(evicted); + StateAutomationElements.Remove(evicted); + } + } + private static Dictionary Drag(JsonElement parameters) { var fromX = ReadDouble(parameters, "from_x", double.NaN); diff --git a/server/modules/computer-use/semantics/semantic-session-store.ts b/server/modules/computer-use/semantics/semantic-session-store.ts index f1c6849b..bb16ee03 100644 --- a/server/modules/computer-use/semantics/semantic-session-store.ts +++ b/server/modules/computer-use/semantics/semantic-session-store.ts @@ -43,7 +43,8 @@ export class SemanticSessionStore { this.expire(); if (stateId) { const entry = this.states.get(stateId); - return entry && entry.sessionId === sessionId ? entry.state : null; + const appKey = normalizeAppKey(app); + return entry && entry.sessionId === sessionId && entry.appKey === appKey ? entry.state : null; } const latestStateId = this.latestBySessionApp.get(this.latestKey(sessionId, normalizeAppKey(app))); return latestStateId ? this.states.get(latestStateId)?.state || null : null; @@ -70,7 +71,10 @@ export class SemanticSessionStore { for (const [stateId, entry] of this.states.entries()) { if (now - entry.updatedAt > ttl) { this.states.delete(stateId); - this.latestBySessionApp.delete(this.latestKey(entry.sessionId, entry.appKey)); + const key = this.latestKey(entry.sessionId, entry.appKey); + if (this.latestBySessionApp.get(key) === stateId) { + this.latestBySessionApp.delete(key); + } } } } diff --git a/server/modules/computer-use/semantics/semantic-tool-dispatcher.ts b/server/modules/computer-use/semantics/semantic-tool-dispatcher.ts index a879a5f2..59482383 100644 --- a/server/modules/computer-use/semantics/semantic-tool-dispatcher.ts +++ b/server/modules/computer-use/semantics/semantic-tool-dispatcher.ts @@ -1,11 +1,11 @@ export const semanticMcpToolMap: Record = { computer_app_drag: 'drag', - computer_click_element: 'click', + computer_click_element: 'click_element', computer_get_app_state: 'get_app_state', computer_list_apps: 'list_apps', computer_perform_secondary_action: 'perform_secondary_action', computer_press_key: 'press_key', - computer_scroll_element: 'scroll', + computer_scroll_element: 'scroll_element', computer_set_value: 'set_value', computer_type_text: 'type_text', }; diff --git a/src/components/main-content/view/MainContent.tsx b/src/components/main-content/view/MainContent.tsx index bd69e999..3cf0dbc9 100644 --- a/src/components/main-content/view/MainContent.tsx +++ b/src/components/main-content/view/MainContent.tsx @@ -121,27 +121,43 @@ function MainContent({ const loadComputerUseSettings = useCallback(async () => { try { - const [settingsResponse, statusResponse] = await Promise.all([ + const [settingsResponse, statusResponse] = await Promise.allSettled([ authenticatedFetch('/api/computer-use/settings'), authenticatedFetch('/api/computer-use/status'), ]); - const settingsData = await settingsResponse.json(); - const statusData = await statusResponse.json(); + const settingsRes = settingsResponse.status === 'fulfilled' ? settingsResponse.value : null; + const statusRes = statusResponse.status === 'fulfilled' ? statusResponse.value : null; + const readJson = async (response: Response | null) => { + if (!response) return null; + try { + return await response.json(); + } catch { + return null; + } + }; + const settingsData = await readJson(settingsRes); + const statusData = await readJson(statusRes); const runtime = statusData?.data?.runtime; + const settingsUsable = Boolean(settingsRes?.ok && settingsData?.success !== false); + const statusUsable = Boolean(statusRes?.ok && statusData?.success !== false); const settingsEnabled = Boolean( - settingsResponse.ok && - settingsData?.success !== false && + settingsUsable && settingsData?.data?.settings?.enabled ); const cloudEnabled = Boolean( - statusResponse.ok && - statusData?.success !== false && + statusUsable && runtime === 'cloud' && statusData?.data?.enabled ); - setComputerUseEnabled(runtime === 'cloud' ? cloudEnabled : settingsEnabled); + if (runtime === 'cloud') { + setComputerUseEnabled(cloudEnabled); + } else if (settingsUsable) { + setComputerUseEnabled(settingsEnabled); + } else if (statusUsable) { + setComputerUseEnabled(Boolean(statusData?.data?.enabled)); + } } catch { - setComputerUseEnabled(false); + // Keep the current tab availability on transient status/settings failures. } }, []);