From d1930fecdb088b2fc301ed30f7007c9124ec7eaf Mon Sep 17 00:00:00 2001 From: Simos Mikelatos Date: Fri, 19 Jun 2026 12:17:32 +0000 Subject: [PATCH] fix: build semantic helpers on macos and windows --- .../helpers/macos/CloudCLISemantics.swift | 22 ++++++++++++++----- .../helpers/windows/CloudCLISemantics.csproj | 1 + 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/server/modules/computer-use/semantics/helpers/macos/CloudCLISemantics.swift b/server/modules/computer-use/semantics/helpers/macos/CloudCLISemantics.swift index a7b3a5f0..9c91e624 100644 --- a/server/modules/computer-use/semantics/helpers/macos/CloudCLISemantics.swift +++ b/server/modules/computer-use/semantics/helpers/macos/CloudCLISemantics.swift @@ -159,10 +159,22 @@ func walk(_ element: AXUIElement, depth: Int, maxDepth: Int, records: inout [Ele } func pngDataUrlForMainDisplay() -> String? { - guard let image = CGDisplayCreateImage(CGMainDisplayID()) else { return nil } - let bitmap = NSBitmapImageRep(cgImage: image) - guard let png = bitmap.representation(using: .png, properties: [:]) else { return nil } - return "data:image/png;base64,\(png.base64EncodedString())" + let fileURL = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("cloudcli-semantics-\(UUID().uuidString).png") + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture") + process.arguments = ["-x", "-t", "png", fileURL.path] + + do { + try process.run() + process.waitUntilExit() + guard process.terminationStatus == 0 else { return nil } + let png = try Data(contentsOf: fileURL) + try? FileManager.default.removeItem(at: fileURL) + return png.isEmpty ? nil : "data:image/png;base64,\(png.base64EncodedString())" + } catch { + try? FileManager.default.removeItem(at: fileURL) + return nil + } } func getAppState(_ params: JSON) throws -> JSON { @@ -376,7 +388,7 @@ func scrollElement(_ params: JSON) throws -> JSON { let amount = Int32(max(1.0, abs(pages) * 8.0)) let vertical = direction == "up" ? amount : direction == "down" ? -amount : 0 let horizontal = direction == "left" ? amount : direction == "right" ? -amount : 0 - CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: vertical, wheel2: horizontal)?.post(tap: .cghidEventTap) + CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: vertical, wheel2: horizontal, wheel3: 0)?.post(tap: .cghidEventTap) return try getAppState(params) } diff --git a/server/modules/computer-use/semantics/helpers/windows/CloudCLISemantics.csproj b/server/modules/computer-use/semantics/helpers/windows/CloudCLISemantics.csproj index 323f1159..8ea71167 100644 --- a/server/modules/computer-use/semantics/helpers/windows/CloudCLISemantics.csproj +++ b/server/modules/computer-use/semantics/helpers/windows/CloudCLISemantics.csproj @@ -5,6 +5,7 @@ enable enable true + true CloudCLISemantics