mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-19 23:42:03 +08:00
438 lines
17 KiB
Swift
438 lines
17 KiB
Swift
import AppKit
|
|
import ApplicationServices
|
|
import Foundation
|
|
|
|
typealias JSON = [String: Any]
|
|
|
|
struct ElementRecord {
|
|
let index: String
|
|
let role: String
|
|
let title: String?
|
|
let value: String?
|
|
let bounds: [String: Double]?
|
|
let actions: [String]
|
|
}
|
|
|
|
var stateElements: [String: [ElementRecord]] = [:]
|
|
var stateAxElements: [String: [String: AXUIElement]] = [:]
|
|
|
|
func jsonLine(_ object: Any) {
|
|
guard JSONSerialization.isValidJSONObject(object),
|
|
let data = try? JSONSerialization.data(withJSONObject: object),
|
|
let text = String(data: data, encoding: .utf8)
|
|
else {
|
|
print("{\"error\":\"Failed to encode JSON\"}")
|
|
fflush(stdout)
|
|
return
|
|
}
|
|
print(text)
|
|
fflush(stdout)
|
|
}
|
|
|
|
func respond(id: Any?, result: Any) {
|
|
jsonLine(["id": id ?? NSNull(), "result": result])
|
|
}
|
|
|
|
func respondError(id: Any?, _ message: String) {
|
|
jsonLine(["id": id ?? NSNull(), "error": message])
|
|
}
|
|
|
|
func stringAttr(_ element: AXUIElement, _ attr: CFString) -> String? {
|
|
var value: CFTypeRef?
|
|
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return nil }
|
|
return value as? String
|
|
}
|
|
|
|
func boolAttr(_ element: AXUIElement, _ attr: CFString) -> Bool? {
|
|
var value: CFTypeRef?
|
|
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return nil }
|
|
return value as? Bool
|
|
}
|
|
|
|
func arrayAttr(_ element: AXUIElement, _ attr: CFString) -> [AXUIElement] {
|
|
var value: CFTypeRef?
|
|
guard AXUIElementCopyAttributeValue(element, attr, &value) == .success else { return [] }
|
|
return value as? [AXUIElement] ?? []
|
|
}
|
|
|
|
func actions(_ element: AXUIElement) -> [String] {
|
|
var names: CFArray?
|
|
guard AXUIElementCopyActionNames(element, &names) == .success else { return [] }
|
|
return names as? [String] ?? []
|
|
}
|
|
|
|
func bounds(_ element: AXUIElement) -> [String: Double]? {
|
|
var positionRef: CFTypeRef?
|
|
var sizeRef: CFTypeRef?
|
|
guard AXUIElementCopyAttributeValue(element, kAXPositionAttribute as CFString, &positionRef) == .success,
|
|
AXUIElementCopyAttributeValue(element, kAXSizeAttribute as CFString, &sizeRef) == .success,
|
|
let positionValue = positionRef,
|
|
let sizeValue = sizeRef
|
|
else { return nil }
|
|
|
|
var point = CGPoint.zero
|
|
var size = CGSize.zero
|
|
guard AXValueGetValue(positionValue as! AXValue, .cgPoint, &point),
|
|
AXValueGetValue(sizeValue as! AXValue, .cgSize, &size)
|
|
else { return nil }
|
|
|
|
return [
|
|
"x": Double(point.x),
|
|
"y": Double(point.y),
|
|
"width": Double(size.width),
|
|
"height": Double(size.height),
|
|
]
|
|
}
|
|
|
|
func record(_ element: AXUIElement, index: String) -> ElementRecord {
|
|
ElementRecord(
|
|
index: index,
|
|
role: stringAttr(element, kAXRoleAttribute as CFString) ?? "AXUnknown",
|
|
title: stringAttr(element, kAXTitleAttribute as CFString) ?? stringAttr(element, kAXDescriptionAttribute as CFString),
|
|
value: stringAttr(element, kAXValueAttribute as CFString),
|
|
bounds: bounds(element),
|
|
actions: actions(element)
|
|
)
|
|
}
|
|
|
|
func cachedElement(_ params: JSON) -> AXUIElement? {
|
|
guard let stateId = params["stateId"] as? String,
|
|
let elementIndex = params["element_index"] as? String
|
|
else {
|
|
return nil
|
|
}
|
|
return stateAxElements[stateId]?[elementIndex]
|
|
}
|
|
|
|
func dictionary(_ record: ElementRecord) -> JSON {
|
|
var output: JSON = [
|
|
"index": record.index,
|
|
"role": record.role,
|
|
"actions": record.actions,
|
|
]
|
|
if let title = record.title { output["title"] = title }
|
|
if let value = record.value { output["value"] = value }
|
|
if let bounds = record.bounds { output["bounds"] = bounds }
|
|
return output
|
|
}
|
|
|
|
func resolveApp(_ query: String) throws -> NSRunningApplication {
|
|
let normalized = query.lowercased()
|
|
let apps = NSWorkspace.shared.runningApplications.filter { app in
|
|
app.activationPolicy == .regular
|
|
}
|
|
if let app = apps.first(where: { $0.bundleIdentifier?.lowercased() == normalized }) {
|
|
return app
|
|
}
|
|
if let app = apps.first(where: { ($0.localizedName ?? "").lowercased() == normalized }) {
|
|
return app
|
|
}
|
|
if let app = apps.first(where: { ($0.localizedName ?? "").lowercased().contains(normalized) }) {
|
|
return app
|
|
}
|
|
throw NSError(domain: "CloudCLISemantics", code: 404, userInfo: [NSLocalizedDescriptionKey: "App is not running: \(query)"])
|
|
}
|
|
|
|
func listApps() -> [[String: Any]] {
|
|
NSWorkspace.shared.runningApplications
|
|
.filter { $0.activationPolicy == .regular }
|
|
.map { app in
|
|
[
|
|
"id": app.bundleIdentifier ?? app.localizedName ?? "\(app.processIdentifier)",
|
|
"name": app.localizedName ?? app.bundleIdentifier ?? "Unknown",
|
|
"bundleIdentifier": app.bundleIdentifier ?? "",
|
|
"pid": Int(app.processIdentifier),
|
|
"running": true,
|
|
]
|
|
}
|
|
}
|
|
|
|
func walk(_ element: AXUIElement, depth: Int, maxDepth: Int, records: inout [ElementRecord], axRecords: inout [String: AXUIElement], limit: Int) {
|
|
if depth > maxDepth || records.count >= limit { return }
|
|
let index = "\(records.count + 1)"
|
|
records.append(record(element, index: index))
|
|
axRecords[index] = element
|
|
for child in arrayAttr(element, kAXChildrenAttribute as CFString) {
|
|
walk(child, depth: depth + 1, maxDepth: maxDepth, records: &records, axRecords: &axRecords, limit: limit)
|
|
if records.count >= limit { return }
|
|
}
|
|
}
|
|
|
|
func pngDataUrlForMainDisplay() -> String? {
|
|
guard let image = CGDisplayCreateImage(CGMainDisplayID()) else { return nil }
|
|
let bitmap = NSBitmapImageRep(cgImage: image)
|
|
guard let png = bitmap.representation(using: .png, properties: [:]) else { return nil }
|
|
return "data:image/png;base64,\(png.base64EncodedString())"
|
|
}
|
|
|
|
func getAppState(_ params: JSON) throws -> JSON {
|
|
let appName = params["app"] as? String ?? ""
|
|
let app = try resolveApp(appName)
|
|
let axApp = AXUIElementCreateApplication(app.processIdentifier)
|
|
let windows = arrayAttr(axApp, kAXWindowsAttribute as CFString)
|
|
let root = windows.first ?? axApp
|
|
var records: [ElementRecord] = []
|
|
var axRecords: [String: AXUIElement] = [:]
|
|
walk(root, depth: 0, maxDepth: 5, records: &records, axRecords: &axRecords, limit: 300)
|
|
let stateId = "state_\(UUID().uuidString)"
|
|
stateElements[stateId] = records
|
|
stateAxElements[stateId] = axRecords
|
|
|
|
let elements = records.map(dictionary)
|
|
return [
|
|
"stateId": stateId,
|
|
"app": app.localizedName ?? app.bundleIdentifier ?? appName,
|
|
"platform": "darwin",
|
|
"screenshotDataUrl": pngDataUrlForMainDisplay() ?? NSNull(),
|
|
"displaySize": [
|
|
"width": Int(CGDisplayPixelsWide(CGMainDisplayID())),
|
|
"height": Int(CGDisplayPixelsHigh(CGMainDisplayID())),
|
|
],
|
|
"elements": elements,
|
|
"accessibilityTree": elements,
|
|
"treeText": elements.map { "\($0["index"] ?? "") \($0["role"] ?? "") \($0["title"] ?? "")" }.joined(separator: "\n"),
|
|
]
|
|
}
|
|
|
|
func cgMouseButton(_ value: Any?) -> CGMouseButton {
|
|
guard let button = value as? String else { return .left }
|
|
switch button {
|
|
case "right": return .right
|
|
case "middle": return .center
|
|
default: return .left
|
|
}
|
|
}
|
|
|
|
func mouseEventTypes(_ button: CGMouseButton) -> (CGEventType, CGEventType) {
|
|
switch button {
|
|
case .right: return (.rightMouseDown, .rightMouseUp)
|
|
case .center: return (.otherMouseDown, .otherMouseUp)
|
|
default: return (.leftMouseDown, .leftMouseUp)
|
|
}
|
|
}
|
|
|
|
func postMouseClick(point: CGPoint, button: CGMouseButton, clickCount: Int = 1) throws {
|
|
guard let source = CGEventSource(stateID: .combinedSessionState) else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGEventSource"])
|
|
}
|
|
let eventTypes = mouseEventTypes(button)
|
|
for _ in 0..<max(1, clickCount) {
|
|
let down = CGEvent(mouseEventSource: source, mouseType: eventTypes.0, mouseCursorPosition: point, mouseButton: button)
|
|
let up = CGEvent(mouseEventSource: source, mouseType: eventTypes.1, mouseCursorPosition: point, mouseButton: button)
|
|
down?.post(tap: .cghidEventTap)
|
|
up?.post(tap: .cghidEventTap)
|
|
usleep(80_000)
|
|
}
|
|
}
|
|
|
|
func postDrag(from: CGPoint, to: CGPoint, button: CGMouseButton) throws {
|
|
guard let source = CGEventSource(stateID: .combinedSessionState) else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 500, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGEventSource"])
|
|
}
|
|
let eventTypes = mouseEventTypes(button)
|
|
CGEvent(mouseEventSource: source, mouseType: eventTypes.0, mouseCursorPosition: from, mouseButton: button)?.post(tap: .cghidEventTap)
|
|
usleep(80_000)
|
|
CGEvent(mouseEventSource: source, mouseType: .leftMouseDragged, mouseCursorPosition: to, mouseButton: button)?.post(tap: .cghidEventTap)
|
|
usleep(80_000)
|
|
CGEvent(mouseEventSource: source, mouseType: eventTypes.1, mouseCursorPosition: to, mouseButton: button)?.post(tap: .cghidEventTap)
|
|
}
|
|
|
|
func runAppleScript(_ script: String) throws {
|
|
let process = Process()
|
|
process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
|
|
process.arguments = ["-e", script]
|
|
process.standardOutput = Pipe()
|
|
let stderr = Pipe()
|
|
process.standardError = stderr
|
|
try process.run()
|
|
process.waitUntilExit()
|
|
if process.terminationStatus != 0 {
|
|
let data = stderr.fileHandleForReading.readDataToEndOfFile()
|
|
let message = String(data: data, encoding: .utf8) ?? "AppleScript failed."
|
|
throw NSError(domain: "CloudCLISemantics", code: Int(process.terminationStatus), userInfo: [NSLocalizedDescriptionKey: message])
|
|
}
|
|
}
|
|
|
|
func escapedAppleScriptString(_ value: String) -> String {
|
|
value.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(of: "\"", with: "\\\"")
|
|
}
|
|
|
|
func pointForElement(_ params: JSON) -> CGPoint? {
|
|
if let x = params["x"] as? Double, let y = params["y"] as? Double {
|
|
return CGPoint(x: x, y: y)
|
|
}
|
|
guard let stateId = params["stateId"] as? String,
|
|
let elementIndex = params["element_index"] as? String,
|
|
let element = stateElements[stateId]?.first(where: { $0.index == elementIndex }),
|
|
let b = element.bounds,
|
|
let x = b["x"], let y = b["y"], let width = b["width"], let height = b["height"]
|
|
else {
|
|
return nil
|
|
}
|
|
return CGPoint(x: x + width / 2, y: y + height / 2)
|
|
}
|
|
|
|
func click(_ params: JSON) throws -> JSON {
|
|
if let element = cachedElement(params),
|
|
cgMouseButton(params["mouse_button"]) == .left,
|
|
(params["click_count"] as? Int ?? 1) == 1,
|
|
actions(element).contains(kAXPressAction as String),
|
|
AXUIElementPerformAction(element, kAXPressAction as CFString) == .success {
|
|
return try getAppState(params)
|
|
}
|
|
|
|
guard let point = pointForElement(params) else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "click_element requires x/y or stateId + element_index"])
|
|
}
|
|
let clickCount = params["click_count"] as? Int ?? 1
|
|
try postMouseClick(point: point, button: cgMouseButton(params["mouse_button"]), clickCount: clickCount)
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func performSecondaryAction(_ params: JSON) throws -> JSON {
|
|
if let element = cachedElement(params),
|
|
actions(element).contains(kAXShowMenuAction as String),
|
|
AXUIElementPerformAction(element, kAXShowMenuAction as CFString) == .success {
|
|
return try getAppState(params)
|
|
}
|
|
guard let point = pointForElement(params) else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "perform_secondary_action requires x/y or stateId + element_index"])
|
|
}
|
|
try postMouseClick(point: point, button: .right)
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func setValue(_ params: JSON) throws -> JSON {
|
|
guard let value = params["value"] as? String else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "set_value requires value"])
|
|
}
|
|
if let element = cachedElement(params),
|
|
AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, value as CFTypeRef) == .success {
|
|
return try getAppState(params)
|
|
}
|
|
guard let point = pointForElement(params) else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "set_value requires x/y or stateId + element_index"])
|
|
}
|
|
try postMouseClick(point: point, button: .left)
|
|
try runAppleScript("tell application \"System Events\" to keystroke \"a\" using command down")
|
|
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(value))\"")
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func typeText(_ params: JSON) throws -> JSON {
|
|
let text = params["text"] as? String ?? ""
|
|
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(text))\"")
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func appleScriptModifiers(_ parts: [String]) -> String {
|
|
let modifiers = parts.compactMap { part -> String? in
|
|
switch part.lowercased() {
|
|
case "cmd", "command", "meta": return "command down"
|
|
case "ctrl", "control": return "control down"
|
|
case "alt", "option": return "option down"
|
|
case "shift": return "shift down"
|
|
default: return nil
|
|
}
|
|
}
|
|
return modifiers.isEmpty ? "" : " using {\(modifiers.joined(separator: ", "))}"
|
|
}
|
|
|
|
func appleScriptKeyCode(_ key: String) -> Int? {
|
|
switch key.lowercased() {
|
|
case "return", "enter": return 36
|
|
case "tab": return 48
|
|
case "space": return 49
|
|
case "delete", "backspace": return 51
|
|
case "escape", "esc": return 53
|
|
case "left": return 123
|
|
case "right": return 124
|
|
case "down": return 125
|
|
case "up": return 126
|
|
default: return nil
|
|
}
|
|
}
|
|
|
|
func pressKey(_ params: JSON) throws -> JSON {
|
|
let raw = params["key"] as? String ?? ""
|
|
let parts = raw.split(separator: "+").map { String($0).trimmingCharacters(in: .whitespacesAndNewlines) }.filter { !$0.isEmpty }
|
|
let key = parts.last ?? raw
|
|
let modifiers = appleScriptModifiers(Array(parts.dropLast()))
|
|
if let keyCode = appleScriptKeyCode(key) {
|
|
try runAppleScript("tell application \"System Events\" to key code \(keyCode)\(modifiers)")
|
|
} else {
|
|
try runAppleScript("tell application \"System Events\" to keystroke \"\(escapedAppleScriptString(key))\"\(modifiers)")
|
|
}
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func scrollElement(_ params: JSON) throws -> JSON {
|
|
guard let point = pointForElement(params) else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "scroll_element requires x/y or stateId + element_index"])
|
|
}
|
|
CGWarpMouseCursorPosition(point)
|
|
let direction = params["direction"] as? String ?? "down"
|
|
let pages = params["pages"] as? Double ?? 1.0
|
|
let amount = Int32(max(1.0, abs(pages) * 8.0))
|
|
let vertical = direction == "up" ? amount : direction == "down" ? -amount : 0
|
|
let horizontal = direction == "left" ? amount : direction == "right" ? -amount : 0
|
|
CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: vertical, wheel2: horizontal)?.post(tap: .cghidEventTap)
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func drag(_ params: JSON) throws -> JSON {
|
|
guard let fromX = params["from_x"] as? Double,
|
|
let fromY = params["from_y"] as? Double,
|
|
let toX = params["to_x"] as? Double,
|
|
let toY = params["to_y"] as? Double
|
|
else {
|
|
throw NSError(domain: "CloudCLISemantics", code: 400, userInfo: [NSLocalizedDescriptionKey: "drag requires from_x/from_y/to_x/to_y"])
|
|
}
|
|
try postDrag(from: CGPoint(x: fromX, y: fromY), to: CGPoint(x: toX, y: toY), button: cgMouseButton(params["mouse_button"]))
|
|
return try getAppState(params)
|
|
}
|
|
|
|
func handle(_ request: JSON) {
|
|
let id = request["id"]
|
|
let method = request["method"] as? String ?? ""
|
|
let params = request["params"] as? JSON ?? [:]
|
|
|
|
do {
|
|
switch method {
|
|
case "list_apps":
|
|
respond(id: id, result: listApps())
|
|
case "get_app_state":
|
|
respond(id: id, result: try getAppState(params))
|
|
case "click_element":
|
|
respond(id: id, result: try click(params))
|
|
case "perform_secondary_action":
|
|
respond(id: id, result: try performSecondaryAction(params))
|
|
case "set_value":
|
|
respond(id: id, result: try setValue(params))
|
|
case "type_text":
|
|
respond(id: id, result: try typeText(params))
|
|
case "press_key":
|
|
respond(id: id, result: try pressKey(params))
|
|
case "scroll_element":
|
|
respond(id: id, result: try scrollElement(params))
|
|
case "drag":
|
|
respond(id: id, result: try drag(params))
|
|
default:
|
|
respondError(id: id, "Method is not implemented yet: \(method)")
|
|
}
|
|
} catch {
|
|
respondError(id: id, error.localizedDescription)
|
|
}
|
|
}
|
|
|
|
while let line = readLine() {
|
|
guard let data = line.data(using: .utf8),
|
|
let object = try? JSONSerialization.jsonObject(with: data),
|
|
let request = object as? JSON
|
|
else {
|
|
respondError(id: nil, "Invalid JSON request")
|
|
continue
|
|
}
|
|
handle(request)
|
|
}
|