From 8354cb65fd3322245f0f619c46d8f264553e85e2 Mon Sep 17 00:00:00 2001 From: Haileyesus Date: Mon, 6 Apr 2026 19:36:28 +0300 Subject: [PATCH] feat(backend): setup mcp, image upload, and skills --- docs/backend/llm-unifier-helper-2.md | 456 ++++++++++ .../llm-unifier-helper-2-backend-testing.md | 56 ++ package-lock.json | 21 +- server/src/modules/llm/assets.service.ts | 85 ++ .../modules/llm/llm-unifier.images.test.ts | 211 +++++ .../src/modules/llm/llm-unifier.mcp.test.ts | 351 ++++++++ .../modules/llm/llm-unifier.skills.test.ts | 207 +++++ server/src/modules/llm/llm.routes.ts | 337 +++++++- server/src/modules/llm/llm.service.ts | 30 + server/src/modules/llm/mcp.service.ts | 817 ++++++++++++++++++ .../llm/providers/base-cli.provider.ts | 11 + .../modules/llm/providers/claude.provider.ts | 87 +- .../modules/llm/providers/codex.provider.ts | 43 +- .../modules/llm/providers/cursor.provider.ts | 3 +- .../modules/llm/providers/gemini.provider.ts | 3 +- .../llm/providers/provider.interface.ts | 1 + server/src/modules/llm/skills.service.ts | 396 +++++++++ 17 files changed, 3091 insertions(+), 24 deletions(-) create mode 100644 docs/backend/llm-unifier-helper-2.md create mode 100644 docs/testing/llm-unifier-helper-2-backend-testing.md create mode 100644 server/src/modules/llm/assets.service.ts create mode 100644 server/src/modules/llm/llm-unifier.images.test.ts create mode 100644 server/src/modules/llm/llm-unifier.mcp.test.ts create mode 100644 server/src/modules/llm/llm-unifier.skills.test.ts create mode 100644 server/src/modules/llm/mcp.service.ts create mode 100644 server/src/modules/llm/skills.service.ts diff --git a/docs/backend/llm-unifier-helper-2.md b/docs/backend/llm-unifier-helper-2.md new file mode 100644 index 00000000..513b3044 --- /dev/null +++ b/docs/backend/llm-unifier-helper-2.md @@ -0,0 +1,456 @@ +# How each provider supports image uploading + +Universally: First, we should upload the images in `.cloudcli/assets` folder. Then, it should just reference that path later on. + +## Claude +- When clicking send, attach the images in the content list with the type of 'image'. +- https://platform.claude.com/docs/en/api/messages#message_param +```js + const imageBytes = await fs.readFile(imagePath); + const sdkPrompt = (async function*: AsyncIterable () { + yield { + type: 'user', + message: { + role: 'user', + content: [ + { type: 'text', text: prompt }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/jpeg', + data: imageBytes.toString('base64'), + }, + }, + ], + }, + parent_tool_use_id: null, + timestamp: new Date().toISOString(), + }; + })(); // automatically executed because of the `()` in the end. +``` + +### Some useful types +```ts +export interface MessageParam { + content: string | Array; + + role: 'user' | 'assistant'; // when we send the message for prompting, the role will be 'user' +} + +/** + * Regular text content. + */ +export type ContentBlockParam = + | TextBlockParam + | ImageBlockParam + | DocumentBlockParam + | SearchResultBlockParam + | ThinkingBlockParam + | RedactedThinkingBlockParam + | ToolUseBlockParam + | ToolResultBlockParam + | ServerToolUseBlockParam + | WebSearchToolResultBlockParam; + + +export interface TextBlockParam { + text: string; + type: 'text'; +} + +export interface ImageBlockParam { + source: Base64ImageSource | URLImageSource; // I'll be using only base 64 for now. + type: 'image'; +} + +export interface Base64ImageSource { + data: string; + media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; + type: 'base64'; +} +``` + +### Explanations about async generators and yield +To understand why `async function*` is used, it helps to stop thinking of functions as "machines that run and finish" and start thinking of them as **"factories that stay open."** +```ts +async function* getTaskStatus(): AsyncIterable { + yield "Checking permissions..."; + await new Promise(r => setTimeout(r, 500)); // Simulate work + + yield "Searching database..."; + await new Promise(r => setTimeout(r, 500)); + + yield "Formatting prompt..."; +} + +// CONSUMPTION +async function run() { + const statusGenerator = getTaskStatus(); + + for await (const status of statusGenerator) { + console.log(`Current Status: ${status}`); + } + + console.log("Done!"); +} +``` + +## Codex +```ts +const streamed = await thread.runStreamed([ {type: "text", text: "Describe this image:"}, {type: "local_image", path: "scripts/pic.jpg"} +``` +- Don't add the above query lines for codex. We can directly use the `sdk`. + +## Gemini and Cursor +- Just add the path to the end of the prompt when clicking send for paths including images. For e.g. +``` + + + +---- IGNORE THE QUERY LINES. Just use the attached list of an array of paths for images below and use it with the above prompt. + +["scripts\pic.jpg", "", ...] +``` + + + +# MCP servers (how to add/remove one and run it) + +**What is the Model Context Protocol (MCP)?** +Think of MCP as the USB-C cable for AI. +- Historically, if you wanted an AI model to read your GitHub repository, query your database, or search your company's Notion workspace, developers had to write custom, one-off integrations for every single AI tool. +- Created by Anthropic as an open-source standard, the Model Context Protocol fixes this. It is a universal language that allows AI applications (the "clients") to securely connect to external data sources and tools (the "servers") using a single, unified protocol. + +**What is an MCP Server?** +- If MCP is the USB-C cable, an **MCP Server** is the hard drive or webcam you are plugging in. +- It is a lightweight program that acts as a secure bridge between your specific data and the AI. When the AI needs context—like checking the current state of a file or executing a search—it asks the MCP server. The server translates the AI's request, securely fetches the data or performs the action, and hands the result back to the AI. + + **Different transport mechanisms for MCP servers** +1. `stdio` - This is the default and most common transport for local development. When using `stdio`, the AI client directly launches the MCP server as a background "child process" on your machine. The client and server then talk to each other locally by writing to and reading from standard input (`stdin`) and standard output (`stdout`). + - **Clear Example:** A local **File System Server**. You want the AI to read your local `package.json` file. The AI client spawns the file system server via `stdio`. Because the server is running locally on your hardware, it inherently has access to your files without needing complex authentication. It reads the file and prints the contents back to the AI. +2. `https` (Streamable HTTP) - Streamable HTTP replaces older remote methods. It uses a single HTTP or HTTPS endpoint for bidirectional communication. The client sends standard `POST` requests, and the server can respond instantly or keep the connection open to stream data back. It behaves exactly like a modern web API. Because it runs over HTTP, it supports standard web security features like OAuth, Bearer tokens, and CORS. + - **Clear Example:** A **Cloud Database Server**. If you work on a team and want everyone's AI to be able to query a shared staging database, you would deploy an MCP server to the cloud. Your AI connects to `https://api.yourcompany.com/mcp` using Streamable HTTP and passes an API key in the headers to securely run queries. +3. `sse` (Server sent events) - SSE is the legacy transport mechanism for remote servers. While still widely supported, it is actively being phased out in favor of Streamable HTTP because it is slightly more cumbersome to build and maintain. + - **How it works:** Unlike Streamable HTTP which uses a single unified endpoint, SSE requires _two_ distinct network connections. The client connects to an SSE endpoint (via an HTTP `GET` request) strictly to listen for incoming messages from the server, and uses a separate HTTP `POST` endpoint to send messages to the server. + +- **Clear Example:** An older **Slack Integration Server**. The AI client connects to the server's SSE stream to listen for real-time incoming messages from a Slack channel. When the AI wants to reply, it sends a payload to a separate `/message` POST endpoint. + +**Frontend coordination** +- When listing the MCP servers for a provider, go to the appropriate files where the configuration is stored to fetch all of them. When listing, the User/Local/Project MCPs should be grouped separately. +- To add/remove an MCP server, go to the appropriate file and add/remove it there keeping in mind whether it is configured as User/Local/Project. +- To update the server, go to the appropriate file and update it from there. +- There should also be one big mcp adder that supports `http` and `stdio` only. When it's added from there, the server will automatically be added to every provider. + +## Claude +Supports all 3 transports. +### `stdio` +- We can have arguments and env variables input when executing the command. +- `args` and `env` are optional. +```json +{ + "mcpServers": { + "local-weather": { + "type": "stdio", + "command": "/path/to/weather-cli", + "args": ["--api-key", "abc123"], + "env": { + "CACHE_DIR": "/tmp" + } + } + } +} +``` + +### `http` +- We don't pass `env` inputs for now. It's supported but we will add it only later. +- `headers` is optional. +```json +{ + "mcpServers": { + "weather-api": { + "type": "http", + "url": "https://api.weather.com/mcp", + "headers": { + "Authorization": "Bearer token" + } + } + } +} +``` + +### `sse` +- similar with `http` format. +```json +{ + "mcpServers": { + "private-api": { + "type": "sse", + "url": "https://api.company.com/sse", + "headers": { + "X-API-Key": "your-key-here" + } + } + } +} +``` + +### Support for different modes (Local, user, project) + +#### Local +- stored in `~/.claude.json` under the project’s path. +#### User +- stored in `~/.claude.json` under the main object with the key `"mcpServers" +#### Project specific +- add it in the `.mcp.json` file in the project root directory. + +## Codex + +### Configuration (Only `stdio` and `http` are supported.) + +#### `stdio` +- `command` (required): The command that starts the server. +- `args` (optional): Arguments to pass to the server. +- `env` (optional): Environment variables to set for the server. +- `env_vars` (optional): Environment variables to allow and forward. +- `cwd` (optional): Working directory to start the server from. + +```toml +[mcp_servers.my_stdio] +command = "npx" +args = ["-y", "@upstash/context7-mcp"] + +[mcp_servers.my_stdio.env] +API_KEY = "your-key" +``` + +With forwarded host env vars. +```toml +[mcp_servers.my_stdio] +command = "python" +args = ["server.py"] +env_vars = ["API_KEY", "DEBUG"] +cwd = "/path/to/project" +``` +#### `http` +- `url` (required): The server address. +- `bearer_token_env_var` (optional): Environment variable name for a bearer token to send in `Authorization`. +- `http_headers` (optional): Map of header names to static values. +- `env_http_headers` (optional): Map of header names to environment variable names (values pulled from the environment). +```toml +[mcp_servers.my_http] +url = "https://example.com/mcp" +bearer_token_env_var = "MY_API_TOKEN" +http_headers = { "X-Custom-Header" = "custom-value" } +env_http_headers = { "X-Api-Key" = "MY_API_KEY_ENV" } +``` + +### Support for different modes (user, project) +#### User +- add it to the global `~/.codex/config.toml` file. + +#### Project specific +- add it in `.codex/config.toml` file in the project's root directory. + +## Gemini +Supports all 3 transports. +### `stdio` +- We can have arguments and env variables as inputs when executing the command. +- `args` and `env` are optional. +- No `type` attribute like Claude for `stdio`. If there is no type, we can infer that it must be `stdio` since the rest have it. +```json + +{ + "mcpServers": { + "serverName": { + "command": "path/to/server", + "args": ["--arg1", "value1"], + "env": { + "API_KEY": "$MY_API_TOKEN" + }, + "cwd": "./server-directory" + } + } +} +``` + +### `http` +- We don't pass `env` inputs. Notice the type is set here like Claude. +- `headers` is optional. +- EXACTLY same as Claude `http`. +```json +{ + "mcpServers": { + "weather-api": { + "type": "http", + "url": "https://api.weather.com/mcp", + "headers": { + "Authorization": "Bearer token" + } + } + } +} +``` + +### `sse` +- similar with `http` format. +- EXACT with Claude `sse` format. +```json +{ + "mcpServers": { + "private-api": { + "type": "sse", + "url": "https://api.company.com/sse", + "headers": { + "X-API-Key": "your-key-here" + } + } + } +} +``` + +### Support for different modes (user, project) + +#### User +- stored in `~/.gemini/settings.json`. + +#### Project specific +- add it in the `.gemini/settings.json` file in the project root directory. + + + +## Cursor + +Supports all 3 transports. There is no `type` attribute for all 3. Here are the structures: + +#### `stdio` +```json +{ + "mcpServers": { + "server-name": { + "command": "npx", + "args": ["-y", "mcp-server"], + "env": { + "API_KEY": "value" + } + } + } +} +``` + +#### `http` / `sse` +```json +// MCP server using HTTP or SSE - runs on a server +{ + "mcpServers": { + "server-name": { + "url": "http://localhost:3000/mcp", + "headers": { + "API_KEY": "value" + } + } + } +} +``` + + +### Support for different modes (user, project) + +#### User +- stored in `~/.cursor/mcp.json`. + +#### Project specific +- add it in the `.cursor/mcp.json` file in the project root directory. + + + + +# Skills management (ONLY Fetching support needed for now) +## Claude +- To get user skills, fetch all `~/.claude/skills//SKILL.md`. +- To get project skills, fetch from `.claude/skills//SKILL.md`. +- To get plugin skills: + - Find all the enabled plugins in `~/.claude/settings.json`. + ```json + { + "apiKeyHelper": "...", + "enabledPlugins": { + "example-skills@anthropic-agent-skills": true + }, + ... + } + ``` + - Then go to `~/.claude/plugins/installed_plugins.json` file to find where the plugin is installed. + ```json + { + "version": 2, + "plugins": { + "example-skills@anthropic-agent-skills": [ + { + "scope": "user", + "installPath": "C:\\Users\\OMEN6\\.claude\\plugins\\cache\\anthropic-agent-skills\\example-skills\\3d5951151859", + "version": "3d5951151859", + "installedAt": "2026-03-03T12:52:08.024Z", + "lastUpdated": "2026-03-03T12:52:08.024Z", + "gitCommitSha": "3d59511518591fa82e6cfcf0438d68dd5dad3e76" + } + ] + } + } + ``` + - Then go the `installPath` directory. If there is a `skills` folder there, go to each of the skills in `/skills//SKILL.md`. + +Then, parse the name and description of the skills from the md for every `SKILL.md`. + +- The command for invoking skills is `/` . + +- Whenever a skill is from a plugin, doing `/skill-name` should automatically be updated with `/plugin-name:skill-name`. This is because plugin skills use a `plugin-name:skill-name` namespace, so they cannot conflict with other levels. + +I have attached the first initial contents of a sample `SKILL.md` file below. + +```md +--- + +name: mcp-builder + +description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). + +license: Complete terms in LICENSE.txt + +--- +``` +## Codex + + +Codex reads skills from repository, user, admin, and system locations. + + +| Skill Scope | Location | Suggested use | +| ----------- | ------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `REPO` | `$CWD/.agents/skills`
Current working directory: where you launch Codex. | If you’re in a repository or code environment, teams can check in skills relevant to a working folder. For example, skills only relevant to a microservice or a module. | +| `REPO` | ` $CWD/../.agents/skills`
A folder above CWD when you launch Codex inside a Git repository. | If you’re in a repository with nested folders, organizations can check in skills relevant to a shared area in a parent folder. | +| `REPO` | `$REPO_ROOT/.agents/skills`
The topmost root folder when you launch Codex inside a Git repository. | If you’re in a repository with nested folders, organizations can check in skills relevant to everyone using the repository. These serve as root skills available to any subfolder in the repository. | +| `USER` | `$HOME/.agents/skills`
Any skills checked into the user’s personal folder. | Use to curate skills relevant to a user that apply to any repository the user may work in. | +| `ADMIN` | `/etc/codex/skills`
Any skills checked into the machine or container in a shared, system location. | Use for SDK scripts, automation, and for checking in default admin skills available to each user on the machine. | +| `SYSTEM` | `~/.codex/skills/.system` | Useful skills relevant to a broad audience such as the skill-creator and plan skills. Available to everyone when they start Codex. | + +Then, parse the name and description of the skills from the md for every `SKILL.md`. + +- The command for invoking skills is `$` +## Gemini +- Gets all skills from `~/.gemini/skills`, `~/.agents/skills`, `.gemini/skills`, `.agents/skills` +- command for invoking skills is same as Claude. + + +## Cursor +[Skill directories](https://cursor.com/docs/skills?utm_source=chatgpt.com#skill-directories) +Skills are automatically loaded from these locations: + +|Location|Scope| +|---|---| +|`.agents/skills/`|Project-level| +|`.cursor/skills/`|Project-level| +|`~/.cursor/skills/`|User-level (global)| +Then, parse the name and description of the skills from the md for every `SKILL.md`. + +- command for invoking skills is same as Claude. diff --git a/docs/testing/llm-unifier-helper-2-backend-testing.md b/docs/testing/llm-unifier-helper-2-backend-testing.md new file mode 100644 index 00000000..56e15647 --- /dev/null +++ b/docs/testing/llm-unifier-helper-2-backend-testing.md @@ -0,0 +1,56 @@ +# LLM Unifier Helper-2 Backend Testing Report + +Date: 2026-04-06 + +## Scope +This report validates every backend functionality listed in: +- `docs/backend/llm-unifier-helper-2.md` + +All test cases include inline comments that describe which helper-2 requirement they cover. + +## Test Files +- `server/src/modules/llm/llm-unifier.providers.test.ts` +- `server/src/modules/llm/llm-unifier.sessions.test.ts` +- `server/src/modules/llm/llm-unifier.images.test.ts` +- `server/src/modules/llm/llm-unifier.mcp.test.ts` +- `server/src/modules/llm/llm-unifier.skills.test.ts` + +## package.json Scripts +- `test:server` now includes the full unifier suite. +- Added `test:server:llm-unifier-2` for running only helper-2 unifier coverage. + +## Commands Used +```powershell +npm run typecheck:server +npm run test:server:llm-unifier-2 +npm run test:server +``` + +## Results +- `typecheck:server`: pass +- `test:server:llm-unifier-2`: pass (`30/30`) +- `test:server`: pass (`30/30`) + +## Requirement Coverage Matrix +| Helper-2 requirement | Test coverage | +| --- | --- | +| Universal image upload into `.cloudcli/assets` | `llmAssetsService stores uploaded images in .cloudcli/assets` | +| Image upload validation for supported image mime types | `llmAssetsService rejects unsupported image mime types` | +| Claude image prompt as content blocks with base64 images | `claude provider builds async prompt payload with base64 image blocks` | +| Codex image prompt via `local_image` entries | `codex provider sends local_image prompt items when image paths are provided` | +| Gemini/Cursor image handling by appending image path array to prompt | `gemini and cursor providers append image path arrays to prompts` | +| Start payload imagePaths validation | `llmService rejects invalid imagePaths payloads before provider execution` | +| MCP list grouped by User/Local/Project | `llmMcpService handles claude MCP scopes/transports with file-backed persistence` | +| MCP add/remove/update behavior backed by provider config files | `llmMcpService handles claude MCP scopes/transports with file-backed persistence`, `llmMcpService handles codex MCP TOML config and capability validation`, `llmMcpService handles gemini and cursor MCP JSON config formats` | +| Claude MCP transports: stdio/http/sse and scopes: user/local/project | `llmMcpService handles claude MCP scopes/transports with file-backed persistence` | +| Codex MCP transports: stdio/http and scopes: user/project | `llmMcpService handles codex MCP TOML config and capability validation` | +| Gemini MCP transports: stdio/http/sse and scopes: user/project | `llmMcpService handles gemini and cursor MCP JSON config formats` | +| Cursor MCP transports: stdio/http/sse and scopes: user/project | `llmMcpService handles gemini and cursor MCP JSON config formats` | +| Global MCP adder supports only `http` and `stdio` and applies to all providers | `llmMcpService global adder writes to all providers and rejects unsupported transports` | +| MCP run/connectivity checks (stdio and http) | `llmMcpService runProviderServer probes stdio and http MCP servers` | +| Claude skills fetch (user/project/plugin) and plugin namespacing | `llmSkillsService lists claude user/project/plugin skills with proper invocation names` | +| Codex skills fetch (repo/user/admin/system path model; tested repo/user/system paths) and `$` invocation | `llmSkillsService lists codex skills from repo/user/system locations with dollar invocation` | +| Gemini skills fetch from documented directories and `/` invocation | `llmSkillsService lists gemini skills from documented directories` | +| Cursor skills fetch from documented directories and `/` invocation | `llmSkillsService lists cursor skills from documented directories` | +| Existing unifier provider/session baseline behaviors remain passing | `llm-unifier.providers.test.ts`, `llm-unifier.sessions.test.ts` full suite | + diff --git a/package-lock.json b/package-lock.json index 36e0c89f..e0686416 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,6 +24,7 @@ "@openai/codex-sdk": "^0.101.0", "@replit/codemirror-minimap": "^0.5.2", "@tailwindcss/typography": "^0.5.16", + "@types/multer": "^2.1.0", "@uiw/react-codemirror": "^4.23.13", "@xterm/addon-clipboard": "^0.1.0", "@xterm/addon-fit": "^0.10.0", @@ -3766,7 +3767,6 @@ "version": "1.19.6", "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz", "integrity": "sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==", - "dev": true, "license": "MIT", "dependencies": { "@types/connect": "*", @@ -3777,7 +3777,6 @@ "version": "3.4.38", "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.38.tgz", "integrity": "sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*" @@ -3831,7 +3830,6 @@ "version": "5.0.6", "resolved": "https://registry.npmjs.org/@types/express/-/express-5.0.6.tgz", "integrity": "sha512-sKYVuV7Sv9fbPIt/442koC7+IIwK5olP1KWeD88e/idgoJqDm3JV/YUiPwkoKK92ylff2MGxSz1CSjsXelx0YA==", - "dev": true, "license": "MIT", "dependencies": { "@types/body-parser": "*", @@ -3843,7 +3841,6 @@ "version": "5.1.1", "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-5.1.1.tgz", "integrity": "sha512-v4zIMr/cX7/d2BpAEX3KNKL/JrT1s43s96lLvvdTmza1oEvDudCqK9aF/djc/SWgy8Yh0h30TZx5VpzqFCxk5A==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*", @@ -3865,7 +3862,6 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/@types/http-errors/-/http-errors-2.0.5.tgz", "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==", - "dev": true, "license": "MIT" }, "node_modules/@types/json-schema": { @@ -3907,11 +3903,19 @@ "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", "license": "MIT" }, + "node_modules/@types/multer": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/multer/-/multer-2.1.0.tgz", + "integrity": "sha512-zYZb0+nJhOHtPpGDb3vqPjwpdeGlGC157VpkqNQL+UU2qwoacoQ7MpsAmUptI/0Oa127X32JzWDqQVEXp2RcIA==", + "license": "MIT", + "dependencies": { + "@types/express": "*" + } + }, "node_modules/@types/node": { "version": "22.19.7", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.7.tgz", "integrity": "sha512-MciR4AKGHWl7xwxkBa6xUGxQJ4VBOmPTF7sL+iGzuahOFaO0jHCsuEfS80pan1ef4gWId1oWOweIhrDEYLuaOw==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~6.21.0" @@ -3941,14 +3945,12 @@ "version": "6.15.0", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.15.0.tgz", "integrity": "sha512-JawvT8iBVWpzTrz3EGw9BTQFg3BQNmwERdKE22vlTxawwtbyUSlMppvZYKLZzB5zgACXdXxbD3m1bXaMqP/9ow==", - "dev": true, "license": "MIT" }, "node_modules/@types/range-parser": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.7.tgz", "integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==", - "dev": true, "license": "MIT" }, "node_modules/@types/react": { @@ -3975,7 +3977,6 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/@types/send/-/send-1.2.1.tgz", "integrity": "sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*" @@ -3985,7 +3986,6 @@ "version": "2.2.0", "resolved": "https://registry.npmjs.org/@types/serve-static/-/serve-static-2.2.0.tgz", "integrity": "sha512-8mam4H1NHLtu7nmtalF7eyBH14QyOASmcxHhSfEoRyr0nP/YdoesEtU+uSRvMe96TW/HPTtkoKqQLl53N7UXMQ==", - "dev": true, "license": "MIT", "dependencies": { "@types/http-errors": "*", @@ -17584,7 +17584,6 @@ "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, "license": "MIT" }, "node_modules/unified": { diff --git a/server/src/modules/llm/assets.service.ts b/server/src/modules/llm/assets.service.ts new file mode 100644 index 00000000..8ddefc45 --- /dev/null +++ b/server/src/modules/llm/assets.service.ts @@ -0,0 +1,85 @@ +import { randomUUID } from 'node:crypto'; +import { mkdir, writeFile } from 'node:fs/promises'; +import path from 'node:path'; + +import { AppError } from '@/shared/utils/app-error.js'; + +const SUPPORTED_IMAGE_MIME_TYPES = new Set([ + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/webp', +]); + +const MIME_TO_EXTENSION: Record = { + 'image/jpeg': '.jpg', + 'image/png': '.png', + 'image/gif': '.gif', + 'image/webp': '.webp', +}; + +type UploadedImage = { + originalname: string; + mimetype: string; + size: number; + buffer: Buffer; +}; + +export type StoredImageAsset = { + originalName: string; + storedName: string; + absolutePath: string; + relativePath: string; + mimeType: string; + size: number; +}; + +/** + * Persists uploaded images in `.cloudcli/assets` and returns resolved paths for provider calls. + */ +export const llmAssetsService = { + async storeUploadedImages( + images: UploadedImage[], + options?: { + workspacePath?: string; + }, + ): Promise { + if (!images.length) { + throw new AppError('At least one image file is required.', { + code: 'IMAGE_REQUIRED', + statusCode: 400, + }); + } + + const workspaceRoot = path.resolve(options?.workspacePath ?? process.cwd()); + const assetsDirectory = path.join(workspaceRoot, '.cloudcli', 'assets'); + await mkdir(assetsDirectory, { recursive: true }); + + const storedAssets: StoredImageAsset[] = []; + for (const image of images) { + if (!SUPPORTED_IMAGE_MIME_TYPES.has(image.mimetype)) { + throw new AppError(`Unsupported image type "${image.mimetype}".`, { + code: 'UNSUPPORTED_IMAGE_TYPE', + statusCode: 400, + }); + } + + const extension = (MIME_TO_EXTENSION[image.mimetype] ?? path.extname(image.originalname)) || '.img'; + const storedName = `${Date.now()}-${randomUUID()}${extension}`; + const absolutePath = path.join(assetsDirectory, storedName); + + await writeFile(absolutePath, image.buffer); + + storedAssets.push({ + originalName: image.originalname, + storedName, + absolutePath, + relativePath: path.relative(workspaceRoot, absolutePath).replace(/\\/g, '/'), + mimeType: image.mimetype, + size: image.size, + }); + } + + return storedAssets; + }, +}; diff --git a/server/src/modules/llm/llm-unifier.images.test.ts b/server/src/modules/llm/llm-unifier.images.test.ts new file mode 100644 index 00000000..e92c55c2 --- /dev/null +++ b/server/src/modules/llm/llm-unifier.images.test.ts @@ -0,0 +1,211 @@ +import assert from 'node:assert/strict'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import test from 'node:test'; + +import { AppError } from '../../shared/utils/app-error.js'; +import { llmAssetsService } from './assets.service.js'; +import { ClaudeProvider } from './providers/claude.provider.js'; +import { CodexProvider } from './providers/codex.provider.js'; +import { CursorProvider } from './providers/cursor.provider.js'; +import { GeminiProvider } from './providers/gemini.provider.js'; +import { llmService } from './llm.service.js'; + +const asyncEvents = async function* (events: unknown[]) { + for (const event of events) { + yield event; + } +}; + +/** + * This test covers the universal image-upload flow: store uploads under `.cloudcli/assets`. + */ +test('llmAssetsService stores uploaded images in .cloudcli/assets', { concurrency: false }, async () => { + const workspaceRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-assets-')); + try { + const images = await llmAssetsService.storeUploadedImages( + [ + { + originalname: 'photo.jpg', + mimetype: 'image/jpeg', + size: 3, + buffer: Buffer.from([0x01, 0x02, 0x03]), + }, + { + originalname: 'diagram.png', + mimetype: 'image/png', + size: 4, + buffer: Buffer.from([0x11, 0x12, 0x13, 0x14]), + }, + ], + { workspacePath: workspaceRoot }, + ); + + assert.equal(images.length, 2); + assert.ok(images[0]?.relativePath.startsWith('.cloudcli/assets/')); + assert.ok(images[1]?.relativePath.startsWith('.cloudcli/assets/')); + await fs.access(images[0]!.absolutePath); + await fs.access(images[1]!.absolutePath); + } finally { + await fs.rm(workspaceRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers upload validation: unsupported mime types are rejected. + */ +test('llmAssetsService rejects unsupported image mime types', async () => { + await assert.rejects( + llmAssetsService.storeUploadedImages([ + { + originalname: 'file.bmp', + mimetype: 'image/bmp', + size: 4, + buffer: Buffer.from([0x10, 0x20, 0x30, 0x40]), + }, + ]), + (error: unknown) => + error instanceof AppError && + error.code === 'UNSUPPORTED_IMAGE_TYPE' && + error.statusCode === 400, + ); +}); + +/** + * This test covers Claude image input support: prompt becomes async iterable with text + base64 image blocks. + */ +test('claude provider builds async prompt payload with base64 image blocks', { concurrency: false }, async () => { + const workspaceRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-claude-img-')); + const imagePath = path.join(workspaceRoot, 'sample.jpg'); + const imageBytes = Buffer.from([0xaa, 0xbb, 0xcc]); + await fs.writeFile(imagePath, imageBytes); + + try { + const provider = new ClaudeProvider() as any; + const promptPayload = await provider.buildPromptInput( + 'describe this', + [imagePath], + workspaceRoot, + ); + + assert.equal(typeof promptPayload[Symbol.asyncIterator], 'function'); + const iterator = promptPayload[Symbol.asyncIterator](); + const first = await iterator.next(); + assert.equal(first.done, false); + + const message = first.value as { + type: string; + message: { + role: string; + content: Array>; + }; + }; + + assert.equal(message.type, 'user'); + assert.equal(message.message.role, 'user'); + assert.equal(message.message.content[0]?.type, 'text'); + assert.equal(message.message.content[0]?.text, 'describe this'); + assert.equal(message.message.content[1]?.type, 'image'); + const imageBlock = message.message.content[1] as { + source: { + type: string; + media_type: string; + data: string; + }; + }; + assert.equal(imageBlock.source.type, 'base64'); + assert.equal(imageBlock.source.media_type, 'image/jpeg'); + assert.equal(imageBlock.source.data, imageBytes.toString('base64')); + } finally { + await fs.rm(workspaceRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers Codex image input support: runStreamed receives text + local_image items. + */ +test('codex provider sends local_image prompt items when image paths are provided', async () => { + const provider = new CodexProvider() as any; + let capturedPrompt: unknown; + + provider.loadCodexSdkModule = async () => ({ + Codex: class { + startThread() { + return { + async runStreamed(prompt: unknown) { + capturedPrompt = prompt; + return { events: asyncEvents([]) }; + }, + }; + } + + resumeThread() { + return { + async runStreamed(prompt: unknown) { + capturedPrompt = prompt; + return { events: asyncEvents([]) }; + }, + }; + } + }, + }); + + await provider.createSdkExecution({ + prompt: 'analyze this image', + sessionId: 'codex-image-1', + isResume: false, + imagePaths: ['assets/a.png'], + workspacePath: '/tmp/workspace', + }); + + assert.ok(Array.isArray(capturedPrompt)); + const promptItems = capturedPrompt as Array>; + assert.equal(promptItems[0]?.type, 'text'); + assert.equal(promptItems[0]?.text, 'analyze this image'); + assert.equal(promptItems[1]?.type, 'local_image'); + assert.equal(promptItems[1]?.path, path.resolve('/tmp/workspace', 'assets/a.png')); +}); + +/** + * This test covers Gemini/Cursor image handling: image paths are appended to the prompt payload. + */ +test('gemini and cursor providers append image path arrays to prompts', () => { + const geminiProvider = new GeminiProvider() as any; + const cursorProvider = new CursorProvider() as any; + + const geminiInvocation = geminiProvider.createCliInvocation({ + prompt: 'summarize', + sessionId: 'g-1', + isResume: false, + imagePaths: ['scripts/pic.jpg'], + }); + + const cursorInvocation = cursorProvider.createCliInvocation({ + prompt: 'summarize', + sessionId: 'c-1', + isResume: false, + imagePaths: ['scripts/pic.jpg'], + }); + + const geminiPrompt = geminiInvocation.args[1]; + const cursorPrompt = cursorInvocation.args[cursorInvocation.args.length - 1]; + assert.ok(typeof geminiPrompt === 'string' && geminiPrompt.includes('["scripts/pic.jpg"]')); + assert.ok(typeof cursorPrompt === 'string' && cursorPrompt.includes('["scripts/pic.jpg"]')); +}); + +/** + * This test covers API payload validation: imagePaths must be an array of strings. + */ +test('llmService rejects invalid imagePaths payloads before provider execution', async () => { + await assert.rejects( + llmService.startSession('cursor', { + prompt: 'hello', + imagePaths: [1, 2, 3], + }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_IMAGE_PATHS' && + error.statusCode === 400, + ); +}); diff --git a/server/src/modules/llm/llm-unifier.mcp.test.ts b/server/src/modules/llm/llm-unifier.mcp.test.ts new file mode 100644 index 00000000..ba491c8c --- /dev/null +++ b/server/src/modules/llm/llm-unifier.mcp.test.ts @@ -0,0 +1,351 @@ +import assert from 'node:assert/strict'; +import fs from 'node:fs/promises'; +import http from 'node:http'; +import os from 'node:os'; +import path from 'node:path'; +import test from 'node:test'; + +import TOML from '@iarna/toml'; + +import { AppError } from '../../shared/utils/app-error.js'; +import { llmMcpService } from './mcp.service.js'; + +const patchHomeDir = (nextHomeDir: string) => { + const original = os.homedir; + (os as any).homedir = () => nextHomeDir; + return () => { + (os as any).homedir = original; + }; +}; + +const readJson = async (filePath: string): Promise> => { + const content = await fs.readFile(filePath, 'utf8'); + return JSON.parse(content) as Record; +}; + +/** + * This test covers Claude MCP support for all scopes (user/local/project) and all transports (stdio/http/sse), + * including add, update/list, and remove operations. + */ +test('llmMcpService handles claude MCP scopes/transports with file-backed persistence', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-mcp-claude-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await llmMcpService.upsertProviderServer('claude', { + name: 'claude-user-stdio', + scope: 'user', + transport: 'stdio', + command: 'npx', + args: ['-y', 'my-server'], + env: { API_KEY: 'secret' }, + }); + + await llmMcpService.upsertProviderServer('claude', { + name: 'claude-local-http', + scope: 'local', + transport: 'http', + url: 'https://example.com/mcp', + headers: { Authorization: 'Bearer token' }, + workspacePath, + }); + + await llmMcpService.upsertProviderServer('claude', { + name: 'claude-project-sse', + scope: 'project', + transport: 'sse', + url: 'https://example.com/sse', + headers: { 'X-API-Key': 'abc' }, + workspacePath, + }); + + const grouped = await llmMcpService.listProviderServers('claude', { workspacePath }); + assert.ok(grouped.user.some((server) => server.name === 'claude-user-stdio' && server.transport === 'stdio')); + assert.ok(grouped.local.some((server) => server.name === 'claude-local-http' && server.transport === 'http')); + assert.ok(grouped.project.some((server) => server.name === 'claude-project-sse' && server.transport === 'sse')); + + // update behavior is the same upsert route with same name + await llmMcpService.upsertProviderServer('claude', { + name: 'claude-project-sse', + scope: 'project', + transport: 'sse', + url: 'https://example.com/sse-updated', + headers: { 'X-API-Key': 'updated' }, + workspacePath, + }); + + const projectConfig = await readJson(path.join(workspacePath, '.mcp.json')); + const projectServers = projectConfig.mcpServers as Record; + const projectServer = projectServers['claude-project-sse'] as Record; + assert.equal(projectServer.url, 'https://example.com/sse-updated'); + + const removeResult = await llmMcpService.removeProviderServer('claude', { + name: 'claude-local-http', + scope: 'local', + workspacePath, + }); + assert.equal(removeResult.removed, true); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers Codex MCP support for user/project scopes, stdio/http formats, + * and validation for unsupported scope/transport combinations. + */ +test('llmMcpService handles codex MCP TOML config and capability validation', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-mcp-codex-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await llmMcpService.upsertProviderServer('codex', { + name: 'codex-user-stdio', + scope: 'user', + transport: 'stdio', + command: 'python', + args: ['server.py'], + env: { API_KEY: 'x' }, + envVars: ['API_KEY'], + cwd: '/tmp', + }); + + await llmMcpService.upsertProviderServer('codex', { + name: 'codex-project-http', + scope: 'project', + transport: 'http', + url: 'https://codex.example.com/mcp', + headers: { 'X-Custom-Header': 'value' }, + envHttpHeaders: { 'X-API-Key': 'MY_API_KEY_ENV' }, + bearerTokenEnvVar: 'MY_API_TOKEN', + workspacePath, + }); + + const userTomlPath = path.join(tempRoot, '.codex', 'config.toml'); + const userConfig = TOML.parse(await fs.readFile(userTomlPath, 'utf8')) as Record; + const userServers = userConfig.mcp_servers as Record; + const userStdio = userServers['codex-user-stdio'] as Record; + assert.equal(userStdio.command, 'python'); + + const projectTomlPath = path.join(workspacePath, '.codex', 'config.toml'); + const projectConfig = TOML.parse(await fs.readFile(projectTomlPath, 'utf8')) as Record; + const projectServers = projectConfig.mcp_servers as Record; + const projectHttp = projectServers['codex-project-http'] as Record; + assert.equal(projectHttp.url, 'https://codex.example.com/mcp'); + + await assert.rejects( + llmMcpService.upsertProviderServer('codex', { + name: 'codex-local', + scope: 'local', + transport: 'stdio', + command: 'node', + }), + (error: unknown) => + error instanceof AppError && + error.code === 'MCP_SCOPE_NOT_SUPPORTED' && + error.statusCode === 400, + ); + + await assert.rejects( + llmMcpService.upsertProviderServer('codex', { + name: 'codex-sse', + scope: 'project', + transport: 'sse', + url: 'https://example.com/sse', + workspacePath, + }), + (error: unknown) => + error instanceof AppError && + error.code === 'MCP_TRANSPORT_NOT_SUPPORTED' && + error.statusCode === 400, + ); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers Gemini/Cursor MCP JSON formats and user/project scope persistence. + */ +test('llmMcpService handles gemini and cursor MCP JSON config formats', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-mcp-gc-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await llmMcpService.upsertProviderServer('gemini', { + name: 'gemini-stdio', + scope: 'user', + transport: 'stdio', + command: 'node', + args: ['server.js'], + env: { TOKEN: '$TOKEN' }, + cwd: './server', + }); + + await llmMcpService.upsertProviderServer('gemini', { + name: 'gemini-http', + scope: 'project', + transport: 'http', + url: 'https://gemini.example.com/mcp', + headers: { Authorization: 'Bearer token' }, + workspacePath, + }); + + await llmMcpService.upsertProviderServer('cursor', { + name: 'cursor-stdio', + scope: 'project', + transport: 'stdio', + command: 'npx', + args: ['-y', 'mcp-server'], + env: { API_KEY: 'value' }, + workspacePath, + }); + + await llmMcpService.upsertProviderServer('cursor', { + name: 'cursor-http', + scope: 'user', + transport: 'http', + url: 'http://localhost:3333/mcp', + headers: { API_KEY: 'value' }, + }); + + const geminiUserConfig = await readJson(path.join(tempRoot, '.gemini', 'settings.json')); + const geminiUserServer = (geminiUserConfig.mcpServers as Record)['gemini-stdio'] as Record; + assert.equal(geminiUserServer.command, 'node'); + assert.equal(geminiUserServer.type, undefined); + + const geminiProjectConfig = await readJson(path.join(workspacePath, '.gemini', 'settings.json')); + const geminiProjectServer = (geminiProjectConfig.mcpServers as Record)['gemini-http'] as Record; + assert.equal(geminiProjectServer.type, 'http'); + + const cursorUserConfig = await readJson(path.join(tempRoot, '.cursor', 'mcp.json')); + const cursorHttpServer = (cursorUserConfig.mcpServers as Record)['cursor-http'] as Record; + assert.equal(cursorHttpServer.url, 'http://localhost:3333/mcp'); + assert.equal(cursorHttpServer.type, undefined); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers the global MCP adder requirement: only http/stdio are allowed and + * one payload is written to all providers. + */ +test('llmMcpService global adder writes to all providers and rejects unsupported transports', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-mcp-global-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + const globalResult = await llmMcpService.addServerToAllProviders({ + name: 'global-http', + scope: 'project', + transport: 'http', + url: 'https://global.example.com/mcp', + workspacePath, + }); + + assert.equal(globalResult.length, 4); + assert.ok(globalResult.every((entry) => entry.created === true)); + + const claudeProject = await readJson(path.join(workspacePath, '.mcp.json')); + assert.ok((claudeProject.mcpServers as Record)['global-http']); + + const codexProject = TOML.parse(await fs.readFile(path.join(workspacePath, '.codex', 'config.toml'), 'utf8')) as Record; + assert.ok((codexProject.mcp_servers as Record)['global-http']); + + const geminiProject = await readJson(path.join(workspacePath, '.gemini', 'settings.json')); + assert.ok((geminiProject.mcpServers as Record)['global-http']); + + const cursorProject = await readJson(path.join(workspacePath, '.cursor', 'mcp.json')); + assert.ok((cursorProject.mcpServers as Record)['global-http']); + + await assert.rejects( + llmMcpService.addServerToAllProviders({ + name: 'global-sse', + scope: 'project', + transport: 'sse', + url: 'https://example.com/sse', + workspacePath, + }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_GLOBAL_MCP_TRANSPORT' && + error.statusCode === 400, + ); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers "run" behavior for both stdio and http MCP servers. + */ +test('llmMcpService runProviderServer probes stdio and http MCP servers', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-mcp-run-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + const server = http.createServer((_req, res) => { + res.statusCode = 200; + res.end('ok'); + }); + + try { + await new Promise((resolve) => server.listen(0, '127.0.0.1', () => resolve())); + const address = server.address(); + assert.ok(address && typeof address === 'object'); + const url = `http://127.0.0.1:${address.port}/mcp`; + + await llmMcpService.upsertProviderServer('gemini', { + name: 'probe-http', + scope: 'project', + transport: 'http', + url, + workspacePath, + }); + + await llmMcpService.upsertProviderServer('cursor', { + name: 'probe-stdio', + scope: 'project', + transport: 'stdio', + command: process.execPath, + args: ['-e', 'process.exit(0)'], + workspacePath, + }); + + const httpProbe = await llmMcpService.runProviderServer({ + provider: 'gemini', + name: 'probe-http', + scope: 'project', + workspacePath, + }); + assert.equal(httpProbe.reachable, true); + assert.equal(httpProbe.transport, 'http'); + + const stdioProbe = await llmMcpService.runProviderServer({ + provider: 'cursor', + name: 'probe-stdio', + scope: 'project', + workspacePath, + }); + assert.equal(stdioProbe.reachable, true); + assert.equal(stdioProbe.transport, 'stdio'); + } finally { + server.close(); + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); diff --git a/server/src/modules/llm/llm-unifier.skills.test.ts b/server/src/modules/llm/llm-unifier.skills.test.ts new file mode 100644 index 00000000..b1f3e653 --- /dev/null +++ b/server/src/modules/llm/llm-unifier.skills.test.ts @@ -0,0 +1,207 @@ +import assert from 'node:assert/strict'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import test from 'node:test'; + +import { llmSkillsService } from './skills.service.js'; + +const patchHomeDir = (nextHomeDir: string) => { + const original = os.homedir; + (os as any).homedir = () => nextHomeDir; + return () => { + (os as any).homedir = original; + }; +}; + +const createSkill = async ( + rootSkillsDirectory: string, + directoryName: string, + metadata: { + name: string; + description: string; + }, +) => { + const skillDirectory = path.join(rootSkillsDirectory, directoryName); + await fs.mkdir(skillDirectory, { recursive: true }); + await fs.writeFile( + path.join(skillDirectory, 'SKILL.md'), + `---\nname: ${metadata.name}\ndescription: ${metadata.description}\n---\n\n# ${metadata.name}\n`, + 'utf8', + ); +}; + +/** + * This test covers Claude skills fetching from user/project/plugin locations and plugin namespace invocation. + */ +test('llmSkillsService lists claude user/project/plugin skills with proper invocation names', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-skills-claude-')); + const workspacePath = path.join(tempRoot, 'workspace'); + const pluginInstallPath = path.join(tempRoot, 'plugin-install'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await createSkill(path.join(tempRoot, '.claude', 'skills'), 'user-helper', { + name: 'user-helper', + description: 'User skill description', + }); + await createSkill(path.join(workspacePath, '.claude', 'skills'), 'project-helper', { + name: 'project-helper', + description: 'Project skill description', + }); + await createSkill(path.join(pluginInstallPath, 'skills'), 'plugin-helper', { + name: 'plugin-helper', + description: 'Plugin skill description', + }); + + await fs.mkdir(path.join(tempRoot, '.claude', 'plugins'), { recursive: true }); + await fs.writeFile( + path.join(tempRoot, '.claude', 'settings.json'), + JSON.stringify({ + enabledPlugins: { + 'example-skills@anthropic-agent-skills': true, + }, + }), + 'utf8', + ); + await fs.writeFile( + path.join(tempRoot, '.claude', 'plugins', 'installed_plugins.json'), + JSON.stringify({ + version: 2, + plugins: { + 'example-skills@anthropic-agent-skills': [ + { + installPath: pluginInstallPath, + }, + ], + }, + }), + 'utf8', + ); + + const skills = await llmSkillsService.listProviderSkills('claude', { workspacePath }); + assert.ok(skills.some((skill) => skill.scope === 'user' && skill.invocation === '/user-helper')); + assert.ok(skills.some((skill) => skill.scope === 'project' && skill.invocation === '/project-helper')); + assert.ok(skills.some((skill) => skill.scope === 'plugin' && skill.invocation === '/example-skills:plugin-helper')); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers Codex skills discovery across repo/user/system locations and `$` invocation prefix. + */ +test('llmSkillsService lists codex skills from repo/user/system locations with dollar invocation', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-skills-codex-')); + const repoRoot = path.join(tempRoot, 'repo'); + const workspacePath = path.join(repoRoot, 'packages', 'app'); + await fs.mkdir(workspacePath, { recursive: true }); + await fs.mkdir(path.join(repoRoot, '.git'), { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await createSkill(path.join(workspacePath, '.agents', 'skills'), 'cwd-skill', { + name: 'cwd-skill', + description: 'cwd skill', + }); + await createSkill(path.join(workspacePath, '..', '.agents', 'skills'), 'parent-skill', { + name: 'parent-skill', + description: 'parent skill', + }); + await createSkill(path.join(repoRoot, '.agents', 'skills'), 'repo-root-skill', { + name: 'repo-root-skill', + description: 'repo root skill', + }); + await createSkill(path.join(tempRoot, '.agents', 'skills'), 'user-skill', { + name: 'user-skill', + description: 'user skill', + }); + await createSkill(path.join(tempRoot, '.codex', 'skills', '.system'), 'system-skill', { + name: 'system-skill', + description: 'system skill', + }); + + const skills = await llmSkillsService.listProviderSkills('codex', { workspacePath }); + assert.ok(skills.some((skill) => skill.name === 'cwd-skill' && skill.invocation === '$cwd-skill')); + assert.ok(skills.some((skill) => skill.name === 'parent-skill' && skill.invocation === '$parent-skill')); + assert.ok(skills.some((skill) => skill.name === 'repo-root-skill' && skill.invocation === '$repo-root-skill')); + assert.ok(skills.some((skill) => skill.name === 'user-skill' && skill.invocation === '$user-skill')); + assert.ok(skills.some((skill) => skill.name === 'system-skill' && skill.invocation === '$system-skill')); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers Gemini skill fetch locations and slash-based invocation format. + */ +test('llmSkillsService lists gemini skills from documented directories', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-skills-gemini-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await createSkill(path.join(tempRoot, '.gemini', 'skills'), 'home-gemini', { + name: 'home-gemini', + description: 'home gemini skill', + }); + await createSkill(path.join(tempRoot, '.agents', 'skills'), 'home-agents', { + name: 'home-agents', + description: 'home agents skill', + }); + await createSkill(path.join(workspacePath, '.gemini', 'skills'), 'project-gemini', { + name: 'project-gemini', + description: 'project gemini skill', + }); + await createSkill(path.join(workspacePath, '.agents', 'skills'), 'project-agents', { + name: 'project-agents', + description: 'project agents skill', + }); + + const skills = await llmSkillsService.listProviderSkills('gemini', { workspacePath }); + assert.ok(skills.some((skill) => skill.invocation === '/home-gemini')); + assert.ok(skills.some((skill) => skill.invocation === '/home-agents')); + assert.ok(skills.some((skill) => skill.invocation === '/project-gemini')); + assert.ok(skills.some((skill) => skill.invocation === '/project-agents')); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); + +/** + * This test covers Cursor skill fetch locations and slash-based invocation format. + */ +test('llmSkillsService lists cursor skills from documented directories', { concurrency: false }, async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'llm-skills-cursor-')); + const workspacePath = path.join(tempRoot, 'workspace'); + await fs.mkdir(workspacePath, { recursive: true }); + + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await createSkill(path.join(workspacePath, '.agents', 'skills'), 'project-agents', { + name: 'project-agents', + description: 'project agents skill', + }); + await createSkill(path.join(workspacePath, '.cursor', 'skills'), 'project-cursor', { + name: 'project-cursor', + description: 'project cursor skill', + }); + await createSkill(path.join(tempRoot, '.cursor', 'skills'), 'user-cursor', { + name: 'user-cursor', + description: 'user cursor skill', + }); + + const skills = await llmSkillsService.listProviderSkills('cursor', { workspacePath }); + assert.ok(skills.some((skill) => skill.invocation === '/project-agents')); + assert.ok(skills.some((skill) => skill.invocation === '/project-cursor')); + assert.ok(skills.some((skill) => skill.invocation === '/user-cursor')); + } finally { + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +}); diff --git a/server/src/modules/llm/llm.routes.ts b/server/src/modules/llm/llm.routes.ts index 4f229da5..9242dd16 100644 --- a/server/src/modules/llm/llm.routes.ts +++ b/server/src/modules/llm/llm.routes.ts @@ -1,13 +1,27 @@ import express, { type NextFunction, type Request, type Response } from 'express'; +import multer from 'multer'; +import path from 'node:path'; import { asyncHandler } from '@/shared/http/async-handler.js'; import { AppError } from '@/shared/utils/app-error.js'; import { createApiErrorResponse, createApiSuccessResponse } from '@/shared/http/api-response.js'; import { llmService } from '@/modules/llm/llm.service.js'; import { llmSessionsService } from '@/modules/llm/sessions.service.js'; +import { llmAssetsService } from '@/modules/llm/assets.service.js'; +import type { McpScope, McpTransport, UpsertMcpServerInput } from '@/modules/llm/mcp.service.js'; +import { llmMcpService } from '@/modules/llm/mcp.service.js'; +import { llmSkillsService } from '@/modules/llm/skills.service.js'; +import type { LLMProvider } from '@/shared/types/app.js'; import { logger } from '@/shared/utils/logger.js'; const router = express.Router(); +const upload = multer({ + storage: multer.memoryStorage(), + limits: { + files: 10, + fileSize: 20 * 1024 * 1024, + }, +}); /** * Safely reads an Express path parameter that may arrive as string or string[]. @@ -68,6 +82,139 @@ const parseRenamePayload = (payload: unknown): { summary: string } => { return { summary }; }; +/** + * Reads optional query values and trims surrounding whitespace. + */ +const readOptionalQueryString = (value: unknown): string | undefined => { + if (typeof value !== 'string') { + return undefined; + } + + const normalized = value.trim(); + return normalized.length > 0 ? normalized : undefined; +}; + +/** + * Validates MCP scope query/body values. + */ +const parseMcpScope = (value: unknown): McpScope | undefined => { + if (value === undefined) { + return undefined; + } + + const normalized = readOptionalQueryString(value); + if (!normalized) { + return undefined; + } + + if (normalized === 'user' || normalized === 'local' || normalized === 'project') { + return normalized; + } + + throw new AppError(`Unsupported MCP scope "${normalized}".`, { + code: 'INVALID_MCP_SCOPE', + statusCode: 400, + }); +}; + +/** + * Validates MCP transport query/body values. + */ +const parseMcpTransport = (value: unknown): McpTransport => { + const normalized = readOptionalQueryString(value); + if (!normalized) { + throw new AppError('transport is required.', { + code: 'MCP_TRANSPORT_REQUIRED', + statusCode: 400, + }); + } + + if (normalized === 'stdio' || normalized === 'http' || normalized === 'sse') { + return normalized; + } + + throw new AppError(`Unsupported MCP transport "${normalized}".`, { + code: 'INVALID_MCP_TRANSPORT', + statusCode: 400, + }); +}; + +/** + * Parses and validates MCP upsert payload. + */ +const parseMcpUpsertPayload = (payload: unknown): UpsertMcpServerInput => { + if (!payload || typeof payload !== 'object') { + throw new AppError('Request body must be an object.', { + code: 'INVALID_REQUEST_BODY', + statusCode: 400, + }); + } + + const body = payload as Record; + const name = readOptionalQueryString(body.name); + if (!name) { + throw new AppError('name is required.', { + code: 'MCP_NAME_REQUIRED', + statusCode: 400, + }); + } + + const transport = parseMcpTransport(body.transport); + const scope = parseMcpScope(body.scope); + const workspacePath = readOptionalQueryString(body.workspacePath); + + return { + name, + transport, + scope, + workspacePath, + command: readOptionalQueryString(body.command), + args: Array.isArray(body.args) ? body.args.filter((entry): entry is string => typeof entry === 'string') : undefined, + env: typeof body.env === 'object' && body.env !== null + ? Object.fromEntries( + Object.entries(body.env as Record).filter( + (entry): entry is [string, string] => typeof entry[1] === 'string', + ), + ) + : undefined, + cwd: readOptionalQueryString(body.cwd), + url: readOptionalQueryString(body.url), + headers: typeof body.headers === 'object' && body.headers !== null + ? Object.fromEntries( + Object.entries(body.headers as Record).filter( + (entry): entry is [string, string] => typeof entry[1] === 'string', + ), + ) + : undefined, + envVars: Array.isArray(body.envVars) + ? body.envVars.filter((entry): entry is string => typeof entry === 'string') + : undefined, + bearerTokenEnvVar: readOptionalQueryString(body.bearerTokenEnvVar), + envHttpHeaders: typeof body.envHttpHeaders === 'object' && body.envHttpHeaders !== null + ? Object.fromEntries( + Object.entries(body.envHttpHeaders as Record).filter( + (entry): entry is [string, string] => typeof entry[1] === 'string', + ), + ) + : undefined, + }; +}; + +/** + * Converts any provider route parameter into the strongly typed provider union. + */ +const parseProvider = (value: unknown): LLMProvider => { + const normalized = normalizeProviderParam(value); + if (normalized === 'claude' || normalized === 'codex' || normalized === 'cursor' || normalized === 'gemini') { + return normalized; + } + + throw new AppError(`Unsupported provider "${normalized}".`, { + code: 'UNSUPPORTED_PROVIDER', + statusCode: 400, + }); +}; + router.get( '/providers', asyncHandler(async (_req: Request, res: Response) => { @@ -78,7 +225,7 @@ router.get( router.get( '/providers/:provider/models', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const models = await llmService.listModels(provider); res.json(createApiSuccessResponse({ provider, models })); }), @@ -87,7 +234,7 @@ router.get( router.get( '/providers/:provider/sessions', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const sessions = llmService.listSessions(provider); res.json(createApiSuccessResponse({ provider, sessions })); }), @@ -96,7 +243,7 @@ router.get( router.get( '/providers/:provider/sessions/:sessionId', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const sessionId = readPathParam(req.params.sessionId, 'sessionId'); const session = llmService.getSession(provider, sessionId); if (!session) { @@ -113,7 +260,7 @@ router.get( router.post( '/providers/:provider/sessions/start', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const snapshot = await llmService.startSession(provider, req.body); const waitForCompletion = parseWaitForCompletion(req); @@ -135,7 +282,7 @@ router.post( router.post( '/providers/:provider/sessions/:sessionId/resume', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const sessionId = readPathParam(req.params.sessionId, 'sessionId'); const snapshot = await llmService.resumeSession(provider, sessionId, req.body); @@ -154,7 +301,7 @@ router.post( router.post( '/providers/:provider/sessions/:sessionId/stop', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const sessionId = readPathParam(req.params.sessionId, 'sessionId'); const stopped = await llmService.stopSession(provider, sessionId); res.json(createApiSuccessResponse({ provider, sessionId, stopped })); @@ -164,7 +311,7 @@ router.post( router.patch( '/providers/:provider/sessions/:sessionId/model', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const sessionId = readPathParam(req.params.sessionId, 'sessionId'); const model = typeof req.body?.model === 'string' ? req.body.model.trim() : ''; if (!model) { @@ -188,7 +335,7 @@ router.patch( router.patch( '/providers/:provider/sessions/:sessionId/thinking', asyncHandler(async (req: Request, res: Response) => { - const provider = normalizeProviderParam(req.params.provider); + const provider = parseProvider(req.params.provider); const sessionId = readPathParam(req.params.sessionId, 'sessionId'); const thinkingMode = typeof req.body?.thinkingMode === 'string' ? req.body.thinkingMode.trim() : ''; @@ -211,6 +358,180 @@ router.patch( }), ); +/** + * Uploads one or more images into `.cloudcli/assets` so providers can reuse file paths. + */ +router.post( + '/assets/images', + upload.array('images', 10), + asyncHandler(async (req: Request, res: Response) => { + const workspacePath = readOptionalQueryString((req.body as Record | undefined)?.workspacePath); + const filesValue = (req as Request & { files?: unknown }).files; + const files = Array.isArray(filesValue) ? filesValue as Array<{ + originalname: string; + mimetype: string; + size: number; + buffer: Buffer; + }> : []; + const images = await llmAssetsService.storeUploadedImages(files, { workspacePath }); + res.status(201).json(createApiSuccessResponse({ images })); + }), +); + +/** + * Lists MCP servers for one provider grouped by user/local/project scopes. + */ +router.get( + '/providers/:provider/mcp/servers', + asyncHandler(async (req: Request, res: Response) => { + const provider = parseProvider(req.params.provider); + const workspacePath = readOptionalQueryString(req.query.workspacePath); + const scope = parseMcpScope(req.query.scope); + + if (scope) { + const servers = await llmMcpService.listProviderServersForScope( + provider, + scope, + path.resolve(workspacePath ?? process.cwd()), + ); + res.json(createApiSuccessResponse({ provider, scope, servers })); + return; + } + + const groupedServers = await llmMcpService.listProviderServers(provider, { workspacePath }); + res.json(createApiSuccessResponse({ provider, scopes: groupedServers })); + }), +); + +/** + * Adds one MCP server for one provider and scope. + */ +router.post( + '/providers/:provider/mcp/servers', + asyncHandler(async (req: Request, res: Response) => { + const provider = parseProvider(req.params.provider); + const payload = parseMcpUpsertPayload(req.body); + const server = await llmMcpService.upsertProviderServer(provider, payload); + res.status(201).json(createApiSuccessResponse({ server })); + }), +); + +/** + * Updates one provider MCP server definition. + */ +router.put( + '/providers/:provider/mcp/servers/:name', + asyncHandler(async (req: Request, res: Response) => { + const provider = parseProvider(req.params.provider); + const payload = parseMcpUpsertPayload({ + ...((req.body && typeof req.body === 'object') ? req.body as Record : {}), + name: readPathParam(req.params.name, 'name'), + }); + const server = await llmMcpService.upsertProviderServer(provider, payload); + res.json(createApiSuccessResponse({ server })); + }), +); + +/** + * Removes one provider MCP server from its configured scope. + */ +router.delete( + '/providers/:provider/mcp/servers/:name', + asyncHandler(async (req: Request, res: Response) => { + const provider = parseProvider(req.params.provider); + const scope = parseMcpScope(req.query.scope); + const workspacePath = readOptionalQueryString(req.query.workspacePath); + const result = await llmMcpService.removeProviderServer(provider, { + name: readPathParam(req.params.name, 'name'), + scope, + workspacePath, + }); + res.json(createApiSuccessResponse(result)); + }), +); + +/** + * Executes a lightweight startup/connectivity probe for one provider MCP server. + */ +router.post( + '/providers/:provider/mcp/servers/:name/run', + asyncHandler(async (req: Request, res: Response) => { + const provider = parseProvider(req.params.provider); + const body = (req.body as Record | undefined) ?? {}; + const scope = parseMcpScope(body.scope ?? req.query.scope); + const workspacePath = readOptionalQueryString(body.workspacePath ?? req.query.workspacePath); + const result = await llmMcpService.runProviderServer({ + provider, + name: readPathParam(req.params.name, 'name'), + scope, + workspacePath, + }); + res.json(createApiSuccessResponse(result)); + }), +); + +/** + * Adds one HTTP/stdio MCP server to every provider. + */ +router.post( + '/mcp/servers/global', + asyncHandler(async (req: Request, res: Response) => { + const payload = parseMcpUpsertPayload(req.body); + if (payload.scope === 'local') { + throw new AppError('Global MCP add supports only "user" or "project" scopes.', { + code: 'INVALID_GLOBAL_MCP_SCOPE', + statusCode: 400, + }); + } + const results = await llmMcpService.addServerToAllProviders({ + ...payload, + scope: payload.scope === 'user' ? 'user' : 'project', + }); + res.status(201).json(createApiSuccessResponse({ results })); + }), +); + +/** + * Lists provider-specific skills from all documented skill directories. + */ +router.get( + '/providers/:provider/skills', + asyncHandler(async (req: Request, res: Response) => { + const provider = parseProvider(req.params.provider); + const workspacePath = readOptionalQueryString(req.query.workspacePath); + const skills = await llmSkillsService.listProviderSkills(provider, { workspacePath }); + res.json(createApiSuccessResponse({ provider, skills })); + }), +); + +/** + * Lists skills for one provider or for all providers in a single response. + */ +router.get( + '/skills', + asyncHandler(async (req: Request, res: Response) => { + const providerQuery = readOptionalQueryString(req.query.provider); + const workspacePath = readOptionalQueryString(req.query.workspacePath); + if (providerQuery) { + const provider = parseProvider(providerQuery); + const skills = await llmSkillsService.listProviderSkills(provider, { workspacePath }); + res.json(createApiSuccessResponse({ provider, skills })); + return; + } + + const providers: LLMProvider[] = ['claude', 'codex', 'cursor', 'gemini']; + const byProvider = Object.fromEntries( + await Promise.all( + providers.map(async (provider) => ([ + provider, + await llmSkillsService.listProviderSkills(provider, { workspacePath }), + ])), + ), + ); + res.json(createApiSuccessResponse({ providers: byProvider })); + }), +); + router.get( '/sessions/:sessionId/history', asyncHandler(async (req: Request, res: Response) => { diff --git a/server/src/modules/llm/llm.service.ts b/server/src/modules/llm/llm.service.ts index fe4a0632..ce9a5d7c 100644 --- a/server/src/modules/llm/llm.service.ts +++ b/server/src/modules/llm/llm.service.ts @@ -20,6 +20,35 @@ const normalizeOptionalString = (value: unknown): string | undefined => { return normalized.length > 0 ? normalized : undefined; }; +/** + * Validates and normalizes optional image path arrays. + */ +const normalizeImagePaths = (value: unknown): string[] | undefined => { + if (value === undefined) { + return undefined; + } + + if (!Array.isArray(value)) { + throw new AppError('imagePaths must be an array of strings.', { + code: 'INVALID_IMAGE_PATHS', + statusCode: 400, + }); + } + + const normalizedPaths = value + .map((entry) => (typeof entry === 'string' ? entry.trim() : '')) + .filter((entry) => entry.length > 0); + + if (normalizedPaths.length !== value.length) { + throw new AppError('imagePaths must contain non-empty strings only.', { + code: 'INVALID_IMAGE_PATHS', + statusCode: 400, + }); + } + + return normalizedPaths; +}; + /** * Validates and normalizes runtime permission mode. */ @@ -145,6 +174,7 @@ function parseStartPayload(payload: unknown): StartSessionInput { sessionId: normalizeOptionalString(body.sessionId), model: normalizeOptionalString(body.model), thinkingMode: normalizeOptionalString(body.thinkingMode), + imagePaths: normalizeImagePaths(body.imagePaths), runtimePermissionMode: normalizePermissionMode(body.runtimePermissionMode), allowYolo: body.allowYolo === true, }; diff --git a/server/src/modules/llm/mcp.service.ts b/server/src/modules/llm/mcp.service.ts new file mode 100644 index 00000000..e546e55c --- /dev/null +++ b/server/src/modules/llm/mcp.service.ts @@ -0,0 +1,817 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { once } from 'node:events'; + +import spawn from 'cross-spawn'; +import TOML from '@iarna/toml'; + +import type { LLMProvider } from '@/shared/types/app.js'; +import { AppError } from '@/shared/utils/app-error.js'; + +export type McpScope = 'user' | 'local' | 'project'; +export type McpTransport = 'stdio' | 'http' | 'sse'; + +export type UnifiedMcpServer = { + provider: LLMProvider; + name: string; + scope: McpScope; + transport: McpTransport; + command?: string; + args?: string[]; + env?: Record; + cwd?: string; + url?: string; + headers?: Record; + envVars?: string[]; + bearerTokenEnvVar?: string; + envHttpHeaders?: Record; +}; + +export type UpsertMcpServerInput = { + name: string; + scope?: McpScope; + transport: McpTransport; + workspacePath?: string; + command?: string; + args?: string[]; + env?: Record; + cwd?: string; + url?: string; + headers?: Record; + envVars?: string[]; + bearerTokenEnvVar?: string; + envHttpHeaders?: Record; +}; + +const PROVIDER_CAPABILITIES: Record = { + claude: { scopes: ['user', 'local', 'project'], transports: ['stdio', 'http', 'sse'] }, + codex: { scopes: ['user', 'project'], transports: ['stdio', 'http'] }, + cursor: { scopes: ['user', 'project'], transports: ['stdio', 'http', 'sse'] }, + gemini: { scopes: ['user', 'project'], transports: ['stdio', 'http', 'sse'] }, +}; + +const PROVIDERS: LLMProvider[] = ['claude', 'codex', 'cursor', 'gemini']; + +/** + * Unified MCP configuration service backed by provider-native config files. + */ +export const llmMcpService = { + /** + * Lists MCP servers for one provider grouped by user/local/project scopes. + */ + async listProviderServers( + provider: LLMProvider, + options?: { workspacePath?: string }, + ): Promise> { + const workspacePath = resolveWorkspacePath(options?.workspacePath); + const grouped: Record = { + user: [], + local: [], + project: [], + }; + + const capability = PROVIDER_CAPABILITIES[provider]; + for (const scope of capability.scopes) { + const servers = await this.listProviderServersForScope(provider, scope, workspacePath); + grouped[scope] = servers; + } + + return grouped; + }, + + /** + * Writes one MCP server definition into the provider's config file for the selected scope. + */ + async upsertProviderServer(provider: LLMProvider, input: UpsertMcpServerInput): Promise { + validateProviderScopeAndTransport(provider, input.scope ?? 'project', input.transport); + const scope = input.scope ?? 'project'; + const workspacePath = resolveWorkspacePath(input.workspacePath); + const normalizedName = normalizeServerName(input.name); + const scopedServers = await readScopedProviderServers(provider, scope, workspacePath); + scopedServers[normalizedName] = buildProviderServerConfig(provider, input); + await writeScopedProviderServers(provider, scope, workspacePath, scopedServers); + + return { + provider, + name: normalizedName, + scope, + transport: input.transport, + command: input.command, + args: input.args, + env: input.env, + cwd: input.cwd, + url: input.url, + headers: input.headers, + envVars: input.envVars, + bearerTokenEnvVar: input.bearerTokenEnvVar, + envHttpHeaders: input.envHttpHeaders, + }; + }, + + /** + * Removes one MCP server definition from the provider's config file. + */ + async removeProviderServer( + provider: LLMProvider, + input: { name: string; scope?: McpScope; workspacePath?: string }, + ): Promise<{ removed: boolean; provider: LLMProvider; name: string; scope: McpScope }> { + const scope = input.scope ?? 'project'; + validateProviderScopeAndTransport(provider, scope, 'stdio'); + const workspacePath = resolveWorkspacePath(input.workspacePath); + const normalizedName = normalizeServerName(input.name); + const scopedServers = await readScopedProviderServers(provider, scope, workspacePath); + const removed = Object.prototype.hasOwnProperty.call(scopedServers, normalizedName); + if (removed) { + delete scopedServers[normalizedName]; + await writeScopedProviderServers(provider, scope, workspacePath, scopedServers); + } + + return { removed, provider, name: normalizedName, scope }; + }, + + /** + * Adds one MCP server to all providers using the same input shape. + */ + async addServerToAllProviders( + input: Omit & { scope?: Exclude }, + ): Promise> { + if (input.transport !== 'stdio' && input.transport !== 'http') { + throw new AppError('Global MCP add supports only "stdio" and "http".', { + code: 'INVALID_GLOBAL_MCP_TRANSPORT', + statusCode: 400, + }); + } + + const scope = input.scope ?? 'project'; + const results: Array<{ provider: LLMProvider; created: boolean; error?: string }> = []; + for (const provider of PROVIDERS) { + try { + await this.upsertProviderServer(provider, { ...input, scope }); + results.push({ provider, created: true }); + } catch (error) { + results.push({ + provider, + created: false, + error: error instanceof Error ? error.message : 'Unknown error', + }); + } + } + + return results; + }, + + /** + * Performs a lightweight startup/connectivity check for one configured MCP server. + */ + async runProviderServer(input: { + provider: LLMProvider; + name: string; + scope?: McpScope; + workspacePath?: string; + }): Promise<{ + provider: LLMProvider; + name: string; + scope: McpScope; + transport: McpTransport; + reachable: boolean; + statusCode?: number; + error?: string; + }> { + const scope = input.scope ?? 'project'; + const workspacePath = resolveWorkspacePath(input.workspacePath); + const normalizedName = normalizeServerName(input.name); + const scopedServers = await readScopedProviderServers(input.provider, scope, workspacePath); + const rawConfig = scopedServers[normalizedName]; + if (!rawConfig || typeof rawConfig !== 'object') { + throw new AppError(`MCP server "${normalizedName}" was not found.`, { + code: 'MCP_SERVER_NOT_FOUND', + statusCode: 404, + }); + } + + const normalized = normalizeServerConfig(input.provider, scope, normalizedName, rawConfig); + if (!normalized) { + throw new AppError(`MCP server "${normalizedName}" has an invalid configuration.`, { + code: 'MCP_SERVER_INVALID_CONFIG', + statusCode: 400, + }); + } + + if (normalized.transport === 'stdio') { + const result = await runStdioServerProbe(normalized, workspacePath); + return { + provider: input.provider, + name: normalizedName, + scope, + transport: normalized.transport, + reachable: result.reachable, + error: result.error, + }; + } + + const result = await runHttpServerProbe(normalized.url ?? ''); + return { + provider: input.provider, + name: normalizedName, + scope, + transport: normalized.transport, + reachable: result.reachable, + statusCode: result.statusCode, + error: result.error, + }; + }, + + /** + * Reads and normalizes one provider scope into unified MCP server records. + */ + async listProviderServersForScope( + provider: LLMProvider, + scope: McpScope, + workspacePath: string, + ): Promise { + if (!PROVIDER_CAPABILITIES[provider].scopes.includes(scope)) { + return []; + } + + const scopedServers = await readScopedProviderServers(provider, scope, workspacePath); + return Object.entries(scopedServers) + .map(([name, rawConfig]) => normalizeServerConfig(provider, scope, name, rawConfig)) + .filter((entry): entry is UnifiedMcpServer => entry !== null); + }, +}; + +/** + * Resolves workspace paths once so all scope loaders read from a consistent absolute root. + */ +function resolveWorkspacePath(workspacePath?: string): string { + return path.resolve(workspacePath ?? process.cwd()); +} + +/** + * Restricts MCP server names to non-empty trimmed strings. + */ +function normalizeServerName(name: string): string { + const normalized = name.trim(); + if (!normalized) { + throw new AppError('MCP server name is required.', { + code: 'MCP_SERVER_NAME_REQUIRED', + statusCode: 400, + }); + } + + return normalized; +} + +/** + * Applies provider capability checks before read/write operations. + */ +function validateProviderScopeAndTransport( + provider: LLMProvider, + scope: McpScope, + transport: McpTransport, +): void { + const capability = PROVIDER_CAPABILITIES[provider]; + if (!capability.scopes.includes(scope)) { + throw new AppError(`Provider "${provider}" does not support "${scope}" MCP scope.`, { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + if (!capability.transports.includes(transport)) { + throw new AppError(`Provider "${provider}" does not support "${transport}" MCP transport.`, { + code: 'MCP_TRANSPORT_NOT_SUPPORTED', + statusCode: 400, + }); + } +} + +/** + * Loads one scope's raw server map from a provider-native config file. + */ +async function readScopedProviderServers( + provider: LLMProvider, + scope: McpScope, + workspacePath: string, +): Promise> { + switch (provider) { + case 'claude': + return readClaudeScopedServers(scope, workspacePath); + case 'codex': + return readCodexScopedServers(scope, workspacePath); + case 'cursor': + return readCursorScopedServers(scope, workspacePath); + case 'gemini': + return readGeminiScopedServers(scope, workspacePath); + default: + return {}; + } +} + +/** + * Persists one scope's raw server map back to provider-native config files. + */ +async function writeScopedProviderServers( + provider: LLMProvider, + scope: McpScope, + workspacePath: string, + servers: Record, +): Promise { + switch (provider) { + case 'claude': + await writeClaudeScopedServers(scope, workspacePath, servers); + return; + case 'codex': + await writeCodexScopedServers(scope, workspacePath, servers); + return; + case 'cursor': + await writeCursorScopedServers(scope, workspacePath, servers); + return; + case 'gemini': + await writeGeminiScopedServers(scope, workspacePath, servers); + return; + default: + return; + } +} + +/** + * Creates one provider-native server config object from unified input payload. + */ +function buildProviderServerConfig(provider: LLMProvider, input: UpsertMcpServerInput): Record { + const scope = input.scope ?? 'project'; + validateProviderScopeAndTransport(provider, scope, input.transport); + + if (input.transport === 'stdio') { + if (!input.command?.trim()) { + throw new AppError('command is required for stdio MCP servers.', { + code: 'MCP_COMMAND_REQUIRED', + statusCode: 400, + }); + } + + if (provider === 'claude') { + return { + type: 'stdio', + command: input.command, + args: input.args ?? [], + env: input.env ?? {}, + }; + } + + if (provider === 'codex') { + return { + command: input.command, + args: input.args ?? [], + env: input.env ?? {}, + env_vars: input.envVars ?? [], + cwd: input.cwd, + }; + } + + return { + command: input.command, + args: input.args ?? [], + env: input.env ?? {}, + cwd: input.cwd, + }; + } + + if (!input.url?.trim()) { + throw new AppError('url is required for http/sse MCP servers.', { + code: 'MCP_URL_REQUIRED', + statusCode: 400, + }); + } + + if (provider === 'codex') { + return { + url: input.url, + bearer_token_env_var: input.bearerTokenEnvVar, + http_headers: input.headers ?? {}, + env_http_headers: input.envHttpHeaders ?? {}, + }; + } + + if (provider === 'cursor') { + return { + url: input.url, + headers: input.headers ?? {}, + }; + } + + return { + type: input.transport, + url: input.url, + headers: input.headers ?? {}, + }; +} + +/** + * Maps one provider-native server object into the unified response shape. + */ +function normalizeServerConfig( + provider: LLMProvider, + scope: McpScope, + name: string, + rawConfig: unknown, +): UnifiedMcpServer | null { + if (!rawConfig || typeof rawConfig !== 'object') { + return null; + } + + const config = rawConfig as Record; + if (typeof config.command === 'string') { + const transport: McpTransport = 'stdio'; + return { + provider, + name, + scope, + transport, + command: config.command, + args: readStringArray(config.args), + env: readStringRecord(config.env), + cwd: readOptionalString(config.cwd), + envVars: readStringArray(config.env_vars), + }; + } + + if (typeof config.url === 'string') { + let transport: McpTransport = 'http'; + if (provider === 'claude' || provider === 'gemini') { + const typeValue = readOptionalString(config.type); + if (typeValue === 'sse') { + transport = 'sse'; + } + } + + return { + provider, + name, + scope, + transport, + url: config.url, + headers: readStringRecord(config.headers) ?? readStringRecord(config.http_headers), + bearerTokenEnvVar: readOptionalString(config.bearer_token_env_var), + envHttpHeaders: readStringRecord(config.env_http_headers), + }; + } + + return null; +} + +/** + * Reads Claude MCP servers from ~/.claude.json and project .mcp.json files. + */ +async function readClaudeScopedServers(scope: McpScope, workspacePath: string): Promise> { + if (scope === 'project') { + const filePath = path.join(workspacePath, '.mcp.json'); + const config = await readJsonConfig(filePath); + return readObjectRecord(config.mcpServers) ?? {}; + } + + const filePath = path.join(os.homedir(), '.claude.json'); + const config = await readJsonConfig(filePath); + if (scope === 'user') { + return readObjectRecord(config.mcpServers) ?? {}; + } + + if (scope === 'local') { + const projects = readObjectRecord(config.projects) ?? {}; + const projectConfig = readObjectRecord(projects[workspacePath]) ?? {}; + return readObjectRecord(projectConfig.mcpServers) ?? {}; + } + + return {}; +} + +/** + * Persists Claude MCP servers back to ~/.claude.json or .mcp.json depending on scope. + */ +async function writeClaudeScopedServers( + scope: McpScope, + workspacePath: string, + servers: Record, +): Promise { + if (scope === 'project') { + const filePath = path.join(workspacePath, '.mcp.json'); + const config = await readJsonConfig(filePath); + config.mcpServers = servers; + await writeJsonConfig(filePath, config); + return; + } + + const filePath = path.join(os.homedir(), '.claude.json'); + const config = await readJsonConfig(filePath); + if (scope === 'user') { + config.mcpServers = servers; + await writeJsonConfig(filePath, config); + return; + } + + const projects = readObjectRecord(config.projects) ?? {}; + const projectConfig = readObjectRecord(projects[workspacePath]) ?? {}; + projectConfig.mcpServers = servers; + projects[workspacePath] = projectConfig; + config.projects = projects; + await writeJsonConfig(filePath, config); +} + +/** + * Reads Codex MCP servers from config.toml user or project scopes. + */ +async function readCodexScopedServers(scope: McpScope, workspacePath: string): Promise> { + if (scope === 'local') { + throw new AppError('Codex does not support local MCP scope.', { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + const filePath = scope === 'user' + ? path.join(os.homedir(), '.codex', 'config.toml') + : path.join(workspacePath, '.codex', 'config.toml'); + const config = await readTomlConfig(filePath); + return readObjectRecord(config.mcp_servers) ?? {}; +} + +/** + * Persists Codex MCP servers to config.toml user/project scopes. + */ +async function writeCodexScopedServers( + scope: McpScope, + workspacePath: string, + servers: Record, +): Promise { + if (scope === 'local') { + throw new AppError('Codex does not support local MCP scope.', { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + const filePath = scope === 'user' + ? path.join(os.homedir(), '.codex', 'config.toml') + : path.join(workspacePath, '.codex', 'config.toml'); + const config = await readTomlConfig(filePath); + config.mcp_servers = servers; + await writeTomlConfig(filePath, config); +} + +/** + * Reads Gemini MCP servers from settings.json user/project scopes. + */ +async function readGeminiScopedServers(scope: McpScope, workspacePath: string): Promise> { + if (scope === 'local') { + throw new AppError('Gemini does not support local MCP scope.', { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + const filePath = scope === 'user' + ? path.join(os.homedir(), '.gemini', 'settings.json') + : path.join(workspacePath, '.gemini', 'settings.json'); + const config = await readJsonConfig(filePath); + return readObjectRecord(config.mcpServers) ?? {}; +} + +/** + * Persists Gemini MCP servers to settings.json user/project scopes. + */ +async function writeGeminiScopedServers( + scope: McpScope, + workspacePath: string, + servers: Record, +): Promise { + if (scope === 'local') { + throw new AppError('Gemini does not support local MCP scope.', { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + const filePath = scope === 'user' + ? path.join(os.homedir(), '.gemini', 'settings.json') + : path.join(workspacePath, '.gemini', 'settings.json'); + const config = await readJsonConfig(filePath); + config.mcpServers = servers; + await writeJsonConfig(filePath, config); +} + +/** + * Reads Cursor MCP servers from mcp.json user/project scopes. + */ +async function readCursorScopedServers(scope: McpScope, workspacePath: string): Promise> { + if (scope === 'local') { + throw new AppError('Cursor does not support local MCP scope.', { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + const filePath = scope === 'user' + ? path.join(os.homedir(), '.cursor', 'mcp.json') + : path.join(workspacePath, '.cursor', 'mcp.json'); + const config = await readJsonConfig(filePath); + return readObjectRecord(config.mcpServers) ?? {}; +} + +/** + * Persists Cursor MCP servers to mcp.json user/project scopes. + */ +async function writeCursorScopedServers( + scope: McpScope, + workspacePath: string, + servers: Record, +): Promise { + if (scope === 'local') { + throw new AppError('Cursor does not support local MCP scope.', { + code: 'MCP_SCOPE_NOT_SUPPORTED', + statusCode: 400, + }); + } + + const filePath = scope === 'user' + ? path.join(os.homedir(), '.cursor', 'mcp.json') + : path.join(workspacePath, '.cursor', 'mcp.json'); + const config = await readJsonConfig(filePath); + config.mcpServers = servers; + await writeJsonConfig(filePath, config); +} + +/** + * Runs a short stdio process startup probe. + */ +async function runStdioServerProbe( + server: UnifiedMcpServer, + workspacePath: string, +): Promise<{ reachable: boolean; error?: string }> { + if (!server.command) { + return { reachable: false, error: 'Missing stdio command.' }; + } + + try { + const child = spawn(server.command, server.args ?? [], { + cwd: server.cwd ? path.resolve(workspacePath, server.cwd) : workspacePath, + env: { + ...process.env, + ...(server.env ?? {}), + }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + const timeout = setTimeout(() => { + if (!child.killed && child.exitCode === null) { + child.kill('SIGTERM'); + } + }, 1_500); + + const errorPromise = once(child, 'error').then(([error]) => { + throw error; + }); + const closePromise = once(child, 'close'); + await Promise.race([closePromise, errorPromise]); + clearTimeout(timeout); + + if (typeof child.exitCode === 'number' && child.exitCode !== 0) { + return { + reachable: false, + error: `Process exited with code ${child.exitCode}.`, + }; + } + + return { reachable: true }; + } catch (error) { + return { + reachable: false, + error: error instanceof Error ? error.message : 'Failed to start stdio process', + }; + } +} + +/** + * Runs a lightweight HTTP/SSE reachability probe. + */ +async function runHttpServerProbe(url: string): Promise<{ reachable: boolean; statusCode?: number; error?: string }> { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 3_000); + try { + const response = await fetch(url, { method: 'GET', signal: controller.signal }); + clearTimeout(timeout); + return { + reachable: true, + statusCode: response.status, + }; + } catch (error) { + clearTimeout(timeout); + return { + reachable: false, + error: error instanceof Error ? error.message : 'Network probe failed', + }; + } +} + +/** + * Safely reads a JSON config file and returns an empty object when missing. + */ +async function readJsonConfig(filePath: string): Promise> { + try { + const content = await readFile(filePath, 'utf8'); + const parsed = JSON.parse(content) as Record; + return readObjectRecord(parsed) ?? {}; + } catch (error) { + const code = (error as NodeJS.ErrnoException).code; + if (code === 'ENOENT') { + return {}; + } + throw error; + } +} + +/** + * Writes one JSON config with stable formatting. + */ +async function writeJsonConfig(filePath: string, data: Record): Promise { + await mkdir(path.dirname(filePath), { recursive: true }); + await writeFile(filePath, `${JSON.stringify(data, null, 2)}\n`, 'utf8'); +} + +/** + * Safely reads a TOML config and returns an empty object when missing. + */ +async function readTomlConfig(filePath: string): Promise> { + try { + const content = await readFile(filePath, 'utf8'); + const parsed = TOML.parse(content) as Record; + return readObjectRecord(parsed) ?? {}; + } catch (error) { + const code = (error as NodeJS.ErrnoException).code; + if (code === 'ENOENT') { + return {}; + } + throw error; + } +} + +/** + * Writes one TOML config file. + */ +async function writeTomlConfig(filePath: string, data: Record): Promise { + await mkdir(path.dirname(filePath), { recursive: true }); + const toml = TOML.stringify(data as any); + await writeFile(filePath, toml, 'utf8'); +} + +/** + * Reads plain object records. + */ +function readObjectRecord(value: unknown): Record | null { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return null; + } + return value as Record; +} + +/** + * Reads optional strings. + */ +function readOptionalString(value: unknown): string | undefined { + if (typeof value !== 'string') { + return undefined; + } + + const normalized = value.trim(); + return normalized.length ? normalized : undefined; +} + +/** + * Reads optional string arrays. + */ +function readStringArray(value: unknown): string[] | undefined { + if (!Array.isArray(value)) { + return undefined; + } + + return value.filter((entry): entry is string => typeof entry === 'string'); +} + +/** + * Reads optional string maps. + */ +function readStringRecord(value: unknown): Record | undefined { + const record = readObjectRecord(value); + if (!record) { + return undefined; + } + + const normalized: Record = {}; + for (const [key, entry] of Object.entries(record)) { + if (typeof entry === 'string') { + normalized[key] = entry; + } + } + + return Object.keys(normalized).length > 0 ? normalized : undefined; +} diff --git a/server/src/modules/llm/providers/base-cli.provider.ts b/server/src/modules/llm/providers/base-cli.provider.ts index 7129a4ec..85f94ec5 100644 --- a/server/src/modules/llm/providers/base-cli.provider.ts +++ b/server/src/modules/llm/providers/base-cli.provider.ts @@ -63,6 +63,17 @@ export abstract class BaseCliProvider extends AbstractProvider { */ protected abstract createCliInvocation(input: CreateCliInvocationInput): CliInvocation; + /** + * Appends uploaded image paths to prompt text for CLI providers that only accept string prompts. + */ + protected appendImagePathsToPrompt(prompt: string, imagePaths?: string[]): string { + if (!imagePaths || imagePaths.length === 0) { + return prompt; + } + + return `${prompt}\n\n${JSON.stringify(imagePaths)}`; + } + /** * Maps one stdout/stderr line into either JSON or plain-text event shapes. */ diff --git a/server/src/modules/llm/providers/claude.provider.ts b/server/src/modules/llm/providers/claude.provider.ts index 23457ea4..af9155c3 100644 --- a/server/src/modules/llm/providers/claude.provider.ts +++ b/server/src/modules/llm/providers/claude.provider.ts @@ -4,6 +4,8 @@ import { type ModelInfo, type Options, } from '@anthropic-ai/claude-agent-sdk'; +import path from 'node:path'; +import { readFile } from 'node:fs/promises'; import { BaseSdkProvider } from '@/modules/llm/providers/base-sdk.provider.js'; import type { @@ -19,6 +21,36 @@ type ClaudeExecutionInput = StartSessionInput & { }; const CLAUDE_THINKING_LEVELS = new Set(['low', 'medium', 'high', 'max']); +const SUPPORTED_CLAUDE_IMAGE_TYPES = new Map([ + ['.jpg', 'image/jpeg'], + ['.jpeg', 'image/jpeg'], + ['.png', 'image/png'], + ['.gif', 'image/gif'], + ['.webp', 'image/webp'], +]); + +type ClaudeUserPromptMessage = { + type: 'user'; + message: { + role: 'user'; + content: Array< + | { + type: 'text'; + text: string; + } + | { + type: 'image'; + source: { + type: 'base64'; + media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; + data: string; + }; + } + >; + }; + parent_tool_use_id: null; + timestamp: string; +}; /** * Claude SDK provider implementation. @@ -74,8 +106,9 @@ export class ClaudeProvider extends BaseSdkProvider { options.sessionId = input.sessionId; } + const promptInput = await this.buildPromptInput(input.prompt, input.imagePaths, input.workspacePath); const queryInstance = query({ - prompt: input.prompt, + prompt: promptInput as any, options, }); @@ -91,6 +124,58 @@ export class ClaudeProvider extends BaseSdkProvider { }; } + /** + * Builds a Claude prompt payload. When images are present, this returns an async iterable user message. + */ + private async buildPromptInput( + prompt: string, + imagePaths?: string[], + workspacePath?: string, + ): Promise> { + if (!imagePaths || imagePaths.length === 0) { + return prompt; + } + + const content: ClaudeUserPromptMessage['message']['content'] = [ + { type: 'text', text: prompt }, + ]; + + for (const imagePath of imagePaths) { + const resolvedPath = path.isAbsolute(imagePath) + ? imagePath + : path.resolve(workspacePath ?? process.cwd(), imagePath); + const extension = path.extname(resolvedPath).toLowerCase(); + const mediaType = SUPPORTED_CLAUDE_IMAGE_TYPES.get(extension); + if (!mediaType) { + continue; + } + + const imageBytes = await readFile(resolvedPath); + content.push({ + type: 'image', + source: { + type: 'base64', + media_type: mediaType, + data: imageBytes.toString('base64'), + }, + }); + } + + const sdkPrompt = (async function* (): AsyncIterable { + yield { + type: 'user', + message: { + role: 'user', + content, + }, + parent_tool_use_id: null, + timestamp: new Date().toISOString(), + }; + })(); + + return sdkPrompt; + } + /** * Produces compact event metadata for frontend stream rendering. */ diff --git a/server/src/modules/llm/providers/codex.provider.ts b/server/src/modules/llm/providers/codex.provider.ts index 19e577d1..bbef1aef 100644 --- a/server/src/modules/llm/providers/codex.provider.ts +++ b/server/src/modules/llm/providers/codex.provider.ts @@ -29,7 +29,18 @@ type CodexSdkClient = { type CodexThread = { runStreamed: ( - prompt: string, + prompt: + | string + | Array< + | { + type: 'text'; + text: string; + } + | { + type: 'local_image'; + path: string; + } + >, options?: { signal?: AbortSignal; }, @@ -114,7 +125,8 @@ export class CodexProvider extends BaseSdkProvider { : client.startThread(threadOptions); const abortController = new AbortController(); - const streamedTurn = await thread.runStreamed(input.prompt, { + const promptInput = this.buildPromptInput(input.prompt, input.imagePaths, input.workspacePath); + const streamedTurn = await thread.runStreamed(promptInput, { signal: abortController.signal, }); @@ -127,6 +139,33 @@ export class CodexProvider extends BaseSdkProvider { }; } + /** + * Builds Codex prompt items. Images are sent as `local_image` entries for SDK-native image support. + */ + private buildPromptInput( + prompt: string, + imagePaths?: string[], + workspacePath?: string, + ): string | Array<{ type: 'text'; text: string } | { type: 'local_image'; path: string }> { + if (!imagePaths || imagePaths.length === 0) { + return prompt; + } + + const resolvedImagePaths = imagePaths.map((imagePath) => ( + path.isAbsolute(imagePath) + ? imagePath + : path.resolve(workspacePath ?? process.cwd(), imagePath) + )); + + return [ + { type: 'text', text: prompt }, + ...resolvedImagePaths.map((resolvedPath) => ({ + type: 'local_image' as const, + path: resolvedPath, + })), + ]; + } + /** * Normalizes Codex stream events into the shared event shape. */ diff --git a/server/src/modules/llm/providers/cursor.provider.ts b/server/src/modules/llm/providers/cursor.provider.ts index a19da697..7d931277 100644 --- a/server/src/modules/llm/providers/cursor.provider.ts +++ b/server/src/modules/llm/providers/cursor.provider.ts @@ -40,6 +40,7 @@ export class CursorProvider extends BaseCliProvider { args: string[]; cwd?: string; } { + const promptWithImagePaths = this.appendImagePathsToPrompt(input.prompt, input.imagePaths); const args = ['--print', '--trust', '--output-format', 'stream-json']; if (input.allowYolo) { @@ -54,7 +55,7 @@ export class CursorProvider extends BaseCliProvider { args.push('--resume', input.sessionId); } - args.push(input.prompt); + args.push(promptWithImagePaths); return { command: 'cursor-agent', diff --git a/server/src/modules/llm/providers/gemini.provider.ts b/server/src/modules/llm/providers/gemini.provider.ts index f01b1036..20d30a61 100644 --- a/server/src/modules/llm/providers/gemini.provider.ts +++ b/server/src/modules/llm/providers/gemini.provider.ts @@ -47,7 +47,8 @@ export class GeminiProvider extends BaseCliProvider { args: string[]; cwd?: string; } { - const args = ['--prompt', input.prompt, '--output-format', 'stream-json']; + const promptWithImagePaths = this.appendImagePathsToPrompt(input.prompt, input.imagePaths); + const args = ['--prompt', promptWithImagePaths, '--output-format', 'stream-json']; if (input.model) { args.push('--model', input.model); diff --git a/server/src/modules/llm/providers/provider.interface.ts b/server/src/modules/llm/providers/provider.interface.ts index 70da652c..1df52cf0 100644 --- a/server/src/modules/llm/providers/provider.interface.ts +++ b/server/src/modules/llm/providers/provider.interface.ts @@ -49,6 +49,7 @@ export type StartSessionInput = { sessionId?: string; model?: string; thinkingMode?: string; + imagePaths?: string[]; runtimePermissionMode?: RuntimePermissionMode; allowYolo?: boolean; }; diff --git a/server/src/modules/llm/skills.service.ts b/server/src/modules/llm/skills.service.ts new file mode 100644 index 00000000..b53e4930 --- /dev/null +++ b/server/src/modules/llm/skills.service.ts @@ -0,0 +1,396 @@ +import { access, readFile, readdir } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; + +import type { LLMProvider } from '@/shared/types/app.js'; + +export type SkillScope = 'user' | 'project' | 'plugin' | 'repo' | 'admin' | 'system'; + +export type UnifiedSkill = { + provider: LLMProvider; + scope: SkillScope; + name: string; + description?: string; + invocation: string; + filePath: string; + pluginName?: string; +}; + +/** + * Unified provider skills loader used by the refactor LLM module. + */ +export const llmSkillsService = { + /** + * Lists all available skills for one provider from provider-specific skill directories. + */ + async listProviderSkills( + provider: LLMProvider, + options?: { workspacePath?: string }, + ): Promise { + const workspacePath = path.resolve(options?.workspacePath ?? process.cwd()); + switch (provider) { + case 'claude': + return listClaudeSkills(workspacePath); + case 'codex': + return listCodexSkills(workspacePath); + case 'cursor': + return listCursorSkills(workspacePath); + case 'gemini': + return listGeminiSkills(workspacePath); + default: + return []; + } + }, +}; + +/** + * Reads Claude user/project skills and plugin skills with plugin namespace commands. + */ +async function listClaudeSkills(workspacePath: string): Promise { + const home = os.homedir(); + const skills: UnifiedSkill[] = []; + + skills.push( + ...(await listSkillsFromDirectory({ + provider: 'claude', + scope: 'user', + skillsDirectory: path.join(home, '.claude', 'skills'), + invocationPrefix: '/', + })), + ); + + skills.push( + ...(await listSkillsFromDirectory({ + provider: 'claude', + scope: 'project', + skillsDirectory: path.join(workspacePath, '.claude', 'skills'), + invocationPrefix: '/', + })), + ); + + const enabledPlugins = await readClaudeEnabledPlugins(); + if (!enabledPlugins.length) { + return skills; + } + + const installedPluginIndex = await readClaudeInstalledPluginIndex(); + for (const pluginId of enabledPlugins) { + const pluginInstalls = installedPluginIndex[pluginId]; + if (!Array.isArray(pluginInstalls)) { + continue; + } + + const pluginNamespace = pluginId.split('@')[0] ?? pluginId; + for (const install of pluginInstalls) { + if (!install || typeof install !== 'object') { + continue; + } + const installPath = typeof (install as Record).installPath === 'string' + ? (install as Record).installPath as string + : ''; + if (!installPath) { + continue; + } + + const pluginSkills = await listSkillsFromDirectory({ + provider: 'claude', + scope: 'plugin', + skillsDirectory: path.join(installPath, 'skills'), + invocationPrefix: '/', + pluginName: pluginNamespace, + }); + + for (const skill of pluginSkills) { + skill.invocation = `/${pluginNamespace}:${skill.name}`; + skill.pluginName = pluginNamespace; + } + + skills.push(...pluginSkills); + } + } + + return deduplicateSkills(skills); +} + +/** + * Reads Codex skills from repo/user/admin/system locations. + */ +async function listCodexSkills(workspacePath: string): Promise { + const home = os.homedir(); + const repoRoot = await findGitRepoRoot(workspacePath); + const candidateDirectories: Array<{ scope: SkillScope; directory: string }> = [ + { scope: 'repo', directory: path.join(workspacePath, '.agents', 'skills') }, + { scope: 'repo', directory: path.join(workspacePath, '..', '.agents', 'skills') }, + { scope: 'user', directory: path.join(home, '.agents', 'skills') }, + { scope: 'admin', directory: path.join(path.sep, 'etc', 'codex', 'skills') }, + { scope: 'system', directory: path.join(home, '.codex', 'skills', '.system') }, + ]; + if (repoRoot) { + candidateDirectories.push({ scope: 'repo', directory: path.join(repoRoot, '.agents', 'skills') }); + } + + const skills: UnifiedSkill[] = []; + for (const candidate of deduplicateDirectories(candidateDirectories)) { + const loadedSkills = await listSkillsFromDirectory({ + provider: 'codex', + scope: candidate.scope, + skillsDirectory: candidate.directory, + invocationPrefix: '$', + }); + skills.push(...loadedSkills); + } + + return deduplicateSkills(skills); +} + +/** + * Reads Gemini user/project skill directories. + */ +async function listGeminiSkills(workspacePath: string): Promise { + const home = os.homedir(); + const candidateDirectories: Array<{ scope: SkillScope; directory: string }> = [ + { scope: 'user', directory: path.join(home, '.gemini', 'skills') }, + { scope: 'user', directory: path.join(home, '.agents', 'skills') }, + { scope: 'project', directory: path.join(workspacePath, '.gemini', 'skills') }, + { scope: 'project', directory: path.join(workspacePath, '.agents', 'skills') }, + ]; + + const skills: UnifiedSkill[] = []; + for (const candidate of deduplicateDirectories(candidateDirectories)) { + const loadedSkills = await listSkillsFromDirectory({ + provider: 'gemini', + scope: candidate.scope, + skillsDirectory: candidate.directory, + invocationPrefix: '/', + }); + skills.push(...loadedSkills); + } + + return deduplicateSkills(skills); +} + +/** + * Reads Cursor user/project skill directories. + */ +async function listCursorSkills(workspacePath: string): Promise { + const home = os.homedir(); + const candidateDirectories: Array<{ scope: SkillScope; directory: string }> = [ + { scope: 'project', directory: path.join(workspacePath, '.agents', 'skills') }, + { scope: 'project', directory: path.join(workspacePath, '.cursor', 'skills') }, + { scope: 'user', directory: path.join(home, '.cursor', 'skills') }, + ]; + + const skills: UnifiedSkill[] = []; + for (const candidate of deduplicateDirectories(candidateDirectories)) { + const loadedSkills = await listSkillsFromDirectory({ + provider: 'cursor', + scope: candidate.scope, + skillsDirectory: candidate.directory, + invocationPrefix: '/', + }); + skills.push(...loadedSkills); + } + + return deduplicateSkills(skills); +} + +/** + * Reads SKILL.md files from a `//SKILL.md` directory layout. + */ +async function listSkillsFromDirectory(input: { + provider: LLMProvider; + scope: SkillScope; + skillsDirectory: string; + invocationPrefix: '/' | '$'; + pluginName?: string; +}): Promise { + if (!(await pathExists(input.skillsDirectory))) { + return []; + } + + const entries = await readdir(input.skillsDirectory, { withFileTypes: true }); + const skills: UnifiedSkill[] = []; + for (const entry of entries) { + if (!entry.isDirectory()) { + continue; + } + + const skillDirectory = path.join(input.skillsDirectory, entry.name); + const skillFilePath = path.join(skillDirectory, 'SKILL.md'); + if (!(await pathExists(skillFilePath))) { + continue; + } + + const skillMarkdown = await readFile(skillFilePath, 'utf8'); + const metadata = parseSkillFrontmatter(skillMarkdown); + const skillName = metadata.name ?? entry.name; + const invocation = `${input.invocationPrefix}${skillName}`; + skills.push({ + provider: input.provider, + scope: input.scope, + name: skillName, + description: metadata.description, + invocation, + filePath: skillFilePath, + pluginName: input.pluginName, + }); + } + + return skills; +} + +/** + * Parses frontmatter metadata from SKILL.md files. + */ +function parseSkillFrontmatter(content: string): { name?: string; description?: string } { + if (!content.startsWith('---')) { + return {}; + } + + const closingDelimiterIndex = content.indexOf('\n---', 3); + if (closingDelimiterIndex < 0) { + return {}; + } + + const frontmatter = content.slice(3, closingDelimiterIndex).trim(); + const metadata: { name?: string; description?: string } = {}; + for (const line of frontmatter.split(/\r?\n/)) { + const separatorIndex = line.indexOf(':'); + if (separatorIndex <= 0) { + continue; + } + + const key = line.slice(0, separatorIndex).trim(); + const rawValue = line.slice(separatorIndex + 1).trim(); + const value = rawValue.replace(/^["']|["']$/g, ''); + if (key === 'name') { + metadata.name = value; + } else if (key === 'description') { + metadata.description = value; + } + } + + return metadata; +} + +/** + * Reads Claude enabled plugin map from ~/.claude/settings.json. + */ +async function readClaudeEnabledPlugins(): Promise { + const settingsPath = path.join(os.homedir(), '.claude', 'settings.json'); + try { + const settingsContent = await readFile(settingsPath, 'utf8'); + const settings = JSON.parse(settingsContent) as Record; + const enabledPlugins = settings.enabledPlugins; + if (!enabledPlugins || typeof enabledPlugins !== 'object' || Array.isArray(enabledPlugins)) { + return []; + } + + const enabledRecords = enabledPlugins as Record; + return Object.entries(enabledRecords) + .filter(([, enabled]) => enabled === true) + .map(([pluginId]) => pluginId); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return []; + } + throw error; + } +} + +/** + * Reads Claude installed plugin index from ~/.claude/plugins/installed_plugins.json. + */ +async function readClaudeInstalledPluginIndex(): Promise> { + const pluginIndexPath = path.join(os.homedir(), '.claude', 'plugins', 'installed_plugins.json'); + try { + const indexContent = await readFile(pluginIndexPath, 'utf8'); + const index = JSON.parse(indexContent) as Record; + const plugins = index.plugins; + if (!plugins || typeof plugins !== 'object' || Array.isArray(plugins)) { + return {}; + } + + const normalized: Record = {}; + for (const [pluginId, entries] of Object.entries(plugins as Record)) { + normalized[pluginId] = Array.isArray(entries) ? entries : []; + } + + return normalized; + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return {}; + } + throw error; + } +} + +/** + * Finds the closest git root by walking up from the current workspace path. + */ +async function findGitRepoRoot(startPath: string): Promise { + let currentPath = path.resolve(startPath); + while (true) { + const gitPath = path.join(currentPath, '.git'); + if (await pathExists(gitPath)) { + return currentPath; + } + + const parentPath = path.dirname(currentPath); + if (parentPath === currentPath) { + return null; + } + + currentPath = parentPath; + } +} + +/** + * Deduplicates directory candidates by absolute path. + */ +function deduplicateDirectories( + entries: Array<{ scope: SkillScope; directory: string }>, +): Array<{ scope: SkillScope; directory: string }> { + const seen = new Set(); + const deduplicated: Array<{ scope: SkillScope; directory: string }> = []; + for (const entry of entries) { + const normalizedDirectory = path.resolve(entry.directory); + if (seen.has(normalizedDirectory)) { + continue; + } + seen.add(normalizedDirectory); + deduplicated.push({ scope: entry.scope, directory: normalizedDirectory }); + } + + return deduplicated; +} + +/** + * Deduplicates skills by provider + invocation command. + */ +function deduplicateSkills(skills: UnifiedSkill[]): UnifiedSkill[] { + const seen = new Set(); + const deduplicated: UnifiedSkill[] = []; + for (const skill of skills) { + const key = `${skill.provider}:${skill.invocation}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + deduplicated.push(skill); + } + + return deduplicated; +} + +/** + * Tests whether a path exists. + */ +async function pathExists(targetPath: string): Promise { + try { + await access(targetPath); + return true; + } catch { + return false; + } +}