Compare commits

..

1 Commits

Author SHA1 Message Date
Haileyesus
c02ead51d9 docs: add nginx subpath deployment template
Users deploying behind a reverse proxy need a config they can adapt.

The template documents each proxy block and centralizes upstream/subpath values.

It also notes that Nginx location matchers still require literal subpath edits.
2026-06-02 20:39:01 +03:00
11 changed files with 236 additions and 88 deletions

View File

@@ -0,0 +1,218 @@
# CloudCLI UI Nginx subpath deployment template.
#
# Purpose:
# Serve CloudCLI UI from a path prefix such as:
# http://localhost/ai/
# https://example.com/ai/
#
# CloudCLI itself still runs at the root of its own HTTP server, for example:
# http://127.0.0.1:3001/
#
# Nginx receives public requests under /ai, strips that prefix, and forwards the
# remaining path to CloudCLI. For example:
# /ai/ -> /
# /ai/session/abc -> /session/abc
# /ai/assets/index.js -> /assets/index.js
#
# Important Nginx limitation:
# Nginx does not allow variables in `location` matchers or `rewrite` regexes.
# The configurable variables below are still useful for proxy/filter values,
# but if you change /ai to a different subpath, also update every line marked:
# [SUBPATH LITERAL]
#
# To use a different subpath, replace these literal matchers:
# location = /ai
# location ^~ /ai/
# rewrite ^/ai(?<cloudcli_path>/.*)$ ...
#
# Recommended deployment shape:
# CloudCLI is the only app using /ai, while root paths /api, /ws, and /shell
# are also proxied because the current frontend still calls those endpoints
# with root-relative URLs.
worker_processes 1;
events {
# Maximum simultaneous connections handled by each worker process.
# The default is enough for local testing and small self-hosted deployments.
worker_connections 1024;
}
http {
# WebSocket requests include an Upgrade header. Normal HTTP requests do not.
# This map gives us the right Connection header for both cases:
# Upgrade present -> "upgrade"
# Upgrade absent -> "close"
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
# For HTTPS deployments, replace this with `listen 443 ssl http2;` and
# add ssl_certificate / ssl_certificate_key lines.
listen 80 default_server;
# Use your real hostname in production, for example:
# server_name cloudcli.example.com;
server_name localhost 127.0.0.1;
# ---- User settings -------------------------------------------------
#
# Public path prefix where users access CloudCLI.
# Do not add a trailing slash.
#
# This variable can be used in redirects and response rewrites. It
# cannot be used in `location` matchers, so update the [SUBPATH LITERAL]
# lines too if you change it.
set $cloudcli_subpath /ai;
# Private upstream URL where the CloudCLI server is listening.
# For a default local server this is usually http://127.0.0.1:3001.
set $cloudcli_upstream http://127.0.0.1:3001;
# Allow larger file uploads through the code editor/project file APIs.
client_max_body_size 100m;
# Redirect /ai to /ai/ so relative browser URL resolution is stable.
# [SUBPATH LITERAL] Change `/ai` if you change $cloudcli_subpath.
location = /ai {
return 301 $cloudcli_subpath/;
}
# Main prefixed CloudCLI UI route.
#
# [SUBPATH LITERAL] Change `/ai/` and the `^/ai` rewrite if you change
# $cloudcli_subpath.
location ^~ /ai/ {
# Strip the public subpath before proxying. CloudCLI expects to see
# root paths such as /, /session/:id, /assets/..., /manifest.json.
rewrite ^/ai(?<cloudcli_path>/.*)$ $cloudcli_path break;
# Forward the rewritten request to the private CloudCLI server.
proxy_pass $cloudcli_upstream;
# Use HTTP/1.1 so WebSocket upgrade requests can pass through if a
# browser reaches a socket endpoint under the subpath.
proxy_http_version 1.1;
# Preserve useful request metadata for logs and future app support.
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Prefix $cloudcli_subpath;
# WebSocket upgrade headers. Harmless for normal HTTP requests.
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
# Long-running agent and terminal sessions can stay open for a long
# time, so avoid closing idle proxied connections too aggressively.
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
# Disable gzip from the upstream response so sub_filter can inspect
# and rewrite HTML/JSON/JS response bodies.
proxy_set_header Accept-Encoding "";
# Rewrite browser-visible root-relative URLs so the runtime can
# discover that the app is mounted under the subpath.
#
# Examples:
# href="/manifest.json" -> href="/ai/manifest.json"
# src="/assets/app.js" -> src="/ai/assets/app.js"
#
# These rewrites are important for React Router basename detection.
sub_filter_once off;
sub_filter_types
application/json
application/manifest+json
application/javascript
text/javascript;
sub_filter 'href="/' 'href="$cloudcli_subpath/';
sub_filter 'src="/' 'src="$cloudcli_subpath/';
# The production HTML and JS register the service worker at /sw.js.
# Rewrite that registration so the worker is served from /ai/sw.js.
sub_filter "register('/sw.js')" "register('$cloudcli_subpath/sw.js')";
sub_filter 'register("/sw.js")' 'register("$cloudcli_subpath/sw.js")';
# The manifest and service worker contain root-relative paths too.
# Rewriting them keeps PWA metadata and cached manifest requests
# under the same public subpath.
sub_filter '"start_url": "/"' '"start_url": "$cloudcli_subpath/"';
sub_filter '"scope": "/"' '"scope": "$cloudcli_subpath/"';
sub_filter '"src": "/' '"src": "$cloudcli_subpath/';
sub_filter "'/manifest.json'" "'$cloudcli_subpath/manifest.json'";
sub_filter '"/manifest.json"' '"$cloudcli_subpath/manifest.json"';
}
# Root API proxy.
#
# The current CloudCLI frontend calls APIs with root-relative URLs such
# as /api/auth/login. Keep this location unless the frontend becomes
# fully prefix-aware for API requests.
location ^~ /api/ {
proxy_pass $cloudcli_upstream;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Prefix $cloudcli_subpath;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
}
# Main app WebSocket proxy.
#
# The frontend opens /ws for realtime chat/session/task updates.
location /ws {
proxy_pass $cloudcli_upstream;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Prefix $cloudcli_subpath;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
}
# Shell WebSocket proxy.
#
# The browser terminal uses /shell. It requires the same WebSocket
# upgrade handling as /ws.
location /shell {
proxy_pass $cloudcli_upstream;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Prefix $cloudcli_subpath;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
}
# Optional health endpoint proxy used by the frontend version checker.
location = /health {
proxy_pass $cloudcli_upstream;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Prefix $cloudcli_subpath;
}
}
}

View File

@@ -11,7 +11,7 @@ export const CLAUDE_MODELS = {
{
value: "default",
label: "Default (recommended)",
description: "Use the default model (currently Opus 4.8 (1M context)) · $5/$25 per Mtok",
description: "Use the default model (currently Opus 4.7 (1M context)) · $5/$25 per Mtok",
},
{
value: "sonnet",

View File

@@ -304,11 +304,7 @@ function extractTokenBudget(sdkMessage) {
const messageUsage = sdkMessage.message?.usage || sdkMessage.usage;
if (messageUsage && typeof messageUsage === 'object') {
const directInputTokens = readNumber(messageUsage.input_tokens ?? messageUsage.inputTokens);
const cacheCreationTokens = readNumber(messageUsage.cache_creation_input_tokens ?? messageUsage.cacheCreationInputTokens ?? messageUsage.cacheCreationTokens);
const cacheReadTokens = readNumber(messageUsage.cache_read_input_tokens ?? messageUsage.cacheReadInputTokens ?? messageUsage.cacheReadTokens);
const cacheTokens = cacheCreationTokens + cacheReadTokens;
const inputTokens = directInputTokens + cacheTokens;
const inputTokens = readNumber(messageUsage.input_tokens ?? messageUsage.inputTokens);
const outputTokens = readNumber(messageUsage.output_tokens ?? messageUsage.outputTokens);
const totalUsed = inputTokens + outputTokens;
const contextWindow = parseInt(process.env.CONTEXT_WINDOW, 10) || 160000;
@@ -318,9 +314,6 @@ function extractTokenBudget(sdkMessage) {
total: contextWindow,
inputTokens,
outputTokens,
cacheReadTokens,
cacheCreationTokens,
cacheTokens,
breakdown: {
input: inputTokens,
output: outputTokens,

View File

@@ -87,11 +87,6 @@ const installMode = fs.existsSync(path.join(APP_ROOT, '.git')) ? 'git' : 'npm';
console.log('SERVER_PORT from env:', process.env.SERVER_PORT);
function readUsageNumber(value) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : 0;
}
const app = express();
const server = http.createServer(app);
@@ -1391,8 +1386,6 @@ app.get('/api/projects/:projectId/sessions/:sessionId/token-usage', authenticate
const contextWindow = Number.isFinite(parsedContextWindow) ? parsedContextWindow : 160000;
let inputTokens = 0;
let outputTokens = 0;
let cacheReadTokens = 0;
let cacheCreationTokens = 0;
// Find the latest assistant message with usage data (scan from end)
for (let i = lines.length - 1; i >= 0; i--) {
@@ -1404,11 +1397,8 @@ app.get('/api/projects/:projectId/sessions/:sessionId/token-usage', authenticate
const usage = entry.message.usage;
// Use token counts from latest assistant message only
const directInputTokens = readUsageNumber(usage.input_tokens ?? usage.inputTokens);
cacheReadTokens = readUsageNumber(usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? usage.cacheReadTokens);
cacheCreationTokens = readUsageNumber(usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? usage.cacheCreationTokens);
inputTokens = directInputTokens + cacheReadTokens + cacheCreationTokens;
outputTokens = readUsageNumber(usage.output_tokens ?? usage.outputTokens);
inputTokens = usage.input_tokens || 0;
outputTokens = usage.output_tokens || 0;
break; // Stop after finding the latest assistant message
}
@@ -1419,16 +1409,12 @@ app.get('/api/projects/:projectId/sessions/:sessionId/token-usage', authenticate
}
const totalUsed = inputTokens + outputTokens;
const cacheTokens = cacheReadTokens + cacheCreationTokens;
res.json({
used: totalUsed,
total: contextWindow,
inputTokens,
outputTokens,
cacheReadTokens,
cacheCreationTokens,
cacheTokens,
breakdown: {
input: inputTokens,
output: outputTokens

View File

@@ -31,24 +31,6 @@ export function createWebSocketServer(
});
wss.on('connection', (ws, request) => {
// Keep WebSocket alive across reverse-proxy idle timeouts (Cloudflare ~100s,
// AWS ALB 60s, nginx 60s, etc.). Without app-level pings these connections
// are silently torn down even when the UI is active, causing repeated
// reconnect cycles. ws library heartbeat is opt-in.
const HEARTBEAT_INTERVAL_MS = 30_000;
const heartbeat = setInterval(() => {
if (ws.readyState === ws.OPEN) {
try {
ws.ping();
} catch {
// socket may have been closed concurrently — interval will be cleared below
}
}
}, HEARTBEAT_INTERVAL_MS);
const stopHeartbeat = () => clearInterval(heartbeat);
ws.on('close', stopHeartbeat);
ws.on('error', stopHeartbeat);
const incomingRequest = request as AuthenticatedWebSocketRequest;
const url = incomingRequest.url ?? '/';
const pathname = new URL(url, 'http://localhost').pathname;

View File

@@ -592,14 +592,12 @@ class ResponseCollector {
}
}
const inputTokens = totalInput + totalCacheRead + totalCacheCreation;
return {
inputTokens,
inputTokens: totalInput,
outputTokens: totalOutput,
cacheReadTokens: totalCacheRead,
cacheCreationTokens: totalCacheCreation,
totalTokens: inputTokens + totalOutput
totalTokens: totalInput + totalOutput + totalCacheRead + totalCacheCreation
};
}
}

View File

@@ -268,35 +268,16 @@ Custom commands can be created in:
tokenUsage.contextWindow ??
0,
) || 0;
const normalizedInputValue =
tokenUsage.inputTokens ??
tokenUsage.input ??
tokenUsage.cumulativeInputTokens ??
tokenUsage.breakdown?.input ??
tokenUsage.promptTokens;
const directInputTokens =
const inputTokensRaw =
Number(
normalizedInputValue ??
tokenUsage.inputTokens ??
tokenUsage.input ??
tokenUsage.input_tokens ??
0
) || 0;
const cacheReadTokens =
Number(
tokenUsage.cacheReadTokens ??
tokenUsage.cache_read_input_tokens ??
tokenUsage.cacheReadInputTokens ??
tokenUsage.cumulativeInputTokens ??
tokenUsage.breakdown?.input ??
tokenUsage.promptTokens ??
0,
) || 0;
const cacheCreationTokens =
Number(
tokenUsage.cacheCreationTokens ??
tokenUsage.cache_creation_input_tokens ??
tokenUsage.cacheCreationInputTokens ??
0,
) || 0;
const inputTokens = normalizedInputValue == null
? directInputTokens + cacheReadTokens + cacheCreationTokens
: directInputTokens;
const outputTokens =
Number(
tokenUsage.outputTokens ??
@@ -307,9 +288,8 @@ Custom commands can be created in:
tokenUsage.completionTokens ??
0,
) || 0;
const computedUsed = inputTokens + outputTokens;
const hasTokenBreakdown = computedUsed > 0;
const used = Math.max(reportedUsed, computedUsed);
const hasTokenBreakdown = inputTokensRaw > 0 || outputTokens > 0;
const used = reportedUsed || inputTokensRaw + outputTokens;
return {
type: "builtin",
@@ -322,7 +302,7 @@ Custom commands can be created in:
...(hasTokenBreakdown
? {
tokenBreakdown: {
input: inputTokens,
input: inputTokensRaw,
output: outputTokens,
},
}

View File

@@ -295,7 +295,6 @@ export default function ChatComposer({
<PromptInputTextarea
ref={textareaRef}
dir="auto"
value={input}
onChange={onInputChange}
onClick={onTextareaClick}

View File

@@ -120,7 +120,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o
/* User message bubble on the right */
<div className="flex w-full items-end space-x-0 sm:w-auto sm:max-w-[85%] sm:space-x-3 md:max-w-md lg:max-w-lg xl:max-w-xl">
<div className="group flex-1 rounded-2xl rounded-br-md bg-blue-600 px-3 py-2 text-white shadow-sm sm:flex-initial sm:px-4">
<div dir="auto" className="whitespace-pre-wrap break-words text-sm">
<div className="whitespace-pre-wrap break-words text-sm">
{message.content}
</div>
{message.images && message.images.length > 0 && (
@@ -405,7 +405,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o
</ReasoningContent>
</Reasoning>
) : (
<div dir="auto" className="text-sm text-gray-700 dark:text-gray-300">
<div className="text-sm text-gray-700 dark:text-gray-300">
{/* Reasoning accordion */}
{showThinking && message.reasoning && (
<Reasoning className="mb-3" defaultOpen={false}>

View File

@@ -36,12 +36,8 @@ const useWebSocketProviderState = (): WebSocketContextType => {
const { token } = useAuth();
useEffect(() => {
// The cleanup below sets unmountedRef = true. Without this reset, every
// re-run of the effect (e.g. on token refresh) would short-circuit connect()
// at its unmounted guard and leave the socket permanently disconnected.
unmountedRef.current = false;
connect();
return () => {
unmountedRef.current = true;
if (reconnectTimeoutRef.current) {

View File

@@ -37,10 +37,6 @@ export default defineConfig(({ mode }) => {
'/shell': {
target: `ws://${proxyHost}:${serverPort}`,
ws: true
},
'/plugin-ws': {
target: `ws://${proxyHost}:${serverPort}`,
ws: true
}
}
},