Compare commits

..

6 Commits

Author SHA1 Message Date
Haileyesus
ed9cdf0114 fix: include Claude cache tokens in usage 2026-06-05 21:38:05 +03:00
Simos Mikelatos
c667b6a179 Update model version in OPTIONS description 2026-06-04 23:43:48 +02:00
Reza Moghaddam
fa9eaf5573 feat(chat): auto-detect text direction for RTL languages (#729)
Add dir="auto" to chat message content and composer textarea so
Persian and Arabic text automatically renders right-to-left
while English and other LTR text remains unaffected.

Co-authored-by: Haile <118998054+blackmammoth@users.noreply.github.com>
2026-06-04 22:24:07 +03:00
Vojtech
2edfef2e3f fix(websocket): add 30s server-side heartbeat to prevent proxy idle disconnects (#770)
The WebSocket gateway never sent ping frames, so any reverse proxy with
an idle timeout (Cloudflare Tunnel ~100s, AWS ALB 60s, nginx 60s, etc.)
would silently tear down /shell, /ws and /plugin-ws/* connections after
the idle window. The UI reconnects automatically but users see a
"Connecting to shell" toast every 1–3 minutes during normal use and any
in-flight PTY/chat traffic can race the reconnect.

Schedule a 30s ws.ping() per connection at the gateway level, cleared on
close/error. ping/pong counts as protocol activity for all proxies that
implement WebSocket correctly, so this single change covers every
deployment topology without per-proxy tuning.

Fixes #769

Co-authored-by: Haile <118998054+blackmammoth@users.noreply.github.com>
2026-06-04 22:07:59 +03:00
ehsanmim
96b16b42e4 fix(vite): proxy /plugin-ws WebSocket requests to the backend in dev (#757)
Plugin WebSocket connections (e.g. the official Terminal plugin) hang
in `npm run dev` because Vite proxies /api, /ws, and /shell but not
/plugin-ws/*. Production is unaffected because the same Express server
serves both the frontend and the WS gateway.

Co-authored-by: Haile <118998054+blackmammoth@users.noreply.github.com>
2026-06-04 20:57:24 +03:00
Peter Buchegger
f082cdc63b fix(websocket): reset unmountedRef on each effect re-run so token refresh reconnects (#721)
The effect cleanup sets unmountedRef.current = true to prevent reconnects after
the provider unmounts. Without an inverse reset at the start of the effect,
re-running the effect (e.g. when the auth token rotates) leaves the ref true,
and connect() short-circuits at its unmounted guard. The socket then stays
permanently disconnected for the lifetime of the provider.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Haile <118998054+blackmammoth@users.noreply.github.com>
2026-06-04 20:50:02 +03:00
12 changed files with 89 additions and 22 deletions

View File

@@ -5,9 +5,6 @@
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<link rel="icon" type="image/png" href="/favicon.png" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no, viewport-fit=cover" />
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;500;600;700;800&display=swap" rel="stylesheet" />
<title>CloudCLI UI</title>
<!-- PWA Manifest -->

View File

@@ -11,7 +11,7 @@ export const CLAUDE_MODELS = {
{
value: "default",
label: "Default (recommended)",
description: "Use the default model (currently Opus 4.7 (1M context)) · $5/$25 per Mtok",
description: "Use the default model (currently Opus 4.8 (1M context)) · $5/$25 per Mtok",
},
{
value: "sonnet",

View File

@@ -304,7 +304,11 @@ function extractTokenBudget(sdkMessage) {
const messageUsage = sdkMessage.message?.usage || sdkMessage.usage;
if (messageUsage && typeof messageUsage === 'object') {
const inputTokens = readNumber(messageUsage.input_tokens ?? messageUsage.inputTokens);
const directInputTokens = readNumber(messageUsage.input_tokens ?? messageUsage.inputTokens);
const cacheCreationTokens = readNumber(messageUsage.cache_creation_input_tokens ?? messageUsage.cacheCreationInputTokens ?? messageUsage.cacheCreationTokens);
const cacheReadTokens = readNumber(messageUsage.cache_read_input_tokens ?? messageUsage.cacheReadInputTokens ?? messageUsage.cacheReadTokens);
const cacheTokens = cacheCreationTokens + cacheReadTokens;
const inputTokens = directInputTokens + cacheTokens;
const outputTokens = readNumber(messageUsage.output_tokens ?? messageUsage.outputTokens);
const totalUsed = inputTokens + outputTokens;
const contextWindow = parseInt(process.env.CONTEXT_WINDOW, 10) || 160000;
@@ -314,6 +318,9 @@ function extractTokenBudget(sdkMessage) {
total: contextWindow,
inputTokens,
outputTokens,
cacheReadTokens,
cacheCreationTokens,
cacheTokens,
breakdown: {
input: inputTokens,
output: outputTokens,

View File

@@ -87,6 +87,11 @@ const installMode = fs.existsSync(path.join(APP_ROOT, '.git')) ? 'git' : 'npm';
console.log('SERVER_PORT from env:', process.env.SERVER_PORT);
function readUsageNumber(value) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : 0;
}
const app = express();
const server = http.createServer(app);
@@ -1386,6 +1391,8 @@ app.get('/api/projects/:projectId/sessions/:sessionId/token-usage', authenticate
const contextWindow = Number.isFinite(parsedContextWindow) ? parsedContextWindow : 160000;
let inputTokens = 0;
let outputTokens = 0;
let cacheReadTokens = 0;
let cacheCreationTokens = 0;
// Find the latest assistant message with usage data (scan from end)
for (let i = lines.length - 1; i >= 0; i--) {
@@ -1397,8 +1404,11 @@ app.get('/api/projects/:projectId/sessions/:sessionId/token-usage', authenticate
const usage = entry.message.usage;
// Use token counts from latest assistant message only
inputTokens = usage.input_tokens || 0;
outputTokens = usage.output_tokens || 0;
const directInputTokens = readUsageNumber(usage.input_tokens ?? usage.inputTokens);
cacheReadTokens = readUsageNumber(usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? usage.cacheReadTokens);
cacheCreationTokens = readUsageNumber(usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? usage.cacheCreationTokens);
inputTokens = directInputTokens + cacheReadTokens + cacheCreationTokens;
outputTokens = readUsageNumber(usage.output_tokens ?? usage.outputTokens);
break; // Stop after finding the latest assistant message
}
@@ -1409,12 +1419,16 @@ app.get('/api/projects/:projectId/sessions/:sessionId/token-usage', authenticate
}
const totalUsed = inputTokens + outputTokens;
const cacheTokens = cacheReadTokens + cacheCreationTokens;
res.json({
used: totalUsed,
total: contextWindow,
inputTokens,
outputTokens,
cacheReadTokens,
cacheCreationTokens,
cacheTokens,
breakdown: {
input: inputTokens,
output: outputTokens

View File

@@ -31,6 +31,24 @@ export function createWebSocketServer(
});
wss.on('connection', (ws, request) => {
// Keep WebSocket alive across reverse-proxy idle timeouts (Cloudflare ~100s,
// AWS ALB 60s, nginx 60s, etc.). Without app-level pings these connections
// are silently torn down even when the UI is active, causing repeated
// reconnect cycles. ws library heartbeat is opt-in.
const HEARTBEAT_INTERVAL_MS = 30_000;
const heartbeat = setInterval(() => {
if (ws.readyState === ws.OPEN) {
try {
ws.ping();
} catch {
// socket may have been closed concurrently — interval will be cleared below
}
}
}, HEARTBEAT_INTERVAL_MS);
const stopHeartbeat = () => clearInterval(heartbeat);
ws.on('close', stopHeartbeat);
ws.on('error', stopHeartbeat);
const incomingRequest = request as AuthenticatedWebSocketRequest;
const url = incomingRequest.url ?? '/';
const pathname = new URL(url, 'http://localhost').pathname;

View File

@@ -592,12 +592,14 @@ class ResponseCollector {
}
}
const inputTokens = totalInput + totalCacheRead + totalCacheCreation;
return {
inputTokens: totalInput,
inputTokens,
outputTokens: totalOutput,
cacheReadTokens: totalCacheRead,
cacheCreationTokens: totalCacheCreation,
totalTokens: totalInput + totalOutput + totalCacheRead + totalCacheCreation
totalTokens: inputTokens + totalOutput
};
}
}

View File

@@ -268,16 +268,35 @@ Custom commands can be created in:
tokenUsage.contextWindow ??
0,
) || 0;
const inputTokensRaw =
const normalizedInputValue =
tokenUsage.inputTokens ??
tokenUsage.input ??
tokenUsage.cumulativeInputTokens ??
tokenUsage.breakdown?.input ??
tokenUsage.promptTokens;
const directInputTokens =
Number(
tokenUsage.inputTokens ??
tokenUsage.input ??
normalizedInputValue ??
tokenUsage.input_tokens ??
tokenUsage.cumulativeInputTokens ??
tokenUsage.breakdown?.input ??
tokenUsage.promptTokens ??
0
) || 0;
const cacheReadTokens =
Number(
tokenUsage.cacheReadTokens ??
tokenUsage.cache_read_input_tokens ??
tokenUsage.cacheReadInputTokens ??
0,
) || 0;
const cacheCreationTokens =
Number(
tokenUsage.cacheCreationTokens ??
tokenUsage.cache_creation_input_tokens ??
tokenUsage.cacheCreationInputTokens ??
0,
) || 0;
const inputTokens = normalizedInputValue == null
? directInputTokens + cacheReadTokens + cacheCreationTokens
: directInputTokens;
const outputTokens =
Number(
tokenUsage.outputTokens ??
@@ -288,8 +307,9 @@ Custom commands can be created in:
tokenUsage.completionTokens ??
0,
) || 0;
const hasTokenBreakdown = inputTokensRaw > 0 || outputTokens > 0;
const used = reportedUsed || inputTokensRaw + outputTokens;
const computedUsed = inputTokens + outputTokens;
const hasTokenBreakdown = computedUsed > 0;
const used = Math.max(reportedUsed, computedUsed);
return {
type: "builtin",
@@ -302,7 +322,7 @@ Custom commands can be created in:
...(hasTokenBreakdown
? {
tokenBreakdown: {
input: inputTokensRaw,
input: inputTokens,
output: outputTokens,
},
}

View File

@@ -295,6 +295,7 @@ export default function ChatComposer({
<PromptInputTextarea
ref={textareaRef}
dir="auto"
value={input}
onChange={onInputChange}
onClick={onTextareaClick}

View File

@@ -120,7 +120,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o
/* User message bubble on the right */
<div className="flex w-full items-end space-x-0 sm:w-auto sm:max-w-[85%] sm:space-x-3 md:max-w-md lg:max-w-lg xl:max-w-xl">
<div className="group flex-1 rounded-2xl rounded-br-md bg-blue-600 px-3 py-2 text-white shadow-sm sm:flex-initial sm:px-4">
<div className="whitespace-pre-wrap break-words text-sm">
<div dir="auto" className="whitespace-pre-wrap break-words text-sm">
{message.content}
</div>
{message.images && message.images.length > 0 && (
@@ -405,7 +405,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o
</ReasoningContent>
</Reasoning>
) : (
<div className="text-sm text-gray-700 dark:text-gray-300">
<div dir="auto" className="text-sm text-gray-700 dark:text-gray-300">
{/* Reasoning accordion */}
{showThinking && message.reasoning && (
<Reasoning className="mb-3" defaultOpen={false}>

View File

@@ -36,8 +36,12 @@ const useWebSocketProviderState = (): WebSocketContextType => {
const { token } = useAuth();
useEffect(() => {
// The cleanup below sets unmountedRef = true. Without this reset, every
// re-run of the effect (e.g. on token refresh) would short-circuit connect()
// at its unmounted guard and leave the socket permanently disconnected.
unmountedRef.current = false;
connect();
return () => {
unmountedRef.current = true;
if (reconnectTimeoutRef.current) {

View File

@@ -128,7 +128,7 @@
body {
@apply bg-background text-foreground;
font-family: "Montserrat", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
margin: 0;
padding: 0;
}

View File

@@ -37,6 +37,10 @@ export default defineConfig(({ mode }) => {
'/shell': {
target: `ws://${proxyHost}:${serverPort}`,
ws: true
},
'/plugin-ws': {
target: `ws://${proxyHost}:${serverPort}`,
ws: true
}
}
},