Merge branch 'main' into fix/shell-user-npm-path-priority

fix(shell): prioritize user npm binaries
Interactive shells could resolve bundled or system CLIs before user-installed npm binaries. Move existing user npm global directories to the front of PATH while preserving all other entries.
2026-06-26 13:35:49 +08:00 · 2026-06-24 11:10:47 +02:00 · 2026-06-22 15:27:13 +03:00
27 changed files with 27 additions and 1170 deletions
--- a/server/index.js
+++ b/server/index.js
@@ -61,7 +61,6 @@ import userRoutes from './routes/user.js';
 import geminiRoutes from './routes/gemini.js';
 import pluginsRoutes from './routes/plugins.js';
 import providerRoutes from './modules/providers/provider.routes.js';
-import voiceRoutes from './voice-proxy.js';
 import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
 import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
 import { browserUseService } from './modules/browser-use/browser-use.service.js';
@@ -223,8 +222,6 @@ app.use('/api/providers', authenticateToken, providerRoutes);
 // Agent API Routes (uses API key authentication)
 app.use('/api/agent', agentRoutes);

-app.use('/api/voice', authenticateToken, voiceRoutes);
-
 // Serve public files (like api-docs.html)
 app.use(express.static(path.join(APP_ROOT, 'public')));

--- a/server/voice-proxy.js
+++ b/server/voice-proxy.js
@@ -1,224 +0,0 @@
-// Optional voice proxy — forwards STT/TTS to an OpenAI-compatible audio backend.
-//
-// The backend is whatever the user points at: OpenAI, Groq, or a local server
-// (LocalAI / Speaches / Kokoro-FastAPI / openedai-speech / etc.). It must expose the
-// standard OpenAI audio endpoints:
-//     POST {base}/audio/transcriptions   (multipart 'file' + 'model')      -> { text }
-//     POST {base}/audio/speech           ({ model, voice, input })         -> audio bytes
-//
-// Config is resolved per-request from headers (set by the client's voice settings),
-// falling back to server env defaults. Mounted at /api/voice behind authenticateToken.
-import { Readable } from 'node:stream';
-
-import express from 'express';
-
-const ENV = {
-  baseUrl: (process.env.VOICE_API_BASE_URL || '').replace(/\/$/, ''),
-  apiKey: process.env.VOICE_API_KEY || '',
-  sttModel: process.env.VOICE_STT_MODEL || 'whisper-1',
-  ttsModel: process.env.VOICE_TTS_MODEL || 'tts-1',
-  ttsVoice: process.env.VOICE_TTS_VOICE || 'alloy',
-};
-
-/**
- * Resolve the voice backend config for a request. Client headers (set from the
- * user's in-app voice settings) take precedence over the server env defaults.
- * @param {import('express').Request} req
- * @returns {{baseUrl: string, apiKey: string, sttModel: string, ttsModel: string, ttsVoice: string, ttsFormat: string}}
- */
-function resolveConfig(req) {
-  const h = req.headers;
-  return {
-    // Security: do not allow clients to control the outbound backend host.
-    // Always use the server-side configured base URL.
-    baseUrl: ENV.baseUrl,
-    apiKey: String(h['x-voice-api-key'] || '') || ENV.apiKey,
-    sttModel: String(h['x-voice-stt-model'] || '') || ENV.sttModel,
-    ttsModel: String(h['x-voice-tts-model'] || '') || ENV.ttsModel,
-    ttsVoice: String(h['x-voice-tts-voice'] || '') || ENV.ttsVoice,
-    ttsFormat: String(h['x-voice-tts-format'] || '').trim(),
-  };
-}
-
-const router = express.Router();
-
-// Generous by default — local TTS can synthesize long messages at ~real-time on CPU.
-// Guard against a non-numeric/zero override that would make setTimeout fire immediately.
-const DEFAULT_VOICE_TIMEOUT_MS = 300000;
-const _parsedTimeout = Number(process.env.VOICE_TIMEOUT_MS);
-const VOICE_TIMEOUT_MS = Number.isFinite(_parsedTimeout) && _parsedTimeout > 0
-  ? _parsedTimeout
-  : DEFAULT_VOICE_TIMEOUT_MS;
-
-/**
- * fetch() with an AbortController timeout so a stalled backend can't hold the
- * request open indefinitely. Aborts after VOICE_TIMEOUT_MS.
- * @param {string} url
- * @param {RequestInit} [options]
- * @returns {Promise<Response>}
- */
-async function fetchWithTimeout(url, options = {}) {
-  const parsed = new URL(url);
-  if (!['http:', 'https:'].includes(parsed.protocol) || !isAllowedBackendUrl(parsed.origin)) {
-    throw new Error('Blocked outbound voice backend URL');
-  }
-  const controller = new AbortController();
-  const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS);
-  try {
-    return await fetch(parsed.toString(), { redirect: 'manual', ...options, signal: controller.signal });
-  } finally {
-    clearTimeout(timer);
-  }
-}
-
-/**
- * Turn a backend fetch failure into a clear, actionable client response:
- * 504 on timeout (AbortError), 502 otherwise.
- * @param {import('express').Response} res
- * @param {Error} e
- */
-function backendError(res, e) {
-  if (e && e.name === 'AbortError') {
-    return res.status(504).json({
-      error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your voice backend.`,
-    });
-  }
-  return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
-}
-
-/**
- * SSRF guard for the user-configurable backend URL: allow http/https only and
- * block the link-local / cloud-metadata range (169.254.x). localhost and private
- * ranges are allowed on purpose so users can point at a local voice server
- * (LocalAI, Speaches, Kokoro-FastAPI, etc.).
- * @param {string} raw
- * @returns {boolean}
- */
-function isAllowedBackendUrl(raw) {
-  let u;
-  try {
-    u = new URL(raw);
-  } catch {
-    return false;
-  }
-  if (u.protocol !== 'http:' && u.protocol !== 'https:') return false;
-  if (u.hostname === '169.254.169.254' || u.hostname.startsWith('169.254.')) return false;
-  return true;
-}
-
-/**
- * Relay an upstream (backend) error to the client without making an upstream
- * 401/403 look like the user's own app login failed.
- * @param {import('express').Response} res
- * @param {number} status
- * @param {string} [text]
- */
-function upstreamError(res, status, text) {
-  if (status === 401 || status === 403) {
-    return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
-  }
-  return res.status(status).json({ error: text || 'voice backend error' });
-}
-
-let _upload = null;
-/**
- * Lazily build a memory-storage multer instance (25 MB cap) for audio uploads,
- * so multer is only imported when the voice feature is actually used.
- * @returns {Promise<import('multer').Multer>}
- */
-async function getUpload() {
-  if (!_upload) {
-    const multer = (await import('multer')).default;
-    _upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
-  }
-  return _upload;
-}
-
-/**
- * Build the Authorization header for the backend, or an empty object when no
- * key is configured (e.g. a local server that needs none).
- * @param {string} apiKey
- * @returns {Record<string, string>}
- */
-function authHeader(apiKey) {
-  return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
-}
-
-/**
- * GET /api/voice/health -> { configured } (true when a backend base URL is set).
- */
-router.get('/health', (req, res) => {
-  res.json({ configured: Boolean(resolveConfig(req).baseUrl) });
-});
-
-/**
- * POST /api/voice/transcribe (multipart 'audio') -> { text }.
- * Forwards the uploaded audio to the backend's /audio/transcriptions endpoint.
- */
-router.post('/transcribe', async (req, res) => {
-  const cfg = resolveConfig(req);
-  if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
-  if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
-  const upload = await getUpload();
-  upload.single('audio')(req, res, async (err) => {
-    if (err) return res.status(400).json({ error: err.message });
-    if (!req.file) return res.status(400).json({ error: 'No audio uploaded' });
-    try {
-      const fd = new FormData();
-      fd.append(
-        'file',
-        new Blob([req.file.buffer], { type: req.file.mimetype || 'audio/webm' }),
-        req.file.originalname || 'recording.webm',
-      );
-      fd.append('model', cfg.sttModel);
-      const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/transcriptions`, {
-        method: 'POST',
-        headers: authHeader(cfg.apiKey),
-        body: fd,
-      });
-      const text = await r.text();
-      if (!r.ok) return upstreamError(res, r.status, text);
-      let data;
-      try { data = JSON.parse(text); } catch { data = { text }; }
-      res.json({ text: data.text ?? '' });
-    } catch (e) {
-      backendError(res, e);
-    }
-  });
-});
-
-/**
- * POST /api/voice/tts { text } -> audio bytes.
- * Forwards the text to the backend's /audio/speech endpoint and streams the audio back.
- */
-router.post('/tts', async (req, res) => {
-  const cfg = resolveConfig(req);
-  if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
-  if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
-  const text = req.body?.text;
-  if (typeof text !== 'string' || !text.trim()) return res.status(400).json({ error: 'text required' });
-  try {
-    const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/speech`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json', ...authHeader(cfg.apiKey) },
-      body: JSON.stringify({
-        model: cfg.ttsModel,
-        voice: cfg.ttsVoice,
-        input: text,
-        ...(cfg.ttsFormat ? { response_format: cfg.ttsFormat } : {}),
-      }),
-    });
-    if (!r.ok) {
-      const errText = await r.text().catch(() => 'tts failed');
-      return upstreamError(res, r.status, errText);
-    }
-    res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg');
-    res.setHeader('Cache-Control', 'no-store');
-    if (!r.body) return res.end();
-    Readable.fromWeb(r.body).on('error', (error) => res.destroy(error)).pipe(res);
-  } catch (e) {
-    backendError(res, e);
-  }
-});
-
-export default router;
--- a/src/components/chat/hooks/useChatComposerState.ts
+++ b/src/components/chat/hooks/useChatComposerState.ts
@@ -775,17 +775,6 @@ export function useChatComposerState({
    handleSubmitRef.current = handleSubmit;
  }, [handleSubmit]);

-  // A voice transcript either fills the input (to edit before sending) or, when the
-  // user tapped "stop and send", is submitted straight away. Mirror the value into
-  // inputValueRef synchronously so handleSubmit reads the new text, not the stale state.
-  const handleVoiceTranscript = useCallback((text: string, send?: boolean) => {
-    const base = inputValueRef.current.trim();
-    const next = base ? `${base} ${text}` : text;
-    setInput(next);
-    inputValueRef.current = next;
-    if (send) handleSubmitRef.current?.(createFakeSubmitEvent());
-  }, [setInput]);
-
  useEffect(() => {
    inputValueRef.current = input;
  }, [input]);
@@ -1024,7 +1013,6 @@ export function useChatComposerState({
    isDragActive,
    openImagePicker: open,
    handleSubmit,
-    handleVoiceTranscript,
    handleInputChange,
    handleKeyDown,
    handlePaste,
--- a/src/components/chat/hooks/useChatProviderState.ts
+++ b/src/components/chat/hooks/useChatProviderState.ts
@@ -114,6 +114,7 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
  const [providerModelsLoading, setProviderModelsLoading] = useState(true);
  const [providerModelsRefreshing, setProviderModelsRefreshing] = useState(false);

+  const lastProviderRef = useRef(provider);
  const providerModelsRequestIdRef = useRef(0);

  const setStoredProviderModel = useCallback((targetProvider: LLMProvider, model: string) => {
@@ -343,8 +344,14 @@ export function useChatProviderState({ selectedSession, selectedProject }: UseCh
    localStorage.setItem('selected-provider', selectedSession.__provider);
  }, [provider, selectedSession]);

-  // Permission prompts belong to a session, not to the transient provider
-  // selection that is synchronized after navigation.
+  useEffect(() => {
+    if (lastProviderRef.current === provider) {
+      return;
+    }
+    setPendingPermissionRequests([]);
+    lastProviderRef.current = provider;
+  }, [provider]);
+
  useEffect(() => {
    setPendingPermissionRequests((previous) =>
      previous.filter((request) => !request.sessionId || request.sessionId === selectedSession?.id),
--- a/src/components/chat/hooks/useChatRealtimeHandlers.ts
+++ b/src/components/chat/hooks/useChatRealtimeHandlers.ts
@@ -1,29 +1,20 @@
-import { useEffect, useRef } from 'react';
+import { useEffect } from 'react';
 import type { Dispatch, MutableRefObject, SetStateAction } from 'react';

 import type { ServerEvent } from '../../../contexts/WebSocketContext';
 import { showCompletionTitleIndicator } from '../../../utils/pageTitleNotification';
-import { playChatCompletionSound, playNotificationSound } from '../../../utils/notificationSound';
+import { playChatCompletionSound } from '../../../utils/notificationSound';
 import type { MarkSessionIdle, MarkSessionProcessing } from '../../../hooks/useSessionProtection';
 import type { PendingPermissionRequest } from '../types/types';
 import type { ProjectSession, LLMProvider } from '../../../types/app';
 import type { SessionStore, NormalizedMessage } from '../../../stores/useSessionStore';

-const isActionablePermissionRequest = (request: { toolName?: unknown } | null | undefined): boolean => {
-  return request?.toolName !== 'ExitPlanMode' && request?.toolName !== 'exit_plan_mode';
-};
-
-const hasActionablePermissionRequests = (requests: Array<{ toolName?: unknown }> | null | undefined): boolean => {
-  return Array.isArray(requests) && requests.some((request) => isActionablePermissionRequest(request));
-};
-
 interface UseChatRealtimeHandlersArgs {
  subscribe: (listener: (event: ServerEvent) => void) => () => void;
  provider: LLMProvider;
  selectedSession: ProjectSession | null;
  currentSessionId: string | null;
  setTokenBudget: (budget: Record<string, unknown> | null) => void;
-  pendingPermissionRequests: PendingPermissionRequest[];
  setPendingPermissionRequests: Dispatch<SetStateAction<PendingPermissionRequest[]>>;
  streamTimerRef: MutableRefObject<number | null>;
  accumulatedStreamRef: MutableRefObject<string>;
@@ -61,7 +52,6 @@ export function useChatRealtimeHandlers({
  selectedSession,
  currentSessionId,
  setTokenBudget,
-  pendingPermissionRequests,
  setPendingPermissionRequests,
  streamTimerRef,
  accumulatedStreamRef,
@@ -72,29 +62,13 @@ export function useChatRealtimeHandlers({
  onWebSocketReconnect,
  sessionStore,
 }: UseChatRealtimeHandlersArgs) {
-  // Session switches can send `chat.subscribe` before this effect has a chance
-  // to rebind the websocket listener. Read the visible session id from a ref
-  // so a fast `chat_subscribed` ack is matched against the current view, not
-  // the previous render's closed-over selection.
-  const activeViewSessionIdRef = useRef<string | null>(selectedSession?.id || currentSessionId || null);
-  activeViewSessionIdRef.current = selectedSession?.id || currentSessionId || null;
-
-  // Keep the latest pending-permission snapshot available to the websocket
-  // listener so back-to-back permission events can dedupe and re-arm the
-  // notification sound before React finishes a rerender.
-  const pendingPermissionRequestsRef = useRef(pendingPermissionRequests);
-
-  useEffect(() => {
-    pendingPermissionRequestsRef.current = pendingPermissionRequests;
-  }, [pendingPermissionRequests]);
-
  useEffect(() => {
    const handleEvent = (msg: ServerEvent) => {
      if (!msg.kind) {
        return;
      }

-      const activeViewSessionId = activeViewSessionIdRef.current;
+      const activeViewSessionId = selectedSession?.id || currentSessionId || null;
      const sid = (typeof msg.sessionId === 'string' && msg.sessionId) || activeViewSessionId;

      // Record replay progress for every sequenced live event.
@@ -127,16 +101,7 @@ export function useChatRealtimeHandlers({

          const isViewedSession = sid === activeViewSessionId;
          if (isViewedSession && Array.isArray(msg.pendingPermissions)) {
-            const nextPendingPermissionRequests = msg.pendingPermissions as PendingPermissionRequest[];
-            const hadActionablePermissionRequests = hasActionablePermissionRequests(pendingPermissionRequestsRef.current);
-            const hasPendingActionablePermissionRequests = hasActionablePermissionRequests(nextPendingPermissionRequests);
-
-            pendingPermissionRequestsRef.current = nextPendingPermissionRequests;
-            setPendingPermissionRequests(nextPendingPermissionRequests);
-
-            if (hasPendingActionablePermissionRequests && !hadActionablePermissionRequests) {
-              void playNotificationSound();
-            }
+            setPendingPermissionRequests(msg.pendingPermissions as PendingPermissionRequest[]);
          }
          return;
        }
@@ -238,7 +203,6 @@ export function useChatRealtimeHandlers({
          // hides it immediately and atomically.
          onSessionIdle?.(sid);
          if (sid === activeViewSessionId) {
-            pendingPermissionRequestsRef.current = [];
            setPendingPermissionRequests([]);
          }

@@ -270,14 +234,10 @@ export function useChatRealtimeHandlers({

        case 'permission_request': {
          if (!msg.requestId) break;
-          if (isActionablePermissionRequest({ toolName: msg.toolName })) {
-            void playNotificationSound();
-          }
-
          if (sid === activeViewSessionId) {
-            const previousPendingPermissionRequests = pendingPermissionRequestsRef.current;
-            if (!previousPendingPermissionRequests.some((request) => request.requestId === msg.requestId)) {
-              const nextPendingPermissionRequests = [...previousPendingPermissionRequests, {
+            setPendingPermissionRequests((prev) => {
+              if (prev.some((r: PendingPermissionRequest) => r.requestId === msg.requestId)) return prev;
+              return [...prev, {
                requestId: msg.requestId as string,
                toolName: (msg.toolName as string) || 'UnknownTool',
                input: msg.input,
@@ -285,10 +245,7 @@ export function useChatRealtimeHandlers({
                sessionId: sid || null,
                receivedAt: new Date(),
              }];
-
-              pendingPermissionRequestsRef.current = nextPendingPermissionRequests;
-              setPendingPermissionRequests(nextPendingPermissionRequests);
-            }
+            });
          }
          if (sid) {
            onSessionProcessing?.(sid);
@@ -298,12 +255,7 @@ export function useChatRealtimeHandlers({

        case 'permission_cancelled': {
          if (msg.requestId && sid === activeViewSessionId) {
-            const nextPendingPermissionRequests = pendingPermissionRequestsRef.current.filter(
-              (request: PendingPermissionRequest) => request.requestId !== msg.requestId,
-            );
-
-            pendingPermissionRequestsRef.current = nextPendingPermissionRequests;
-            setPendingPermissionRequests(nextPendingPermissionRequests);
+            setPendingPermissionRequests((prev) => prev.filter((r: PendingPermissionRequest) => r.requestId !== msg.requestId));
          }
          break;
        }
@@ -334,7 +286,6 @@ export function useChatRealtimeHandlers({
    selectedSession,
    currentSessionId,
    setTokenBudget,
-    pendingPermissionRequests,
    setPendingPermissionRequests,
    streamTimerRef,
    accumulatedStreamRef,
--- a/src/components/chat/hooks/useTts.ts
+++ b/src/components/chat/hooks/useTts.ts
@@ -1,33 +0,0 @@
-import { useCallback, useEffect, useState } from 'react';
-import { voicePlayer, voiceId, type VoiceSnapshot } from '../../../lib/voicePlayer';
-
-export type TtsState = VoiceSnapshot['state'];
-
-/**
- * Thin adapter over the app-level voicePlayer. Playback lives outside React (see
- * lib/voicePlayer), so switching chats or re-rendering a message no longer cuts the
- * audio off. This hook just reflects the player's state for one message and forwards taps.
- */
-export function useTts(getText: () => string) {
-  const content = getText();
-  const id = voiceId(content);
-
-  const [snap, setSnap] = useState<VoiceSnapshot>(() => voicePlayer.getSnapshot(id));
-
-  useEffect(() => {
-    const update = () =>
-      setSnap((prev) => {
-        const next = voicePlayer.getSnapshot(id);
-        return prev.state === next.state && prev.error === next.error ? prev : next;
-      });
-    update();
-    return voicePlayer.subscribe(update);
-  }, [id]);
-
-  const toggle = useCallback(() => {
-    voicePlayer.unlock(); // synchronous, within the click gesture (iOS)
-    voicePlayer.toggle(content);
-  }, [content]);
-
-  return { state: snap.state, toggle, error: snap.error };
-}
--- a/src/components/chat/hooks/useVoiceAvailable.ts
+++ b/src/components/chat/hooks/useVoiceAvailable.ts
@@ -1,85 +0,0 @@
-import { useEffect, useState } from 'react';
-
-import { authenticatedFetch } from '../../../utils/api';
-import { readVoiceConfig, VOICE_CONFIG_SYNC_EVENT } from '../../../hooks/useVoiceConfig';
-
-// Voice UI is gated on the `voiceEnabled` UI preference (toggled in Quick Settings /
-// the Settings modal) and a configured voice backend.
-const STORAGE_KEY = 'uiPreferences';
-const SYNC_EVENT = 'ui-preferences:sync';
-let healthRequest: Promise<boolean> | null = null;
-
-function checkVoiceHealth(): Promise<boolean> {
-  if (healthRequest) return healthRequest;
-  const request = authenticatedFetch('/api/voice/health')
-    .then(async (response) => {
-      if (!response.ok) throw new Error(`Voice health check failed (${response.status})`);
-      const data = await response.json();
-      return data?.configured === true;
-    })
-    .finally(() => {
-      healthRequest = null;
-    });
-  healthRequest = request;
-  return request;
-}
-
-function readVoiceEnabled(): boolean {
-  try {
-    const raw = localStorage.getItem(STORAGE_KEY);
-    if (!raw) return false;
-    const parsed = JSON.parse(raw);
-    return parsed?.voiceEnabled === true || parsed?.voiceEnabled === 'true';
-  } catch {
-    return false;
-  }
-}
-
-export function useVoiceAvailable(): boolean {
-  const [enabled, setEnabled] = useState<boolean>(() =>
-    typeof window === 'undefined' ? false : readVoiceEnabled(),
-  );
-  const [available, setAvailable] = useState(false);
-
-  useEffect(() => {
-    const update = () => setEnabled(readVoiceEnabled());
-    window.addEventListener('storage', update);
-    window.addEventListener(SYNC_EVENT, update as EventListener);
-    return () => {
-      window.removeEventListener('storage', update);
-      window.removeEventListener(SYNC_EVENT, update as EventListener);
-    };
-  }, []);
-
-  useEffect(() => {
-    let active = true;
-    let requestId = 0;
-
-    const check = async () => {
-      if (!enabled) {
-        setAvailable(false);
-        return;
-      }
-      if (readVoiceConfig().baseUrl.trim()) {
-        setAvailable(true);
-        return;
-      }
-      const id = ++requestId;
-      try {
-        const result = await checkVoiceHealth();
-        if (active && id === requestId) setAvailable(result);
-      } catch {
-        if (active && id === requestId) setAvailable(false);
-      }
-    };
-
-    void check();
-    window.addEventListener(VOICE_CONFIG_SYNC_EVENT, check);
-    return () => {
-      active = false;
-      window.removeEventListener(VOICE_CONFIG_SYNC_EVENT, check);
-    };
-  }, [enabled]);
-
-  return enabled && available;
-}
--- a/src/components/chat/hooks/useVoiceInput.ts
+++ b/src/components/chat/hooks/useVoiceInput.ts
@@ -1,149 +0,0 @@
-import { useCallback, useEffect, useRef, useState } from 'react';
-
-import { transcribeVoice } from '../../../lib/voiceApi';
-
-// Mobile-safe recording: iOS Safari 18.4+ supports webm/opus; older iOS needs mp4.
-const MIME_CANDIDATES = [
-  'audio/webm;codecs=opus',
-  'audio/webm',
-  'audio/mp4',
-  'audio/ogg;codecs=opus',
-  'audio/ogg',
-];
-
-function pickMime(): string {
-  for (const t of MIME_CANDIDATES) {
-    try {
-      if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported(t)) return t;
-    } catch {
-      /* isTypeSupported can throw on some iOS versions */
-    }
-  }
-  return '';
-}
-
-export type VoiceInputState = 'idle' | 'recording' | 'transcribing';
-
-/**
- * Push-to-talk dictation. Records the mic, uploads to /api/voice/transcribe
- * (an OpenAI-compatible speech-to-text backend via the Express proxy), and
- * returns the transcript through onTranscript.
- */
-export function useVoiceInput(
-  onTranscript: (text: string, send?: boolean) => void,
-  onError?: (msg: string) => void,
-) {
-  const [state, setState] = useState<VoiceInputState>('idle');
-  const recorderRef = useRef<MediaRecorder | null>(null);
-  const chunksRef = useRef<Blob[]>([]);
-  const streamRef = useRef<MediaStream | null>(null);
-  const cancelledRef = useRef(false);
-  const startingRef = useRef(false);
-  // Whether the in-progress stop should auto-send the transcript (vs just fill the box).
-  const sendRef = useRef(false);
-
-  const stopTracks = () => {
-    streamRef.current?.getTracks().forEach((t) => t.stop());
-    streamRef.current = null;
-  };
-
-  // Stop the mic if the component unmounts mid-recording.
-  useEffect(() => {
-    cancelledRef.current = false;
-    return () => {
-      cancelledRef.current = true;
-      startingRef.current = false;
-      streamRef.current?.getTracks().forEach((t) => t.stop());
-      streamRef.current = null;
-      recorderRef.current = null;
-    };
-  }, []);
-
-  const start = useCallback(async () => {
-    if (startingRef.current || (recorderRef.current && recorderRef.current.state !== 'inactive')) return;
-    startingRef.current = true;
-    try {
-      const stream = await navigator.mediaDevices.getUserMedia({
-        audio: { echoCancellation: true, noiseSuppression: true },
-      });
-      if (cancelledRef.current) {
-        stream.getTracks().forEach((t) => t.stop());
-        return;
-      }
-      streamRef.current = stream;
-      const mimeType = pickMime();
-      const rec = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
-      recorderRef.current = rec;
-      chunksRef.current = [];
-
-      rec.ondataavailable = (e) => {
-        if (e.data.size > 0) chunksRef.current.push(e.data);
-      };
-
-      rec.onstop = async () => {
-        stopTracks();
-        if (cancelledRef.current) return;
-        // Capture and clear the send intent for this stop before any async work.
-        const shouldSend = sendRef.current;
-        sendRef.current = false;
-        const type = rec.mimeType || 'audio/webm';
-        const blob = new Blob(chunksRef.current, { type });
-        if (blob.size < 800) {
-          setState('idle');
-          onError?.('Recording too short');
-          return;
-        }
-        setState('transcribing');
-        try {
-          const ext = type.includes('mp4') ? 'm4a' : type.includes('ogg') ? 'ogg' : 'webm';
-          const res = await transcribeVoice(blob, `recording.${ext}`);
-          if (!res.ok) throw new Error(`transcribe ${res.status}`);
-          const data = await res.json();
-          if (cancelledRef.current) return;
-          const text = String(data?.text || '').trim();
-          if (text) onTranscript(text, shouldSend);
-          else onError?.('No speech detected');
-        } catch (e) {
-          if (!cancelledRef.current) {
-            onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
-          }
-        } finally {
-          if (!cancelledRef.current) setState('idle');
-        }
-      };
-
-      rec.start();
-      setState('recording');
-    } catch (e) {
-      recorderRef.current = null;
-      stopTracks();
-      if (cancelledRef.current) return;
-      const err = e as { name?: string; message?: string };
-      let msg = `Mic error: ${err?.message || e}`;
-      if (err?.name === 'NotAllowedError') msg = 'Microphone access denied.';
-      else if (err?.name === 'NotFoundError') msg = 'No microphone found.';
-      onError?.(msg);
-      setState('idle');
-    } finally {
-      startingRef.current = false;
-    }
-  }, [onTranscript, onError]);
-
-  // Stop recording. Pass { send: true } to auto-send the transcript once it's ready.
-  // Guard on the recorder's own state (not React state) so a double tap, or the mic
-  // and Send buttons both firing, can't call stop() on an already-inactive recorder.
-  const stop = useCallback((opts?: { send?: boolean }) => {
-    const rec = recorderRef.current;
-    if (rec && rec.state !== 'inactive') {
-      sendRef.current = opts?.send ?? false;
-      rec.stop();
-    }
-  }, []);
-
-  const toggle = useCallback(() => {
-    if (state === 'recording') stop();
-    else if (state === 'idle') start();
-  }, [state, start, stop]);
-
-  return { state, toggle, stop };
-}
--- a/src/components/chat/view/ChatInterface.tsx
+++ b/src/components/chat/view/ChatInterface.tsx
@@ -173,7 +173,6 @@ function ChatInterface({
    isDragActive,
    openImagePicker,
    handleSubmit,
-    handleVoiceTranscript,
    handleInputChange,
    handleKeyDown,
    handlePaste,
@@ -240,7 +239,6 @@ function ChatInterface({
    selectedSession,
    currentSessionId,
    setTokenBudget,
-    pendingPermissionRequests,
    setPendingPermissionRequests,
    streamTimerRef,
    accumulatedStreamRef,
@@ -407,7 +405,6 @@ function ChatInterface({
          renderInputWithMentions={renderInputWithMentions}
          textareaRef={textareaRef}
          input={input}
-          onVoiceTranscript={handleVoiceTranscript}
          onInputChange={handleInputChange}
          onTextareaClick={handleTextareaClick}
          onTextareaKeyDown={handleKeyDown}
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -1,5 +1,4 @@
 import { useTranslation } from 'react-i18next';
-import { useCallback, useEffect, useRef, useState } from 'react';
 import type {
  ChangeEvent,
  ClipboardEvent,
@@ -10,10 +9,8 @@ import type {
  RefObject,
  TouchEvent,
 } from 'react';
-import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react';
+import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react';

-import { useVoiceInput } from '../../hooks/useVoiceInput';
-import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
 import type { SessionActivity } from '../../../../hooks/useSessionProtection';
 import type { PendingPermissionRequest, PermissionMode } from '../../types/types';
 import {
@@ -30,7 +27,6 @@ import {
 import CommandMenu from './CommandMenu';
 import ActivityIndicator from './ActivityIndicator';
 import ImageAttachment from './ImageAttachment';
-import VoiceInputButton from './VoiceInputButton';
 import PermissionRequestsBanner from './PermissionRequestsBanner';
 import TokenUsageSummary from './TokenUsageSummary';

@@ -93,7 +89,6 @@ interface ChatComposerProps {
  renderInputWithMentions: (text: string) => ReactNode;
  textareaRef: RefObject<HTMLTextAreaElement>;
  input: string;
-  onVoiceTranscript?: (text: string, send?: boolean) => void;
  onInputChange: (event: ChangeEvent<HTMLTextAreaElement>) => void;
  onTextareaClick: (event: MouseEvent<HTMLTextAreaElement>) => void;
  onTextareaKeyDown: (event: KeyboardEvent<HTMLTextAreaElement>) => void;
@@ -147,7 +142,6 @@ export default function ChatComposer({
  renderInputWithMentions,
  textareaRef,
  input,
-  onVoiceTranscript,
  onInputChange,
  onTextareaClick,
  onTextareaKeyDown,
@@ -160,28 +154,6 @@ export default function ChatComposer({
  sendByCtrlEnter,
 }: ChatComposerProps) {
  const { t } = useTranslation('chat');
-
-  // Voice state is hosted here (not in the mic button) so the main Send button can stop
-  // recording and send the transcript in one tap, the way the mic button drops it in the box.
-  const voiceAvailable = useVoiceAvailable();
-  const [voiceError, setVoiceError] = useState<string | null>(null);
-  const voiceErrorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
-  const handleVoiceError = useCallback((msg: string) => {
-    setVoiceError(msg);
-    if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
-    voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000);
-  }, []);
-  useEffect(() => () => {
-    if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
-  }, []);
-  const noopTranscript = useCallback(() => {}, []);
-  const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput(
-    onVoiceTranscript ?? noopTranscript,
-    handleVoiceError,
-  );
-  const isRecording = voiceState === 'recording';
-  const isTranscribing = voiceState === 'transcribing';
-
  const textareaRect = textareaRef.current?.getBoundingClientRect();
  const commandMenuPosition = {
    top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -337,10 +309,6 @@ export default function ChatComposer({
              <ImageIcon />
            </PromptInputButton>

-            {onVoiceTranscript && voiceAvailable && (
-              <VoiceInputButton state={voiceState} onToggle={voiceToggle} errorMsg={voiceError} />
-            )}
-
            <button
              type="button"
              onClick={onModeSwitch}
@@ -419,21 +387,10 @@ export default function ChatComposer({
              {sendByCtrlEnter ? t('input.hintText.ctrlEnter') : t('input.hintText.enter')}
            </div>
            <PromptInputSubmit
-              onClick={
-                isLoading
-                  ? onAbortSession
-                  : isRecording
-                    ? (e: MouseEvent<HTMLButtonElement>) => {
-                        e.preventDefault();
-                        voiceStop({ send: true });
-                      }
-                    : undefined
-              }
-              disabled={isLoading ? false : isRecording ? false : isTranscribing ? true : !input.trim()}
+              onClick={isLoading ? onAbortSession : undefined}
+              disabled={!isLoading && !input.trim()}
              className="h-10 w-10 sm:h-10 sm:w-10"
-            >
-              {isTranscribing ? <Loader2 className="h-4 w-4 animate-spin" /> : undefined}
-            </PromptInputSubmit>
+            />
          </div>
        </PromptInputFooter>
      </PromptInput>
--- a/src/components/chat/view/subcomponents/MessageComponent.tsx
+++ b/src/components/chat/view/subcomponents/MessageComponent.tsx
@@ -15,7 +15,6 @@ import { Reasoning, ReasoningTrigger, ReasoningContent } from '../../../../share

 import { Markdown } from './Markdown';
 import MessageCopyControl from './MessageCopyControl';
-import MessageSpeakControl from './MessageSpeakControl';

 type DiffLine = {
  type: string;
@@ -416,9 +415,6 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
                {shouldShowAssistantCopyControl && (
                  <MessageCopyControl content={assistantCopyContent} messageType="assistant" />
                )}
-                {shouldShowAssistantCopyControl && (
-                  <MessageSpeakControl content={assistantCopyContent} />
-                )}
                {!isGrouped && <span>{formattedTime}</span>}
              </div>
            )}
--- a/src/components/chat/view/subcomponents/MessageSpeakControl.tsx
+++ b/src/components/chat/view/subcomponents/MessageSpeakControl.tsx
@@ -1,44 +0,0 @@
-import { Volume2, Loader2, Square } from 'lucide-react';
-import { useTranslation } from 'react-i18next';
-import { useTts } from '../../hooks/useTts';
-import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
-
-// Tap-to-speak button beside the copy control on assistant messages.
-// Renders nothing unless the optional voice feature is enabled.
-const MessageSpeakControl = ({ content }: { content: string }) => {
-  const { t } = useTranslation('chat');
-  const available = useVoiceAvailable();
-  const { state, toggle, error } = useTts(() => content);
-
-  if (!available) return null;
-
-  const title =
-    state === 'playing' ? t('voice.stopSpeaking') : state === 'loading' ? t('voice.loading') : t('voice.speak');
-
-  return (
-    <span className="relative inline-flex">
-      {error && (
-        <span className="absolute bottom-full left-1/2 z-10 mb-1 max-w-[240px] -translate-x-1/2 whitespace-normal rounded bg-red-600 px-2 py-1 text-center text-xs text-white shadow-lg">
-          {error}
-        </span>
-      )}
-      <button
-        type="button"
-        onClick={toggle}
-        title={title}
-        aria-label={title}
-        className="inline-flex items-center gap-1 rounded px-1 py-0.5 text-gray-400 transition-colors hover:text-gray-600 dark:text-gray-500 dark:hover:text-gray-300"
-      >
-        {state === 'playing' ? (
-          <Square className="h-3.5 w-3.5" />
-        ) : state === 'loading' ? (
-          <Loader2 className="h-3.5 w-3.5 animate-spin" />
-        ) : (
-          <Volume2 className="h-3.5 w-3.5" />
-        )}
-      </button>
-    </span>
-  );
-};
-
-export default MessageSpeakControl;
--- a/src/components/chat/view/subcomponents/VoiceInputButton.tsx
+++ b/src/components/chat/view/subcomponents/VoiceInputButton.tsx
@@ -1,46 +0,0 @@
-import { useTranslation } from 'react-i18next';
-import { Mic, Square, Loader2 } from 'lucide-react';
-
-import { PromptInputButton } from '../../../../shared/view/ui';
-import type { VoiceInputState } from '../../hooks/useVoiceInput';
-
-type Props = {
-  state: VoiceInputState;
-  onToggle: () => void;
-  errorMsg?: string | null;
-};
-
-// Push-to-talk mic button (presentational). Recording state and the stop-and-send action
-// are owned by the composer so the main Send button can drive them too. This button just
-// starts recording and, while recording, stops and drops the transcript into the input box.
-export default function VoiceInputButton({ state, onToggle, errorMsg }: Props) {
-  const { t } = useTranslation('chat');
-
-  const icon =
-    state === 'recording' ? (
-      <Square className="text-red-500" />
-    ) : state === 'transcribing' ? (
-      <Loader2 className="animate-spin" />
-    ) : (
-      <Mic />
-    );
-
-  return (
-    <span className="relative inline-flex">
-      {errorMsg && (
-        <span className="absolute bottom-full left-1/2 mb-1 -translate-x-1/2 whitespace-nowrap rounded bg-red-600 px-2 py-1 text-xs text-white shadow-lg">
-          {errorMsg}
-        </span>
-      )}
-      <PromptInputButton
-        tooltip={{ content: state === 'recording' ? t('voice.stopRecording') : t('voice.input') }}
-        onClick={(e: { preventDefault: () => void }) => {
-          e.preventDefault();
-          onToggle();
-        }}
-      >
-        {icon}
-      </PromptInputButton>
-    </span>
-  );
-}
--- a/src/components/quick-settings-panel/constants.ts
+++ b/src/components/quick-settings-panel/constants.ts
@@ -4,7 +4,6 @@ import {
  Eye,
  Languages,
  Maximize2,
-  Mic,
 } from 'lucide-react';
 import type { PreferenceToggleItem } from './types';

@@ -55,9 +54,4 @@ export const INPUT_SETTING_TOGGLES: PreferenceToggleItem[] = [
    labelKey: 'quickSettings.sendByCtrlEnter',
    icon: Languages,
  },
-  {
-    key: 'voiceEnabled',
-    labelKey: 'quickSettings.voiceEnabled',
-    icon: Mic,
-  },
 ];
--- a/src/components/quick-settings-panel/types.ts
+++ b/src/components/quick-settings-panel/types.ts
@@ -6,8 +6,7 @@ export type PreferenceToggleKey =
  | 'showRawParameters'
  | 'showThinking'
  | 'autoScrollToBottom'
-  | 'sendByCtrlEnter'
-  | 'voiceEnabled';
+  | 'sendByCtrlEnter';

 export type QuickSettingsPreferences = Record<PreferenceToggleKey, boolean>;

--- a/src/components/quick-settings-panel/view/QuickSettingsPanelView.tsx
+++ b/src/components/quick-settings-panel/view/QuickSettingsPanelView.tsx
@@ -27,14 +27,12 @@ export default function QuickSettingsPanelView() {
    showThinking: preferences.showThinking,
    autoScrollToBottom: preferences.autoScrollToBottom,
    sendByCtrlEnter: preferences.sendByCtrlEnter,
-    voiceEnabled: preferences.voiceEnabled,
  }), [
    preferences.autoExpandTools,
    preferences.autoScrollToBottom,
    preferences.sendByCtrlEnter,
    preferences.showRawParameters,
    preferences.showThinking,
-    preferences.voiceEnabled,
  ]);

  const handlePreferenceChange = useCallback(
--- a/src/components/settings/types/types.ts
+++ b/src/components/settings/types/types.ts
@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
 import type { LLMProvider } from '../../../types/app';
 import type { ProviderAuthStatus } from '../../provider-auth/types';

-export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
+export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
 export type AgentProvider = LLMProvider;
 export type AgentCategory = 'account' | 'permissions' | 'mcp' | 'skills';
 export type ProjectSortOrder = 'name' | 'date';
--- a/src/components/settings/view/Settings.tsx
+++ b/src/components/settings/view/Settings.tsx
@@ -7,7 +7,6 @@ import SettingsSidebar from '../view/SettingsSidebar';
 import AgentsSettingsTab from '../view/tabs/agents-settings/AgentsSettingsTab';
 import AppearanceSettingsTab from '../view/tabs/AppearanceSettingsTab';
 import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSettingsTab';
-import VoiceSettingsTab from '../view/tabs/VoiceSettingsTab';
 import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
 import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
 import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
@@ -158,8 +157,6 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set

              {activeTab === 'api' && <CredentialsSettingsTab />}

-              {activeTab === 'voice' && <VoiceSettingsTab />}
-
              {activeTab === 'plugins' && <PluginSettingsTab />}

              {activeTab === 'about' && <AboutTab />}
--- a/src/components/settings/view/SettingsSidebar.tsx
+++ b/src/components/settings/view/SettingsSidebar.tsx
@@ -1,6 +1,5 @@
-import { Bell, Bot, GitBranch, Info, Key, ListChecks, Mic, MonitorPlay, Palette, Puzzle } from 'lucide-react';
+import { Bell, Bot, GitBranch, Info, Key, ListChecks, MonitorPlay, Palette, Puzzle } from 'lucide-react';
 import { useTranslation } from 'react-i18next';
-
 import { cn } from '../../../lib/utils';
 import { PillBar, Pill } from '../../../shared/view/ui';
 import type { SettingsMainTab } from '../types/types';
@@ -21,7 +20,6 @@ const NAV_ITEMS: NavItem[] = [
  { id: 'appearance', labelKey: 'mainTabs.appearance', icon: Palette },
  { id: 'git', labelKey: 'mainTabs.git', icon: GitBranch },
  { id: 'api', labelKey: 'mainTabs.apiTokens', icon: Key },
-  { id: 'voice', labelKey: 'mainTabs.voice', icon: Mic },
  { id: 'tasks', labelKey: 'mainTabs.tasks', icon: ListChecks },
  { id: 'browser', labelKey: 'mainTabs.browser', icon: MonitorPlay },
  { id: 'plugins', labelKey: 'mainTabs.plugins', icon: Puzzle },
--- a/src/components/settings/view/tabs/VoiceSettingsTab.tsx
+++ b/src/components/settings/view/tabs/VoiceSettingsTab.tsx
@@ -1,88 +0,0 @@
-import type { InputHTMLAttributes } from 'react';
-import { useTranslation } from 'react-i18next';
-import SettingsSection from '../SettingsSection';
-import SettingsToggle from '../SettingsToggle';
-import { useUiPreferences } from '../../../../hooks/useUiPreferences';
-import { useVoiceConfig } from '../../../../hooks/useVoiceConfig';
-
-const inputClass =
-  'w-full rounded-md border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring';
-
-function Field({ label, ...props }: { label: string } & InputHTMLAttributes<HTMLInputElement>) {
-  return (
-    <label className="block space-y-1">
-      <span className="text-sm font-medium text-foreground">{label}</span>
-      <input className={inputClass} {...props} />
-    </label>
-  );
-}
-
-export default function VoiceSettingsTab() {
-  const { t } = useTranslation('settings');
-  const { preferences, setPreference } = useUiPreferences();
-  const { config, update } = useVoiceConfig();
-
-  return (
-    <div className="space-y-8">
-      <SettingsSection title={t('voiceSettings.title')} description={t('voiceSettings.description')}>
-        <div className="flex items-center justify-between rounded-lg border border-border p-3">
-          <div className="pr-3">
-            <div className="text-sm font-medium text-foreground">{t('voiceSettings.enable')}</div>
-            <div className="text-xs text-muted-foreground">{t('voiceSettings.enableDescription')}</div>
-          </div>
-          <SettingsToggle
-            checked={preferences.voiceEnabled}
-            onChange={(v) => setPreference('voiceEnabled', v)}
-            ariaLabel={t('voiceSettings.enable')}
-          />
-        </div>
-      </SettingsSection>
-
-      <SettingsSection title={t('voiceSettings.backendTitle')} description={t('voiceSettings.backendDescription')}>
-        <div className="space-y-4">
-          <Field
-            label={t('voiceSettings.baseUrl')}
-            placeholder="https://api.openai.com/v1"
-            value={config.baseUrl}
-            onChange={(e) => update({ baseUrl: e.target.value })}
-          />
-          <Field
-            label={t('voiceSettings.apiKey')}
-            type="password"
-            autoComplete="off"
-            placeholder="sk-…"
-            value={config.apiKey}
-            onChange={(e) => update({ apiKey: e.target.value })}
-          />
-          <div className="grid grid-cols-1 gap-4 sm:grid-cols-4">
-            <Field
-              label={t('voiceSettings.sttModel')}
-              placeholder="whisper-1"
-              value={config.sttModel}
-              onChange={(e) => update({ sttModel: e.target.value })}
-            />
-            <Field
-              label={t('voiceSettings.ttsModel')}
-              placeholder="tts-1"
-              value={config.ttsModel}
-              onChange={(e) => update({ ttsModel: e.target.value })}
-            />
-            <Field
-              label={t('voiceSettings.voice')}
-              placeholder="alloy"
-              value={config.ttsVoice}
-              onChange={(e) => update({ ttsVoice: e.target.value })}
-            />
-            <Field
-              label={t('voiceSettings.format')}
-              placeholder="mp3"
-              value={config.ttsFormat}
-              onChange={(e) => update({ ttsFormat: e.target.value })}
-            />
-          </div>
-          <p className="text-xs text-muted-foreground">{t('voiceSettings.note')}</p>
-        </div>
-      </SettingsSection>
-    </div>
-  );
-}
--- a/src/hooks/useUiPreferences.ts
+++ b/src/hooks/useUiPreferences.ts
@@ -7,7 +7,6 @@ type UiPreferences = {
  autoScrollToBottom: boolean;
  sendByCtrlEnter: boolean;
  sidebarVisible: boolean;
-  voiceEnabled: boolean;
 };

 type UiPreferenceKey = keyof UiPreferences;
@@ -40,7 +39,6 @@ const DEFAULTS: UiPreferences = {
  autoScrollToBottom: true,
  sendByCtrlEnter: false,
  sidebarVisible: true,
-  voiceEnabled: false,
 };

 const PREFERENCE_KEYS = Object.keys(DEFAULTS) as UiPreferenceKey[];
--- a/src/hooks/useVoiceConfig.ts
+++ b/src/hooks/useVoiceConfig.ts
@@ -1,68 +0,0 @@
-import { useState } from 'react';
-
-export type VoiceConfig = {
-  baseUrl: string;
-  apiKey: string;
-  sttModel: string;
-  ttsModel: string;
-  ttsVoice: string;
-  ttsFormat: string;
-};
-
-const STORAGE_KEY = 'voiceConfig';
-export const VOICE_CONFIG_SYNC_EVENT = 'voice-config:sync';
-const DEFAULTS: VoiceConfig = { baseUrl: '', apiKey: '', sttModel: '', ttsModel: '', ttsVoice: '', ttsFormat: '' };
-
-export function readVoiceConfig(): VoiceConfig {
-  try {
-    const raw = localStorage.getItem(STORAGE_KEY);
-    if (!raw) return { ...DEFAULTS };
-    const parsed = JSON.parse(raw);
-    if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return { ...DEFAULTS };
-    const config = { ...DEFAULTS };
-    for (const key of Object.keys(DEFAULTS) as (keyof VoiceConfig)[]) {
-      if (typeof parsed[key] === 'string') config[key] = parsed[key];
-    }
-    return config;
-  } catch {
-    return { ...DEFAULTS };
-  }
-}
-
-// Headers the voice proxy reads to target a per-user OpenAI-compatible backend.
-// Empty fields are omitted so the server's env defaults apply.
-export function voiceConfigHeaders(): Record<string, string> {
-  if (typeof window === 'undefined') return {};
-  const c = readVoiceConfig();
-  const h: Record<string, string> = {};
-  if (c.apiKey) h['x-voice-api-key'] = c.apiKey;
-  if (c.sttModel) h['x-voice-stt-model'] = c.sttModel;
-  if (c.ttsModel) h['x-voice-tts-model'] = c.ttsModel;
-  if (c.ttsVoice) h['x-voice-tts-voice'] = c.ttsVoice;
-  if (c.ttsFormat.trim()) h['x-voice-tts-format'] = c.ttsFormat.trim();
-  return h;
-}
-
-export function useVoiceConfig() {
-  const [config, setConfig] = useState<VoiceConfig>(() =>
-    typeof window === 'undefined' ? { ...DEFAULTS } : readVoiceConfig(),
-  );
-
-  const update = (patch: Partial<VoiceConfig>) => {
-    setConfig((prev) => {
-      const next = { ...prev, ...patch };
-      try {
-        const stored: Partial<VoiceConfig> = { ...next };
-        if (next.ttsFormat.trim()) stored.ttsFormat = next.ttsFormat.trim();
-        else delete stored.ttsFormat;
-        localStorage.setItem(STORAGE_KEY, JSON.stringify(stored));
-        window.dispatchEvent(new Event(VOICE_CONFIG_SYNC_EVENT));
-      } catch {
-        /* ignore persistence errors */
-      }
-      return next;
-    });
-  };
-
-  return { config, update };
-}
--- a/src/i18n/locales/en/chat.json
+++ b/src/i18n/locales/en/chat.json
@@ -122,14 +122,6 @@
      }
    }
  },
-  "voice": {
-    "input": "Voice input",
-    "stopRecording": "Stop recording",
-    "transcribing": "Transcribing…",
-    "speak": "Read aloud",
-    "stopSpeaking": "Stop",
-    "loading": "Loading…"
-  },
  "input": {
    "placeholder": "Type / for commands, @ for files, or ask {{provider}} anything...",
    "placeholderDefault": "Type your message...",
--- a/src/i18n/locales/en/settings.json
+++ b/src/i18n/locales/en/settings.json
@@ -50,21 +50,6 @@
    "resetToDefaults": "Reset to Defaults",
    "cancelChanges": "Cancel Changes"
  },
-  "voiceSettings": {
-    "title": "Voice",
-    "description": "Speech-to-text input and read-aloud, via an OpenAI-compatible audio backend.",
-    "enable": "Enable voice",
-    "enableDescription": "Show the mic button and the read-aloud button on messages.",
-    "backendTitle": "Backend",
-    "backendDescription": "Point at OpenAI, Groq, or a local server (LocalAI, Speaches, Kokoro-FastAPI). Leave blank to use the server default.",
-    "baseUrl": "Base URL",
-    "apiKey": "API key",
-    "sttModel": "Speech-to-text model",
-    "ttsModel": "Text-to-speech model",
-    "voice": "Voice",
-    "format": "Audio format",
-    "note": "A custom base URL is called directly by your browser and must allow browser CORS requests. Leave it blank to use the server-configured backend."
-  },
  "quickSettings": {
    "title": "Quick Settings",
    "sections": {
@@ -79,7 +64,6 @@
    "showThinking": "Show thinking",
    "autoScrollToBottom": "Auto-scroll to bottom",
    "sendByCtrlEnter": "Send by Ctrl+Enter",
-    "voiceEnabled": "Voice (mic + read aloud)",
    "sendByCtrlEnterDescription": "When enabled, pressing Ctrl+Enter will send the message instead of just Enter. This is useful for IME users to avoid accidental sends.",
    "dragHandle": {
      "dragging": "Dragging handle",
@@ -110,7 +94,6 @@
    "appearance": "Appearance",
    "git": "Git",
    "apiTokens": "API & Tokens",
-    "voice": "Voice",
    "tasks": "Tasks",
    "browser": "Browser",
    "notifications": "Notifications",
@@ -131,7 +114,7 @@
    },
    "sound": {
      "title": "Sound",
-      "description": "Play a short tone when a chat run finishes or needs tool approval.",
+      "description": "Play a short tone when a chat run finishes.",
      "enabled": "Enabled",
      "test": "Test sound"
    },
--- a/src/lib/voiceApi.ts
+++ b/src/lib/voiceApi.ts
@@ -1,60 +0,0 @@
-import { authenticatedFetch } from '../utils/api';
-import { readVoiceConfig, voiceConfigHeaders } from '../hooks/useVoiceConfig';
-
-function directUrl(baseUrl: string, path: string): string {
-  return `${baseUrl.replace(/\/$/, '')}${path}`;
-}
-
-export function voiceConfigSignature(): string {
-  return JSON.stringify(readVoiceConfig());
-}
-
-export function transcribeVoice(blob: Blob, filename: string): Promise<Response> {
-  const config = readVoiceConfig();
-  const body = new FormData();
-
-  if (config.baseUrl.trim()) {
-    body.append('file', blob, filename);
-    body.append('model', config.sttModel || 'whisper-1');
-    return fetch(directUrl(config.baseUrl.trim(), '/audio/transcriptions'), {
-      method: 'POST',
-      headers: config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {},
-      body,
-    });
-  }
-
-  body.append('audio', blob, filename);
-  return authenticatedFetch('/api/voice/transcribe', {
-    method: 'POST',
-    headers: voiceConfigHeaders(),
-    body,
-  });
-}
-
-export function synthesizeVoice(text: string, signal: AbortSignal): Promise<Response> {
-  const config = readVoiceConfig();
-
-  if (config.baseUrl.trim()) {
-    return fetch(directUrl(config.baseUrl.trim(), '/audio/speech'), {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        ...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
-      },
-      body: JSON.stringify({
-        model: config.ttsModel || 'tts-1',
-        voice: config.ttsVoice || 'alloy',
-        input: text,
-        ...(config.ttsFormat.trim() ? { response_format: config.ttsFormat.trim() } : {}),
-      }),
-      signal,
-    });
-  }
-
-  return authenticatedFetch('/api/voice/tts', {
-    method: 'POST',
-    body: JSON.stringify({ text }),
-    headers: voiceConfigHeaders(),
-    signal,
-  });
-}
--- a/src/lib/voicePlayer.ts
+++ b/src/lib/voicePlayer.ts
@@ -1,196 +0,0 @@
-import { synthesizeVoice, voiceConfigSignature } from './voiceApi';
-
-// A single app-level audio player for read-aloud. It owns one <audio> element, lives
-// outside the React tree, and caches generated audio by content. Because playback is not
-// tied to a component, switching chats or re-rendering a message can't revoke the blob URL
-// out from under it (the cause of mid-play cutoffs). v1 plays one message at a time
-// (a new play replaces the current one); the design leaves room for a queue later.
-
-export type VoicePlayState = 'idle' | 'loading' | 'playing';
-
-export type VoiceSnapshot = { state: VoicePlayState; error: string | null };
-
-const IDLE: VoiceSnapshot = { state: 'idle', error: null };
-const CACHE_MAX = 24;
-const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min
-
-// Stable id / cache key from the text and voice settings that affect its audio (djb2).
-export function voiceId(content: string, signature = voiceConfigSignature()): string {
-  const input = JSON.stringify([content, signature]);
-  let h = 5381;
-  for (let i = 0; i < input.length; i++) h = (((h << 5) + h) + input.charCodeAt(i)) | 0;
-  return (h >>> 0).toString(36);
-}
-
-class VoicePlayer {
-  private audio: HTMLAudioElement | null = null;
-  private unlocked = false;
-  private cache = new Map<string, string>(); // id -> blob URL (insertion order = LRU)
-  private currentId: string | null = null;
-  private state: VoicePlayState = 'idle';
-  private errorId: string | null = null;
-  private errorMsg: string | null = null;
-  private token = 0; // bumps to ignore stale in-flight results
-  private activeController: AbortController | null = null; // aborts the in-flight TTS fetch
-  private errorTimer: ReturnType<typeof setTimeout> | null = null;
-  private listeners = new Set<() => void>();
-
-  subscribe(listener: () => void): () => void {
-    this.listeners.add(listener);
-    return () => {
-      this.listeners.delete(listener);
-    };
-  }
-
-  private emit() {
-    this.listeners.forEach((l) => l());
-  }
-
-  getSnapshot(id: string): VoiceSnapshot {
-    const state = this.currentId === id ? this.state : 'idle';
-    const error = this.errorId === id ? this.errorMsg : null;
-    if (state === 'idle' && error === null) return IDLE;
-    return { state, error };
-  }
-
-  private ensureAudio(): HTMLAudioElement {
-    if (!this.audio) {
-      const audio = new Audio();
-      audio.addEventListener('ended', () => this.onEnded());
-      audio.addEventListener('error', () => {
-        // Only meaningful while we believe we're playing.
-        if (this.state === 'playing') this.onEnded();
-      });
-      this.audio = audio;
-    }
-    return this.audio;
-  }
-
-  // Call synchronously from the click handler so iOS grants the (reused) element playback.
-  unlock() {
-    if (this.unlocked) return;
-    const audio = this.ensureAudio();
-    try {
-      const p = audio.play();
-      if (p && typeof p.catch === 'function') p.catch(() => {});
-      audio.pause();
-    } catch {
-      /* priming attempt; ignore */
-    }
-    this.unlocked = true;
-  }
-
-  toggle(content: string) {
-    const id = voiceId(content);
-    if (this.currentId === id && (this.state === 'playing' || this.state === 'loading')) {
-      this.stop();
-      return;
-    }
-    void this.play(id, content);
-  }
-
-  stop() {
-    this.token++; // ignore any stale in-flight result
-    this.abortActive(); // and actually cancel the network request
-    if (this.audio) this.audio.pause();
-    this.state = 'idle';
-    this.currentId = null;
-    this.emit();
-  }
-
-  private abortActive() {
-    if (this.activeController) {
-      this.activeController.abort();
-      this.activeController = null;
-    }
-  }
-
-  private onEnded() {
-    this.state = 'idle';
-    this.currentId = null;
-    this.emit();
-    // (queue auto-advance would hook in here)
-  }
-
-  private setError(id: string, msg: string) {
-    this.state = 'idle';
-    this.currentId = id;
-    this.errorId = id;
-    this.errorMsg = msg;
-    this.emit();
-    if (this.errorTimer) clearTimeout(this.errorTimer);
-    this.errorTimer = setTimeout(() => {
-      if (this.errorId === id) {
-        this.errorId = null;
-        this.errorMsg = null;
-        if (this.currentId === id) this.currentId = null;
-        this.emit();
-      }
-    }, 6000);
-  }
-
-  private async play(id: string, content: string) {
-    const audio = this.ensureAudio();
-    audio.pause();
-    this.currentId = id;
-    this.errorId = null;
-    this.errorMsg = null;
-    this.state = 'loading';
-    this.emit();
-
-    const myToken = ++this.token;
-    this.abortActive(); // cancel any request this play supersedes
-
-    try {
-      let url = this.cache.get(id);
-      if (!url) {
-        const controller = new AbortController();
-        this.activeController = controller;
-        const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS);
-        const res = await synthesizeVoice(content, controller.signal).finally(() => {
-          clearTimeout(timer);
-          if (this.activeController === controller) this.activeController = null;
-        });
-        if (myToken !== this.token) return; // superseded by another play/stop
-        if (!res.ok) {
-          let msg = `Read-aloud failed (${res.status})`;
-          try {
-            const j = await res.json();
-            if (j?.error) msg = String(j.error);
-          } catch {
-            /* non-JSON error body */
-          }
-          throw new Error(msg);
-        }
-        const blob = await res.blob();
-        if (myToken !== this.token) return;
-        url = URL.createObjectURL(blob);
-        this.cacheSet(id, url);
-      }
-      if (myToken !== this.token) return;
-      audio.src = url;
-      audio.load();
-      await audio.play();
-      if (myToken !== this.token) return;
-      this.state = 'playing';
-      this.emit();
-    } catch (e) {
-      if (myToken !== this.token) return;
-      const aborted = e instanceof Error && e.name === 'AbortError';
-      this.setError(id, aborted ? 'Read-aloud timed out.' : e instanceof Error ? e.message : 'Read-aloud failed');
-    }
-  }
-
-  private cacheSet(id: string, url: string) {
-    this.cache.set(id, url);
-    while (this.cache.size > CACHE_MAX) {
-      const oldest = this.cache.keys().next().value as string | undefined;
-      if (oldest === undefined) break;
-      const oldUrl = this.cache.get(oldest);
-      this.cache.delete(oldest);
-      if (oldUrl && oldUrl !== this.audio?.src) URL.revokeObjectURL(oldUrl);
-    }
-  }
-}
-
-export const voicePlayer = new VoicePlayer();
--- a/src/utils/notificationSound.ts
+++ b/src/utils/notificationSound.ts
@@ -58,7 +58,7 @@ const playTone = (
  oscillator.stop(startsAt + duration + 0.02);
 };

-export const playNotificationSound = async ({ force = false } = {}): Promise<void> => {
+export const playChatCompletionSound = async ({ force = false } = {}): Promise<void> => {
  if (!force && !isNotificationSoundEnabled()) {
    return;
  }
@@ -81,5 +81,3 @@ export const playNotificationSound = async ({ force = false } = {}): Promise<voi
    console.warn('Unable to play notification sound:', error);
  }
 };
-
-export const playChatCompletionSound = (options = {}): Promise<void> => playNotificationSound(options);
Author	SHA1	Message	Date
Simos Mikelatos	80ce5b8313	Merge branch 'main' into fix/shell-user-npm-path-priority	2026-06-24 11:10:47 +02:00
Haileyesus	9a33426eed	fix(shell): prioritize user npm binaries Interactive shells could resolve bundled or system CLIs before user-installed npm binaries. Move existing user npm global directories to the front of PATH while preserving all other entries.	2026-06-22 15:27:13 +03:00