Merge branch 'main' into feat/pending-tool-request-sound

fix(chat): notify for background permission requests
Permission audio was guarded by the visible session check. Tool approvals in background sessions were therefore silent. Keep banner state session-scoped while playing the existing sound globally.
2026-06-26 05:15:48 +08:00 · 2026-06-25 15:55:36 +03:00 · 2026-06-25 15:54:05 +03:00 · 2026-06-24 22:20:34 +03:00 · 2026-06-23 14:06:55 +03:00
24 changed files with 7 additions and 1105 deletions
--- a/server/index.js
+++ b/server/index.js
@@ -61,7 +61,6 @@ import userRoutes from './routes/user.js';
 import geminiRoutes from './routes/gemini.js';
 import pluginsRoutes from './routes/plugins.js';
 import providerRoutes from './modules/providers/provider.routes.js';
-import voiceRoutes from './voice-proxy.js';
 import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
 import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
 import { browserUseService } from './modules/browser-use/browser-use.service.js';
@@ -223,8 +222,6 @@ app.use('/api/providers', authenticateToken, providerRoutes);
 // Agent API Routes (uses API key authentication)
 app.use('/api/agent', agentRoutes);

-app.use('/api/voice', authenticateToken, voiceRoutes);
-
 // Serve public files (like api-docs.html)
 app.use(express.static(path.join(APP_ROOT, 'public')));

--- a/server/voice-proxy.js
+++ b/server/voice-proxy.js
@@ -1,224 +0,0 @@
-// Optional voice proxy — forwards STT/TTS to an OpenAI-compatible audio backend.
-//
-// The backend is whatever the user points at: OpenAI, Groq, or a local server
-// (LocalAI / Speaches / Kokoro-FastAPI / openedai-speech / etc.). It must expose the
-// standard OpenAI audio endpoints:
-//     POST {base}/audio/transcriptions   (multipart 'file' + 'model')      -> { text }
-//     POST {base}/audio/speech           ({ model, voice, input })         -> audio bytes
-//
-// Config is resolved per-request from headers (set by the client's voice settings),
-// falling back to server env defaults. Mounted at /api/voice behind authenticateToken.
-import { Readable } from 'node:stream';
-
-import express from 'express';
-
-const ENV = {
-  baseUrl: (process.env.VOICE_API_BASE_URL || '').replace(/\/$/, ''),
-  apiKey: process.env.VOICE_API_KEY || '',
-  sttModel: process.env.VOICE_STT_MODEL || 'whisper-1',
-  ttsModel: process.env.VOICE_TTS_MODEL || 'tts-1',
-  ttsVoice: process.env.VOICE_TTS_VOICE || 'alloy',
-};
-
-/**
- * Resolve the voice backend config for a request. Client headers (set from the
- * user's in-app voice settings) take precedence over the server env defaults.
- * @param {import('express').Request} req
- * @returns {{baseUrl: string, apiKey: string, sttModel: string, ttsModel: string, ttsVoice: string, ttsFormat: string}}
- */
-function resolveConfig(req) {
-  const h = req.headers;
-  return {
-    // Security: do not allow clients to control the outbound backend host.
-    // Always use the server-side configured base URL.
-    baseUrl: ENV.baseUrl,
-    apiKey: String(h['x-voice-api-key'] || '') || ENV.apiKey,
-    sttModel: String(h['x-voice-stt-model'] || '') || ENV.sttModel,
-    ttsModel: String(h['x-voice-tts-model'] || '') || ENV.ttsModel,
-    ttsVoice: String(h['x-voice-tts-voice'] || '') || ENV.ttsVoice,
-    ttsFormat: String(h['x-voice-tts-format'] || '').trim(),
-  };
-}
-
-const router = express.Router();
-
-// Generous by default — local TTS can synthesize long messages at ~real-time on CPU.
-// Guard against a non-numeric/zero override that would make setTimeout fire immediately.
-const DEFAULT_VOICE_TIMEOUT_MS = 300000;
-const _parsedTimeout = Number(process.env.VOICE_TIMEOUT_MS);
-const VOICE_TIMEOUT_MS = Number.isFinite(_parsedTimeout) && _parsedTimeout > 0
-  ? _parsedTimeout
-  : DEFAULT_VOICE_TIMEOUT_MS;
-
-/**
- * fetch() with an AbortController timeout so a stalled backend can't hold the
- * request open indefinitely. Aborts after VOICE_TIMEOUT_MS.
- * @param {string} url
- * @param {RequestInit} [options]
- * @returns {Promise<Response>}
- */
-async function fetchWithTimeout(url, options = {}) {
-  const parsed = new URL(url);
-  if (!['http:', 'https:'].includes(parsed.protocol) || !isAllowedBackendUrl(parsed.origin)) {
-    throw new Error('Blocked outbound voice backend URL');
-  }
-  const controller = new AbortController();
-  const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS);
-  try {
-    return await fetch(parsed.toString(), { redirect: 'manual', ...options, signal: controller.signal });
-  } finally {
-    clearTimeout(timer);
-  }
-}
-
-/**
- * Turn a backend fetch failure into a clear, actionable client response:
- * 504 on timeout (AbortError), 502 otherwise.
- * @param {import('express').Response} res
- * @param {Error} e
- */
-function backendError(res, e) {
-  if (e && e.name === 'AbortError') {
-    return res.status(504).json({
-      error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your voice backend.`,
-    });
-  }
-  return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
-}
-
-/**
- * SSRF guard for the user-configurable backend URL: allow http/https only and
- * block the link-local / cloud-metadata range (169.254.x). localhost and private
- * ranges are allowed on purpose so users can point at a local voice server
- * (LocalAI, Speaches, Kokoro-FastAPI, etc.).
- * @param {string} raw
- * @returns {boolean}
- */
-function isAllowedBackendUrl(raw) {
-  let u;
-  try {
-    u = new URL(raw);
-  } catch {
-    return false;
-  }
-  if (u.protocol !== 'http:' && u.protocol !== 'https:') return false;
-  if (u.hostname === '169.254.169.254' || u.hostname.startsWith('169.254.')) return false;
-  return true;
-}
-
-/**
- * Relay an upstream (backend) error to the client without making an upstream
- * 401/403 look like the user's own app login failed.
- * @param {import('express').Response} res
- * @param {number} status
- * @param {string} [text]
- */
-function upstreamError(res, status, text) {
-  if (status === 401 || status === 403) {
-    return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
-  }
-  return res.status(status).json({ error: text || 'voice backend error' });
-}
-
-let _upload = null;
-/**
- * Lazily build a memory-storage multer instance (25 MB cap) for audio uploads,
- * so multer is only imported when the voice feature is actually used.
- * @returns {Promise<import('multer').Multer>}
- */
-async function getUpload() {
-  if (!_upload) {
-    const multer = (await import('multer')).default;
-    _upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
-  }
-  return _upload;
-}
-
-/**
- * Build the Authorization header for the backend, or an empty object when no
- * key is configured (e.g. a local server that needs none).
- * @param {string} apiKey
- * @returns {Record<string, string>}
- */
-function authHeader(apiKey) {
-  return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
-}
-
-/**
- * GET /api/voice/health -> { configured } (true when a backend base URL is set).
- */
-router.get('/health', (req, res) => {
-  res.json({ configured: Boolean(resolveConfig(req).baseUrl) });
-});
-
-/**
- * POST /api/voice/transcribe (multipart 'audio') -> { text }.
- * Forwards the uploaded audio to the backend's /audio/transcriptions endpoint.
- */
-router.post('/transcribe', async (req, res) => {
-  const cfg = resolveConfig(req);
-  if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
-  if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
-  const upload = await getUpload();
-  upload.single('audio')(req, res, async (err) => {
-    if (err) return res.status(400).json({ error: err.message });
-    if (!req.file) return res.status(400).json({ error: 'No audio uploaded' });
-    try {
-      const fd = new FormData();
-      fd.append(
-        'file',
-        new Blob([req.file.buffer], { type: req.file.mimetype || 'audio/webm' }),
-        req.file.originalname || 'recording.webm',
-      );
-      fd.append('model', cfg.sttModel);
-      const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/transcriptions`, {
-        method: 'POST',
-        headers: authHeader(cfg.apiKey),
-        body: fd,
-      });
-      const text = await r.text();
-      if (!r.ok) return upstreamError(res, r.status, text);
-      let data;
-      try { data = JSON.parse(text); } catch { data = { text }; }
-      res.json({ text: data.text ?? '' });
-    } catch (e) {
-      backendError(res, e);
-    }
-  });
-});
-
-/**
- * POST /api/voice/tts { text } -> audio bytes.
- * Forwards the text to the backend's /audio/speech endpoint and streams the audio back.
- */
-router.post('/tts', async (req, res) => {
-  const cfg = resolveConfig(req);
-  if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
-  if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
-  const text = req.body?.text;
-  if (typeof text !== 'string' || !text.trim()) return res.status(400).json({ error: 'text required' });
-  try {
-    const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/speech`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json', ...authHeader(cfg.apiKey) },
-      body: JSON.stringify({
-        model: cfg.ttsModel,
-        voice: cfg.ttsVoice,
-        input: text,
-        ...(cfg.ttsFormat ? { response_format: cfg.ttsFormat } : {}),
-      }),
-    });
-    if (!r.ok) {
-      const errText = await r.text().catch(() => 'tts failed');
-      return upstreamError(res, r.status, errText);
-    }
-    res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg');
-    res.setHeader('Cache-Control', 'no-store');
-    if (!r.body) return res.end();
-    Readable.fromWeb(r.body).on('error', (error) => res.destroy(error)).pipe(res);
-  } catch (e) {
-    backendError(res, e);
-  }
-});
-
-export default router;
--- a/src/components/chat/hooks/useChatComposerState.ts
+++ b/src/components/chat/hooks/useChatComposerState.ts
@@ -775,17 +775,6 @@ export function useChatComposerState({
    handleSubmitRef.current = handleSubmit;
  }, [handleSubmit]);

-  // A voice transcript either fills the input (to edit before sending) or, when the
-  // user tapped "stop and send", is submitted straight away. Mirror the value into
-  // inputValueRef synchronously so handleSubmit reads the new text, not the stale state.
-  const handleVoiceTranscript = useCallback((text: string, send?: boolean) => {
-    const base = inputValueRef.current.trim();
-    const next = base ? `${base} ${text}` : text;
-    setInput(next);
-    inputValueRef.current = next;
-    if (send) handleSubmitRef.current?.(createFakeSubmitEvent());
-  }, [setInput]);
-
  useEffect(() => {
    inputValueRef.current = input;
  }, [input]);
@@ -1024,7 +1013,6 @@ export function useChatComposerState({
    isDragActive,
    openImagePicker: open,
    handleSubmit,
-    handleVoiceTranscript,
    handleInputChange,
    handleKeyDown,
    handlePaste,
--- a/src/components/chat/hooks/useTts.ts
+++ b/src/components/chat/hooks/useTts.ts
@@ -1,33 +0,0 @@
-import { useCallback, useEffect, useState } from 'react';
-import { voicePlayer, voiceId, type VoiceSnapshot } from '../../../lib/voicePlayer';
-
-export type TtsState = VoiceSnapshot['state'];
-
-/**
- * Thin adapter over the app-level voicePlayer. Playback lives outside React (see
- * lib/voicePlayer), so switching chats or re-rendering a message no longer cuts the
- * audio off. This hook just reflects the player's state for one message and forwards taps.
- */
-export function useTts(getText: () => string) {
-  const content = getText();
-  const id = voiceId(content);
-
-  const [snap, setSnap] = useState<VoiceSnapshot>(() => voicePlayer.getSnapshot(id));
-
-  useEffect(() => {
-    const update = () =>
-      setSnap((prev) => {
-        const next = voicePlayer.getSnapshot(id);
-        return prev.state === next.state && prev.error === next.error ? prev : next;
-      });
-    update();
-    return voicePlayer.subscribe(update);
-  }, [id]);
-
-  const toggle = useCallback(() => {
-    voicePlayer.unlock(); // synchronous, within the click gesture (iOS)
-    voicePlayer.toggle(content);
-  }, [content]);
-
-  return { state: snap.state, toggle, error: snap.error };
-}
--- a/src/components/chat/hooks/useVoiceAvailable.ts
+++ b/src/components/chat/hooks/useVoiceAvailable.ts
@@ -1,85 +0,0 @@
-import { useEffect, useState } from 'react';
-
-import { authenticatedFetch } from '../../../utils/api';
-import { readVoiceConfig, VOICE_CONFIG_SYNC_EVENT } from '../../../hooks/useVoiceConfig';
-
-// Voice UI is gated on the `voiceEnabled` UI preference (toggled in Quick Settings /
-// the Settings modal) and a configured voice backend.
-const STORAGE_KEY = 'uiPreferences';
-const SYNC_EVENT = 'ui-preferences:sync';
-let healthRequest: Promise<boolean> | null = null;
-
-function checkVoiceHealth(): Promise<boolean> {
-  if (healthRequest) return healthRequest;
-  const request = authenticatedFetch('/api/voice/health')
-    .then(async (response) => {
-      if (!response.ok) throw new Error(`Voice health check failed (${response.status})`);
-      const data = await response.json();
-      return data?.configured === true;
-    })
-    .finally(() => {
-      healthRequest = null;
-    });
-  healthRequest = request;
-  return request;
-}
-
-function readVoiceEnabled(): boolean {
-  try {
-    const raw = localStorage.getItem(STORAGE_KEY);
-    if (!raw) return false;
-    const parsed = JSON.parse(raw);
-    return parsed?.voiceEnabled === true || parsed?.voiceEnabled === 'true';
-  } catch {
-    return false;
-  }
-}
-
-export function useVoiceAvailable(): boolean {
-  const [enabled, setEnabled] = useState<boolean>(() =>
-    typeof window === 'undefined' ? false : readVoiceEnabled(),
-  );
-  const [available, setAvailable] = useState(false);
-
-  useEffect(() => {
-    const update = () => setEnabled(readVoiceEnabled());
-    window.addEventListener('storage', update);
-    window.addEventListener(SYNC_EVENT, update as EventListener);
-    return () => {
-      window.removeEventListener('storage', update);
-      window.removeEventListener(SYNC_EVENT, update as EventListener);
-    };
-  }, []);
-
-  useEffect(() => {
-    let active = true;
-    let requestId = 0;
-
-    const check = async () => {
-      if (!enabled) {
-        setAvailable(false);
-        return;
-      }
-      if (readVoiceConfig().baseUrl.trim()) {
-        setAvailable(true);
-        return;
-      }
-      const id = ++requestId;
-      try {
-        const result = await checkVoiceHealth();
-        if (active && id === requestId) setAvailable(result);
-      } catch {
-        if (active && id === requestId) setAvailable(false);
-      }
-    };
-
-    void check();
-    window.addEventListener(VOICE_CONFIG_SYNC_EVENT, check);
-    return () => {
-      active = false;
-      window.removeEventListener(VOICE_CONFIG_SYNC_EVENT, check);
-    };
-  }, [enabled]);
-
-  return enabled && available;
-}
--- a/src/components/chat/hooks/useVoiceInput.ts
+++ b/src/components/chat/hooks/useVoiceInput.ts
@@ -1,149 +0,0 @@
-import { useCallback, useEffect, useRef, useState } from 'react';
-
-import { transcribeVoice } from '../../../lib/voiceApi';
-
-// Mobile-safe recording: iOS Safari 18.4+ supports webm/opus; older iOS needs mp4.
-const MIME_CANDIDATES = [
-  'audio/webm;codecs=opus',
-  'audio/webm',
-  'audio/mp4',
-  'audio/ogg;codecs=opus',
-  'audio/ogg',
-];
-
-function pickMime(): string {
-  for (const t of MIME_CANDIDATES) {
-    try {
-      if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported(t)) return t;
-    } catch {
-      /* isTypeSupported can throw on some iOS versions */
-    }
-  }
-  return '';
-}
-
-export type VoiceInputState = 'idle' | 'recording' | 'transcribing';
-
-/**
- * Push-to-talk dictation. Records the mic, uploads to /api/voice/transcribe
- * (an OpenAI-compatible speech-to-text backend via the Express proxy), and
- * returns the transcript through onTranscript.
- */
-export function useVoiceInput(
-  onTranscript: (text: string, send?: boolean) => void,
-  onError?: (msg: string) => void,
-) {
-  const [state, setState] = useState<VoiceInputState>('idle');
-  const recorderRef = useRef<MediaRecorder | null>(null);
-  const chunksRef = useRef<Blob[]>([]);
-  const streamRef = useRef<MediaStream | null>(null);
-  const cancelledRef = useRef(false);
-  const startingRef = useRef(false);
-  // Whether the in-progress stop should auto-send the transcript (vs just fill the box).
-  const sendRef = useRef(false);
-
-  const stopTracks = () => {
-    streamRef.current?.getTracks().forEach((t) => t.stop());
-    streamRef.current = null;
-  };
-
-  // Stop the mic if the component unmounts mid-recording.
-  useEffect(() => {
-    cancelledRef.current = false;
-    return () => {
-      cancelledRef.current = true;
-      startingRef.current = false;
-      streamRef.current?.getTracks().forEach((t) => t.stop());
-      streamRef.current = null;
-      recorderRef.current = null;
-    };
-  }, []);
-
-  const start = useCallback(async () => {
-    if (startingRef.current || (recorderRef.current && recorderRef.current.state !== 'inactive')) return;
-    startingRef.current = true;
-    try {
-      const stream = await navigator.mediaDevices.getUserMedia({
-        audio: { echoCancellation: true, noiseSuppression: true },
-      });
-      if (cancelledRef.current) {
-        stream.getTracks().forEach((t) => t.stop());
-        return;
-      }
-      streamRef.current = stream;
-      const mimeType = pickMime();
-      const rec = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
-      recorderRef.current = rec;
-      chunksRef.current = [];
-
-      rec.ondataavailable = (e) => {
-        if (e.data.size > 0) chunksRef.current.push(e.data);
-      };
-
-      rec.onstop = async () => {
-        stopTracks();
-        if (cancelledRef.current) return;
-        // Capture and clear the send intent for this stop before any async work.
-        const shouldSend = sendRef.current;
-        sendRef.current = false;
-        const type = rec.mimeType || 'audio/webm';
-        const blob = new Blob(chunksRef.current, { type });
-        if (blob.size < 800) {
-          setState('idle');
-          onError?.('Recording too short');
-          return;
-        }
-        setState('transcribing');
-        try {
-          const ext = type.includes('mp4') ? 'm4a' : type.includes('ogg') ? 'ogg' : 'webm';
-          const res = await transcribeVoice(blob, `recording.${ext}`);
-          if (!res.ok) throw new Error(`transcribe ${res.status}`);
-          const data = await res.json();
-          if (cancelledRef.current) return;
-          const text = String(data?.text || '').trim();
-          if (text) onTranscript(text, shouldSend);
-          else onError?.('No speech detected');
-        } catch (e) {
-          if (!cancelledRef.current) {
-            onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
-          }
-        } finally {
-          if (!cancelledRef.current) setState('idle');
-        }
-      };
-
-      rec.start();
-      setState('recording');
-    } catch (e) {
-      recorderRef.current = null;
-      stopTracks();
-      if (cancelledRef.current) return;
-      const err = e as { name?: string; message?: string };
-      let msg = `Mic error: ${err?.message || e}`;
-      if (err?.name === 'NotAllowedError') msg = 'Microphone access denied.';
-      else if (err?.name === 'NotFoundError') msg = 'No microphone found.';
-      onError?.(msg);
-      setState('idle');
-    } finally {
-      startingRef.current = false;
-    }
-  }, [onTranscript, onError]);
-
-  // Stop recording. Pass { send: true } to auto-send the transcript once it's ready.
-  // Guard on the recorder's own state (not React state) so a double tap, or the mic
-  // and Send buttons both firing, can't call stop() on an already-inactive recorder.
-  const stop = useCallback((opts?: { send?: boolean }) => {
-    const rec = recorderRef.current;
-    if (rec && rec.state !== 'inactive') {
-      sendRef.current = opts?.send ?? false;
-      rec.stop();
-    }
-  }, []);
-
-  const toggle = useCallback(() => {
-    if (state === 'recording') stop();
-    else if (state === 'idle') start();
-  }, [state, start, stop]);
-
-  return { state, toggle, stop };
-}
--- a/src/components/chat/view/ChatInterface.tsx
+++ b/src/components/chat/view/ChatInterface.tsx
@@ -173,7 +173,6 @@ function ChatInterface({
    isDragActive,
    openImagePicker,
    handleSubmit,
-    handleVoiceTranscript,
    handleInputChange,
    handleKeyDown,
    handlePaste,
@@ -407,7 +406,6 @@ function ChatInterface({
          renderInputWithMentions={renderInputWithMentions}
          textareaRef={textareaRef}
          input={input}
-          onVoiceTranscript={handleVoiceTranscript}
          onInputChange={handleInputChange}
          onTextareaClick={handleTextareaClick}
          onTextareaKeyDown={handleKeyDown}
--- a/src/components/chat/view/subcomponents/ChatComposer.tsx
+++ b/src/components/chat/view/subcomponents/ChatComposer.tsx
@@ -1,5 +1,4 @@
 import { useTranslation } from 'react-i18next';
-import { useCallback, useEffect, useRef, useState } from 'react';
 import type {
  ChangeEvent,
  ClipboardEvent,
@@ -10,10 +9,8 @@ import type {
  RefObject,
  TouchEvent,
 } from 'react';
-import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react';
+import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react';

-import { useVoiceInput } from '../../hooks/useVoiceInput';
-import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
 import type { SessionActivity } from '../../../../hooks/useSessionProtection';
 import type { PendingPermissionRequest, PermissionMode } from '../../types/types';
 import {
@@ -30,7 +27,6 @@ import {
 import CommandMenu from './CommandMenu';
 import ActivityIndicator from './ActivityIndicator';
 import ImageAttachment from './ImageAttachment';
-import VoiceInputButton from './VoiceInputButton';
 import PermissionRequestsBanner from './PermissionRequestsBanner';
 import TokenUsageSummary from './TokenUsageSummary';

@@ -93,7 +89,6 @@ interface ChatComposerProps {
  renderInputWithMentions: (text: string) => ReactNode;
  textareaRef: RefObject<HTMLTextAreaElement>;
  input: string;
-  onVoiceTranscript?: (text: string, send?: boolean) => void;
  onInputChange: (event: ChangeEvent<HTMLTextAreaElement>) => void;
  onTextareaClick: (event: MouseEvent<HTMLTextAreaElement>) => void;
  onTextareaKeyDown: (event: KeyboardEvent<HTMLTextAreaElement>) => void;
@@ -147,7 +142,6 @@ export default function ChatComposer({
  renderInputWithMentions,
  textareaRef,
  input,
-  onVoiceTranscript,
  onInputChange,
  onTextareaClick,
  onTextareaKeyDown,
@@ -160,28 +154,6 @@ export default function ChatComposer({
  sendByCtrlEnter,
 }: ChatComposerProps) {
  const { t } = useTranslation('chat');
-
-  // Voice state is hosted here (not in the mic button) so the main Send button can stop
-  // recording and send the transcript in one tap, the way the mic button drops it in the box.
-  const voiceAvailable = useVoiceAvailable();
-  const [voiceError, setVoiceError] = useState<string | null>(null);
-  const voiceErrorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
-  const handleVoiceError = useCallback((msg: string) => {
-    setVoiceError(msg);
-    if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
-    voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000);
-  }, []);
-  useEffect(() => () => {
-    if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
-  }, []);
-  const noopTranscript = useCallback(() => {}, []);
-  const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput(
-    onVoiceTranscript ?? noopTranscript,
-    handleVoiceError,
-  );
-  const isRecording = voiceState === 'recording';
-  const isTranscribing = voiceState === 'transcribing';
-
  const textareaRect = textareaRef.current?.getBoundingClientRect();
  const commandMenuPosition = {
    top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -337,10 +309,6 @@ export default function ChatComposer({
              <ImageIcon />
            </PromptInputButton>

-            {onVoiceTranscript && voiceAvailable && (
-              <VoiceInputButton state={voiceState} onToggle={voiceToggle} errorMsg={voiceError} />
-            )}
-
            <button
              type="button"
              onClick={onModeSwitch}
@@ -419,21 +387,10 @@ export default function ChatComposer({
              {sendByCtrlEnter ? t('input.hintText.ctrlEnter') : t('input.hintText.enter')}
            </div>
            <PromptInputSubmit
-              onClick={
-                isLoading
-                  ? onAbortSession
-                  : isRecording
-                    ? (e: MouseEvent<HTMLButtonElement>) => {
-                        e.preventDefault();
-                        voiceStop({ send: true });
-                      }
-                    : undefined
-              }
-              disabled={isLoading ? false : isRecording ? false : isTranscribing ? true : !input.trim()}
+              onClick={isLoading ? onAbortSession : undefined}
+              disabled={!isLoading && !input.trim()}
              className="h-10 w-10 sm:h-10 sm:w-10"
-            >
-              {isTranscribing ? <Loader2 className="h-4 w-4 animate-spin" /> : undefined}
-            </PromptInputSubmit>
+            />
          </div>
        </PromptInputFooter>
      </PromptInput>
--- a/src/components/chat/view/subcomponents/MessageComponent.tsx
+++ b/src/components/chat/view/subcomponents/MessageComponent.tsx
@@ -15,7 +15,6 @@ import { Reasoning, ReasoningTrigger, ReasoningContent } from '../../../../share

 import { Markdown } from './Markdown';
 import MessageCopyControl from './MessageCopyControl';
-import MessageSpeakControl from './MessageSpeakControl';

 type DiffLine = {
  type: string;
@@ -416,9 +415,6 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
                {shouldShowAssistantCopyControl && (
                  <MessageCopyControl content={assistantCopyContent} messageType="assistant" />
                )}
-                {shouldShowAssistantCopyControl && (
-                  <MessageSpeakControl content={assistantCopyContent} />
-                )}
                {!isGrouped && <span>{formattedTime}</span>}
              </div>
            )}
--- a/src/components/chat/view/subcomponents/MessageSpeakControl.tsx
+++ b/src/components/chat/view/subcomponents/MessageSpeakControl.tsx
@@ -1,44 +0,0 @@
-import { Volume2, Loader2, Square } from 'lucide-react';
-import { useTranslation } from 'react-i18next';
-import { useTts } from '../../hooks/useTts';
-import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
-
-// Tap-to-speak button beside the copy control on assistant messages.
-// Renders nothing unless the optional voice feature is enabled.
-const MessageSpeakControl = ({ content }: { content: string }) => {
-  const { t } = useTranslation('chat');
-  const available = useVoiceAvailable();
-  const { state, toggle, error } = useTts(() => content);
-
-  if (!available) return null;
-
-  const title =
-    state === 'playing' ? t('voice.stopSpeaking') : state === 'loading' ? t('voice.loading') : t('voice.speak');
-
-  return (
-    <span className="relative inline-flex">
-      {error && (
-        <span className="absolute bottom-full left-1/2 z-10 mb-1 max-w-[240px] -translate-x-1/2 whitespace-normal rounded bg-red-600 px-2 py-1 text-center text-xs text-white shadow-lg">
-          {error}
-        </span>
-      )}
-      <button
-        type="button"
-        onClick={toggle}
-        title={title}
-        aria-label={title}
-        className="inline-flex items-center gap-1 rounded px-1 py-0.5 text-gray-400 transition-colors hover:text-gray-600 dark:text-gray-500 dark:hover:text-gray-300"
-      >
-        {state === 'playing' ? (
-          <Square className="h-3.5 w-3.5" />
-        ) : state === 'loading' ? (
-          <Loader2 className="h-3.5 w-3.5 animate-spin" />
-        ) : (
-          <Volume2 className="h-3.5 w-3.5" />
-        )}
-      </button>
-    </span>
-  );
-};
-
-export default MessageSpeakControl;
--- a/src/components/chat/view/subcomponents/VoiceInputButton.tsx
+++ b/src/components/chat/view/subcomponents/VoiceInputButton.tsx
@@ -1,46 +0,0 @@
-import { useTranslation } from 'react-i18next';
-import { Mic, Square, Loader2 } from 'lucide-react';
-
-import { PromptInputButton } from '../../../../shared/view/ui';
-import type { VoiceInputState } from '../../hooks/useVoiceInput';
-
-type Props = {
-  state: VoiceInputState;
-  onToggle: () => void;
-  errorMsg?: string | null;
-};
-
-// Push-to-talk mic button (presentational). Recording state and the stop-and-send action
-// are owned by the composer so the main Send button can drive them too. This button just
-// starts recording and, while recording, stops and drops the transcript into the input box.
-export default function VoiceInputButton({ state, onToggle, errorMsg }: Props) {
-  const { t } = useTranslation('chat');
-
-  const icon =
-    state === 'recording' ? (
-      <Square className="text-red-500" />
-    ) : state === 'transcribing' ? (
-      <Loader2 className="animate-spin" />
-    ) : (
-      <Mic />
-    );
-
-  return (
-    <span className="relative inline-flex">
-      {errorMsg && (
-        <span className="absolute bottom-full left-1/2 mb-1 -translate-x-1/2 whitespace-nowrap rounded bg-red-600 px-2 py-1 text-xs text-white shadow-lg">
-          {errorMsg}
-        </span>
-      )}
-      <PromptInputButton
-        tooltip={{ content: state === 'recording' ? t('voice.stopRecording') : t('voice.input') }}
-        onClick={(e: { preventDefault: () => void }) => {
-          e.preventDefault();
-          onToggle();
-        }}
-      >
-        {icon}
-      </PromptInputButton>
-    </span>
-  );
-}
--- a/src/components/quick-settings-panel/constants.ts
+++ b/src/components/quick-settings-panel/constants.ts
@@ -4,7 +4,6 @@ import {
  Eye,
  Languages,
  Maximize2,
-  Mic,
 } from 'lucide-react';
 import type { PreferenceToggleItem } from './types';

@@ -55,9 +54,4 @@ export const INPUT_SETTING_TOGGLES: PreferenceToggleItem[] = [
    labelKey: 'quickSettings.sendByCtrlEnter',
    icon: Languages,
  },
-  {
-    key: 'voiceEnabled',
-    labelKey: 'quickSettings.voiceEnabled',
-    icon: Mic,
-  },
 ];
--- a/src/components/quick-settings-panel/types.ts
+++ b/src/components/quick-settings-panel/types.ts
@@ -6,8 +6,7 @@ export type PreferenceToggleKey =
  | 'showRawParameters'
  | 'showThinking'
  | 'autoScrollToBottom'
-  | 'sendByCtrlEnter'
-  | 'voiceEnabled';
+  | 'sendByCtrlEnter';

 export type QuickSettingsPreferences = Record<PreferenceToggleKey, boolean>;

--- a/src/components/quick-settings-panel/view/QuickSettingsPanelView.tsx
+++ b/src/components/quick-settings-panel/view/QuickSettingsPanelView.tsx
@@ -27,14 +27,12 @@ export default function QuickSettingsPanelView() {
    showThinking: preferences.showThinking,
    autoScrollToBottom: preferences.autoScrollToBottom,
    sendByCtrlEnter: preferences.sendByCtrlEnter,
-    voiceEnabled: preferences.voiceEnabled,
  }), [
    preferences.autoExpandTools,
    preferences.autoScrollToBottom,
    preferences.sendByCtrlEnter,
    preferences.showRawParameters,
    preferences.showThinking,
-    preferences.voiceEnabled,
  ]);

  const handlePreferenceChange = useCallback(
--- a/src/components/settings/types/types.ts
+++ b/src/components/settings/types/types.ts
@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
 import type { LLMProvider } from '../../../types/app';
 import type { ProviderAuthStatus } from '../../provider-auth/types';

-export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
+export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
 export type AgentProvider = LLMProvider;
 export type AgentCategory = 'account' | 'permissions' | 'mcp' | 'skills';
 export type ProjectSortOrder = 'name' | 'date';
--- a/src/components/settings/view/Settings.tsx
+++ b/src/components/settings/view/Settings.tsx
@@ -7,7 +7,6 @@ import SettingsSidebar from '../view/SettingsSidebar';
 import AgentsSettingsTab from '../view/tabs/agents-settings/AgentsSettingsTab';
 import AppearanceSettingsTab from '../view/tabs/AppearanceSettingsTab';
 import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSettingsTab';
-import VoiceSettingsTab from '../view/tabs/VoiceSettingsTab';
 import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
 import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
 import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
@@ -158,8 +157,6 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set

              {activeTab === 'api' && <CredentialsSettingsTab />}

-              {activeTab === 'voice' && <VoiceSettingsTab />}
-
              {activeTab === 'plugins' && <PluginSettingsTab />}

              {activeTab === 'about' && <AboutTab />}
--- a/src/components/settings/view/SettingsSidebar.tsx
+++ b/src/components/settings/view/SettingsSidebar.tsx
@@ -1,6 +1,5 @@
-import { Bell, Bot, GitBranch, Info, Key, ListChecks, Mic, MonitorPlay, Palette, Puzzle } from 'lucide-react';
+import { Bell, Bot, GitBranch, Info, Key, ListChecks, MonitorPlay, Palette, Puzzle } from 'lucide-react';
 import { useTranslation } from 'react-i18next';
-
 import { cn } from '../../../lib/utils';
 import { PillBar, Pill } from '../../../shared/view/ui';
 import type { SettingsMainTab } from '../types/types';
@@ -21,7 +20,6 @@ const NAV_ITEMS: NavItem[] = [
  { id: 'appearance', labelKey: 'mainTabs.appearance', icon: Palette },
  { id: 'git', labelKey: 'mainTabs.git', icon: GitBranch },
  { id: 'api', labelKey: 'mainTabs.apiTokens', icon: Key },
-  { id: 'voice', labelKey: 'mainTabs.voice', icon: Mic },
  { id: 'tasks', labelKey: 'mainTabs.tasks', icon: ListChecks },
  { id: 'browser', labelKey: 'mainTabs.browser', icon: MonitorPlay },
  { id: 'plugins', labelKey: 'mainTabs.plugins', icon: Puzzle },
--- a/src/components/settings/view/tabs/VoiceSettingsTab.tsx
+++ b/src/components/settings/view/tabs/VoiceSettingsTab.tsx
@@ -1,88 +0,0 @@
-import type { InputHTMLAttributes } from 'react';
-import { useTranslation } from 'react-i18next';
-import SettingsSection from '../SettingsSection';
-import SettingsToggle from '../SettingsToggle';
-import { useUiPreferences } from '../../../../hooks/useUiPreferences';
-import { useVoiceConfig } from '../../../../hooks/useVoiceConfig';
-
-const inputClass =
-  'w-full rounded-md border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring';
-
-function Field({ label, ...props }: { label: string } & InputHTMLAttributes<HTMLInputElement>) {
-  return (
-    <label className="block space-y-1">
-      <span className="text-sm font-medium text-foreground">{label}</span>
-      <input className={inputClass} {...props} />
-    </label>
-  );
-}
-
-export default function VoiceSettingsTab() {
-  const { t } = useTranslation('settings');
-  const { preferences, setPreference } = useUiPreferences();
-  const { config, update } = useVoiceConfig();
-
-  return (
-    <div className="space-y-8">
-      <SettingsSection title={t('voiceSettings.title')} description={t('voiceSettings.description')}>
-        <div className="flex items-center justify-between rounded-lg border border-border p-3">
-          <div className="pr-3">
-            <div className="text-sm font-medium text-foreground">{t('voiceSettings.enable')}</div>
-            <div className="text-xs text-muted-foreground">{t('voiceSettings.enableDescription')}</div>
-          </div>
-          <SettingsToggle
-            checked={preferences.voiceEnabled}
-            onChange={(v) => setPreference('voiceEnabled', v)}
-            ariaLabel={t('voiceSettings.enable')}
-          />
-        </div>
-      </SettingsSection>
-
-      <SettingsSection title={t('voiceSettings.backendTitle')} description={t('voiceSettings.backendDescription')}>
-        <div className="space-y-4">
-          <Field
-            label={t('voiceSettings.baseUrl')}
-            placeholder="https://api.openai.com/v1"
-            value={config.baseUrl}
-            onChange={(e) => update({ baseUrl: e.target.value })}
-          />
-          <Field
-            label={t('voiceSettings.apiKey')}
-            type="password"
-            autoComplete="off"
-            placeholder="sk-…"
-            value={config.apiKey}
-            onChange={(e) => update({ apiKey: e.target.value })}
-          />
-          <div className="grid grid-cols-1 gap-4 sm:grid-cols-4">
-            <Field
-              label={t('voiceSettings.sttModel')}
-              placeholder="whisper-1"
-              value={config.sttModel}
-              onChange={(e) => update({ sttModel: e.target.value })}
-            />
-            <Field
-              label={t('voiceSettings.ttsModel')}
-              placeholder="tts-1"
-              value={config.ttsModel}
-              onChange={(e) => update({ ttsModel: e.target.value })}
-            />
-            <Field
-              label={t('voiceSettings.voice')}
-              placeholder="alloy"
-              value={config.ttsVoice}
-              onChange={(e) => update({ ttsVoice: e.target.value })}
-            />
-            <Field
-              label={t('voiceSettings.format')}
-              placeholder="mp3"
-              value={config.ttsFormat}
-              onChange={(e) => update({ ttsFormat: e.target.value })}
-            />
-          </div>
-          <p className="text-xs text-muted-foreground">{t('voiceSettings.note')}</p>
-        </div>
-      </SettingsSection>
-    </div>
-  );
-}
--- a/src/hooks/useUiPreferences.ts
+++ b/src/hooks/useUiPreferences.ts
@@ -7,7 +7,6 @@ type UiPreferences = {
  autoScrollToBottom: boolean;
  sendByCtrlEnter: boolean;
  sidebarVisible: boolean;
-  voiceEnabled: boolean;
 };

 type UiPreferenceKey = keyof UiPreferences;
@@ -40,7 +39,6 @@ const DEFAULTS: UiPreferences = {
  autoScrollToBottom: true,
  sendByCtrlEnter: false,
  sidebarVisible: true,
-  voiceEnabled: false,
 };

 const PREFERENCE_KEYS = Object.keys(DEFAULTS) as UiPreferenceKey[];
--- a/src/hooks/useVoiceConfig.ts
+++ b/src/hooks/useVoiceConfig.ts
@@ -1,68 +0,0 @@
-import { useState } from 'react';
-
-export type VoiceConfig = {
-  baseUrl: string;
-  apiKey: string;
-  sttModel: string;
-  ttsModel: string;
-  ttsVoice: string;
-  ttsFormat: string;
-};
-
-const STORAGE_KEY = 'voiceConfig';
-export const VOICE_CONFIG_SYNC_EVENT = 'voice-config:sync';
-const DEFAULTS: VoiceConfig = { baseUrl: '', apiKey: '', sttModel: '', ttsModel: '', ttsVoice: '', ttsFormat: '' };
-
-export function readVoiceConfig(): VoiceConfig {
-  try {
-    const raw = localStorage.getItem(STORAGE_KEY);
-    if (!raw) return { ...DEFAULTS };
-    const parsed = JSON.parse(raw);
-    if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return { ...DEFAULTS };
-    const config = { ...DEFAULTS };
-    for (const key of Object.keys(DEFAULTS) as (keyof VoiceConfig)[]) {
-      if (typeof parsed[key] === 'string') config[key] = parsed[key];
-    }
-    return config;
-  } catch {
-    return { ...DEFAULTS };
-  }
-}
-
-// Headers the voice proxy reads to target a per-user OpenAI-compatible backend.
-// Empty fields are omitted so the server's env defaults apply.
-export function voiceConfigHeaders(): Record<string, string> {
-  if (typeof window === 'undefined') return {};
-  const c = readVoiceConfig();
-  const h: Record<string, string> = {};
-  if (c.apiKey) h['x-voice-api-key'] = c.apiKey;
-  if (c.sttModel) h['x-voice-stt-model'] = c.sttModel;
-  if (c.ttsModel) h['x-voice-tts-model'] = c.ttsModel;
-  if (c.ttsVoice) h['x-voice-tts-voice'] = c.ttsVoice;
-  if (c.ttsFormat.trim()) h['x-voice-tts-format'] = c.ttsFormat.trim();
-  return h;
-}
-
-export function useVoiceConfig() {
-  const [config, setConfig] = useState<VoiceConfig>(() =>
-    typeof window === 'undefined' ? { ...DEFAULTS } : readVoiceConfig(),
-  );
-
-  const update = (patch: Partial<VoiceConfig>) => {
-    setConfig((prev) => {
-      const next = { ...prev, ...patch };
-      try {
-        const stored: Partial<VoiceConfig> = { ...next };
-        if (next.ttsFormat.trim()) stored.ttsFormat = next.ttsFormat.trim();
-        else delete stored.ttsFormat;
-        localStorage.setItem(STORAGE_KEY, JSON.stringify(stored));
-        window.dispatchEvent(new Event(VOICE_CONFIG_SYNC_EVENT));
-      } catch {
-        /* ignore persistence errors */
-      }
-      return next;
-    });
-  };
-
-  return { config, update };
-}
--- a/src/i18n/locales/en/chat.json
+++ b/src/i18n/locales/en/chat.json
@@ -122,14 +122,6 @@
      }
    }
  },
-  "voice": {
-    "input": "Voice input",
-    "stopRecording": "Stop recording",
-    "transcribing": "Transcribing…",
-    "speak": "Read aloud",
-    "stopSpeaking": "Stop",
-    "loading": "Loading…"
-  },
  "input": {
    "placeholder": "Type / for commands, @ for files, or ask {{provider}} anything...",
    "placeholderDefault": "Type your message...",
--- a/src/i18n/locales/en/settings.json
+++ b/src/i18n/locales/en/settings.json
@@ -50,21 +50,6 @@
    "resetToDefaults": "Reset to Defaults",
    "cancelChanges": "Cancel Changes"
  },
-  "voiceSettings": {
-    "title": "Voice",
-    "description": "Speech-to-text input and read-aloud, via an OpenAI-compatible audio backend.",
-    "enable": "Enable voice",
-    "enableDescription": "Show the mic button and the read-aloud button on messages.",
-    "backendTitle": "Backend",
-    "backendDescription": "Point at OpenAI, Groq, or a local server (LocalAI, Speaches, Kokoro-FastAPI). Leave blank to use the server default.",
-    "baseUrl": "Base URL",
-    "apiKey": "API key",
-    "sttModel": "Speech-to-text model",
-    "ttsModel": "Text-to-speech model",
-    "voice": "Voice",
-    "format": "Audio format",
-    "note": "A custom base URL is called directly by your browser and must allow browser CORS requests. Leave it blank to use the server-configured backend."
-  },
  "quickSettings": {
    "title": "Quick Settings",
    "sections": {
@@ -79,7 +64,6 @@
    "showThinking": "Show thinking",
    "autoScrollToBottom": "Auto-scroll to bottom",
    "sendByCtrlEnter": "Send by Ctrl+Enter",
-    "voiceEnabled": "Voice (mic + read aloud)",
    "sendByCtrlEnterDescription": "When enabled, pressing Ctrl+Enter will send the message instead of just Enter. This is useful for IME users to avoid accidental sends.",
    "dragHandle": {
      "dragging": "Dragging handle",
@@ -110,7 +94,6 @@
    "appearance": "Appearance",
    "git": "Git",
    "apiTokens": "API & Tokens",
-    "voice": "Voice",
    "tasks": "Tasks",
    "browser": "Browser",
    "notifications": "Notifications",
--- a/src/lib/voiceApi.ts
+++ b/src/lib/voiceApi.ts
@@ -1,60 +0,0 @@
-import { authenticatedFetch } from '../utils/api';
-import { readVoiceConfig, voiceConfigHeaders } from '../hooks/useVoiceConfig';
-
-function directUrl(baseUrl: string, path: string): string {
-  return `${baseUrl.replace(/\/$/, '')}${path}`;
-}
-
-export function voiceConfigSignature(): string {
-  return JSON.stringify(readVoiceConfig());
-}
-
-export function transcribeVoice(blob: Blob, filename: string): Promise<Response> {
-  const config = readVoiceConfig();
-  const body = new FormData();
-
-  if (config.baseUrl.trim()) {
-    body.append('file', blob, filename);
-    body.append('model', config.sttModel || 'whisper-1');
-    return fetch(directUrl(config.baseUrl.trim(), '/audio/transcriptions'), {
-      method: 'POST',
-      headers: config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {},
-      body,
-    });
-  }
-
-  body.append('audio', blob, filename);
-  return authenticatedFetch('/api/voice/transcribe', {
-    method: 'POST',
-    headers: voiceConfigHeaders(),
-    body,
-  });
-}
-
-export function synthesizeVoice(text: string, signal: AbortSignal): Promise<Response> {
-  const config = readVoiceConfig();
-
-  if (config.baseUrl.trim()) {
-    return fetch(directUrl(config.baseUrl.trim(), '/audio/speech'), {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        ...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
-      },
-      body: JSON.stringify({
-        model: config.ttsModel || 'tts-1',
-        voice: config.ttsVoice || 'alloy',
-        input: text,
-        ...(config.ttsFormat.trim() ? { response_format: config.ttsFormat.trim() } : {}),
-      }),
-      signal,
-    });
-  }
-
-  return authenticatedFetch('/api/voice/tts', {
-    method: 'POST',
-    body: JSON.stringify({ text }),
-    headers: voiceConfigHeaders(),
-    signal,
-  });
-}
--- a/src/lib/voicePlayer.ts
+++ b/src/lib/voicePlayer.ts
@@ -1,196 +0,0 @@
-import { synthesizeVoice, voiceConfigSignature } from './voiceApi';
-
-// A single app-level audio player for read-aloud. It owns one <audio> element, lives
-// outside the React tree, and caches generated audio by content. Because playback is not
-// tied to a component, switching chats or re-rendering a message can't revoke the blob URL
-// out from under it (the cause of mid-play cutoffs). v1 plays one message at a time
-// (a new play replaces the current one); the design leaves room for a queue later.
-
-export type VoicePlayState = 'idle' | 'loading' | 'playing';
-
-export type VoiceSnapshot = { state: VoicePlayState; error: string | null };
-
-const IDLE: VoiceSnapshot = { state: 'idle', error: null };
-const CACHE_MAX = 24;
-const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min
-
-// Stable id / cache key from the text and voice settings that affect its audio (djb2).
-export function voiceId(content: string, signature = voiceConfigSignature()): string {
-  const input = JSON.stringify([content, signature]);
-  let h = 5381;
-  for (let i = 0; i < input.length; i++) h = (((h << 5) + h) + input.charCodeAt(i)) | 0;
-  return (h >>> 0).toString(36);
-}
-
-class VoicePlayer {
-  private audio: HTMLAudioElement | null = null;
-  private unlocked = false;
-  private cache = new Map<string, string>(); // id -> blob URL (insertion order = LRU)
-  private currentId: string | null = null;
-  private state: VoicePlayState = 'idle';
-  private errorId: string | null = null;
-  private errorMsg: string | null = null;
-  private token = 0; // bumps to ignore stale in-flight results
-  private activeController: AbortController | null = null; // aborts the in-flight TTS fetch
-  private errorTimer: ReturnType<typeof setTimeout> | null = null;
-  private listeners = new Set<() => void>();
-
-  subscribe(listener: () => void): () => void {
-    this.listeners.add(listener);
-    return () => {
-      this.listeners.delete(listener);
-    };
-  }
-
-  private emit() {
-    this.listeners.forEach((l) => l());
-  }
-
-  getSnapshot(id: string): VoiceSnapshot {
-    const state = this.currentId === id ? this.state : 'idle';
-    const error = this.errorId === id ? this.errorMsg : null;
-    if (state === 'idle' && error === null) return IDLE;
-    return { state, error };
-  }
-
-  private ensureAudio(): HTMLAudioElement {
-    if (!this.audio) {
-      const audio = new Audio();
-      audio.addEventListener('ended', () => this.onEnded());
-      audio.addEventListener('error', () => {
-        // Only meaningful while we believe we're playing.
-        if (this.state === 'playing') this.onEnded();
-      });
-      this.audio = audio;
-    }
-    return this.audio;
-  }
-
-  // Call synchronously from the click handler so iOS grants the (reused) element playback.
-  unlock() {
-    if (this.unlocked) return;
-    const audio = this.ensureAudio();
-    try {
-      const p = audio.play();
-      if (p && typeof p.catch === 'function') p.catch(() => {});
-      audio.pause();
-    } catch {
-      /* priming attempt; ignore */
-    }
-    this.unlocked = true;
-  }
-
-  toggle(content: string) {
-    const id = voiceId(content);
-    if (this.currentId === id && (this.state === 'playing' || this.state === 'loading')) {
-      this.stop();
-      return;
-    }
-    void this.play(id, content);
-  }
-
-  stop() {
-    this.token++; // ignore any stale in-flight result
-    this.abortActive(); // and actually cancel the network request
-    if (this.audio) this.audio.pause();
-    this.state = 'idle';
-    this.currentId = null;
-    this.emit();
-  }
-
-  private abortActive() {
-    if (this.activeController) {
-      this.activeController.abort();
-      this.activeController = null;
-    }
-  }
-
-  private onEnded() {
-    this.state = 'idle';
-    this.currentId = null;
-    this.emit();
-    // (queue auto-advance would hook in here)
-  }
-
-  private setError(id: string, msg: string) {
-    this.state = 'idle';
-    this.currentId = id;
-    this.errorId = id;
-    this.errorMsg = msg;
-    this.emit();
-    if (this.errorTimer) clearTimeout(this.errorTimer);
-    this.errorTimer = setTimeout(() => {
-      if (this.errorId === id) {
-        this.errorId = null;
-        this.errorMsg = null;
-        if (this.currentId === id) this.currentId = null;
-        this.emit();
-      }
-    }, 6000);
-  }
-
-  private async play(id: string, content: string) {
-    const audio = this.ensureAudio();
-    audio.pause();
-    this.currentId = id;
-    this.errorId = null;
-    this.errorMsg = null;
-    this.state = 'loading';
-    this.emit();
-
-    const myToken = ++this.token;
-    this.abortActive(); // cancel any request this play supersedes
-
-    try {
-      let url = this.cache.get(id);
-      if (!url) {
-        const controller = new AbortController();
-        this.activeController = controller;
-        const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS);
-        const res = await synthesizeVoice(content, controller.signal).finally(() => {
-          clearTimeout(timer);
-          if (this.activeController === controller) this.activeController = null;
-        });
-        if (myToken !== this.token) return; // superseded by another play/stop
-        if (!res.ok) {
-          let msg = `Read-aloud failed (${res.status})`;
-          try {
-            const j = await res.json();
-            if (j?.error) msg = String(j.error);
-          } catch {
-            /* non-JSON error body */
-          }
-          throw new Error(msg);
-        }
-        const blob = await res.blob();
-        if (myToken !== this.token) return;
-        url = URL.createObjectURL(blob);
-        this.cacheSet(id, url);
-      }
-      if (myToken !== this.token) return;
-      audio.src = url;
-      audio.load();
-      await audio.play();
-      if (myToken !== this.token) return;
-      this.state = 'playing';
-      this.emit();
-    } catch (e) {
-      if (myToken !== this.token) return;
-      const aborted = e instanceof Error && e.name === 'AbortError';
-      this.setError(id, aborted ? 'Read-aloud timed out.' : e instanceof Error ? e.message : 'Read-aloud failed');
-    }
-  }
-
-  private cacheSet(id: string, url: string) {
-    this.cache.set(id, url);
-    while (this.cache.size > CACHE_MAX) {
-      const oldest = this.cache.keys().next().value as string | undefined;
-      if (oldest === undefined) break;
-      const oldUrl = this.cache.get(oldest);
-      this.cache.delete(oldest);
-      if (oldUrl && oldUrl !== this.audio?.src) URL.revokeObjectURL(oldUrl);
-    }
-  }
-}
-
-export const voicePlayer = new VoicePlayer();
Author	SHA1	Message	Date
Haile	f8298e5df5	Merge branch 'main' into feat/pending-tool-request-sound	2026-06-25 15:55:36 +03:00
Haileyesus	43d3269b38	fix(chat): notify for background permission requests Permission audio was guarded by the visible session check. Tool approvals in background sessions were therefore silent. Keep banner state session-scoped while playing the existing sound globally.	2026-06-25 15:54:05 +03:00
Haileyesus	16be1d0f7b	fix(chat): preserve rehydrated permission prompts Session navigation restores pending approvals through chat.subscribe. Provider synchronization could clear that restored state afterward. This made banner visibility depend on response timing. Keep cleanup session-scoped and match acknowledgments against the current view.	2026-06-24 22:20:34 +03:00
Haileyesus	63a4869325	feat: play sound for pending tool requests Reuse the existing notification tone when a chat run pauses for actionable tool approval. Track pending permission state inside the realtime handler so the sound plays when approval first becomes pending, including subscribe recovery, without replaying for inline plan prompts or duplicate websocket events.	2026-06-23 14:06:55 +03:00