refactor(voice): provider-agnostic backend and in-app config

Switches the voice proxy to the OpenAI audio API (/v1/audio/transcriptions and /v1/audio/speech) so it works with OpenAI, Groq, or a local server. Adds a Settings -> Voice tab (base URL, API key, models, voice) plus a Quick Settings toggle, and removes the bundled Python sidecar. Review fixes: stop mic tracks on unmount, clear the global TTS stop handler and revoke leaked blob URLs, add fetch timeouts in the proxy, surface mic errors in the button, trim before appending transcripts, and drop the repo-wide wav ignore.
2026-06-25 12:16:00 +08:00 · 2026-06-09 10:05:06 +01:00
parent d05585e1f4
commit 711936d279
21 changed files with 367 additions and 365 deletions
--- a/src/components/chat/hooks/useVoiceInput.ts
+++ b/src/components/chat/hooks/useVoiceInput.ts
@@ -1,5 +1,6 @@
-import { useCallback, useRef, useState } from 'react';
+import { useCallback, useEffect, useRef, useState } from 'react';
 import { authenticatedFetch } from '../../../utils/api';
+import { voiceConfigHeaders } from '../../../hooks/useVoiceConfig';

 // Mobile-safe recording: iOS Safari 18.4+ supports webm/opus; older iOS needs mp4.
 const MIME_CANDIDATES = [
@@ -39,6 +40,15 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m
    streamRef.current = null;
  };

+  // Stop the mic if the component unmounts mid-recording.
+  useEffect(() => {
+    return () => {
+      streamRef.current?.getTracks().forEach((t) => t.stop());
+      streamRef.current = null;
+      recorderRef.current = null;
+    };
+  }, []);
+
  const start = useCallback(async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({
@@ -68,7 +78,11 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m
          const ext = type.includes('mp4') ? 'm4a' : type.includes('ogg') ? 'ogg' : 'webm';
          const fd = new FormData();
          fd.append('audio', blob, `recording.${ext}`);
-          const res = await authenticatedFetch('/api/voice/transcribe', { method: 'POST', body: fd });
+          const res = await authenticatedFetch('/api/voice/transcribe', {
+            method: 'POST',
+            body: fd,
+            headers: voiceConfigHeaders(),
+          });
          if (!res.ok) throw new Error(`transcribe ${res.status}`);
          const data = await res.json();
          const text = String(data?.text || '').trim();