fix(voice): harden recording and backend behavior

Redirects could bypass the backend URL guard, and TTS playback waited for full buffering. Recording could overlap or finish after teardown. Controls also ignored backend readiness. Explicit formats and config-aware cache keys prevent stale audio after settings change.
2026-06-26 13:35:49 +08:00 · 2026-06-25 16:35:30 +03:00
parent b0a49120cc
commit af16d8ebdc
5 changed files with 108 additions and 22 deletions
--- a/src/components/chat/hooks/useVoiceAvailable.ts
+++ b/src/components/chat/hooks/useVoiceAvailable.ts
@@ -1,11 +1,39 @@
 import { useEffect, useState } from 'react';

+import { authenticatedFetch } from '../../../utils/api';
+import { VOICE_CONFIG_SYNC_EVENT, voiceConfigHeaders } from '../../../hooks/useVoiceConfig';
+
 // Voice UI is gated on the `voiceEnabled` UI preference (toggled in Quick Settings /
-// the Settings modal). This is a lightweight read-only view of that preference so the
-// mic/speak controls can hide themselves, kept in sync via the same events
-// useUiPreferences emits. No server probe.
+// the Settings modal) and a configured voice backend.
 const STORAGE_KEY = 'uiPreferences';
 const SYNC_EVENT = 'ui-preferences:sync';
+const healthCache = new Map<string, boolean>();
+const healthRequests = new Map<string, Promise<boolean>>();
+
+function checkVoiceHealth(): Promise<boolean> {
+  const baseUrl = voiceConfigHeaders()['x-voice-base-url'];
+  const signature = baseUrl || '';
+  if (healthCache.has(signature)) return Promise.resolve(healthCache.get(signature) ?? false);
+  const pending = healthRequests.get(signature);
+  if (pending) return pending;
+  const request = authenticatedFetch('/api/voice/health', {
+    headers: baseUrl ? { 'x-voice-base-url': baseUrl } : {},
+  })
+    .then(async (response) => {
+      if (!response.ok) throw new Error(`Voice health check failed (${response.status})`);
+      const data = await response.json();
+      return data?.configured === true;
+    })
+    .then((available) => {
+      healthCache.set(signature, available);
+      return available;
+    })
+    .finally(() => {
+      healthRequests.delete(signature);
+    });
+  healthRequests.set(signature, request);
+  return request;
+}

 function readVoiceEnabled(): boolean {
  try {
@@ -22,6 +50,7 @@ export function useVoiceAvailable(): boolean {
  const [enabled, setEnabled] = useState<boolean>(() =>
    typeof window === 'undefined' ? false : readVoiceEnabled(),
  );
+  const [available, setAvailable] = useState(false);

  useEffect(() => {
    const update = () => setEnabled(readVoiceEnabled());
@@ -33,5 +62,31 @@ export function useVoiceAvailable(): boolean {
    };
  }, []);

-  return enabled;
+  useEffect(() => {
+    let active = true;
+    let requestId = 0;
+
+    const check = async () => {
+      if (!enabled) {
+        setAvailable(false);
+        return;
+      }
+      const id = ++requestId;
+      try {
+        const result = await checkVoiceHealth();
+        if (active && id === requestId) setAvailable(result);
+      } catch {
+        if (active && id === requestId) setAvailable(false);
+      }
+    };
+
+    void check();
+    window.addEventListener(VOICE_CONFIG_SYNC_EVENT, check);
+    return () => {
+      active = false;
+      window.removeEventListener(VOICE_CONFIG_SYNC_EVENT, check);
+    };
+  }, [enabled]);
+
+  return enabled && available;
 }
--- a/src/components/chat/hooks/useVoiceInput.ts
+++ b/src/components/chat/hooks/useVoiceInput.ts
@@ -1,4 +1,5 @@
 import { useCallback, useEffect, useRef, useState } from 'react';
+
 import { authenticatedFetch } from '../../../utils/api';
 import { voiceConfigHeaders } from '../../../hooks/useVoiceConfig';

@@ -37,6 +38,8 @@ export function useVoiceInput(
  const recorderRef = useRef<MediaRecorder | null>(null);
  const chunksRef = useRef<Blob[]>([]);
  const streamRef = useRef<MediaStream | null>(null);
+  const cancelledRef = useRef(false);
+  const startingRef = useRef(false);
  // Whether the in-progress stop should auto-send the transcript (vs just fill the box).
  const sendRef = useRef(false);

@@ -47,7 +50,10 @@ export function useVoiceInput(

  // Stop the mic if the component unmounts mid-recording.
  useEffect(() => {
+    cancelledRef.current = false;
    return () => {
+      cancelledRef.current = true;
+      startingRef.current = false;
      streamRef.current?.getTracks().forEach((t) => t.stop());
      streamRef.current = null;
      recorderRef.current = null;
@@ -55,10 +61,17 @@ export function useVoiceInput(
  }, []);

  const start = useCallback(async () => {
+    if (startingRef.current || (recorderRef.current && recorderRef.current.state !== 'inactive')) return;
+    startingRef.current = true;
+    let recordingCancelled = false;
    try {
      const stream = await navigator.mediaDevices.getUserMedia({
        audio: { echoCancellation: true, noiseSuppression: true },
      });
+      if (cancelledRef.current) {
+        stream.getTracks().forEach((t) => t.stop());
+        return;
+      }
      streamRef.current = stream;
      const mimeType = pickMime();
      const rec = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
@@ -71,6 +84,7 @@ export function useVoiceInput(

      rec.onstop = async () => {
        stopTracks();
+        if (recordingCancelled || cancelledRef.current) return;
        // Capture and clear the send intent for this stop before any async work.
        const shouldSend = sendRef.current;
        sendRef.current = false;
@@ -93,25 +107,34 @@ export function useVoiceInput(
          });
          if (!res.ok) throw new Error(`transcribe ${res.status}`);
          const data = await res.json();
+          if (recordingCancelled || cancelledRef.current) return;
          const text = String(data?.text || '').trim();
          if (text) onTranscript(text, shouldSend);
          else onError?.('No speech detected');
        } catch (e) {
-          onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
+          if (!recordingCancelled && !cancelledRef.current) {
+            onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
+          }
        } finally {
-          setState('idle');
+          if (!recordingCancelled && !cancelledRef.current) setState('idle');
        }
      };

      rec.start();
      setState('recording');
    } catch (e) {
+      recordingCancelled = true;
+      recorderRef.current = null;
+      stopTracks();
+      if (cancelledRef.current) return;
      const err = e as { name?: string; message?: string };
      let msg = `Mic error: ${err?.message || e}`;
      if (err?.name === 'NotAllowedError') msg = 'Microphone access denied.';
      else if (err?.name === 'NotFoundError') msg = 'No microphone found.';
      onError?.(msg);
      setState('idle');
+    } finally {
+      startingRef.current = false;
    }
  }, [onTranscript, onError]);