From 32a6405537f0cd97f9330a67aad44bfb3abc9c82 Mon Sep 17 00:00:00 2001 From: newsbubbles Date: Tue, 9 Jun 2026 12:13:06 +0100 Subject: [PATCH] fix(voice): relax backend timeout and surface timeout errors Bumps the proxy timeout to 5 minutes (VOICE_TIMEOUT_MS) since local TTS can synthesize long messages at roughly real-time, and returns a clear timed-out message (504) instead of failing silently. The read-aloud button now shows backend errors. --- server/voice-proxy.js | 17 +++++++-- src/components/chat/hooks/useTts.ts | 34 +++++++++++++---- .../subcomponents/MessageSpeakControl.tsx | 37 +++++++++++-------- 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/server/voice-proxy.js b/server/voice-proxy.js index 770a91de..0dd158c8 100644 --- a/server/voice-proxy.js +++ b/server/voice-proxy.js @@ -33,7 +33,8 @@ function resolveConfig(req) { const router = express.Router(); -const VOICE_TIMEOUT_MS = Number(process.env.VOICE_TIMEOUT_MS || 60000); +// Generous by default — local TTS can synthesize long messages at ~real-time on CPU. +const VOICE_TIMEOUT_MS = Number(process.env.VOICE_TIMEOUT_MS || 300000); async function fetchWithTimeout(url, options = {}) { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS); @@ -44,6 +45,16 @@ async function fetchWithTimeout(url, options = {}) { } } +// Turn backend failures into a clear, actionable message for the client. +function backendError(res, e) { + if (e && e.name === 'AbortError') { + return res.status(504).json({ + error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your sidecar or API.`, + }); + } + return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` }); +} + let _upload = null; async function getUpload() { if (!_upload) { @@ -89,7 +100,7 @@ router.post('/transcribe', async (req, res) => { try { data = JSON.parse(text); } catch { data = { text }; } res.json({ text: data.text ?? '' }); } catch (e) { - res.status(502).json({ error: `voice backend unreachable: ${e.message}` }); + backendError(res, e); } }); }); @@ -119,7 +130,7 @@ router.post('/tts', async (req, res) => { res.setHeader('Cache-Control', 'no-store'); res.send(Buffer.from(await r.arrayBuffer())); } catch (e) { - res.status(502).json({ error: `voice backend unreachable: ${e.message}` }); + backendError(res, e); } }); diff --git a/src/components/chat/hooks/useTts.ts b/src/components/chat/hooks/useTts.ts index 4ceb3887..25020d10 100644 --- a/src/components/chat/hooks/useTts.ts +++ b/src/components/chat/hooks/useTts.ts @@ -8,14 +8,16 @@ let stopActive: (() => void) | null = null; export type TtsState = 'idle' | 'loading' | 'playing'; /** - * Tap-to-speak for a single message. Sends raw markdown to /api/voice/tts - * (Kokoro sidecar via the Express proxy; cleaning happens server-side), - * plays the returned audio. Manual-gesture only (v1) to satisfy iOS autoplay. + * Tap-to-speak for a single message. Sends raw markdown to /api/voice/tts and plays + * the returned audio. Manual-gesture only (v1) to satisfy iOS autoplay. Exposes the + * last error (e.g. a backend timeout) so the control can surface it. */ export function useTts(getText: () => string) { const [state, setState] = useState('idle'); + const [error, setError] = useState(null); const audioRef = useRef(null); const urlRef = useRef(null); + const errorTimer = useRef | null>(null); const reset = useCallback(() => { if (audioRef.current) { @@ -37,10 +39,17 @@ export function useTts(getText: () => string) { if (stopActive) stopActive = null; }, [reset]); + const showError = useCallback((msg: string) => { + setError(msg); + if (errorTimer.current) clearTimeout(errorTimer.current); + errorTimer.current = setTimeout(() => setError(null), 6000); + }, []); + // Cleanup on unmount: drop the global stop handler if it points at us, then reset. useEffect( () => () => { if (stopActive === stop) stopActive = null; + if (errorTimer.current) clearTimeout(errorTimer.current); reset(); }, [reset, stop], @@ -50,6 +59,7 @@ export function useTts(getText: () => string) { if (stopActive) stopActive(); const text = getText(); if (!text || !text.trim()) return; + setError(null); // Create + "unlock" the audio element synchronously inside the click gesture, // so iOS Safari lets us play it after the async fetch resolves. @@ -72,7 +82,16 @@ export function useTts(getText: () => string) { body: JSON.stringify({ text }), headers: voiceConfigHeaders(), }); - if (!res.ok) throw new Error(`tts ${res.status}`); + if (!res.ok) { + let msg = `Read-aloud failed (${res.status})`; + try { + const j = await res.json(); + if (j?.error) msg = String(j.error); + } catch { + /* non-JSON error body */ + } + throw new Error(msg); + } const blob = await res.blob(); const url = URL.createObjectURL(blob); if (audioRef.current !== audio) { @@ -84,16 +103,17 @@ export function useTts(getText: () => string) { audio.load(); await audio.play(); setState('playing'); - } catch { + } catch (e) { reset(); setState('idle'); + showError(e instanceof Error ? e.message : 'Read-aloud failed'); } - }, [getText, reset, stop]); + }, [getText, reset, stop, showError]); const toggle = useCallback(() => { if (state === 'playing' || state === 'loading') stop(); else play(); }, [state, play, stop]); - return { state, toggle }; + return { state, toggle, error }; } diff --git a/src/components/chat/view/subcomponents/MessageSpeakControl.tsx b/src/components/chat/view/subcomponents/MessageSpeakControl.tsx index e2e86cbc..01a90dfa 100644 --- a/src/components/chat/view/subcomponents/MessageSpeakControl.tsx +++ b/src/components/chat/view/subcomponents/MessageSpeakControl.tsx @@ -8,7 +8,7 @@ import { useVoiceAvailable } from '../../hooks/useVoiceAvailable'; const MessageSpeakControl = ({ content }: { content: string }) => { const { t } = useTranslation('chat'); const available = useVoiceAvailable(); - const { state, toggle } = useTts(() => content); + const { state, toggle, error } = useTts(() => content); if (!available) return null; @@ -16,21 +16,28 @@ const MessageSpeakControl = ({ content }: { content: string }) => { state === 'playing' ? t('voice.stopSpeaking') : state === 'loading' ? t('voice.loading') : t('voice.speak'); return ( - + + ); };