diff --git a/server/voice-proxy.js b/server/voice-proxy.js index 0dd158c8..2645b261 100644 --- a/server/voice-proxy.js +++ b/server/voice-proxy.js @@ -55,6 +55,29 @@ function backendError(res, e) { return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` }); } +// SSRF guard for the user-configurable backend URL: http/https only, and block the +// link-local / cloud-metadata range. localhost/private are allowed on purpose so users +// can run a local voice server (LocalAI, Speaches, etc.). +function isAllowedBackendUrl(raw) { + let u; + try { + u = new URL(raw); + } catch { + return false; + } + if (u.protocol !== 'http:' && u.protocol !== 'https:') return false; + if (u.hostname === '169.254.169.254' || u.hostname.startsWith('169.254.')) return false; + return true; +} + +// Don't surface an upstream 401/403 as if the user's own app login failed. +function upstreamError(res, status, text) { + if (status === 401 || status === 403) { + return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' }); + } + return res.status(status).json({ error: text || 'voice backend error' }); +} + let _upload = null; async function getUpload() { if (!_upload) { @@ -77,6 +100,7 @@ router.get('/health', (req, res) => { router.post('/transcribe', async (req, res) => { const cfg = resolveConfig(req); if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' }); + if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' }); const upload = await getUpload(); upload.single('audio')(req, res, async (err) => { if (err) return res.status(400).json({ error: err.message }); @@ -95,7 +119,7 @@ router.post('/transcribe', async (req, res) => { body: fd, }); const text = await r.text(); - if (!r.ok) return res.status(r.status).json({ error: text || 'transcription failed' }); + if (!r.ok) return upstreamError(res, r.status, text); let data; try { data = JSON.parse(text); } catch { data = { text }; } res.json({ text: data.text ?? '' }); @@ -109,6 +133,7 @@ router.post('/transcribe', async (req, res) => { router.post('/tts', async (req, res) => { const cfg = resolveConfig(req); if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' }); + if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' }); const text = req.body?.text; if (!text || !text.trim()) return res.status(400).json({ error: 'text required' }); try { @@ -124,7 +149,7 @@ router.post('/tts', async (req, res) => { }); if (!r.ok) { const errText = await r.text().catch(() => 'tts failed'); - return res.status(r.status).json({ error: errText }); + return upstreamError(res, r.status, errText); } res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg'); res.setHeader('Cache-Control', 'no-store'); diff --git a/src/lib/voicePlayer.ts b/src/lib/voicePlayer.ts index f6b0015c..7a16a194 100644 --- a/src/lib/voicePlayer.ts +++ b/src/lib/voicePlayer.ts @@ -13,6 +13,7 @@ export type VoiceSnapshot = { state: VoicePlayState; error: string | null }; const IDLE: VoiceSnapshot = { state: 'idle', error: null }; const CACHE_MAX = 24; +const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min // Stable id / cache key from a message's text (djb2). export function voiceId(content: string): string { @@ -133,11 +134,14 @@ class VoicePlayer { try { let url = this.cache.get(id); if (!url) { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS); const res = await authenticatedFetch('/api/voice/tts', { method: 'POST', body: JSON.stringify({ text: content }), headers: voiceConfigHeaders(), - }); + signal: controller.signal, + }).finally(() => clearTimeout(timer)); if (myToken !== this.token) return; // superseded by another play/stop if (!res.ok) { let msg = `Read-aloud failed (${res.status})`; @@ -163,7 +167,8 @@ class VoicePlayer { this.emit(); } catch (e) { if (myToken !== this.token) return; - this.setError(id, e instanceof Error ? e.message : 'Read-aloud failed'); + const aborted = e instanceof Error && e.name === 'AbortError'; + this.setError(id, aborted ? 'Read-aloud timed out.' : e instanceof Error ? e.message : 'Read-aloud failed'); } }