mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-26 05:15:48 +08:00
fix(voice): address review (SSRF guard, auth mapping, client timeout)
Validates the user-supplied backend URL (http/https only, blocks the link-local metadata range) to prevent SSRF; remaps upstream 401/403 so a bad voice API key isn't read as the app's own auth failing; adds a client-side AbortController timeout on the read-aloud request so the button can't sit in loading if a request stalls.
This commit is contained in:
@@ -55,6 +55,29 @@ function backendError(res, e) {
|
|||||||
return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
|
return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SSRF guard for the user-configurable backend URL: http/https only, and block the
|
||||||
|
// link-local / cloud-metadata range. localhost/private are allowed on purpose so users
|
||||||
|
// can run a local voice server (LocalAI, Speaches, etc.).
|
||||||
|
function isAllowedBackendUrl(raw) {
|
||||||
|
let u;
|
||||||
|
try {
|
||||||
|
u = new URL(raw);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (u.protocol !== 'http:' && u.protocol !== 'https:') return false;
|
||||||
|
if (u.hostname === '169.254.169.254' || u.hostname.startsWith('169.254.')) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't surface an upstream 401/403 as if the user's own app login failed.
|
||||||
|
function upstreamError(res, status, text) {
|
||||||
|
if (status === 401 || status === 403) {
|
||||||
|
return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
|
||||||
|
}
|
||||||
|
return res.status(status).json({ error: text || 'voice backend error' });
|
||||||
|
}
|
||||||
|
|
||||||
let _upload = null;
|
let _upload = null;
|
||||||
async function getUpload() {
|
async function getUpload() {
|
||||||
if (!_upload) {
|
if (!_upload) {
|
||||||
@@ -77,6 +100,7 @@ router.get('/health', (req, res) => {
|
|||||||
router.post('/transcribe', async (req, res) => {
|
router.post('/transcribe', async (req, res) => {
|
||||||
const cfg = resolveConfig(req);
|
const cfg = resolveConfig(req);
|
||||||
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
||||||
|
if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
|
||||||
const upload = await getUpload();
|
const upload = await getUpload();
|
||||||
upload.single('audio')(req, res, async (err) => {
|
upload.single('audio')(req, res, async (err) => {
|
||||||
if (err) return res.status(400).json({ error: err.message });
|
if (err) return res.status(400).json({ error: err.message });
|
||||||
@@ -95,7 +119,7 @@ router.post('/transcribe', async (req, res) => {
|
|||||||
body: fd,
|
body: fd,
|
||||||
});
|
});
|
||||||
const text = await r.text();
|
const text = await r.text();
|
||||||
if (!r.ok) return res.status(r.status).json({ error: text || 'transcription failed' });
|
if (!r.ok) return upstreamError(res, r.status, text);
|
||||||
let data;
|
let data;
|
||||||
try { data = JSON.parse(text); } catch { data = { text }; }
|
try { data = JSON.parse(text); } catch { data = { text }; }
|
||||||
res.json({ text: data.text ?? '' });
|
res.json({ text: data.text ?? '' });
|
||||||
@@ -109,6 +133,7 @@ router.post('/transcribe', async (req, res) => {
|
|||||||
router.post('/tts', async (req, res) => {
|
router.post('/tts', async (req, res) => {
|
||||||
const cfg = resolveConfig(req);
|
const cfg = resolveConfig(req);
|
||||||
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
||||||
|
if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
|
||||||
const text = req.body?.text;
|
const text = req.body?.text;
|
||||||
if (!text || !text.trim()) return res.status(400).json({ error: 'text required' });
|
if (!text || !text.trim()) return res.status(400).json({ error: 'text required' });
|
||||||
try {
|
try {
|
||||||
@@ -124,7 +149,7 @@ router.post('/tts', async (req, res) => {
|
|||||||
});
|
});
|
||||||
if (!r.ok) {
|
if (!r.ok) {
|
||||||
const errText = await r.text().catch(() => 'tts failed');
|
const errText = await r.text().catch(() => 'tts failed');
|
||||||
return res.status(r.status).json({ error: errText });
|
return upstreamError(res, r.status, errText);
|
||||||
}
|
}
|
||||||
res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg');
|
res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg');
|
||||||
res.setHeader('Cache-Control', 'no-store');
|
res.setHeader('Cache-Control', 'no-store');
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ export type VoiceSnapshot = { state: VoicePlayState; error: string | null };
|
|||||||
|
|
||||||
const IDLE: VoiceSnapshot = { state: 'idle', error: null };
|
const IDLE: VoiceSnapshot = { state: 'idle', error: null };
|
||||||
const CACHE_MAX = 24;
|
const CACHE_MAX = 24;
|
||||||
|
const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min
|
||||||
|
|
||||||
// Stable id / cache key from a message's text (djb2).
|
// Stable id / cache key from a message's text (djb2).
|
||||||
export function voiceId(content: string): string {
|
export function voiceId(content: string): string {
|
||||||
@@ -133,11 +134,14 @@ class VoicePlayer {
|
|||||||
try {
|
try {
|
||||||
let url = this.cache.get(id);
|
let url = this.cache.get(id);
|
||||||
if (!url) {
|
if (!url) {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS);
|
||||||
const res = await authenticatedFetch('/api/voice/tts', {
|
const res = await authenticatedFetch('/api/voice/tts', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
body: JSON.stringify({ text: content }),
|
body: JSON.stringify({ text: content }),
|
||||||
headers: voiceConfigHeaders(),
|
headers: voiceConfigHeaders(),
|
||||||
});
|
signal: controller.signal,
|
||||||
|
}).finally(() => clearTimeout(timer));
|
||||||
if (myToken !== this.token) return; // superseded by another play/stop
|
if (myToken !== this.token) return; // superseded by another play/stop
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
let msg = `Read-aloud failed (${res.status})`;
|
let msg = `Read-aloud failed (${res.status})`;
|
||||||
@@ -163,7 +167,8 @@ class VoicePlayer {
|
|||||||
this.emit();
|
this.emit();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (myToken !== this.token) return;
|
if (myToken !== this.token) return;
|
||||||
this.setError(id, e instanceof Error ? e.message : 'Read-aloud failed');
|
const aborted = e instanceof Error && e.name === 'AbortError';
|
||||||
|
this.setError(id, aborted ? 'Read-aloud timed out.' : e instanceof Error ? e.message : 'Read-aloud failed');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user