mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-27 06:05:54 +08:00
docs(voice): provider-agnostic wording and jsdoc on proxy functions
drop leftover sidecar/faster-whisper references now that the backend is any openai-compatible voice api, and add jsdoc to the voice-proxy functions so the docstring coverage check passes.
This commit is contained in:
@@ -19,7 +19,12 @@ const ENV = {
|
|||||||
ttsFormat: process.env.VOICE_TTS_FORMAT || 'mp3',
|
ttsFormat: process.env.VOICE_TTS_FORMAT || 'mp3',
|
||||||
};
|
};
|
||||||
|
|
||||||
// Per-request config: client headers (from the user's voice settings) override env defaults.
|
/**
|
||||||
|
* Resolve the voice backend config for a request. Client headers (set from the
|
||||||
|
* user's in-app voice settings) take precedence over the server env defaults.
|
||||||
|
* @param {import('express').Request} req
|
||||||
|
* @returns {{baseUrl: string, apiKey: string, sttModel: string, ttsModel: string, ttsVoice: string}}
|
||||||
|
*/
|
||||||
function resolveConfig(req) {
|
function resolveConfig(req) {
|
||||||
const h = req.headers;
|
const h = req.headers;
|
||||||
return {
|
return {
|
||||||
@@ -35,6 +40,14 @@ const router = express.Router();
|
|||||||
|
|
||||||
// Generous by default — local TTS can synthesize long messages at ~real-time on CPU.
|
// Generous by default — local TTS can synthesize long messages at ~real-time on CPU.
|
||||||
const VOICE_TIMEOUT_MS = Number(process.env.VOICE_TIMEOUT_MS || 300000);
|
const VOICE_TIMEOUT_MS = Number(process.env.VOICE_TIMEOUT_MS || 300000);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* fetch() with an AbortController timeout so a stalled backend can't hold the
|
||||||
|
* request open indefinitely. Aborts after VOICE_TIMEOUT_MS.
|
||||||
|
* @param {string} url
|
||||||
|
* @param {RequestInit} [options]
|
||||||
|
* @returns {Promise<Response>}
|
||||||
|
*/
|
||||||
async function fetchWithTimeout(url, options = {}) {
|
async function fetchWithTimeout(url, options = {}) {
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS);
|
const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS);
|
||||||
@@ -45,19 +58,29 @@ async function fetchWithTimeout(url, options = {}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Turn backend failures into a clear, actionable message for the client.
|
/**
|
||||||
|
* Turn a backend fetch failure into a clear, actionable client response:
|
||||||
|
* 504 on timeout (AbortError), 502 otherwise.
|
||||||
|
* @param {import('express').Response} res
|
||||||
|
* @param {Error} e
|
||||||
|
*/
|
||||||
function backendError(res, e) {
|
function backendError(res, e) {
|
||||||
if (e && e.name === 'AbortError') {
|
if (e && e.name === 'AbortError') {
|
||||||
return res.status(504).json({
|
return res.status(504).json({
|
||||||
error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your sidecar or API.`,
|
error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your voice backend.`,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
|
return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
|
||||||
}
|
}
|
||||||
|
|
||||||
// SSRF guard for the user-configurable backend URL: http/https only, and block the
|
/**
|
||||||
// link-local / cloud-metadata range. localhost/private are allowed on purpose so users
|
* SSRF guard for the user-configurable backend URL: allow http/https only and
|
||||||
// can run a local voice server (LocalAI, Speaches, etc.).
|
* block the link-local / cloud-metadata range (169.254.x). localhost and private
|
||||||
|
* ranges are allowed on purpose so users can point at a local voice server
|
||||||
|
* (LocalAI, Speaches, Kokoro-FastAPI, etc.).
|
||||||
|
* @param {string} raw
|
||||||
|
* @returns {boolean}
|
||||||
|
*/
|
||||||
function isAllowedBackendUrl(raw) {
|
function isAllowedBackendUrl(raw) {
|
||||||
let u;
|
let u;
|
||||||
try {
|
try {
|
||||||
@@ -70,7 +93,13 @@ function isAllowedBackendUrl(raw) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't surface an upstream 401/403 as if the user's own app login failed.
|
/**
|
||||||
|
* Relay an upstream (backend) error to the client without making an upstream
|
||||||
|
* 401/403 look like the user's own app login failed.
|
||||||
|
* @param {import('express').Response} res
|
||||||
|
* @param {number} status
|
||||||
|
* @param {string} [text]
|
||||||
|
*/
|
||||||
function upstreamError(res, status, text) {
|
function upstreamError(res, status, text) {
|
||||||
if (status === 401 || status === 403) {
|
if (status === 401 || status === 403) {
|
||||||
return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
|
return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
|
||||||
@@ -79,6 +108,11 @@ function upstreamError(res, status, text) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let _upload = null;
|
let _upload = null;
|
||||||
|
/**
|
||||||
|
* Lazily build a memory-storage multer instance (25 MB cap) for audio uploads,
|
||||||
|
* so multer is only imported when the voice feature is actually used.
|
||||||
|
* @returns {Promise<import('multer').Multer>}
|
||||||
|
*/
|
||||||
async function getUpload() {
|
async function getUpload() {
|
||||||
if (!_upload) {
|
if (!_upload) {
|
||||||
const multer = (await import('multer')).default;
|
const multer = (await import('multer')).default;
|
||||||
@@ -87,16 +121,27 @@ async function getUpload() {
|
|||||||
return _upload;
|
return _upload;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the Authorization header for the backend, or an empty object when no
|
||||||
|
* key is configured (e.g. a local server that needs none).
|
||||||
|
* @param {string} apiKey
|
||||||
|
* @returns {Record<string, string>}
|
||||||
|
*/
|
||||||
function authHeader(apiKey) {
|
function authHeader(apiKey) {
|
||||||
return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
|
return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
|
||||||
}
|
}
|
||||||
|
|
||||||
// GET /api/voice/health -> { configured } (true if a base URL is available)
|
/**
|
||||||
|
* GET /api/voice/health -> { configured } (true when a backend base URL is set).
|
||||||
|
*/
|
||||||
router.get('/health', (req, res) => {
|
router.get('/health', (req, res) => {
|
||||||
res.json({ configured: Boolean(resolveConfig(req).baseUrl) });
|
res.json({ configured: Boolean(resolveConfig(req).baseUrl) });
|
||||||
});
|
});
|
||||||
|
|
||||||
// POST /api/voice/transcribe (multipart 'audio') -> { text }
|
/**
|
||||||
|
* POST /api/voice/transcribe (multipart 'audio') -> { text }.
|
||||||
|
* Forwards the uploaded audio to the backend's /audio/transcriptions endpoint.
|
||||||
|
*/
|
||||||
router.post('/transcribe', async (req, res) => {
|
router.post('/transcribe', async (req, res) => {
|
||||||
const cfg = resolveConfig(req);
|
const cfg = resolveConfig(req);
|
||||||
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
||||||
@@ -129,7 +174,10 @@ router.post('/transcribe', async (req, res) => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// POST /api/voice/tts { text } -> audio bytes
|
/**
|
||||||
|
* POST /api/voice/tts { text } -> audio bytes.
|
||||||
|
* Forwards the text to the backend's /audio/speech endpoint and streams the audio back.
|
||||||
|
*/
|
||||||
router.post('/tts', async (req, res) => {
|
router.post('/tts', async (req, res) => {
|
||||||
const cfg = resolveConfig(req);
|
const cfg = resolveConfig(req);
|
||||||
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
||||||
|
|||||||
@@ -26,8 +26,8 @@ export type VoiceInputState = 'idle' | 'recording' | 'transcribing';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Push-to-talk dictation. Records the mic, uploads to /api/voice/transcribe
|
* Push-to-talk dictation. Records the mic, uploads to /api/voice/transcribe
|
||||||
* (faster-whisper sidecar via the Express proxy), returns text via onTranscript.
|
* (an OpenAI-compatible speech-to-text backend via the Express proxy), and
|
||||||
* Ported from tooler's VoiceInput.js.
|
* returns the transcript through onTranscript.
|
||||||
*/
|
*/
|
||||||
export function useVoiceInput(onTranscript: (text: string) => void, onError?: (msg: string) => void) {
|
export function useVoiceInput(onTranscript: (text: string) => void, onError?: (msg: string) => void) {
|
||||||
const [state, setState] = useState<VoiceInputState>('idle');
|
const [state, setState] = useState<VoiceInputState>('idle');
|
||||||
|
|||||||
Reference in New Issue
Block a user