mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-25 04:13:51 +08:00
Adds a push-to-talk mic button in the composer and a read-aloud button on assistant messages. Both are opt-in and hidden unless a voice backend is configured via VOICE_SIDECAR_URL. The auth-gated /api/voice proxy forwards to a configurable backend exposing /transcribe and /tts (provider-agnostic); the frontend probes /api/voice/health and hides the controls when disabled. Adds i18n keys and docs/voice.md. Includes a local, no-API-key reference backend in voice-sidecar/ (faster-whisper for STT, Kokoro-82M for TTS, both CPU-capable).
88 lines
3.4 KiB
JavaScript
88 lines
3.4 KiB
JavaScript
// Optional voice proxy — forwards speech-to-text / text-to-speech to a configurable backend.
|
|
//
|
|
// Opt-in: voice is DISABLED unless VOICE_SIDECAR_URL is set. When set, it must point at a
|
|
// backend (any implementation) exposing:
|
|
// POST /transcribe (multipart field 'audio') -> { text }
|
|
// POST /tts (form field 'text') -> audio bytes (audio/*)
|
|
// A reference backend (local faster-whisper + Kokoro) ships in /voice-sidecar, but any
|
|
// service implementing the two endpoints works (e.g. a cloud transcription + TTS gateway).
|
|
//
|
|
// Mounted at /api/voice behind authenticateToken, so it inherits the app's auth. The backend
|
|
// should bind to localhost and is never exposed directly.
|
|
import express from 'express';
|
|
|
|
const VOICE_SIDECAR_URL = (process.env.VOICE_SIDECAR_URL || '').replace(/\/$/, '');
|
|
const VOICE_ENABLED = Boolean(VOICE_SIDECAR_URL);
|
|
|
|
const router = express.Router();
|
|
|
|
// Lazy multer (memory storage) for the audio upload — matches index.js's pattern.
|
|
let _upload = null;
|
|
async function getUpload() {
|
|
if (!_upload) {
|
|
const multer = (await import('multer')).default;
|
|
_upload = multer({
|
|
storage: multer.memoryStorage(),
|
|
limits: { fileSize: 25 * 1024 * 1024 }, // 25MB — short dictation clips
|
|
});
|
|
}
|
|
return _upload;
|
|
}
|
|
|
|
function ensureEnabled(res) {
|
|
if (!VOICE_ENABLED) {
|
|
res.status(503).json({ error: 'Voice is not configured. Set VOICE_SIDECAR_URL to enable it.' });
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// GET /api/voice/health -> { enabled } (frontend hides the voice UI when disabled)
|
|
router.get('/health', (_req, res) => res.json({ enabled: VOICE_ENABLED }));
|
|
|
|
// POST /api/voice/transcribe (multipart 'audio') -> { text }
|
|
router.post('/transcribe', async (req, res) => {
|
|
if (!ensureEnabled(res)) return;
|
|
const upload = await getUpload();
|
|
upload.single('audio')(req, res, async (err) => {
|
|
if (err) return res.status(400).json({ error: err.message });
|
|
if (!req.file) return res.status(400).json({ error: 'No audio uploaded' });
|
|
try {
|
|
const fd = new FormData();
|
|
fd.append(
|
|
'audio',
|
|
new Blob([req.file.buffer], { type: req.file.mimetype || 'audio/webm' }),
|
|
req.file.originalname || 'recording.webm',
|
|
);
|
|
const r = await fetch(`${VOICE_SIDECAR_URL}/transcribe`, { method: 'POST', body: fd });
|
|
const data = await r.json().catch(() => ({ error: 'bad voice backend response' }));
|
|
res.status(r.status).json(data);
|
|
} catch (e) {
|
|
res.status(502).json({ error: `voice backend unreachable: ${e.message}` });
|
|
}
|
|
});
|
|
});
|
|
|
|
// POST /api/voice/tts { text } -> audio bytes
|
|
router.post('/tts', async (req, res) => {
|
|
if (!ensureEnabled(res)) return;
|
|
const text = req.body?.text;
|
|
if (!text || !text.trim()) return res.status(400).json({ error: 'text required' });
|
|
try {
|
|
const fd = new FormData();
|
|
fd.append('text', text);
|
|
const r = await fetch(`${VOICE_SIDECAR_URL}/tts`, { method: 'POST', body: fd });
|
|
if (!r.ok) {
|
|
const errText = await r.text().catch(() => 'tts failed');
|
|
return res.status(r.status).json({ error: errText });
|
|
}
|
|
res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/wav');
|
|
res.setHeader('Cache-Control', 'no-store');
|
|
res.send(Buffer.from(await r.arrayBuffer()));
|
|
} catch (e) {
|
|
res.status(502).json({ error: `voice backend unreachable: ${e.message}` });
|
|
}
|
|
});
|
|
|
|
export default router;
|