mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-26 21:55:50 +08:00
Compare commits
17 Commits
fix/mobile
...
fix/voice-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1c48e5b1d | ||
|
|
0e6373305b | ||
|
|
43c0cca96e | ||
|
|
af16d8ebdc | ||
|
|
b0a49120cc | ||
|
|
8cbfac6ab1 | ||
|
|
9919851be7 | ||
|
|
66b0766013 | ||
|
|
95a75aac47 | ||
|
|
952ddb9eb7 | ||
|
|
7f8ae7023d | ||
|
|
1203760ba8 | ||
|
|
f285715e31 | ||
|
|
cb3ad16139 | ||
|
|
32a6405537 | ||
|
|
711936d279 | ||
|
|
d05585e1f4 |
@@ -61,6 +61,7 @@ import userRoutes from './routes/user.js';
|
||||
import geminiRoutes from './routes/gemini.js';
|
||||
import pluginsRoutes from './routes/plugins.js';
|
||||
import providerRoutes from './modules/providers/provider.routes.js';
|
||||
import voiceRoutes from './voice-proxy.js';
|
||||
import browserUseRoutes from './modules/browser-use/browser-use.routes.js';
|
||||
import browserUseMcpRoutes from './modules/browser-use/browser-use-mcp.routes.js';
|
||||
import { browserUseService } from './modules/browser-use/browser-use.service.js';
|
||||
@@ -222,6 +223,8 @@ app.use('/api/providers', authenticateToken, providerRoutes);
|
||||
// Agent API Routes (uses API key authentication)
|
||||
app.use('/api/agent', agentRoutes);
|
||||
|
||||
app.use('/api/voice', authenticateToken, voiceRoutes);
|
||||
|
||||
// Serve public files (like api-docs.html)
|
||||
app.use(express.static(path.join(APP_ROOT, 'public')));
|
||||
|
||||
|
||||
224
server/voice-proxy.js
Normal file
224
server/voice-proxy.js
Normal file
@@ -0,0 +1,224 @@
|
||||
// Optional voice proxy — forwards STT/TTS to an OpenAI-compatible audio backend.
|
||||
//
|
||||
// The backend is whatever the user points at: OpenAI, Groq, or a local server
|
||||
// (LocalAI / Speaches / Kokoro-FastAPI / openedai-speech / etc.). It must expose the
|
||||
// standard OpenAI audio endpoints:
|
||||
// POST {base}/audio/transcriptions (multipart 'file' + 'model') -> { text }
|
||||
// POST {base}/audio/speech ({ model, voice, input }) -> audio bytes
|
||||
//
|
||||
// Config is resolved per-request from headers (set by the client's voice settings),
|
||||
// falling back to server env defaults. Mounted at /api/voice behind authenticateToken.
|
||||
import { Readable } from 'node:stream';
|
||||
|
||||
import express from 'express';
|
||||
|
||||
const ENV = {
|
||||
baseUrl: (process.env.VOICE_API_BASE_URL || '').replace(/\/$/, ''),
|
||||
apiKey: process.env.VOICE_API_KEY || '',
|
||||
sttModel: process.env.VOICE_STT_MODEL || 'whisper-1',
|
||||
ttsModel: process.env.VOICE_TTS_MODEL || 'tts-1',
|
||||
ttsVoice: process.env.VOICE_TTS_VOICE || 'alloy',
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolve the voice backend config for a request. Client headers (set from the
|
||||
* user's in-app voice settings) take precedence over the server env defaults.
|
||||
* @param {import('express').Request} req
|
||||
* @returns {{baseUrl: string, apiKey: string, sttModel: string, ttsModel: string, ttsVoice: string, ttsFormat: string}}
|
||||
*/
|
||||
function resolveConfig(req) {
|
||||
const h = req.headers;
|
||||
return {
|
||||
// Security: do not allow clients to control the outbound backend host.
|
||||
// Always use the server-side configured base URL.
|
||||
baseUrl: ENV.baseUrl,
|
||||
apiKey: String(h['x-voice-api-key'] || '') || ENV.apiKey,
|
||||
sttModel: String(h['x-voice-stt-model'] || '') || ENV.sttModel,
|
||||
ttsModel: String(h['x-voice-tts-model'] || '') || ENV.ttsModel,
|
||||
ttsVoice: String(h['x-voice-tts-voice'] || '') || ENV.ttsVoice,
|
||||
ttsFormat: String(h['x-voice-tts-format'] || '').trim(),
|
||||
};
|
||||
}
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// Generous by default — local TTS can synthesize long messages at ~real-time on CPU.
|
||||
// Guard against a non-numeric/zero override that would make setTimeout fire immediately.
|
||||
const DEFAULT_VOICE_TIMEOUT_MS = 300000;
|
||||
const _parsedTimeout = Number(process.env.VOICE_TIMEOUT_MS);
|
||||
const VOICE_TIMEOUT_MS = Number.isFinite(_parsedTimeout) && _parsedTimeout > 0
|
||||
? _parsedTimeout
|
||||
: DEFAULT_VOICE_TIMEOUT_MS;
|
||||
|
||||
/**
|
||||
* fetch() with an AbortController timeout so a stalled backend can't hold the
|
||||
* request open indefinitely. Aborts after VOICE_TIMEOUT_MS.
|
||||
* @param {string} url
|
||||
* @param {RequestInit} [options]
|
||||
* @returns {Promise<Response>}
|
||||
*/
|
||||
async function fetchWithTimeout(url, options = {}) {
|
||||
const parsed = new URL(url);
|
||||
if (!['http:', 'https:'].includes(parsed.protocol) || !isAllowedBackendUrl(parsed.origin)) {
|
||||
throw new Error('Blocked outbound voice backend URL');
|
||||
}
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), VOICE_TIMEOUT_MS);
|
||||
try {
|
||||
return await fetch(parsed.toString(), { redirect: 'manual', ...options, signal: controller.signal });
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn a backend fetch failure into a clear, actionable client response:
|
||||
* 504 on timeout (AbortError), 502 otherwise.
|
||||
* @param {import('express').Response} res
|
||||
* @param {Error} e
|
||||
*/
|
||||
function backendError(res, e) {
|
||||
if (e && e.name === 'AbortError') {
|
||||
return res.status(504).json({
|
||||
error: `Voice backend timed out after ${Math.round(VOICE_TIMEOUT_MS / 1000)}s. Check your voice backend.`,
|
||||
});
|
||||
}
|
||||
return res.status(502).json({ error: `Voice backend unreachable: ${e.message}` });
|
||||
}
|
||||
|
||||
/**
|
||||
* SSRF guard for the user-configurable backend URL: allow http/https only and
|
||||
* block the link-local / cloud-metadata range (169.254.x). localhost and private
|
||||
* ranges are allowed on purpose so users can point at a local voice server
|
||||
* (LocalAI, Speaches, Kokoro-FastAPI, etc.).
|
||||
* @param {string} raw
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function isAllowedBackendUrl(raw) {
|
||||
let u;
|
||||
try {
|
||||
u = new URL(raw);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
if (u.protocol !== 'http:' && u.protocol !== 'https:') return false;
|
||||
if (u.hostname === '169.254.169.254' || u.hostname.startsWith('169.254.')) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Relay an upstream (backend) error to the client without making an upstream
|
||||
* 401/403 look like the user's own app login failed.
|
||||
* @param {import('express').Response} res
|
||||
* @param {number} status
|
||||
* @param {string} [text]
|
||||
*/
|
||||
function upstreamError(res, status, text) {
|
||||
if (status === 401 || status === 403) {
|
||||
return res.status(502).json({ error: 'Voice backend rejected the request (check the API key).' });
|
||||
}
|
||||
return res.status(status).json({ error: text || 'voice backend error' });
|
||||
}
|
||||
|
||||
let _upload = null;
|
||||
/**
|
||||
* Lazily build a memory-storage multer instance (25 MB cap) for audio uploads,
|
||||
* so multer is only imported when the voice feature is actually used.
|
||||
* @returns {Promise<import('multer').Multer>}
|
||||
*/
|
||||
async function getUpload() {
|
||||
if (!_upload) {
|
||||
const multer = (await import('multer')).default;
|
||||
_upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
|
||||
}
|
||||
return _upload;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the Authorization header for the backend, or an empty object when no
|
||||
* key is configured (e.g. a local server that needs none).
|
||||
* @param {string} apiKey
|
||||
* @returns {Record<string, string>}
|
||||
*/
|
||||
function authHeader(apiKey) {
|
||||
return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/voice/health -> { configured } (true when a backend base URL is set).
|
||||
*/
|
||||
router.get('/health', (req, res) => {
|
||||
res.json({ configured: Boolean(resolveConfig(req).baseUrl) });
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/voice/transcribe (multipart 'audio') -> { text }.
|
||||
* Forwards the uploaded audio to the backend's /audio/transcriptions endpoint.
|
||||
*/
|
||||
router.post('/transcribe', async (req, res) => {
|
||||
const cfg = resolveConfig(req);
|
||||
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
||||
if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
|
||||
const upload = await getUpload();
|
||||
upload.single('audio')(req, res, async (err) => {
|
||||
if (err) return res.status(400).json({ error: err.message });
|
||||
if (!req.file) return res.status(400).json({ error: 'No audio uploaded' });
|
||||
try {
|
||||
const fd = new FormData();
|
||||
fd.append(
|
||||
'file',
|
||||
new Blob([req.file.buffer], { type: req.file.mimetype || 'audio/webm' }),
|
||||
req.file.originalname || 'recording.webm',
|
||||
);
|
||||
fd.append('model', cfg.sttModel);
|
||||
const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/transcriptions`, {
|
||||
method: 'POST',
|
||||
headers: authHeader(cfg.apiKey),
|
||||
body: fd,
|
||||
});
|
||||
const text = await r.text();
|
||||
if (!r.ok) return upstreamError(res, r.status, text);
|
||||
let data;
|
||||
try { data = JSON.parse(text); } catch { data = { text }; }
|
||||
res.json({ text: data.text ?? '' });
|
||||
} catch (e) {
|
||||
backendError(res, e);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/voice/tts { text } -> audio bytes.
|
||||
* Forwards the text to the backend's /audio/speech endpoint and streams the audio back.
|
||||
*/
|
||||
router.post('/tts', async (req, res) => {
|
||||
const cfg = resolveConfig(req);
|
||||
if (!cfg.baseUrl) return res.status(503).json({ error: 'No voice backend configured' });
|
||||
if (!isAllowedBackendUrl(cfg.baseUrl)) return res.status(400).json({ error: 'Invalid voice backend URL.' });
|
||||
const text = req.body?.text;
|
||||
if (typeof text !== 'string' || !text.trim()) return res.status(400).json({ error: 'text required' });
|
||||
try {
|
||||
const r = await fetchWithTimeout(`${cfg.baseUrl}/audio/speech`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', ...authHeader(cfg.apiKey) },
|
||||
body: JSON.stringify({
|
||||
model: cfg.ttsModel,
|
||||
voice: cfg.ttsVoice,
|
||||
input: text,
|
||||
...(cfg.ttsFormat ? { response_format: cfg.ttsFormat } : {}),
|
||||
}),
|
||||
});
|
||||
if (!r.ok) {
|
||||
const errText = await r.text().catch(() => 'tts failed');
|
||||
return upstreamError(res, r.status, errText);
|
||||
}
|
||||
res.setHeader('Content-Type', r.headers.get('content-type') || 'audio/mpeg');
|
||||
res.setHeader('Cache-Control', 'no-store');
|
||||
if (!r.body) return res.end();
|
||||
Readable.fromWeb(r.body).on('error', (error) => res.destroy(error)).pipe(res);
|
||||
} catch (e) {
|
||||
backendError(res, e);
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -775,6 +775,17 @@ export function useChatComposerState({
|
||||
handleSubmitRef.current = handleSubmit;
|
||||
}, [handleSubmit]);
|
||||
|
||||
// A voice transcript either fills the input (to edit before sending) or, when the
|
||||
// user tapped "stop and send", is submitted straight away. Mirror the value into
|
||||
// inputValueRef synchronously so handleSubmit reads the new text, not the stale state.
|
||||
const handleVoiceTranscript = useCallback((text: string, send?: boolean) => {
|
||||
const base = inputValueRef.current.trim();
|
||||
const next = base ? `${base} ${text}` : text;
|
||||
setInput(next);
|
||||
inputValueRef.current = next;
|
||||
if (send) handleSubmitRef.current?.(createFakeSubmitEvent());
|
||||
}, [setInput]);
|
||||
|
||||
useEffect(() => {
|
||||
inputValueRef.current = input;
|
||||
}, [input]);
|
||||
@@ -1013,6 +1024,7 @@ export function useChatComposerState({
|
||||
isDragActive,
|
||||
openImagePicker: open,
|
||||
handleSubmit,
|
||||
handleVoiceTranscript,
|
||||
handleInputChange,
|
||||
handleKeyDown,
|
||||
handlePaste,
|
||||
|
||||
33
src/components/chat/hooks/useTts.ts
Normal file
33
src/components/chat/hooks/useTts.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import { useCallback, useEffect, useState } from 'react';
|
||||
import { voicePlayer, voiceId, type VoiceSnapshot } from '../../../lib/voicePlayer';
|
||||
|
||||
export type TtsState = VoiceSnapshot['state'];
|
||||
|
||||
/**
|
||||
* Thin adapter over the app-level voicePlayer. Playback lives outside React (see
|
||||
* lib/voicePlayer), so switching chats or re-rendering a message no longer cuts the
|
||||
* audio off. This hook just reflects the player's state for one message and forwards taps.
|
||||
*/
|
||||
export function useTts(getText: () => string) {
|
||||
const content = getText();
|
||||
const id = voiceId(content);
|
||||
|
||||
const [snap, setSnap] = useState<VoiceSnapshot>(() => voicePlayer.getSnapshot(id));
|
||||
|
||||
useEffect(() => {
|
||||
const update = () =>
|
||||
setSnap((prev) => {
|
||||
const next = voicePlayer.getSnapshot(id);
|
||||
return prev.state === next.state && prev.error === next.error ? prev : next;
|
||||
});
|
||||
update();
|
||||
return voicePlayer.subscribe(update);
|
||||
}, [id]);
|
||||
|
||||
const toggle = useCallback(() => {
|
||||
voicePlayer.unlock(); // synchronous, within the click gesture (iOS)
|
||||
voicePlayer.toggle(content);
|
||||
}, [content]);
|
||||
|
||||
return { state: snap.state, toggle, error: snap.error };
|
||||
}
|
||||
85
src/components/chat/hooks/useVoiceAvailable.ts
Normal file
85
src/components/chat/hooks/useVoiceAvailable.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
|
||||
import { authenticatedFetch } from '../../../utils/api';
|
||||
import { readVoiceConfig, VOICE_CONFIG_SYNC_EVENT } from '../../../hooks/useVoiceConfig';
|
||||
|
||||
// Voice UI is gated on the `voiceEnabled` UI preference (toggled in Quick Settings /
|
||||
// the Settings modal) and a configured voice backend.
|
||||
const STORAGE_KEY = 'uiPreferences';
|
||||
const SYNC_EVENT = 'ui-preferences:sync';
|
||||
let healthRequest: Promise<boolean> | null = null;
|
||||
|
||||
function checkVoiceHealth(): Promise<boolean> {
|
||||
if (healthRequest) return healthRequest;
|
||||
const request = authenticatedFetch('/api/voice/health')
|
||||
.then(async (response) => {
|
||||
if (!response.ok) throw new Error(`Voice health check failed (${response.status})`);
|
||||
const data = await response.json();
|
||||
return data?.configured === true;
|
||||
})
|
||||
.finally(() => {
|
||||
healthRequest = null;
|
||||
});
|
||||
healthRequest = request;
|
||||
return request;
|
||||
}
|
||||
|
||||
function readVoiceEnabled(): boolean {
|
||||
try {
|
||||
const raw = localStorage.getItem(STORAGE_KEY);
|
||||
if (!raw) return false;
|
||||
const parsed = JSON.parse(raw);
|
||||
return parsed?.voiceEnabled === true || parsed?.voiceEnabled === 'true';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function useVoiceAvailable(): boolean {
|
||||
const [enabled, setEnabled] = useState<boolean>(() =>
|
||||
typeof window === 'undefined' ? false : readVoiceEnabled(),
|
||||
);
|
||||
const [available, setAvailable] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
const update = () => setEnabled(readVoiceEnabled());
|
||||
window.addEventListener('storage', update);
|
||||
window.addEventListener(SYNC_EVENT, update as EventListener);
|
||||
return () => {
|
||||
window.removeEventListener('storage', update);
|
||||
window.removeEventListener(SYNC_EVENT, update as EventListener);
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
let active = true;
|
||||
let requestId = 0;
|
||||
|
||||
const check = async () => {
|
||||
if (!enabled) {
|
||||
setAvailable(false);
|
||||
return;
|
||||
}
|
||||
if (readVoiceConfig().baseUrl.trim()) {
|
||||
setAvailable(true);
|
||||
return;
|
||||
}
|
||||
const id = ++requestId;
|
||||
try {
|
||||
const result = await checkVoiceHealth();
|
||||
if (active && id === requestId) setAvailable(result);
|
||||
} catch {
|
||||
if (active && id === requestId) setAvailable(false);
|
||||
}
|
||||
};
|
||||
|
||||
void check();
|
||||
window.addEventListener(VOICE_CONFIG_SYNC_EVENT, check);
|
||||
return () => {
|
||||
active = false;
|
||||
window.removeEventListener(VOICE_CONFIG_SYNC_EVENT, check);
|
||||
};
|
||||
}, [enabled]);
|
||||
|
||||
return enabled && available;
|
||||
}
|
||||
149
src/components/chat/hooks/useVoiceInput.ts
Normal file
149
src/components/chat/hooks/useVoiceInput.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
|
||||
import { transcribeVoice } from '../../../lib/voiceApi';
|
||||
|
||||
// Mobile-safe recording: iOS Safari 18.4+ supports webm/opus; older iOS needs mp4.
|
||||
const MIME_CANDIDATES = [
|
||||
'audio/webm;codecs=opus',
|
||||
'audio/webm',
|
||||
'audio/mp4',
|
||||
'audio/ogg;codecs=opus',
|
||||
'audio/ogg',
|
||||
];
|
||||
|
||||
function pickMime(): string {
|
||||
for (const t of MIME_CANDIDATES) {
|
||||
try {
|
||||
if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported(t)) return t;
|
||||
} catch {
|
||||
/* isTypeSupported can throw on some iOS versions */
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
export type VoiceInputState = 'idle' | 'recording' | 'transcribing';
|
||||
|
||||
/**
|
||||
* Push-to-talk dictation. Records the mic, uploads to /api/voice/transcribe
|
||||
* (an OpenAI-compatible speech-to-text backend via the Express proxy), and
|
||||
* returns the transcript through onTranscript.
|
||||
*/
|
||||
export function useVoiceInput(
|
||||
onTranscript: (text: string, send?: boolean) => void,
|
||||
onError?: (msg: string) => void,
|
||||
) {
|
||||
const [state, setState] = useState<VoiceInputState>('idle');
|
||||
const recorderRef = useRef<MediaRecorder | null>(null);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const cancelledRef = useRef(false);
|
||||
const startingRef = useRef(false);
|
||||
// Whether the in-progress stop should auto-send the transcript (vs just fill the box).
|
||||
const sendRef = useRef(false);
|
||||
|
||||
const stopTracks = () => {
|
||||
streamRef.current?.getTracks().forEach((t) => t.stop());
|
||||
streamRef.current = null;
|
||||
};
|
||||
|
||||
// Stop the mic if the component unmounts mid-recording.
|
||||
useEffect(() => {
|
||||
cancelledRef.current = false;
|
||||
return () => {
|
||||
cancelledRef.current = true;
|
||||
startingRef.current = false;
|
||||
streamRef.current?.getTracks().forEach((t) => t.stop());
|
||||
streamRef.current = null;
|
||||
recorderRef.current = null;
|
||||
};
|
||||
}, []);
|
||||
|
||||
const start = useCallback(async () => {
|
||||
if (startingRef.current || (recorderRef.current && recorderRef.current.state !== 'inactive')) return;
|
||||
startingRef.current = true;
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: { echoCancellation: true, noiseSuppression: true },
|
||||
});
|
||||
if (cancelledRef.current) {
|
||||
stream.getTracks().forEach((t) => t.stop());
|
||||
return;
|
||||
}
|
||||
streamRef.current = stream;
|
||||
const mimeType = pickMime();
|
||||
const rec = mimeType ? new MediaRecorder(stream, { mimeType }) : new MediaRecorder(stream);
|
||||
recorderRef.current = rec;
|
||||
chunksRef.current = [];
|
||||
|
||||
rec.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) chunksRef.current.push(e.data);
|
||||
};
|
||||
|
||||
rec.onstop = async () => {
|
||||
stopTracks();
|
||||
if (cancelledRef.current) return;
|
||||
// Capture and clear the send intent for this stop before any async work.
|
||||
const shouldSend = sendRef.current;
|
||||
sendRef.current = false;
|
||||
const type = rec.mimeType || 'audio/webm';
|
||||
const blob = new Blob(chunksRef.current, { type });
|
||||
if (blob.size < 800) {
|
||||
setState('idle');
|
||||
onError?.('Recording too short');
|
||||
return;
|
||||
}
|
||||
setState('transcribing');
|
||||
try {
|
||||
const ext = type.includes('mp4') ? 'm4a' : type.includes('ogg') ? 'ogg' : 'webm';
|
||||
const res = await transcribeVoice(blob, `recording.${ext}`);
|
||||
if (!res.ok) throw new Error(`transcribe ${res.status}`);
|
||||
const data = await res.json();
|
||||
if (cancelledRef.current) return;
|
||||
const text = String(data?.text || '').trim();
|
||||
if (text) onTranscript(text, shouldSend);
|
||||
else onError?.('No speech detected');
|
||||
} catch (e) {
|
||||
if (!cancelledRef.current) {
|
||||
onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
} finally {
|
||||
if (!cancelledRef.current) setState('idle');
|
||||
}
|
||||
};
|
||||
|
||||
rec.start();
|
||||
setState('recording');
|
||||
} catch (e) {
|
||||
recorderRef.current = null;
|
||||
stopTracks();
|
||||
if (cancelledRef.current) return;
|
||||
const err = e as { name?: string; message?: string };
|
||||
let msg = `Mic error: ${err?.message || e}`;
|
||||
if (err?.name === 'NotAllowedError') msg = 'Microphone access denied.';
|
||||
else if (err?.name === 'NotFoundError') msg = 'No microphone found.';
|
||||
onError?.(msg);
|
||||
setState('idle');
|
||||
} finally {
|
||||
startingRef.current = false;
|
||||
}
|
||||
}, [onTranscript, onError]);
|
||||
|
||||
// Stop recording. Pass { send: true } to auto-send the transcript once it's ready.
|
||||
// Guard on the recorder's own state (not React state) so a double tap, or the mic
|
||||
// and Send buttons both firing, can't call stop() on an already-inactive recorder.
|
||||
const stop = useCallback((opts?: { send?: boolean }) => {
|
||||
const rec = recorderRef.current;
|
||||
if (rec && rec.state !== 'inactive') {
|
||||
sendRef.current = opts?.send ?? false;
|
||||
rec.stop();
|
||||
}
|
||||
}, []);
|
||||
|
||||
const toggle = useCallback(() => {
|
||||
if (state === 'recording') stop();
|
||||
else if (state === 'idle') start();
|
||||
}, [state, start, stop]);
|
||||
|
||||
return { state, toggle, stop };
|
||||
}
|
||||
@@ -173,6 +173,7 @@ function ChatInterface({
|
||||
isDragActive,
|
||||
openImagePicker,
|
||||
handleSubmit,
|
||||
handleVoiceTranscript,
|
||||
handleInputChange,
|
||||
handleKeyDown,
|
||||
handlePaste,
|
||||
@@ -406,6 +407,7 @@ function ChatInterface({
|
||||
renderInputWithMentions={renderInputWithMentions}
|
||||
textareaRef={textareaRef}
|
||||
input={input}
|
||||
onVoiceTranscript={handleVoiceTranscript}
|
||||
onInputChange={handleInputChange}
|
||||
onTextareaClick={handleTextareaClick}
|
||||
onTextareaKeyDown={handleKeyDown}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import type {
|
||||
ChangeEvent,
|
||||
ClipboardEvent,
|
||||
@@ -9,8 +10,10 @@ import type {
|
||||
RefObject,
|
||||
TouchEvent,
|
||||
} from 'react';
|
||||
import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react';
|
||||
import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react';
|
||||
|
||||
import { useVoiceInput } from '../../hooks/useVoiceInput';
|
||||
import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
|
||||
import type { SessionActivity } from '../../../../hooks/useSessionProtection';
|
||||
import type { PendingPermissionRequest, PermissionMode } from '../../types/types';
|
||||
import {
|
||||
@@ -27,6 +30,7 @@ import {
|
||||
import CommandMenu from './CommandMenu';
|
||||
import ActivityIndicator from './ActivityIndicator';
|
||||
import ImageAttachment from './ImageAttachment';
|
||||
import VoiceInputButton from './VoiceInputButton';
|
||||
import PermissionRequestsBanner from './PermissionRequestsBanner';
|
||||
import TokenUsageSummary from './TokenUsageSummary';
|
||||
|
||||
@@ -89,6 +93,7 @@ interface ChatComposerProps {
|
||||
renderInputWithMentions: (text: string) => ReactNode;
|
||||
textareaRef: RefObject<HTMLTextAreaElement>;
|
||||
input: string;
|
||||
onVoiceTranscript?: (text: string, send?: boolean) => void;
|
||||
onInputChange: (event: ChangeEvent<HTMLTextAreaElement>) => void;
|
||||
onTextareaClick: (event: MouseEvent<HTMLTextAreaElement>) => void;
|
||||
onTextareaKeyDown: (event: KeyboardEvent<HTMLTextAreaElement>) => void;
|
||||
@@ -142,6 +147,7 @@ export default function ChatComposer({
|
||||
renderInputWithMentions,
|
||||
textareaRef,
|
||||
input,
|
||||
onVoiceTranscript,
|
||||
onInputChange,
|
||||
onTextareaClick,
|
||||
onTextareaKeyDown,
|
||||
@@ -154,6 +160,28 @@ export default function ChatComposer({
|
||||
sendByCtrlEnter,
|
||||
}: ChatComposerProps) {
|
||||
const { t } = useTranslation('chat');
|
||||
|
||||
// Voice state is hosted here (not in the mic button) so the main Send button can stop
|
||||
// recording and send the transcript in one tap, the way the mic button drops it in the box.
|
||||
const voiceAvailable = useVoiceAvailable();
|
||||
const [voiceError, setVoiceError] = useState<string | null>(null);
|
||||
const voiceErrorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const handleVoiceError = useCallback((msg: string) => {
|
||||
setVoiceError(msg);
|
||||
if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
|
||||
voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000);
|
||||
}, []);
|
||||
useEffect(() => () => {
|
||||
if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
|
||||
}, []);
|
||||
const noopTranscript = useCallback(() => {}, []);
|
||||
const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput(
|
||||
onVoiceTranscript ?? noopTranscript,
|
||||
handleVoiceError,
|
||||
);
|
||||
const isRecording = voiceState === 'recording';
|
||||
const isTranscribing = voiceState === 'transcribing';
|
||||
|
||||
const textareaRect = textareaRef.current?.getBoundingClientRect();
|
||||
const commandMenuPosition = {
|
||||
top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
|
||||
@@ -309,6 +337,10 @@ export default function ChatComposer({
|
||||
<ImageIcon />
|
||||
</PromptInputButton>
|
||||
|
||||
{onVoiceTranscript && voiceAvailable && (
|
||||
<VoiceInputButton state={voiceState} onToggle={voiceToggle} errorMsg={voiceError} />
|
||||
)}
|
||||
|
||||
<button
|
||||
type="button"
|
||||
onClick={onModeSwitch}
|
||||
@@ -387,10 +419,21 @@ export default function ChatComposer({
|
||||
{sendByCtrlEnter ? t('input.hintText.ctrlEnter') : t('input.hintText.enter')}
|
||||
</div>
|
||||
<PromptInputSubmit
|
||||
onClick={isLoading ? onAbortSession : undefined}
|
||||
disabled={!isLoading && !input.trim()}
|
||||
onClick={
|
||||
isLoading
|
||||
? onAbortSession
|
||||
: isRecording
|
||||
? (e: MouseEvent<HTMLButtonElement>) => {
|
||||
e.preventDefault();
|
||||
voiceStop({ send: true });
|
||||
}
|
||||
: undefined
|
||||
}
|
||||
disabled={isLoading ? false : isRecording ? false : isTranscribing ? true : !input.trim()}
|
||||
className="h-10 w-10 sm:h-10 sm:w-10"
|
||||
/>
|
||||
>
|
||||
{isTranscribing ? <Loader2 className="h-4 w-4 animate-spin" /> : undefined}
|
||||
</PromptInputSubmit>
|
||||
</div>
|
||||
</PromptInputFooter>
|
||||
</PromptInput>
|
||||
|
||||
@@ -15,6 +15,7 @@ import { Reasoning, ReasoningTrigger, ReasoningContent } from '../../../../share
|
||||
|
||||
import { Markdown } from './Markdown';
|
||||
import MessageCopyControl from './MessageCopyControl';
|
||||
import MessageSpeakControl from './MessageSpeakControl';
|
||||
|
||||
type DiffLine = {
|
||||
type: string;
|
||||
@@ -415,6 +416,9 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, a
|
||||
{shouldShowAssistantCopyControl && (
|
||||
<MessageCopyControl content={assistantCopyContent} messageType="assistant" />
|
||||
)}
|
||||
{shouldShowAssistantCopyControl && (
|
||||
<MessageSpeakControl content={assistantCopyContent} />
|
||||
)}
|
||||
{!isGrouped && <span>{formattedTime}</span>}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
import { Volume2, Loader2, Square } from 'lucide-react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useTts } from '../../hooks/useTts';
|
||||
import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
|
||||
|
||||
// Tap-to-speak button beside the copy control on assistant messages.
|
||||
// Renders nothing unless the optional voice feature is enabled.
|
||||
const MessageSpeakControl = ({ content }: { content: string }) => {
|
||||
const { t } = useTranslation('chat');
|
||||
const available = useVoiceAvailable();
|
||||
const { state, toggle, error } = useTts(() => content);
|
||||
|
||||
if (!available) return null;
|
||||
|
||||
const title =
|
||||
state === 'playing' ? t('voice.stopSpeaking') : state === 'loading' ? t('voice.loading') : t('voice.speak');
|
||||
|
||||
return (
|
||||
<span className="relative inline-flex">
|
||||
{error && (
|
||||
<span className="absolute bottom-full left-1/2 z-10 mb-1 max-w-[240px] -translate-x-1/2 whitespace-normal rounded bg-red-600 px-2 py-1 text-center text-xs text-white shadow-lg">
|
||||
{error}
|
||||
</span>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={toggle}
|
||||
title={title}
|
||||
aria-label={title}
|
||||
className="inline-flex items-center gap-1 rounded px-1 py-0.5 text-gray-400 transition-colors hover:text-gray-600 dark:text-gray-500 dark:hover:text-gray-300"
|
||||
>
|
||||
{state === 'playing' ? (
|
||||
<Square className="h-3.5 w-3.5" />
|
||||
) : state === 'loading' ? (
|
||||
<Loader2 className="h-3.5 w-3.5 animate-spin" />
|
||||
) : (
|
||||
<Volume2 className="h-3.5 w-3.5" />
|
||||
)}
|
||||
</button>
|
||||
</span>
|
||||
);
|
||||
};
|
||||
|
||||
export default MessageSpeakControl;
|
||||
46
src/components/chat/view/subcomponents/VoiceInputButton.tsx
Normal file
46
src/components/chat/view/subcomponents/VoiceInputButton.tsx
Normal file
@@ -0,0 +1,46 @@
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { Mic, Square, Loader2 } from 'lucide-react';
|
||||
|
||||
import { PromptInputButton } from '../../../../shared/view/ui';
|
||||
import type { VoiceInputState } from '../../hooks/useVoiceInput';
|
||||
|
||||
type Props = {
|
||||
state: VoiceInputState;
|
||||
onToggle: () => void;
|
||||
errorMsg?: string | null;
|
||||
};
|
||||
|
||||
// Push-to-talk mic button (presentational). Recording state and the stop-and-send action
|
||||
// are owned by the composer so the main Send button can drive them too. This button just
|
||||
// starts recording and, while recording, stops and drops the transcript into the input box.
|
||||
export default function VoiceInputButton({ state, onToggle, errorMsg }: Props) {
|
||||
const { t } = useTranslation('chat');
|
||||
|
||||
const icon =
|
||||
state === 'recording' ? (
|
||||
<Square className="text-red-500" />
|
||||
) : state === 'transcribing' ? (
|
||||
<Loader2 className="animate-spin" />
|
||||
) : (
|
||||
<Mic />
|
||||
);
|
||||
|
||||
return (
|
||||
<span className="relative inline-flex">
|
||||
{errorMsg && (
|
||||
<span className="absolute bottom-full left-1/2 mb-1 -translate-x-1/2 whitespace-nowrap rounded bg-red-600 px-2 py-1 text-xs text-white shadow-lg">
|
||||
{errorMsg}
|
||||
</span>
|
||||
)}
|
||||
<PromptInputButton
|
||||
tooltip={{ content: state === 'recording' ? t('voice.stopRecording') : t('voice.input') }}
|
||||
onClick={(e: { preventDefault: () => void }) => {
|
||||
e.preventDefault();
|
||||
onToggle();
|
||||
}}
|
||||
>
|
||||
{icon}
|
||||
</PromptInputButton>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
Eye,
|
||||
Languages,
|
||||
Maximize2,
|
||||
Mic,
|
||||
} from 'lucide-react';
|
||||
import type { PreferenceToggleItem } from './types';
|
||||
|
||||
@@ -54,4 +55,9 @@ export const INPUT_SETTING_TOGGLES: PreferenceToggleItem[] = [
|
||||
labelKey: 'quickSettings.sendByCtrlEnter',
|
||||
icon: Languages,
|
||||
},
|
||||
{
|
||||
key: 'voiceEnabled',
|
||||
labelKey: 'quickSettings.voiceEnabled',
|
||||
icon: Mic,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -6,7 +6,8 @@ export type PreferenceToggleKey =
|
||||
| 'showRawParameters'
|
||||
| 'showThinking'
|
||||
| 'autoScrollToBottom'
|
||||
| 'sendByCtrlEnter';
|
||||
| 'sendByCtrlEnter'
|
||||
| 'voiceEnabled';
|
||||
|
||||
export type QuickSettingsPreferences = Record<PreferenceToggleKey, boolean>;
|
||||
|
||||
|
||||
@@ -27,12 +27,14 @@ export default function QuickSettingsPanelView() {
|
||||
showThinking: preferences.showThinking,
|
||||
autoScrollToBottom: preferences.autoScrollToBottom,
|
||||
sendByCtrlEnter: preferences.sendByCtrlEnter,
|
||||
voiceEnabled: preferences.voiceEnabled,
|
||||
}), [
|
||||
preferences.autoExpandTools,
|
||||
preferences.autoScrollToBottom,
|
||||
preferences.sendByCtrlEnter,
|
||||
preferences.showRawParameters,
|
||||
preferences.showThinking,
|
||||
preferences.voiceEnabled,
|
||||
]);
|
||||
|
||||
const handlePreferenceChange = useCallback(
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { Dispatch, SetStateAction } from 'react';
|
||||
import type { LLMProvider } from '../../../types/app';
|
||||
import type { ProviderAuthStatus } from '../../provider-auth/types';
|
||||
|
||||
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
|
||||
export type SettingsMainTab = 'agents' | 'appearance' | 'git' | 'api' | 'voice' | 'tasks' | 'browser' | 'notifications' | 'plugins' | 'about';
|
||||
export type AgentProvider = LLMProvider;
|
||||
export type AgentCategory = 'account' | 'permissions' | 'mcp' | 'skills';
|
||||
export type ProjectSortOrder = 'name' | 'date';
|
||||
|
||||
@@ -7,6 +7,7 @@ import SettingsSidebar from '../view/SettingsSidebar';
|
||||
import AgentsSettingsTab from '../view/tabs/agents-settings/AgentsSettingsTab';
|
||||
import AppearanceSettingsTab from '../view/tabs/AppearanceSettingsTab';
|
||||
import CredentialsSettingsTab from '../view/tabs/api-settings/CredentialsSettingsTab';
|
||||
import VoiceSettingsTab from '../view/tabs/VoiceSettingsTab';
|
||||
import GitSettingsTab from '../view/tabs/git-settings/GitSettingsTab';
|
||||
import BrowserUseSettingsTab from '../view/tabs/browser-use-settings/BrowserUseSettingsTab';
|
||||
import NotificationsSettingsTab from '../view/tabs/NotificationsSettingsTab';
|
||||
@@ -157,6 +158,8 @@ function Settings({ isOpen, onClose, projects = [], initialTab = 'agents' }: Set
|
||||
|
||||
{activeTab === 'api' && <CredentialsSettingsTab />}
|
||||
|
||||
{activeTab === 'voice' && <VoiceSettingsTab />}
|
||||
|
||||
{activeTab === 'plugins' && <PluginSettingsTab />}
|
||||
|
||||
{activeTab === 'about' && <AboutTab />}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Bell, Bot, GitBranch, Info, Key, ListChecks, MonitorPlay, Palette, Puzzle } from 'lucide-react';
|
||||
import { Bell, Bot, GitBranch, Info, Key, ListChecks, Mic, MonitorPlay, Palette, Puzzle } from 'lucide-react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
import { cn } from '../../../lib/utils';
|
||||
import { PillBar, Pill } from '../../../shared/view/ui';
|
||||
import type { SettingsMainTab } from '../types/types';
|
||||
@@ -20,6 +21,7 @@ const NAV_ITEMS: NavItem[] = [
|
||||
{ id: 'appearance', labelKey: 'mainTabs.appearance', icon: Palette },
|
||||
{ id: 'git', labelKey: 'mainTabs.git', icon: GitBranch },
|
||||
{ id: 'api', labelKey: 'mainTabs.apiTokens', icon: Key },
|
||||
{ id: 'voice', labelKey: 'mainTabs.voice', icon: Mic },
|
||||
{ id: 'tasks', labelKey: 'mainTabs.tasks', icon: ListChecks },
|
||||
{ id: 'browser', labelKey: 'mainTabs.browser', icon: MonitorPlay },
|
||||
{ id: 'plugins', labelKey: 'mainTabs.plugins', icon: Puzzle },
|
||||
|
||||
88
src/components/settings/view/tabs/VoiceSettingsTab.tsx
Normal file
88
src/components/settings/view/tabs/VoiceSettingsTab.tsx
Normal file
@@ -0,0 +1,88 @@
|
||||
import type { InputHTMLAttributes } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import SettingsSection from '../SettingsSection';
|
||||
import SettingsToggle from '../SettingsToggle';
|
||||
import { useUiPreferences } from '../../../../hooks/useUiPreferences';
|
||||
import { useVoiceConfig } from '../../../../hooks/useVoiceConfig';
|
||||
|
||||
const inputClass =
|
||||
'w-full rounded-md border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring';
|
||||
|
||||
function Field({ label, ...props }: { label: string } & InputHTMLAttributes<HTMLInputElement>) {
|
||||
return (
|
||||
<label className="block space-y-1">
|
||||
<span className="text-sm font-medium text-foreground">{label}</span>
|
||||
<input className={inputClass} {...props} />
|
||||
</label>
|
||||
);
|
||||
}
|
||||
|
||||
export default function VoiceSettingsTab() {
|
||||
const { t } = useTranslation('settings');
|
||||
const { preferences, setPreference } = useUiPreferences();
|
||||
const { config, update } = useVoiceConfig();
|
||||
|
||||
return (
|
||||
<div className="space-y-8">
|
||||
<SettingsSection title={t('voiceSettings.title')} description={t('voiceSettings.description')}>
|
||||
<div className="flex items-center justify-between rounded-lg border border-border p-3">
|
||||
<div className="pr-3">
|
||||
<div className="text-sm font-medium text-foreground">{t('voiceSettings.enable')}</div>
|
||||
<div className="text-xs text-muted-foreground">{t('voiceSettings.enableDescription')}</div>
|
||||
</div>
|
||||
<SettingsToggle
|
||||
checked={preferences.voiceEnabled}
|
||||
onChange={(v) => setPreference('voiceEnabled', v)}
|
||||
ariaLabel={t('voiceSettings.enable')}
|
||||
/>
|
||||
</div>
|
||||
</SettingsSection>
|
||||
|
||||
<SettingsSection title={t('voiceSettings.backendTitle')} description={t('voiceSettings.backendDescription')}>
|
||||
<div className="space-y-4">
|
||||
<Field
|
||||
label={t('voiceSettings.baseUrl')}
|
||||
placeholder="https://api.openai.com/v1"
|
||||
value={config.baseUrl}
|
||||
onChange={(e) => update({ baseUrl: e.target.value })}
|
||||
/>
|
||||
<Field
|
||||
label={t('voiceSettings.apiKey')}
|
||||
type="password"
|
||||
autoComplete="off"
|
||||
placeholder="sk-…"
|
||||
value={config.apiKey}
|
||||
onChange={(e) => update({ apiKey: e.target.value })}
|
||||
/>
|
||||
<div className="grid grid-cols-1 gap-4 sm:grid-cols-4">
|
||||
<Field
|
||||
label={t('voiceSettings.sttModel')}
|
||||
placeholder="whisper-1"
|
||||
value={config.sttModel}
|
||||
onChange={(e) => update({ sttModel: e.target.value })}
|
||||
/>
|
||||
<Field
|
||||
label={t('voiceSettings.ttsModel')}
|
||||
placeholder="tts-1"
|
||||
value={config.ttsModel}
|
||||
onChange={(e) => update({ ttsModel: e.target.value })}
|
||||
/>
|
||||
<Field
|
||||
label={t('voiceSettings.voice')}
|
||||
placeholder="alloy"
|
||||
value={config.ttsVoice}
|
||||
onChange={(e) => update({ ttsVoice: e.target.value })}
|
||||
/>
|
||||
<Field
|
||||
label={t('voiceSettings.format')}
|
||||
placeholder="mp3"
|
||||
value={config.ttsFormat}
|
||||
onChange={(e) => update({ ttsFormat: e.target.value })}
|
||||
/>
|
||||
</div>
|
||||
<p className="text-xs text-muted-foreground">{t('voiceSettings.note')}</p>
|
||||
</div>
|
||||
</SettingsSection>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { ITerminalOptions } from '@xterm/xterm';
|
||||
|
||||
export const CODEX_DEVICE_AUTH_URL = 'https://auth.openai.com/codex/device';
|
||||
export const SHELL_RESTART_DELAY_MS = 200;
|
||||
export const TERMINAL_INIT_DELAY_MS = 100;
|
||||
export const TERMINAL_RESIZE_DELAY_MS = 50;
|
||||
|
||||
@@ -24,6 +24,7 @@ type UseShellConnectionOptions = {
|
||||
autoConnect: boolean;
|
||||
closeSocket: () => void;
|
||||
clearTerminalScreen: () => void;
|
||||
setAuthUrl: (nextAuthUrl: string) => void;
|
||||
onOutputRef?: MutableRefObject<(() => void) | null>;
|
||||
};
|
||||
|
||||
@@ -48,6 +49,7 @@ export function useShellConnection({
|
||||
autoConnect,
|
||||
closeSocket,
|
||||
clearTerminalScreen,
|
||||
setAuthUrl,
|
||||
onOutputRef,
|
||||
}: UseShellConnectionOptions): UseShellConnectionResult {
|
||||
const [isConnected, setIsConnected] = useState(false);
|
||||
@@ -98,8 +100,14 @@ export function useShellConnection({
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type === 'auth_url' || message.type === 'url_open') {
|
||||
const nextAuthUrl = typeof message.url === 'string' ? message.url : '';
|
||||
if (nextAuthUrl) {
|
||||
setAuthUrl(nextAuthUrl);
|
||||
}
|
||||
}
|
||||
},
|
||||
[handleProcessCompletion, onOutputRef, terminalRef],
|
||||
[handleProcessCompletion, onOutputRef, setAuthUrl, terminalRef],
|
||||
);
|
||||
|
||||
const connectWebSocket = useCallback(
|
||||
@@ -125,6 +133,7 @@ export function useShellConnection({
|
||||
setIsConnected(true);
|
||||
setIsConnecting(false);
|
||||
connectingRef.current = false;
|
||||
setAuthUrl('');
|
||||
|
||||
window.setTimeout(() => {
|
||||
const currentTerminal = terminalRef.current;
|
||||
@@ -187,6 +196,7 @@ export function useShellConnection({
|
||||
isPlainShellRef,
|
||||
selectedProjectRef,
|
||||
selectedSessionRef,
|
||||
setAuthUrl,
|
||||
terminalRef,
|
||||
wsRef,
|
||||
],
|
||||
@@ -215,7 +225,8 @@ export function useShellConnection({
|
||||
setIsConnecting(false);
|
||||
connectingRef.current = false;
|
||||
forceRestartOnInitRef.current = false;
|
||||
}, [clearTerminalScreen, closeSocket]);
|
||||
setAuthUrl('');
|
||||
}, [clearTerminalScreen, closeSocket, setAuthUrl]);
|
||||
|
||||
useEffect(() => {
|
||||
if (
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import { useCallback, useEffect, useRef } from 'react';
|
||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import type { FitAddon } from '@xterm/addon-fit';
|
||||
import type { Terminal } from '@xterm/xterm';
|
||||
|
||||
import type { UseShellRuntimeOptions, UseShellRuntimeResult } from '../types/types';
|
||||
|
||||
import { copyTextToClipboard } from '../../../utils/clipboard';
|
||||
import { useShellConnection } from './useShellConnection';
|
||||
import { useShellTerminal } from './useShellTerminal';
|
||||
|
||||
@@ -23,11 +22,15 @@ export function useShellRuntime({
|
||||
const fitAddonRef = useRef<FitAddon | null>(null);
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
|
||||
const [authUrl, setAuthUrl] = useState('');
|
||||
const [authUrlVersion, setAuthUrlVersion] = useState(0);
|
||||
|
||||
const selectedProjectRef = useRef(selectedProject);
|
||||
const selectedSessionRef = useRef(selectedSession);
|
||||
const initialCommandRef = useRef(initialCommand);
|
||||
const isPlainShellRef = useRef(isPlainShell);
|
||||
const onProcessCompleteRef = useRef(onProcessComplete);
|
||||
const authUrlRef = useRef('');
|
||||
const lastSessionIdRef = useRef<string | null>(selectedSession?.id ?? null);
|
||||
|
||||
// Keep mutable values in refs so websocket handlers always read current data.
|
||||
@@ -39,6 +42,12 @@ export function useShellRuntime({
|
||||
onProcessCompleteRef.current = onProcessComplete;
|
||||
}, [selectedProject, selectedSession, initialCommand, isPlainShell, onProcessComplete]);
|
||||
|
||||
const setCurrentAuthUrl = useCallback((nextAuthUrl: string) => {
|
||||
authUrlRef.current = nextAuthUrl;
|
||||
setAuthUrl(nextAuthUrl);
|
||||
setAuthUrlVersion((previous) => previous + 1);
|
||||
}, []);
|
||||
|
||||
const closeSocket = useCallback(() => {
|
||||
const activeSocket = wsRef.current;
|
||||
if (!activeSocket) {
|
||||
@@ -55,6 +64,32 @@ export function useShellRuntime({
|
||||
wsRef.current = null;
|
||||
}, []);
|
||||
|
||||
const openAuthUrlInBrowser = useCallback((url = authUrlRef.current) => {
|
||||
if (!url) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const popup = window.open(url, '_blank');
|
||||
if (popup) {
|
||||
try {
|
||||
popup.opener = null;
|
||||
} catch {
|
||||
// Ignore cross-origin restrictions when trying to null opener.
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}, []);
|
||||
|
||||
const copyAuthUrlToClipboard = useCallback(async (url = authUrlRef.current) => {
|
||||
if (!url) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return copyTextToClipboard(url);
|
||||
}, []);
|
||||
|
||||
const { isInitialized, clearTerminalScreen, disposeTerminal } = useShellTerminal({
|
||||
terminalContainerRef,
|
||||
terminalRef,
|
||||
@@ -63,6 +98,10 @@ export function useShellRuntime({
|
||||
selectedProject,
|
||||
minimal,
|
||||
isRestarting,
|
||||
initialCommandRef,
|
||||
isPlainShellRef,
|
||||
authUrlRef,
|
||||
copyAuthUrlToClipboard,
|
||||
closeSocket,
|
||||
});
|
||||
|
||||
@@ -79,6 +118,7 @@ export function useShellRuntime({
|
||||
autoConnect,
|
||||
closeSocket,
|
||||
clearTerminalScreen,
|
||||
setAuthUrl: setCurrentAuthUrl,
|
||||
onOutputRef,
|
||||
});
|
||||
|
||||
@@ -116,7 +156,11 @@ export function useShellRuntime({
|
||||
isConnected,
|
||||
isInitialized,
|
||||
isConnecting,
|
||||
authUrl,
|
||||
authUrlVersion,
|
||||
connectToShell,
|
||||
disconnectFromShell,
|
||||
openAuthUrlInBrowser,
|
||||
copyAuthUrlToClipboard,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -4,18 +4,15 @@ import { FitAddon } from '@xterm/addon-fit';
|
||||
import { WebLinksAddon } from '@xterm/addon-web-links';
|
||||
import { WebglAddon } from '@xterm/addon-webgl';
|
||||
import { Terminal } from '@xterm/xterm';
|
||||
|
||||
import type { Project } from '../../../types/app';
|
||||
import { copyTextToClipboard } from '../../../utils/clipboard';
|
||||
import {
|
||||
CODEX_DEVICE_AUTH_URL,
|
||||
TERMINAL_INIT_DELAY_MS,
|
||||
TERMINAL_OPTIONS,
|
||||
TERMINAL_RESIZE_DELAY_MS,
|
||||
} from '../constants/constants';
|
||||
import {
|
||||
installMobileTerminalSelection,
|
||||
type MobileTerminalSelectionManager,
|
||||
} from '../utils/mobileTerminalSelection';
|
||||
import { copyTextToClipboard } from '../../../utils/clipboard';
|
||||
import { isCodexLoginCommand } from '../utils/auth';
|
||||
import { sendSocketMessage } from '../utils/socket';
|
||||
import { ensureXtermFocusStyles } from '../utils/terminalStyles';
|
||||
|
||||
@@ -27,6 +24,10 @@ type UseShellTerminalOptions = {
|
||||
selectedProject: Project | null | undefined;
|
||||
minimal: boolean;
|
||||
isRestarting: boolean;
|
||||
initialCommandRef: MutableRefObject<string | null | undefined>;
|
||||
isPlainShellRef: MutableRefObject<boolean>;
|
||||
authUrlRef: MutableRefObject<string>;
|
||||
copyAuthUrlToClipboard: (url?: string) => Promise<boolean>;
|
||||
closeSocket: () => void;
|
||||
};
|
||||
|
||||
@@ -44,11 +45,14 @@ export function useShellTerminal({
|
||||
selectedProject,
|
||||
minimal,
|
||||
isRestarting,
|
||||
initialCommandRef,
|
||||
isPlainShellRef,
|
||||
authUrlRef,
|
||||
copyAuthUrlToClipboard,
|
||||
closeSocket,
|
||||
}: UseShellTerminalOptions): UseShellTerminalResult {
|
||||
const [isInitialized, setIsInitialized] = useState(false);
|
||||
const resizeTimeoutRef = useRef<number | null>(null);
|
||||
const mobileSelectionRef = useRef<MobileTerminalSelectionManager | null>(null);
|
||||
const selectedProjectKey = selectedProject?.fullPath || selectedProject?.path || '';
|
||||
const hasSelectedProject = Boolean(selectedProject);
|
||||
|
||||
@@ -66,11 +70,6 @@ export function useShellTerminal({
|
||||
}, [terminalRef]);
|
||||
|
||||
const disposeTerminal = useCallback(() => {
|
||||
if (mobileSelectionRef.current) {
|
||||
mobileSelectionRef.current.dispose();
|
||||
mobileSelectionRef.current = null;
|
||||
}
|
||||
|
||||
if (terminalRef.current) {
|
||||
terminalRef.current.dispose();
|
||||
terminalRef.current = null;
|
||||
@@ -81,8 +80,7 @@ export function useShellTerminal({
|
||||
}, [fitAddonRef, terminalRef]);
|
||||
|
||||
useEffect(() => {
|
||||
const terminalContainer = terminalContainerRef.current;
|
||||
if (!terminalContainer || !hasSelectedProject || isRestarting || terminalRef.current) {
|
||||
if (!terminalContainerRef.current || !hasSelectedProject || isRestarting || terminalRef.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -104,11 +102,7 @@ export function useShellTerminal({
|
||||
console.warn('[Shell] WebGL renderer unavailable, using Canvas fallback');
|
||||
}
|
||||
|
||||
nextTerminal.open(terminalContainer);
|
||||
mobileSelectionRef.current = installMobileTerminalSelection(
|
||||
nextTerminal,
|
||||
terminalContainer,
|
||||
);
|
||||
nextTerminal.open(terminalContainerRef.current);
|
||||
|
||||
const copyTerminalSelection = async () => {
|
||||
const selection = nextTerminal.getSelection();
|
||||
@@ -139,9 +133,29 @@ export function useShellTerminal({
|
||||
void copyTextToClipboard(selection);
|
||||
};
|
||||
|
||||
terminalContainer.addEventListener('copy', handleTerminalCopy);
|
||||
terminalContainerRef.current.addEventListener('copy', handleTerminalCopy);
|
||||
|
||||
nextTerminal.attachCustomKeyEventHandler((event) => {
|
||||
const activeAuthUrl = isCodexLoginCommand(initialCommandRef.current)
|
||||
? CODEX_DEVICE_AUTH_URL
|
||||
: authUrlRef.current;
|
||||
|
||||
if (
|
||||
event.type === 'keydown' &&
|
||||
minimal &&
|
||||
isPlainShellRef.current &&
|
||||
activeAuthUrl &&
|
||||
!event.ctrlKey &&
|
||||
!event.metaKey &&
|
||||
!event.altKey &&
|
||||
event.key?.toLowerCase() === 'c'
|
||||
) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
void copyAuthUrlToClipboard(activeAuthUrl);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (
|
||||
event.type === 'keydown' &&
|
||||
(event.ctrlKey || event.metaKey) &&
|
||||
@@ -226,10 +240,10 @@ export function useShellTerminal({
|
||||
}, TERMINAL_RESIZE_DELAY_MS);
|
||||
});
|
||||
|
||||
resizeObserver.observe(terminalContainer);
|
||||
resizeObserver.observe(terminalContainerRef.current);
|
||||
|
||||
return () => {
|
||||
terminalContainer.removeEventListener('copy', handleTerminalCopy);
|
||||
terminalContainerRef.current?.removeEventListener('copy', handleTerminalCopy);
|
||||
resizeObserver.disconnect();
|
||||
if (resizeTimeoutRef.current !== null) {
|
||||
window.clearTimeout(resizeTimeoutRef.current);
|
||||
@@ -240,12 +254,16 @@ export function useShellTerminal({
|
||||
disposeTerminal();
|
||||
};
|
||||
}, [
|
||||
authUrlRef,
|
||||
closeSocket,
|
||||
copyAuthUrlToClipboard,
|
||||
disposeTerminal,
|
||||
fitAddonRef,
|
||||
initialCommandRef,
|
||||
isPlainShellRef,
|
||||
isRestarting,
|
||||
hasSelectedProject,
|
||||
minimal,
|
||||
hasSelectedProject,
|
||||
selectedProjectKey,
|
||||
terminalContainerRef,
|
||||
terminalRef,
|
||||
|
||||
@@ -4,6 +4,8 @@ import type { Terminal } from '@xterm/xterm';
|
||||
|
||||
import type { Project, ProjectSession } from '../../../types/app';
|
||||
|
||||
export type AuthCopyStatus = 'idle' | 'copied' | 'failed';
|
||||
|
||||
export type ShellInitMessage = {
|
||||
type: 'init';
|
||||
projectPath: string;
|
||||
@@ -52,6 +54,7 @@ export type ShellSharedRefs = {
|
||||
wsRef: MutableRefObject<WebSocket | null>;
|
||||
terminalRef: MutableRefObject<Terminal | null>;
|
||||
fitAddonRef: MutableRefObject<FitAddon | null>;
|
||||
authUrlRef: MutableRefObject<string>;
|
||||
selectedProjectRef: MutableRefObject<Project | null | undefined>;
|
||||
selectedSessionRef: MutableRefObject<ProjectSession | null | undefined>;
|
||||
initialCommandRef: MutableRefObject<string | null | undefined>;
|
||||
@@ -66,6 +69,10 @@ export type UseShellRuntimeResult = {
|
||||
isConnected: boolean;
|
||||
isInitialized: boolean;
|
||||
isConnecting: boolean;
|
||||
authUrl: string;
|
||||
authUrlVersion: number;
|
||||
connectToShell: (options?: { forceRestart?: boolean }) => void;
|
||||
disconnectFromShell: (options?: { suppressAutoConnect?: boolean }) => void;
|
||||
openAuthUrlInBrowser: (url?: string) => boolean;
|
||||
copyAuthUrlToClipboard: (url?: string) => Promise<boolean>;
|
||||
};
|
||||
|
||||
@@ -1,4 +1,17 @@
|
||||
import type { ProjectSession } from '../../../types/app';
|
||||
import { CODEX_DEVICE_AUTH_URL } from '../constants/constants';
|
||||
|
||||
export function isCodexLoginCommand(command: string | null | undefined): boolean {
|
||||
return typeof command === 'string' && /\bcodex\s+login\b/i.test(command);
|
||||
}
|
||||
|
||||
export function resolveAuthUrlForDisplay(command: string | null | undefined, authUrl: string): string {
|
||||
if (isCodexLoginCommand(command)) {
|
||||
return CODEX_DEVICE_AUTH_URL;
|
||||
}
|
||||
|
||||
return authUrl;
|
||||
}
|
||||
|
||||
export function getSessionDisplayName(session: ProjectSession | null | undefined): string | null {
|
||||
if (!session) {
|
||||
@@ -8,4 +21,4 @@ export function getSessionDisplayName(session: ProjectSession | null | undefined
|
||||
return session.__provider === 'cursor'
|
||||
? session.name || 'Untitled Session'
|
||||
: session.summary || 'New Session';
|
||||
}
|
||||
}
|
||||
@@ -1,637 +0,0 @@
|
||||
import type { IDisposable, Terminal } from '@xterm/xterm';
|
||||
|
||||
type TerminalCoords = {
|
||||
col: number;
|
||||
row: number;
|
||||
};
|
||||
|
||||
type TouchCoords = {
|
||||
clientX: number;
|
||||
clientY: number;
|
||||
};
|
||||
|
||||
type CellDimensions = {
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
type DragHandle = 'start' | 'end';
|
||||
|
||||
type TerminalWithRenderService = Terminal & {
|
||||
_core?: {
|
||||
_renderService?: {
|
||||
dimensions?: {
|
||||
css?: {
|
||||
cell?: {
|
||||
width?: number;
|
||||
height?: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
export type MobileTerminalSelectionManager = {
|
||||
dispose: () => void;
|
||||
updateHandles: () => void;
|
||||
};
|
||||
|
||||
const LONG_PRESS_MS = 600;
|
||||
const MOVE_THRESHOLD_PX = 8;
|
||||
const HANDLE_SIZE_PX = 22;
|
||||
const FINGER_OFFSET_PX = 40;
|
||||
|
||||
function isTouchSelectionEnvironment(): boolean {
|
||||
if (typeof window === 'undefined' || typeof navigator === 'undefined') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
navigator.maxTouchPoints > 0 ||
|
||||
'ontouchstart' in window ||
|
||||
window.matchMedia?.('(pointer: coarse)').matches === true
|
||||
);
|
||||
}
|
||||
|
||||
function clamp(value: number, min: number, max: number): number {
|
||||
return Math.max(min, Math.min(max, value));
|
||||
}
|
||||
|
||||
function getDistance(start: TouchCoords, end: TouchCoords): number {
|
||||
return Math.hypot(end.clientX - start.clientX, end.clientY - start.clientY);
|
||||
}
|
||||
|
||||
class ShellMobileSelectionCore implements MobileTerminalSelectionManager {
|
||||
private readonly terminal: Terminal;
|
||||
private readonly terminalContent: HTMLElement;
|
||||
private readonly overlay: HTMLDivElement;
|
||||
private readonly startHandle: HTMLDivElement;
|
||||
private readonly endHandle: HTMLDivElement;
|
||||
private readonly disposables: IDisposable[] = [];
|
||||
private readonly originalPosition: string;
|
||||
|
||||
private didSetPosition = false;
|
||||
private isDestroyed = false;
|
||||
private isSelecting = false;
|
||||
private isHandleDragging = false;
|
||||
private dragHandle: DragHandle | null = null;
|
||||
private selectionStart: TerminalCoords | null = null;
|
||||
private selectionEnd: TerminalCoords | null = null;
|
||||
private touchStart: TouchCoords | null = null;
|
||||
private pendingClearTouch: { point: TouchCoords; moved: boolean } | null = null;
|
||||
private tapHoldTimeout: number | null = null;
|
||||
private cellDimensions: CellDimensions = { width: 0, height: 0 };
|
||||
|
||||
constructor(terminal: Terminal, terminalContent: HTMLElement) {
|
||||
this.terminal = terminal;
|
||||
this.terminalContent = terminalContent;
|
||||
this.originalPosition = terminalContent.style.position;
|
||||
|
||||
if (window.getComputedStyle(terminalContent).position === 'static') {
|
||||
terminalContent.style.position = 'relative';
|
||||
this.didSetPosition = true;
|
||||
}
|
||||
|
||||
this.overlay = this.createSelectionOverlay();
|
||||
this.startHandle = this.createHandle('start');
|
||||
this.endHandle = this.createHandle('end');
|
||||
this.overlay.append(this.startHandle, this.endHandle);
|
||||
this.terminalContent.appendChild(this.overlay);
|
||||
|
||||
this.attachEventListeners();
|
||||
this.updateCellDimensions();
|
||||
}
|
||||
|
||||
private createSelectionOverlay(): HTMLDivElement {
|
||||
const overlay = document.createElement('div');
|
||||
overlay.className = 'shell-mobile-selection-overlay';
|
||||
overlay.style.position = 'absolute';
|
||||
overlay.style.inset = '0';
|
||||
overlay.style.overflow = 'hidden';
|
||||
overlay.style.pointerEvents = 'none';
|
||||
overlay.style.zIndex = '30';
|
||||
return overlay;
|
||||
}
|
||||
|
||||
private createHandle(type: DragHandle): HTMLDivElement {
|
||||
const handle = document.createElement('div');
|
||||
handle.className = `shell-mobile-selection-handle shell-mobile-selection-handle-${type}`;
|
||||
handle.dataset.handleType = type;
|
||||
handle.style.position = 'absolute';
|
||||
handle.style.width = `${HANDLE_SIZE_PX}px`;
|
||||
handle.style.height = `${HANDLE_SIZE_PX}px`;
|
||||
handle.style.borderRadius = '50%';
|
||||
handle.style.background = '#3b82f6';
|
||||
handle.style.border = '2px solid #fff';
|
||||
handle.style.boxShadow = '0 2px 8px rgba(0,0,0,0.3)';
|
||||
handle.style.display = 'none';
|
||||
handle.style.pointerEvents = 'auto';
|
||||
handle.style.touchAction = 'none';
|
||||
handle.style.zIndex = '31';
|
||||
return handle;
|
||||
}
|
||||
|
||||
private attachEventListeners(): void {
|
||||
if (!this.terminal.element) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.terminal.element.addEventListener('touchstart', this.onTerminalTouchStart, {
|
||||
passive: false,
|
||||
});
|
||||
this.terminal.element.addEventListener('touchmove', this.onTerminalTouchMove, {
|
||||
passive: false,
|
||||
});
|
||||
this.terminal.element.addEventListener('touchend', this.onTerminalTouchEnd, {
|
||||
passive: false,
|
||||
});
|
||||
this.terminal.element.addEventListener('touchcancel', this.onTerminalTouchCancel, {
|
||||
passive: false,
|
||||
});
|
||||
|
||||
this.startHandle.addEventListener('touchstart', this.onHandleTouchStart, { passive: false });
|
||||
this.startHandle.addEventListener('touchmove', this.onHandleTouchMove, { passive: false });
|
||||
this.startHandle.addEventListener('touchend', this.onHandleTouchEnd, { passive: false });
|
||||
this.startHandle.addEventListener('touchcancel', this.onHandleTouchEnd, { passive: false });
|
||||
|
||||
this.endHandle.addEventListener('touchstart', this.onHandleTouchStart, { passive: false });
|
||||
this.endHandle.addEventListener('touchmove', this.onHandleTouchMove, { passive: false });
|
||||
this.endHandle.addEventListener('touchend', this.onHandleTouchEnd, { passive: false });
|
||||
this.endHandle.addEventListener('touchcancel', this.onHandleTouchEnd, { passive: false });
|
||||
|
||||
document.addEventListener('touchstart', this.onDocumentTouchStart, { passive: true });
|
||||
|
||||
this.disposables.push(
|
||||
this.terminal.onSelectionChange(this.onSelectionChange),
|
||||
this.terminal.onResize(this.onTerminalResize),
|
||||
this.terminal.onScroll(this.onTerminalScroll),
|
||||
);
|
||||
}
|
||||
|
||||
private onTerminalTouchStart = (event: TouchEvent): void => {
|
||||
if (event.touches.length !== 1) {
|
||||
this.clearTapHoldTimeout();
|
||||
return;
|
||||
}
|
||||
|
||||
const touch = this.toTouchCoords(event.touches[0]);
|
||||
this.touchStart = touch;
|
||||
|
||||
if (this.isSelecting) {
|
||||
this.pendingClearTouch = { point: touch, moved: false };
|
||||
return;
|
||||
}
|
||||
|
||||
this.clearTapHoldTimeout();
|
||||
this.tapHoldTimeout = window.setTimeout(() => {
|
||||
this.tapHoldTimeout = null;
|
||||
this.startSelection(touch);
|
||||
}, LONG_PRESS_MS);
|
||||
};
|
||||
|
||||
private onTerminalTouchMove = (event: TouchEvent): void => {
|
||||
if (event.touches.length !== 1) {
|
||||
this.clearTapHoldTimeout();
|
||||
return;
|
||||
}
|
||||
|
||||
const touch = this.toTouchCoords(event.touches[0]);
|
||||
const touchStart = this.touchStart;
|
||||
|
||||
if (this.pendingClearTouch) {
|
||||
this.pendingClearTouch.moved =
|
||||
this.pendingClearTouch.moved ||
|
||||
getDistance(this.pendingClearTouch.point, touch) > MOVE_THRESHOLD_PX;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!touchStart) {
|
||||
return;
|
||||
}
|
||||
|
||||
const moved = getDistance(touchStart, touch) > MOVE_THRESHOLD_PX;
|
||||
if (moved) {
|
||||
this.clearTapHoldTimeout();
|
||||
}
|
||||
|
||||
if (this.isSelecting && !this.isHandleDragging) {
|
||||
event.preventDefault();
|
||||
this.extendSelection(touch);
|
||||
}
|
||||
};
|
||||
|
||||
private onTerminalTouchEnd = (): void => {
|
||||
this.clearTapHoldTimeout();
|
||||
this.touchStart = null;
|
||||
|
||||
if (!this.pendingClearTouch) {
|
||||
return;
|
||||
}
|
||||
|
||||
const shouldClear = this.isSelecting && !this.pendingClearTouch.moved && !this.isHandleDragging;
|
||||
this.pendingClearTouch = null;
|
||||
|
||||
if (shouldClear) {
|
||||
this.clearSelection();
|
||||
}
|
||||
};
|
||||
|
||||
private onTerminalTouchCancel = (): void => {
|
||||
this.clearTapHoldTimeout();
|
||||
this.touchStart = null;
|
||||
this.pendingClearTouch = null;
|
||||
};
|
||||
|
||||
private onHandleTouchStart = (event: TouchEvent): void => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
|
||||
if (event.touches.length !== 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
const target = event.currentTarget as HTMLElement;
|
||||
this.dragHandle = target.dataset.handleType === 'start' ? 'start' : 'end';
|
||||
this.isHandleDragging = true;
|
||||
this.pendingClearTouch = null;
|
||||
};
|
||||
|
||||
private onHandleTouchMove = (event: TouchEvent): void => {
|
||||
if (!this.isHandleDragging || !this.dragHandle || event.touches.length !== 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
|
||||
const touch = this.toTouchCoords(event.touches[0]);
|
||||
const adjustedTouch = {
|
||||
clientX: touch.clientX,
|
||||
clientY: touch.clientY - FINGER_OFFSET_PX,
|
||||
};
|
||||
const coords = this.touchToTerminalCoords(adjustedTouch);
|
||||
if (!coords) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.dragHandle === 'start') {
|
||||
this.selectionStart = coords;
|
||||
} else {
|
||||
this.selectionEnd = coords;
|
||||
}
|
||||
|
||||
this.swapHandlesIfNeeded();
|
||||
this.updateSelection();
|
||||
};
|
||||
|
||||
private onHandleTouchEnd = (event: TouchEvent): void => {
|
||||
if (!this.isHandleDragging) {
|
||||
return;
|
||||
}
|
||||
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
this.isHandleDragging = false;
|
||||
this.dragHandle = null;
|
||||
};
|
||||
|
||||
private onSelectionChange = (): void => {
|
||||
if (!this.isSelecting) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.terminal.hasSelection()) {
|
||||
this.resetSelectionState();
|
||||
return;
|
||||
}
|
||||
|
||||
this.updateHandles();
|
||||
};
|
||||
|
||||
private onTerminalResize = (): void => {
|
||||
this.updateCellDimensions();
|
||||
this.updateHandles();
|
||||
};
|
||||
|
||||
private onTerminalScroll = (): void => {
|
||||
this.updateHandles();
|
||||
};
|
||||
|
||||
private onDocumentTouchStart = (event: TouchEvent): void => {
|
||||
if (!this.isSelecting || !event.target) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.terminalContent.contains(event.target as Node)) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.clearSelection();
|
||||
};
|
||||
|
||||
private startSelection(touch: TouchCoords): void {
|
||||
const coords = this.touchToTerminalCoords(touch);
|
||||
if (!coords) {
|
||||
return;
|
||||
}
|
||||
|
||||
const wordBounds = this.getWordBoundsAt(coords);
|
||||
this.selectionStart = wordBounds?.start ?? coords;
|
||||
this.selectionEnd = wordBounds?.end ?? coords;
|
||||
this.isSelecting = true;
|
||||
|
||||
this.updateSelection();
|
||||
this.showHandles();
|
||||
}
|
||||
|
||||
private extendSelection(touch: TouchCoords): void {
|
||||
const coords = this.touchToTerminalCoords(touch);
|
||||
if (!coords) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.selectionEnd = coords;
|
||||
this.updateSelection();
|
||||
}
|
||||
|
||||
private updateSelection(): void {
|
||||
if (!this.selectionStart || !this.selectionEnd) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { start, end } = this.getOrderedSelection();
|
||||
const length = this.calculateSelectionLength(start, end);
|
||||
if (length <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.terminal.select(start.col, start.row, length);
|
||||
this.updateHandles();
|
||||
}
|
||||
|
||||
private calculateSelectionLength(start: TerminalCoords, end: TerminalCoords): number {
|
||||
if (start.row === end.row) {
|
||||
return end.col - start.col + 1;
|
||||
}
|
||||
|
||||
return (end.row - start.row) * this.terminal.cols - start.col + end.col + 1;
|
||||
}
|
||||
|
||||
private getOrderedSelection(): { start: TerminalCoords; end: TerminalCoords } {
|
||||
const start = this.selectionStart;
|
||||
const end = this.selectionEnd;
|
||||
if (!start || !end) {
|
||||
throw new Error('Cannot order empty terminal selection');
|
||||
}
|
||||
|
||||
if (start.row < end.row || (start.row === end.row && start.col <= end.col)) {
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
return { start: end, end: start };
|
||||
}
|
||||
|
||||
private swapHandlesIfNeeded(): void {
|
||||
if (!this.selectionStart || !this.selectionEnd || !this.dragHandle) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { start, end } = this.getOrderedSelection();
|
||||
if (start === this.selectionStart && end === this.selectionEnd) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.selectionStart = start;
|
||||
this.selectionEnd = end;
|
||||
this.dragHandle = this.dragHandle === 'start' ? 'end' : 'start';
|
||||
}
|
||||
|
||||
private showHandles(): void {
|
||||
this.startHandle.style.display = 'block';
|
||||
this.endHandle.style.display = 'block';
|
||||
this.updateHandles();
|
||||
}
|
||||
|
||||
private hideHandles(): void {
|
||||
this.startHandle.style.display = 'none';
|
||||
this.endHandle.style.display = 'none';
|
||||
}
|
||||
|
||||
updateHandles(): void {
|
||||
if (!this.isSelecting || !this.selectionStart || !this.selectionEnd) {
|
||||
this.hideHandles();
|
||||
return;
|
||||
}
|
||||
|
||||
const { start, end } = this.getOrderedSelection();
|
||||
const startPosition = this.terminalCoordsToPixels(start);
|
||||
const endPosition = this.terminalCoordsToPixels(end);
|
||||
|
||||
if (startPosition) {
|
||||
this.startHandle.style.display = 'block';
|
||||
this.startHandle.style.left = `${startPosition.x - HANDLE_SIZE_PX / 2}px`;
|
||||
this.startHandle.style.top = `${startPosition.y + this.cellDimensions.height + 4}px`;
|
||||
} else {
|
||||
this.startHandle.style.display = 'none';
|
||||
}
|
||||
|
||||
if (endPosition) {
|
||||
this.endHandle.style.display = 'block';
|
||||
this.endHandle.style.left = `${endPosition.x + this.cellDimensions.width - HANDLE_SIZE_PX / 2}px`;
|
||||
this.endHandle.style.top = `${endPosition.y + this.cellDimensions.height + 4}px`;
|
||||
} else {
|
||||
this.endHandle.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
private clearSelection(): void {
|
||||
this.terminal.clearSelection();
|
||||
this.resetSelectionState();
|
||||
}
|
||||
|
||||
private resetSelectionState(): void {
|
||||
this.isSelecting = false;
|
||||
this.isHandleDragging = false;
|
||||
this.dragHandle = null;
|
||||
this.selectionStart = null;
|
||||
this.selectionEnd = null;
|
||||
this.pendingClearTouch = null;
|
||||
this.touchStart = null;
|
||||
this.hideHandles();
|
||||
this.clearTapHoldTimeout();
|
||||
}
|
||||
|
||||
private touchToTerminalCoords(touch: TouchCoords): TerminalCoords | null {
|
||||
const screenElement = this.getTerminalScreenElement();
|
||||
if (!screenElement) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const rect = screenElement.getBoundingClientRect();
|
||||
const x = touch.clientX - rect.left;
|
||||
const y = touch.clientY - rect.top;
|
||||
if (x < 0 || y < 0 || x > rect.width || y > rect.height) {
|
||||
return null;
|
||||
}
|
||||
|
||||
this.updateCellDimensions();
|
||||
if (!this.cellDimensions.width || !this.cellDimensions.height) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const col = clamp(Math.floor(x / this.cellDimensions.width), 0, this.terminal.cols - 1);
|
||||
const row = Math.floor(y / this.cellDimensions.height) + this.terminal.buffer.active.viewportY;
|
||||
|
||||
return {
|
||||
col,
|
||||
row: Math.max(0, row),
|
||||
};
|
||||
}
|
||||
|
||||
private terminalCoordsToPixels(coords: TerminalCoords): { x: number; y: number } | null {
|
||||
const screenElement = this.getTerminalScreenElement();
|
||||
if (!screenElement) {
|
||||
return null;
|
||||
}
|
||||
|
||||
this.updateCellDimensions();
|
||||
|
||||
const visibleRow = coords.row - this.terminal.buffer.active.viewportY;
|
||||
if (visibleRow < 0 || visibleRow >= this.terminal.rows) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const screenRect = screenElement.getBoundingClientRect();
|
||||
const containerRect = this.terminalContent.getBoundingClientRect();
|
||||
|
||||
return {
|
||||
x: screenRect.left - containerRect.left + coords.col * this.cellDimensions.width,
|
||||
y: screenRect.top - containerRect.top + visibleRow * this.cellDimensions.height,
|
||||
};
|
||||
}
|
||||
|
||||
private updateCellDimensions(): void {
|
||||
const renderCell = (this.terminal as TerminalWithRenderService)._core?._renderService
|
||||
?.dimensions?.css?.cell;
|
||||
if (renderCell?.width && renderCell.height) {
|
||||
this.cellDimensions = {
|
||||
width: renderCell.width,
|
||||
height: renderCell.height,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
const screenElement = this.getTerminalScreenElement();
|
||||
const rect = screenElement?.getBoundingClientRect();
|
||||
if (!rect || !this.terminal.cols || !this.terminal.rows) {
|
||||
this.cellDimensions = { width: 0, height: 0 };
|
||||
return;
|
||||
}
|
||||
|
||||
this.cellDimensions = {
|
||||
width: rect.width / this.terminal.cols,
|
||||
height: rect.height / this.terminal.rows,
|
||||
};
|
||||
}
|
||||
|
||||
private getWordBoundsAt(coords: TerminalCoords): {
|
||||
start: TerminalCoords;
|
||||
end: TerminalCoords;
|
||||
} | null {
|
||||
const line = this.terminal.buffer.active.getLine(coords.row);
|
||||
if (!line) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineText = line.translateToString(false);
|
||||
if (!lineText || coords.col >= lineText.length || /\s/.test(lineText[coords.col])) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let startCol = coords.col;
|
||||
let endCol = coords.col;
|
||||
|
||||
while (startCol > 0 && !/\s/.test(lineText[startCol - 1])) {
|
||||
startCol--;
|
||||
}
|
||||
|
||||
while (endCol < lineText.length - 1 && !/\s/.test(lineText[endCol + 1])) {
|
||||
endCol++;
|
||||
}
|
||||
|
||||
return {
|
||||
start: { row: coords.row, col: startCol },
|
||||
end: { row: coords.row, col: endCol },
|
||||
};
|
||||
}
|
||||
|
||||
private getTerminalScreenElement(): HTMLElement | null {
|
||||
return (
|
||||
this.terminal.element?.querySelector<HTMLElement>('.xterm-screen') ??
|
||||
this.terminal.element ??
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
private toTouchCoords(touch: Touch): TouchCoords {
|
||||
return {
|
||||
clientX: touch.clientX,
|
||||
clientY: touch.clientY,
|
||||
};
|
||||
}
|
||||
|
||||
private clearTapHoldTimeout(): void {
|
||||
if (this.tapHoldTimeout === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
window.clearTimeout(this.tapHoldTimeout);
|
||||
this.tapHoldTimeout = null;
|
||||
}
|
||||
|
||||
dispose(): void {
|
||||
if (this.isDestroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isDestroyed = true;
|
||||
this.clearTapHoldTimeout();
|
||||
|
||||
this.terminal.element?.removeEventListener('touchstart', this.onTerminalTouchStart);
|
||||
this.terminal.element?.removeEventListener('touchmove', this.onTerminalTouchMove);
|
||||
this.terminal.element?.removeEventListener('touchend', this.onTerminalTouchEnd);
|
||||
this.terminal.element?.removeEventListener('touchcancel', this.onTerminalTouchCancel);
|
||||
|
||||
this.startHandle.removeEventListener('touchstart', this.onHandleTouchStart);
|
||||
this.startHandle.removeEventListener('touchmove', this.onHandleTouchMove);
|
||||
this.startHandle.removeEventListener('touchend', this.onHandleTouchEnd);
|
||||
this.startHandle.removeEventListener('touchcancel', this.onHandleTouchEnd);
|
||||
|
||||
this.endHandle.removeEventListener('touchstart', this.onHandleTouchStart);
|
||||
this.endHandle.removeEventListener('touchmove', this.onHandleTouchMove);
|
||||
this.endHandle.removeEventListener('touchend', this.onHandleTouchEnd);
|
||||
this.endHandle.removeEventListener('touchcancel', this.onHandleTouchEnd);
|
||||
|
||||
document.removeEventListener('touchstart', this.onDocumentTouchStart);
|
||||
this.disposables.forEach((disposable) => disposable.dispose());
|
||||
this.disposables.length = 0;
|
||||
|
||||
this.overlay.remove();
|
||||
|
||||
if (this.didSetPosition) {
|
||||
this.terminalContent.style.position = this.originalPosition;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function installMobileTerminalSelection(
|
||||
terminal: Terminal,
|
||||
terminalContent: HTMLElement,
|
||||
): MobileTerminalSelectionManager | null {
|
||||
if (!isTouchSelectionEnvironment() || !terminal.element) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new ShellMobileSelectionCore(terminal, terminalContent);
|
||||
}
|
||||
@@ -59,8 +59,12 @@ export default function Shell({
|
||||
isConnected,
|
||||
isInitialized,
|
||||
isConnecting,
|
||||
authUrl,
|
||||
authUrlVersion,
|
||||
connectToShell,
|
||||
disconnectFromShell,
|
||||
openAuthUrlInBrowser,
|
||||
copyAuthUrlToClipboard,
|
||||
} = useShellRuntime({
|
||||
selectedProject,
|
||||
selectedSession,
|
||||
@@ -239,7 +243,15 @@ export default function Shell({
|
||||
if (minimal) {
|
||||
return (
|
||||
<>
|
||||
<ShellMinimalView terminalContainerRef={terminalContainerRef} />
|
||||
<ShellMinimalView
|
||||
terminalContainerRef={terminalContainerRef}
|
||||
authUrl={authUrl}
|
||||
authUrlVersion={authUrlVersion}
|
||||
initialCommand={initialCommand}
|
||||
isConnected={isConnected}
|
||||
openAuthUrlInBrowser={openAuthUrlInBrowser}
|
||||
copyAuthUrlToClipboard={copyAuthUrlToClipboard}
|
||||
/>
|
||||
<TerminalShortcutsPanel
|
||||
wsRef={wsRef}
|
||||
terminalRef={terminalRef}
|
||||
|
||||
@@ -1,12 +1,45 @@
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import type { RefObject } from 'react';
|
||||
import type { AuthCopyStatus } from '../../types/types';
|
||||
import { resolveAuthUrlForDisplay } from '../../utils/auth';
|
||||
|
||||
type ShellMinimalViewProps = {
|
||||
terminalContainerRef: RefObject<HTMLDivElement>;
|
||||
authUrl: string;
|
||||
authUrlVersion: number;
|
||||
initialCommand: string | null | undefined;
|
||||
isConnected: boolean;
|
||||
openAuthUrlInBrowser: (url: string) => boolean;
|
||||
copyAuthUrlToClipboard: (url: string) => Promise<boolean>;
|
||||
};
|
||||
|
||||
export default function ShellMinimalView({
|
||||
terminalContainerRef,
|
||||
authUrl,
|
||||
authUrlVersion,
|
||||
initialCommand,
|
||||
isConnected,
|
||||
openAuthUrlInBrowser,
|
||||
copyAuthUrlToClipboard,
|
||||
}: ShellMinimalViewProps) {
|
||||
const [authUrlCopyStatus, setAuthUrlCopyStatus] = useState<AuthCopyStatus>('idle');
|
||||
const [isAuthPanelHidden, setIsAuthPanelHidden] = useState(false);
|
||||
|
||||
const displayAuthUrl = useMemo(
|
||||
() => resolveAuthUrlForDisplay(initialCommand, authUrl),
|
||||
[authUrl, initialCommand],
|
||||
);
|
||||
|
||||
// Keep auth panel UI state local to minimal mode and reset it when connection/url changes.
|
||||
useEffect(() => {
|
||||
setAuthUrlCopyStatus('idle');
|
||||
setIsAuthPanelHidden(false);
|
||||
}, [authUrlVersion, displayAuthUrl, isConnected]);
|
||||
|
||||
const hasAuthUrl = Boolean(displayAuthUrl);
|
||||
const showMobileAuthPanel = hasAuthUrl && !isAuthPanelHidden;
|
||||
const showMobileAuthPanelToggle = hasAuthUrl && isAuthPanelHidden;
|
||||
|
||||
return (
|
||||
<div className="relative h-full w-full bg-gray-900">
|
||||
<div
|
||||
@@ -14,6 +47,67 @@ export default function ShellMinimalView({
|
||||
className="h-full w-full focus:outline-none"
|
||||
style={{ outline: 'none' }}
|
||||
/>
|
||||
|
||||
{showMobileAuthPanel && (
|
||||
<div className="absolute inset-x-0 bottom-14 z-20 border-t border-gray-700/80 bg-gray-900/95 p-3 backdrop-blur-sm md:hidden">
|
||||
<div className="flex flex-col gap-2">
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<p className="text-xs text-gray-300">Open or copy the login URL:</p>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setIsAuthPanelHidden(true)}
|
||||
className="rounded bg-gray-700 px-2 py-1 text-[10px] font-medium uppercase tracking-wide text-gray-100 hover:bg-gray-600"
|
||||
>
|
||||
Hide
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<input
|
||||
type="text"
|
||||
value={displayAuthUrl}
|
||||
readOnly
|
||||
onClick={(event) => event.currentTarget.select()}
|
||||
className="w-full rounded border border-gray-600 bg-gray-800 px-2 py-1 text-xs text-gray-100 focus:outline-none focus:ring-1 focus:ring-blue-500"
|
||||
aria-label="Authentication URL"
|
||||
/>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
openAuthUrlInBrowser(displayAuthUrl);
|
||||
}}
|
||||
className="flex-1 rounded bg-blue-600 px-3 py-2 text-xs font-medium text-white hover:bg-blue-700"
|
||||
>
|
||||
Open URL
|
||||
</button>
|
||||
|
||||
<button
|
||||
type="button"
|
||||
onClick={async () => {
|
||||
const copied = await copyAuthUrlToClipboard(displayAuthUrl);
|
||||
setAuthUrlCopyStatus(copied ? 'copied' : 'failed');
|
||||
}}
|
||||
className="flex-1 rounded bg-gray-700 px-3 py-2 text-xs font-medium text-white hover:bg-gray-600"
|
||||
>
|
||||
{authUrlCopyStatus === 'copied' ? 'Copied' : 'Copy URL'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{showMobileAuthPanelToggle && (
|
||||
<div className="absolute bottom-14 right-3 z-20 md:hidden">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setIsAuthPanelHidden(false)}
|
||||
className="rounded bg-gray-800/95 px-3 py-2 text-xs font-medium text-gray-100 shadow-lg backdrop-blur-sm hover:bg-gray-700"
|
||||
>
|
||||
Show login URL
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ type UiPreferences = {
|
||||
autoScrollToBottom: boolean;
|
||||
sendByCtrlEnter: boolean;
|
||||
sidebarVisible: boolean;
|
||||
voiceEnabled: boolean;
|
||||
};
|
||||
|
||||
type UiPreferenceKey = keyof UiPreferences;
|
||||
@@ -39,6 +40,7 @@ const DEFAULTS: UiPreferences = {
|
||||
autoScrollToBottom: true,
|
||||
sendByCtrlEnter: false,
|
||||
sidebarVisible: true,
|
||||
voiceEnabled: false,
|
||||
};
|
||||
|
||||
const PREFERENCE_KEYS = Object.keys(DEFAULTS) as UiPreferenceKey[];
|
||||
|
||||
68
src/hooks/useVoiceConfig.ts
Normal file
68
src/hooks/useVoiceConfig.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { useState } from 'react';
|
||||
|
||||
export type VoiceConfig = {
|
||||
baseUrl: string;
|
||||
apiKey: string;
|
||||
sttModel: string;
|
||||
ttsModel: string;
|
||||
ttsVoice: string;
|
||||
ttsFormat: string;
|
||||
};
|
||||
|
||||
const STORAGE_KEY = 'voiceConfig';
|
||||
export const VOICE_CONFIG_SYNC_EVENT = 'voice-config:sync';
|
||||
const DEFAULTS: VoiceConfig = { baseUrl: '', apiKey: '', sttModel: '', ttsModel: '', ttsVoice: '', ttsFormat: '' };
|
||||
|
||||
export function readVoiceConfig(): VoiceConfig {
|
||||
try {
|
||||
const raw = localStorage.getItem(STORAGE_KEY);
|
||||
if (!raw) return { ...DEFAULTS };
|
||||
const parsed = JSON.parse(raw);
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return { ...DEFAULTS };
|
||||
const config = { ...DEFAULTS };
|
||||
for (const key of Object.keys(DEFAULTS) as (keyof VoiceConfig)[]) {
|
||||
if (typeof parsed[key] === 'string') config[key] = parsed[key];
|
||||
}
|
||||
return config;
|
||||
} catch {
|
||||
return { ...DEFAULTS };
|
||||
}
|
||||
}
|
||||
|
||||
// Headers the voice proxy reads to target a per-user OpenAI-compatible backend.
|
||||
// Empty fields are omitted so the server's env defaults apply.
|
||||
export function voiceConfigHeaders(): Record<string, string> {
|
||||
if (typeof window === 'undefined') return {};
|
||||
const c = readVoiceConfig();
|
||||
const h: Record<string, string> = {};
|
||||
if (c.apiKey) h['x-voice-api-key'] = c.apiKey;
|
||||
if (c.sttModel) h['x-voice-stt-model'] = c.sttModel;
|
||||
if (c.ttsModel) h['x-voice-tts-model'] = c.ttsModel;
|
||||
if (c.ttsVoice) h['x-voice-tts-voice'] = c.ttsVoice;
|
||||
if (c.ttsFormat.trim()) h['x-voice-tts-format'] = c.ttsFormat.trim();
|
||||
return h;
|
||||
}
|
||||
|
||||
export function useVoiceConfig() {
|
||||
const [config, setConfig] = useState<VoiceConfig>(() =>
|
||||
typeof window === 'undefined' ? { ...DEFAULTS } : readVoiceConfig(),
|
||||
);
|
||||
|
||||
const update = (patch: Partial<VoiceConfig>) => {
|
||||
setConfig((prev) => {
|
||||
const next = { ...prev, ...patch };
|
||||
try {
|
||||
const stored: Partial<VoiceConfig> = { ...next };
|
||||
if (next.ttsFormat.trim()) stored.ttsFormat = next.ttsFormat.trim();
|
||||
else delete stored.ttsFormat;
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify(stored));
|
||||
window.dispatchEvent(new Event(VOICE_CONFIG_SYNC_EVENT));
|
||||
} catch {
|
||||
/* ignore persistence errors */
|
||||
}
|
||||
return next;
|
||||
});
|
||||
};
|
||||
|
||||
return { config, update };
|
||||
}
|
||||
@@ -122,6 +122,14 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"voice": {
|
||||
"input": "Voice input",
|
||||
"stopRecording": "Stop recording",
|
||||
"transcribing": "Transcribing…",
|
||||
"speak": "Read aloud",
|
||||
"stopSpeaking": "Stop",
|
||||
"loading": "Loading…"
|
||||
},
|
||||
"input": {
|
||||
"placeholder": "Type / for commands, @ for files, or ask {{provider}} anything...",
|
||||
"placeholderDefault": "Type your message...",
|
||||
|
||||
@@ -50,6 +50,21 @@
|
||||
"resetToDefaults": "Reset to Defaults",
|
||||
"cancelChanges": "Cancel Changes"
|
||||
},
|
||||
"voiceSettings": {
|
||||
"title": "Voice",
|
||||
"description": "Speech-to-text input and read-aloud, via an OpenAI-compatible audio backend.",
|
||||
"enable": "Enable voice",
|
||||
"enableDescription": "Show the mic button and the read-aloud button on messages.",
|
||||
"backendTitle": "Backend",
|
||||
"backendDescription": "Point at OpenAI, Groq, or a local server (LocalAI, Speaches, Kokoro-FastAPI). Leave blank to use the server default.",
|
||||
"baseUrl": "Base URL",
|
||||
"apiKey": "API key",
|
||||
"sttModel": "Speech-to-text model",
|
||||
"ttsModel": "Text-to-speech model",
|
||||
"voice": "Voice",
|
||||
"format": "Audio format",
|
||||
"note": "A custom base URL is called directly by your browser and must allow browser CORS requests. Leave it blank to use the server-configured backend."
|
||||
},
|
||||
"quickSettings": {
|
||||
"title": "Quick Settings",
|
||||
"sections": {
|
||||
@@ -64,6 +79,7 @@
|
||||
"showThinking": "Show thinking",
|
||||
"autoScrollToBottom": "Auto-scroll to bottom",
|
||||
"sendByCtrlEnter": "Send by Ctrl+Enter",
|
||||
"voiceEnabled": "Voice (mic + read aloud)",
|
||||
"sendByCtrlEnterDescription": "When enabled, pressing Ctrl+Enter will send the message instead of just Enter. This is useful for IME users to avoid accidental sends.",
|
||||
"dragHandle": {
|
||||
"dragging": "Dragging handle",
|
||||
@@ -94,6 +110,7 @@
|
||||
"appearance": "Appearance",
|
||||
"git": "Git",
|
||||
"apiTokens": "API & Tokens",
|
||||
"voice": "Voice",
|
||||
"tasks": "Tasks",
|
||||
"browser": "Browser",
|
||||
"notifications": "Notifications",
|
||||
|
||||
60
src/lib/voiceApi.ts
Normal file
60
src/lib/voiceApi.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { authenticatedFetch } from '../utils/api';
|
||||
import { readVoiceConfig, voiceConfigHeaders } from '../hooks/useVoiceConfig';
|
||||
|
||||
function directUrl(baseUrl: string, path: string): string {
|
||||
return `${baseUrl.replace(/\/$/, '')}${path}`;
|
||||
}
|
||||
|
||||
export function voiceConfigSignature(): string {
|
||||
return JSON.stringify(readVoiceConfig());
|
||||
}
|
||||
|
||||
export function transcribeVoice(blob: Blob, filename: string): Promise<Response> {
|
||||
const config = readVoiceConfig();
|
||||
const body = new FormData();
|
||||
|
||||
if (config.baseUrl.trim()) {
|
||||
body.append('file', blob, filename);
|
||||
body.append('model', config.sttModel || 'whisper-1');
|
||||
return fetch(directUrl(config.baseUrl.trim(), '/audio/transcriptions'), {
|
||||
method: 'POST',
|
||||
headers: config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {},
|
||||
body,
|
||||
});
|
||||
}
|
||||
|
||||
body.append('audio', blob, filename);
|
||||
return authenticatedFetch('/api/voice/transcribe', {
|
||||
method: 'POST',
|
||||
headers: voiceConfigHeaders(),
|
||||
body,
|
||||
});
|
||||
}
|
||||
|
||||
export function synthesizeVoice(text: string, signal: AbortSignal): Promise<Response> {
|
||||
const config = readVoiceConfig();
|
||||
|
||||
if (config.baseUrl.trim()) {
|
||||
return fetch(directUrl(config.baseUrl.trim(), '/audio/speech'), {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.ttsModel || 'tts-1',
|
||||
voice: config.ttsVoice || 'alloy',
|
||||
input: text,
|
||||
...(config.ttsFormat.trim() ? { response_format: config.ttsFormat.trim() } : {}),
|
||||
}),
|
||||
signal,
|
||||
});
|
||||
}
|
||||
|
||||
return authenticatedFetch('/api/voice/tts', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ text }),
|
||||
headers: voiceConfigHeaders(),
|
||||
signal,
|
||||
});
|
||||
}
|
||||
196
src/lib/voicePlayer.ts
Normal file
196
src/lib/voicePlayer.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import { synthesizeVoice, voiceConfigSignature } from './voiceApi';
|
||||
|
||||
// A single app-level audio player for read-aloud. It owns one <audio> element, lives
|
||||
// outside the React tree, and caches generated audio by content. Because playback is not
|
||||
// tied to a component, switching chats or re-rendering a message can't revoke the blob URL
|
||||
// out from under it (the cause of mid-play cutoffs). v1 plays one message at a time
|
||||
// (a new play replaces the current one); the design leaves room for a queue later.
|
||||
|
||||
export type VoicePlayState = 'idle' | 'loading' | 'playing';
|
||||
|
||||
export type VoiceSnapshot = { state: VoicePlayState; error: string | null };
|
||||
|
||||
const IDLE: VoiceSnapshot = { state: 'idle', error: null };
|
||||
const CACHE_MAX = 24;
|
||||
const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min
|
||||
|
||||
// Stable id / cache key from the text and voice settings that affect its audio (djb2).
|
||||
export function voiceId(content: string, signature = voiceConfigSignature()): string {
|
||||
const input = JSON.stringify([content, signature]);
|
||||
let h = 5381;
|
||||
for (let i = 0; i < input.length; i++) h = (((h << 5) + h) + input.charCodeAt(i)) | 0;
|
||||
return (h >>> 0).toString(36);
|
||||
}
|
||||
|
||||
class VoicePlayer {
|
||||
private audio: HTMLAudioElement | null = null;
|
||||
private unlocked = false;
|
||||
private cache = new Map<string, string>(); // id -> blob URL (insertion order = LRU)
|
||||
private currentId: string | null = null;
|
||||
private state: VoicePlayState = 'idle';
|
||||
private errorId: string | null = null;
|
||||
private errorMsg: string | null = null;
|
||||
private token = 0; // bumps to ignore stale in-flight results
|
||||
private activeController: AbortController | null = null; // aborts the in-flight TTS fetch
|
||||
private errorTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
private listeners = new Set<() => void>();
|
||||
|
||||
subscribe(listener: () => void): () => void {
|
||||
this.listeners.add(listener);
|
||||
return () => {
|
||||
this.listeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
private emit() {
|
||||
this.listeners.forEach((l) => l());
|
||||
}
|
||||
|
||||
getSnapshot(id: string): VoiceSnapshot {
|
||||
const state = this.currentId === id ? this.state : 'idle';
|
||||
const error = this.errorId === id ? this.errorMsg : null;
|
||||
if (state === 'idle' && error === null) return IDLE;
|
||||
return { state, error };
|
||||
}
|
||||
|
||||
private ensureAudio(): HTMLAudioElement {
|
||||
if (!this.audio) {
|
||||
const audio = new Audio();
|
||||
audio.addEventListener('ended', () => this.onEnded());
|
||||
audio.addEventListener('error', () => {
|
||||
// Only meaningful while we believe we're playing.
|
||||
if (this.state === 'playing') this.onEnded();
|
||||
});
|
||||
this.audio = audio;
|
||||
}
|
||||
return this.audio;
|
||||
}
|
||||
|
||||
// Call synchronously from the click handler so iOS grants the (reused) element playback.
|
||||
unlock() {
|
||||
if (this.unlocked) return;
|
||||
const audio = this.ensureAudio();
|
||||
try {
|
||||
const p = audio.play();
|
||||
if (p && typeof p.catch === 'function') p.catch(() => {});
|
||||
audio.pause();
|
||||
} catch {
|
||||
/* priming attempt; ignore */
|
||||
}
|
||||
this.unlocked = true;
|
||||
}
|
||||
|
||||
toggle(content: string) {
|
||||
const id = voiceId(content);
|
||||
if (this.currentId === id && (this.state === 'playing' || this.state === 'loading')) {
|
||||
this.stop();
|
||||
return;
|
||||
}
|
||||
void this.play(id, content);
|
||||
}
|
||||
|
||||
stop() {
|
||||
this.token++; // ignore any stale in-flight result
|
||||
this.abortActive(); // and actually cancel the network request
|
||||
if (this.audio) this.audio.pause();
|
||||
this.state = 'idle';
|
||||
this.currentId = null;
|
||||
this.emit();
|
||||
}
|
||||
|
||||
private abortActive() {
|
||||
if (this.activeController) {
|
||||
this.activeController.abort();
|
||||
this.activeController = null;
|
||||
}
|
||||
}
|
||||
|
||||
private onEnded() {
|
||||
this.state = 'idle';
|
||||
this.currentId = null;
|
||||
this.emit();
|
||||
// (queue auto-advance would hook in here)
|
||||
}
|
||||
|
||||
private setError(id: string, msg: string) {
|
||||
this.state = 'idle';
|
||||
this.currentId = id;
|
||||
this.errorId = id;
|
||||
this.errorMsg = msg;
|
||||
this.emit();
|
||||
if (this.errorTimer) clearTimeout(this.errorTimer);
|
||||
this.errorTimer = setTimeout(() => {
|
||||
if (this.errorId === id) {
|
||||
this.errorId = null;
|
||||
this.errorMsg = null;
|
||||
if (this.currentId === id) this.currentId = null;
|
||||
this.emit();
|
||||
}
|
||||
}, 6000);
|
||||
}
|
||||
|
||||
private async play(id: string, content: string) {
|
||||
const audio = this.ensureAudio();
|
||||
audio.pause();
|
||||
this.currentId = id;
|
||||
this.errorId = null;
|
||||
this.errorMsg = null;
|
||||
this.state = 'loading';
|
||||
this.emit();
|
||||
|
||||
const myToken = ++this.token;
|
||||
this.abortActive(); // cancel any request this play supersedes
|
||||
|
||||
try {
|
||||
let url = this.cache.get(id);
|
||||
if (!url) {
|
||||
const controller = new AbortController();
|
||||
this.activeController = controller;
|
||||
const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS);
|
||||
const res = await synthesizeVoice(content, controller.signal).finally(() => {
|
||||
clearTimeout(timer);
|
||||
if (this.activeController === controller) this.activeController = null;
|
||||
});
|
||||
if (myToken !== this.token) return; // superseded by another play/stop
|
||||
if (!res.ok) {
|
||||
let msg = `Read-aloud failed (${res.status})`;
|
||||
try {
|
||||
const j = await res.json();
|
||||
if (j?.error) msg = String(j.error);
|
||||
} catch {
|
||||
/* non-JSON error body */
|
||||
}
|
||||
throw new Error(msg);
|
||||
}
|
||||
const blob = await res.blob();
|
||||
if (myToken !== this.token) return;
|
||||
url = URL.createObjectURL(blob);
|
||||
this.cacheSet(id, url);
|
||||
}
|
||||
if (myToken !== this.token) return;
|
||||
audio.src = url;
|
||||
audio.load();
|
||||
await audio.play();
|
||||
if (myToken !== this.token) return;
|
||||
this.state = 'playing';
|
||||
this.emit();
|
||||
} catch (e) {
|
||||
if (myToken !== this.token) return;
|
||||
const aborted = e instanceof Error && e.name === 'AbortError';
|
||||
this.setError(id, aborted ? 'Read-aloud timed out.' : e instanceof Error ? e.message : 'Read-aloud failed');
|
||||
}
|
||||
}
|
||||
|
||||
private cacheSet(id: string, url: string) {
|
||||
this.cache.set(id, url);
|
||||
while (this.cache.size > CACHE_MAX) {
|
||||
const oldest = this.cache.keys().next().value as string | undefined;
|
||||
if (oldest === undefined) break;
|
||||
const oldUrl = this.cache.get(oldest);
|
||||
this.cache.delete(oldest);
|
||||
if (oldUrl && oldUrl !== this.audio?.src) URL.revokeObjectURL(oldUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const voicePlayer = new VoicePlayer();
|
||||
Reference in New Issue
Block a user