mirror of
https://github.com/siteboon/claudecodeui.git
synced 2026-06-26 21:55:50 +08:00
fix(voice): play read-aloud through an app-level player to stop cutoffs
Read-aloud now runs in a single module-level player outside the React tree instead of per-message component state. Switching chats or re-rendering a message no longer revokes the blob URL mid-play (the 'Invalid URI' cutoff). Adds content-keyed caching so re-listening doesn't regenerate, and reuses one audio element (also unlocks iOS once).
This commit is contained in:
@@ -1,119 +1,33 @@
|
|||||||
import { useCallback, useEffect, useRef, useState } from 'react';
|
import { useCallback, useEffect, useState } from 'react';
|
||||||
import { authenticatedFetch } from '../../../utils/api';
|
import { voicePlayer, voiceId, type VoiceSnapshot } from '../../../lib/voicePlayer';
|
||||||
import { voiceConfigHeaders } from '../../../hooks/useVoiceConfig';
|
|
||||||
|
|
||||||
// Only one message speaks at a time across the whole app.
|
export type TtsState = VoiceSnapshot['state'];
|
||||||
let stopActive: (() => void) | null = null;
|
|
||||||
|
|
||||||
export type TtsState = 'idle' | 'loading' | 'playing';
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tap-to-speak for a single message. Sends raw markdown to /api/voice/tts and plays
|
* Thin adapter over the app-level voicePlayer. Playback lives outside React (see
|
||||||
* the returned audio. Manual-gesture only (v1) to satisfy iOS autoplay. Exposes the
|
* lib/voicePlayer), so switching chats or re-rendering a message no longer cuts the
|
||||||
* last error (e.g. a backend timeout) so the control can surface it.
|
* audio off. This hook just reflects the player's state for one message and forwards taps.
|
||||||
*/
|
*/
|
||||||
export function useTts(getText: () => string) {
|
export function useTts(getText: () => string) {
|
||||||
const [state, setState] = useState<TtsState>('idle');
|
const content = getText();
|
||||||
const [error, setError] = useState<string | null>(null);
|
const id = voiceId(content);
|
||||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
|
||||||
const urlRef = useRef<string | null>(null);
|
|
||||||
const errorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
|
|
||||||
|
|
||||||
const reset = useCallback(() => {
|
const [snap, setSnap] = useState<VoiceSnapshot>(() => voicePlayer.getSnapshot(id));
|
||||||
if (audioRef.current) {
|
|
||||||
audioRef.current.onended = null;
|
|
||||||
audioRef.current.onerror = null;
|
|
||||||
audioRef.current.pause();
|
|
||||||
audioRef.current.src = '';
|
|
||||||
audioRef.current = null;
|
|
||||||
}
|
|
||||||
if (urlRef.current) {
|
|
||||||
URL.revokeObjectURL(urlRef.current);
|
|
||||||
urlRef.current = null;
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const stop = useCallback(() => {
|
useEffect(() => {
|
||||||
reset();
|
const update = () =>
|
||||||
setState('idle');
|
setSnap((prev) => {
|
||||||
if (stopActive) stopActive = null;
|
const next = voicePlayer.getSnapshot(id);
|
||||||
}, [reset]);
|
return prev.state === next.state && prev.error === next.error ? prev : next;
|
||||||
|
|
||||||
const showError = useCallback((msg: string) => {
|
|
||||||
setError(msg);
|
|
||||||
if (errorTimer.current) clearTimeout(errorTimer.current);
|
|
||||||
errorTimer.current = setTimeout(() => setError(null), 6000);
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
// Cleanup on unmount: drop the global stop handler if it points at us, then reset.
|
|
||||||
useEffect(
|
|
||||||
() => () => {
|
|
||||||
if (stopActive === stop) stopActive = null;
|
|
||||||
if (errorTimer.current) clearTimeout(errorTimer.current);
|
|
||||||
reset();
|
|
||||||
},
|
|
||||||
[reset, stop],
|
|
||||||
);
|
|
||||||
|
|
||||||
const play = useCallback(async () => {
|
|
||||||
if (stopActive) stopActive();
|
|
||||||
const text = getText();
|
|
||||||
if (!text || !text.trim()) return;
|
|
||||||
setError(null);
|
|
||||||
|
|
||||||
// Create + "unlock" the audio element synchronously inside the click gesture,
|
|
||||||
// so iOS Safari lets us play it after the async fetch resolves.
|
|
||||||
const audio = new Audio();
|
|
||||||
audioRef.current = audio;
|
|
||||||
audio.onended = () => stop();
|
|
||||||
audio.onerror = () => stop();
|
|
||||||
try {
|
|
||||||
audio.play().catch(() => {});
|
|
||||||
audio.pause();
|
|
||||||
} catch {
|
|
||||||
/* unlock attempt; ignore */
|
|
||||||
}
|
|
||||||
stopActive = stop;
|
|
||||||
setState('loading');
|
|
||||||
|
|
||||||
try {
|
|
||||||
const res = await authenticatedFetch('/api/voice/tts', {
|
|
||||||
method: 'POST',
|
|
||||||
body: JSON.stringify({ text }),
|
|
||||||
headers: voiceConfigHeaders(),
|
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
update();
|
||||||
let msg = `Read-aloud failed (${res.status})`;
|
return voicePlayer.subscribe(update);
|
||||||
try {
|
}, [id]);
|
||||||
const j = await res.json();
|
|
||||||
if (j?.error) msg = String(j.error);
|
|
||||||
} catch {
|
|
||||||
/* non-JSON error body */
|
|
||||||
}
|
|
||||||
throw new Error(msg);
|
|
||||||
}
|
|
||||||
const blob = await res.blob();
|
|
||||||
const url = URL.createObjectURL(blob);
|
|
||||||
if (audioRef.current !== audio) {
|
|
||||||
URL.revokeObjectURL(url); // stopped while loading; don't leak the blob URL
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
urlRef.current = url;
|
|
||||||
audio.src = url;
|
|
||||||
audio.load();
|
|
||||||
await audio.play();
|
|
||||||
setState('playing');
|
|
||||||
} catch (e) {
|
|
||||||
reset();
|
|
||||||
setState('idle');
|
|
||||||
showError(e instanceof Error ? e.message : 'Read-aloud failed');
|
|
||||||
}
|
|
||||||
}, [getText, reset, stop, showError]);
|
|
||||||
|
|
||||||
const toggle = useCallback(() => {
|
const toggle = useCallback(() => {
|
||||||
if (state === 'playing' || state === 'loading') stop();
|
voicePlayer.unlock(); // synchronous, within the click gesture (iOS)
|
||||||
else play();
|
voicePlayer.toggle(content);
|
||||||
}, [state, play, stop]);
|
}, [content]);
|
||||||
|
|
||||||
return { state, toggle, error };
|
return { state: snap.state, toggle, error: snap.error };
|
||||||
}
|
}
|
||||||
|
|||||||
182
src/lib/voicePlayer.ts
Normal file
182
src/lib/voicePlayer.ts
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
import { authenticatedFetch } from '../utils/api';
|
||||||
|
import { voiceConfigHeaders } from '../hooks/useVoiceConfig';
|
||||||
|
|
||||||
|
// A single app-level audio player for read-aloud. It owns one <audio> element, lives
|
||||||
|
// outside the React tree, and caches generated audio by content. Because playback is not
|
||||||
|
// tied to a component, switching chats or re-rendering a message can't revoke the blob URL
|
||||||
|
// out from under it (the cause of mid-play cutoffs). v1 plays one message at a time
|
||||||
|
// (a new play replaces the current one); the design leaves room for a queue later.
|
||||||
|
|
||||||
|
export type VoicePlayState = 'idle' | 'loading' | 'playing';
|
||||||
|
|
||||||
|
export type VoiceSnapshot = { state: VoicePlayState; error: string | null };
|
||||||
|
|
||||||
|
const IDLE: VoiceSnapshot = { state: 'idle', error: null };
|
||||||
|
const CACHE_MAX = 24;
|
||||||
|
|
||||||
|
// Stable id / cache key from a message's text (djb2).
|
||||||
|
export function voiceId(content: string): string {
|
||||||
|
let h = 5381;
|
||||||
|
for (let i = 0; i < content.length; i++) h = (((h << 5) + h) + content.charCodeAt(i)) | 0;
|
||||||
|
return (h >>> 0).toString(36);
|
||||||
|
}
|
||||||
|
|
||||||
|
class VoicePlayer {
|
||||||
|
private audio: HTMLAudioElement | null = null;
|
||||||
|
private unlocked = false;
|
||||||
|
private cache = new Map<string, string>(); // id -> blob URL (insertion order = LRU)
|
||||||
|
private currentId: string | null = null;
|
||||||
|
private state: VoicePlayState = 'idle';
|
||||||
|
private errorId: string | null = null;
|
||||||
|
private errorMsg: string | null = null;
|
||||||
|
private token = 0; // bumps to cancel in-flight fetches
|
||||||
|
private errorTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
private listeners = new Set<() => void>();
|
||||||
|
|
||||||
|
subscribe(listener: () => void): () => void {
|
||||||
|
this.listeners.add(listener);
|
||||||
|
return () => {
|
||||||
|
this.listeners.delete(listener);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private emit() {
|
||||||
|
this.listeners.forEach((l) => l());
|
||||||
|
}
|
||||||
|
|
||||||
|
getSnapshot(id: string): VoiceSnapshot {
|
||||||
|
const state = this.currentId === id ? this.state : 'idle';
|
||||||
|
const error = this.errorId === id ? this.errorMsg : null;
|
||||||
|
if (state === 'idle' && error === null) return IDLE;
|
||||||
|
return { state, error };
|
||||||
|
}
|
||||||
|
|
||||||
|
private ensureAudio(): HTMLAudioElement {
|
||||||
|
if (!this.audio) {
|
||||||
|
const audio = new Audio();
|
||||||
|
audio.addEventListener('ended', () => this.onEnded());
|
||||||
|
audio.addEventListener('error', () => {
|
||||||
|
// Only meaningful while we believe we're playing.
|
||||||
|
if (this.state === 'playing') this.onEnded();
|
||||||
|
});
|
||||||
|
this.audio = audio;
|
||||||
|
}
|
||||||
|
return this.audio;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call synchronously from the click handler so iOS grants the (reused) element playback.
|
||||||
|
unlock() {
|
||||||
|
if (this.unlocked) return;
|
||||||
|
const audio = this.ensureAudio();
|
||||||
|
try {
|
||||||
|
const p = audio.play();
|
||||||
|
if (p && typeof p.catch === 'function') p.catch(() => {});
|
||||||
|
audio.pause();
|
||||||
|
} catch {
|
||||||
|
/* priming attempt; ignore */
|
||||||
|
}
|
||||||
|
this.unlocked = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
toggle(content: string) {
|
||||||
|
const id = voiceId(content);
|
||||||
|
if (this.currentId === id && (this.state === 'playing' || this.state === 'loading')) {
|
||||||
|
this.stop();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void this.play(id, content);
|
||||||
|
}
|
||||||
|
|
||||||
|
stop() {
|
||||||
|
this.token++; // cancel any in-flight fetch
|
||||||
|
if (this.audio) this.audio.pause();
|
||||||
|
this.state = 'idle';
|
||||||
|
this.currentId = null;
|
||||||
|
this.emit();
|
||||||
|
}
|
||||||
|
|
||||||
|
private onEnded() {
|
||||||
|
this.state = 'idle';
|
||||||
|
this.currentId = null;
|
||||||
|
this.emit();
|
||||||
|
// (queue auto-advance would hook in here)
|
||||||
|
}
|
||||||
|
|
||||||
|
private setError(id: string, msg: string) {
|
||||||
|
this.state = 'idle';
|
||||||
|
this.currentId = id;
|
||||||
|
this.errorId = id;
|
||||||
|
this.errorMsg = msg;
|
||||||
|
this.emit();
|
||||||
|
if (this.errorTimer) clearTimeout(this.errorTimer);
|
||||||
|
this.errorTimer = setTimeout(() => {
|
||||||
|
if (this.errorId === id) {
|
||||||
|
this.errorId = null;
|
||||||
|
this.errorMsg = null;
|
||||||
|
if (this.currentId === id) this.currentId = null;
|
||||||
|
this.emit();
|
||||||
|
}
|
||||||
|
}, 6000);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async play(id: string, content: string) {
|
||||||
|
const audio = this.ensureAudio();
|
||||||
|
audio.pause();
|
||||||
|
this.currentId = id;
|
||||||
|
this.errorId = null;
|
||||||
|
this.errorMsg = null;
|
||||||
|
this.state = 'loading';
|
||||||
|
this.emit();
|
||||||
|
|
||||||
|
const myToken = ++this.token;
|
||||||
|
|
||||||
|
try {
|
||||||
|
let url = this.cache.get(id);
|
||||||
|
if (!url) {
|
||||||
|
const res = await authenticatedFetch('/api/voice/tts', {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify({ text: content }),
|
||||||
|
headers: voiceConfigHeaders(),
|
||||||
|
});
|
||||||
|
if (myToken !== this.token) return; // superseded by another play/stop
|
||||||
|
if (!res.ok) {
|
||||||
|
let msg = `Read-aloud failed (${res.status})`;
|
||||||
|
try {
|
||||||
|
const j = await res.json();
|
||||||
|
if (j?.error) msg = String(j.error);
|
||||||
|
} catch {
|
||||||
|
/* non-JSON error body */
|
||||||
|
}
|
||||||
|
throw new Error(msg);
|
||||||
|
}
|
||||||
|
const blob = await res.blob();
|
||||||
|
if (myToken !== this.token) return;
|
||||||
|
url = URL.createObjectURL(blob);
|
||||||
|
this.cacheSet(id, url);
|
||||||
|
}
|
||||||
|
if (myToken !== this.token) return;
|
||||||
|
audio.src = url;
|
||||||
|
audio.load();
|
||||||
|
await audio.play();
|
||||||
|
if (myToken !== this.token) return;
|
||||||
|
this.state = 'playing';
|
||||||
|
this.emit();
|
||||||
|
} catch (e) {
|
||||||
|
if (myToken !== this.token) return;
|
||||||
|
this.setError(id, e instanceof Error ? e.message : 'Read-aloud failed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private cacheSet(id: string, url: string) {
|
||||||
|
this.cache.set(id, url);
|
||||||
|
while (this.cache.size > CACHE_MAX) {
|
||||||
|
const oldest = this.cache.keys().next().value as string | undefined;
|
||||||
|
if (oldest === undefined) break;
|
||||||
|
const oldUrl = this.cache.get(oldest);
|
||||||
|
this.cache.delete(oldest);
|
||||||
|
if (oldUrl && oldUrl !== this.audio?.src) URL.revokeObjectURL(oldUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const voicePlayer = new VoicePlayer();
|
||||||
Reference in New Issue
Block a user