feat(voice): send transcript with the main send button while recording

while dictating, the main send button stops recording, transcribes, and sends
in one tap, matching the codex-style flow. the mic button still stops and drops
the transcript into the input box to edit before sending. voice recording state
is lifted into the composer so both buttons share it, and the send button is
enabled (not grayed) while recording. also fix a pre-existing type error: the
quick-settings preferences map was missing voiceEnabled.
This commit is contained in:
newsbubbles
2026-06-13 13:09:14 +01:00
parent 7f8ae7023d
commit 952ddb9eb7
6 changed files with 88 additions and 40 deletions

View File

@@ -798,6 +798,17 @@ export function useChatComposerState({
handleSubmitRef.current = handleSubmit;
}, [handleSubmit]);
// A voice transcript either fills the input (to edit before sending) or, when the
// user tapped "stop and send", is submitted straight away. Mirror the value into
// inputValueRef synchronously so handleSubmit reads the new text, not the stale state.
const handleVoiceTranscript = useCallback((text: string, send?: boolean) => {
const base = inputValueRef.current.trim();
const next = base ? `${base} ${text}` : text;
setInput(next);
inputValueRef.current = next;
if (send) handleSubmitRef.current?.(createFakeSubmitEvent());
}, [setInput]);
useEffect(() => {
inputValueRef.current = input;
}, [input]);
@@ -1050,6 +1061,7 @@ export function useChatComposerState({
isDragActive,
openImagePicker: open,
handleSubmit,
handleVoiceTranscript,
handleInputChange,
handleKeyDown,
handlePaste,

View File

@@ -29,11 +29,16 @@ export type VoiceInputState = 'idle' | 'recording' | 'transcribing';
* (an OpenAI-compatible speech-to-text backend via the Express proxy), and
* returns the transcript through onTranscript.
*/
export function useVoiceInput(onTranscript: (text: string) => void, onError?: (msg: string) => void) {
export function useVoiceInput(
onTranscript: (text: string, send?: boolean) => void,
onError?: (msg: string) => void,
) {
const [state, setState] = useState<VoiceInputState>('idle');
const recorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const streamRef = useRef<MediaStream | null>(null);
// Whether the in-progress stop should auto-send the transcript (vs just fill the box).
const sendRef = useRef(false);
const stopTracks = () => {
streamRef.current?.getTracks().forEach((t) => t.stop());
@@ -66,6 +71,9 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m
rec.onstop = async () => {
stopTracks();
// Capture and clear the send intent for this stop before any async work.
const shouldSend = sendRef.current;
sendRef.current = false;
const type = rec.mimeType || 'audio/webm';
const blob = new Blob(chunksRef.current, { type });
if (blob.size < 800) {
@@ -86,7 +94,7 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m
if (!res.ok) throw new Error(`transcribe ${res.status}`);
const data = await res.json();
const text = String(data?.text || '').trim();
if (text) onTranscript(text);
if (text) onTranscript(text, shouldSend);
else onError?.('No speech detected');
} catch (e) {
onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`);
@@ -107,8 +115,12 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m
}
}, [onTranscript, onError]);
const stop = useCallback(() => {
if (recorderRef.current && state === 'recording') recorderRef.current.stop();
// Stop recording. Pass { send: true } to auto-send the transcript once it's ready.
const stop = useCallback((opts?: { send?: boolean }) => {
if (recorderRef.current && state === 'recording') {
sendRef.current = opts?.send ?? false;
recorderRef.current.stop();
}
}, [state]);
const toggle = useCallback(() => {
@@ -116,5 +128,5 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m
else if (state === 'idle') start();
}, [state, start, stop]);
return { state, toggle };
return { state, toggle, stop };
}

View File

@@ -164,6 +164,7 @@ function ChatInterface({
isDragActive,
openImagePicker,
handleSubmit,
handleVoiceTranscript,
handleInputChange,
handleKeyDown,
handlePaste,
@@ -404,7 +405,7 @@ function ChatInterface({
renderInputWithMentions={renderInputWithMentions}
textareaRef={textareaRef}
input={input}
onVoiceTranscript={(text) => setInput(input.trim() ? `${input.trim()} ${text}` : text)}
onVoiceTranscript={handleVoiceTranscript}
onInputChange={handleInputChange}
onTextareaClick={handleTextareaClick}
onTextareaKeyDown={handleKeyDown}

View File

@@ -1,4 +1,5 @@
import { useTranslation } from 'react-i18next';
import { useCallback, useEffect, useRef, useState } from 'react';
import type {
ChangeEvent,
ClipboardEvent,
@@ -9,8 +10,10 @@ import type {
RefObject,
TouchEvent,
} from 'react';
import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react';
import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react';
import { useVoiceInput } from '../../hooks/useVoiceInput';
import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types';
import {
PromptInput,
@@ -90,7 +93,7 @@ interface ChatComposerProps {
renderInputWithMentions: (text: string) => ReactNode;
textareaRef: RefObject<HTMLTextAreaElement>;
input: string;
onVoiceTranscript?: (text: string) => void;
onVoiceTranscript?: (text: string, send?: boolean) => void;
onInputChange: (event: ChangeEvent<HTMLTextAreaElement>) => void;
onTextareaClick: (event: MouseEvent<HTMLTextAreaElement>) => void;
onTextareaKeyDown: (event: KeyboardEvent<HTMLTextAreaElement>) => void;
@@ -158,6 +161,28 @@ export default function ChatComposer({
sendByCtrlEnter,
}: ChatComposerProps) {
const { t } = useTranslation('chat');
// Voice state is hosted here (not in the mic button) so the main Send button can stop
// recording and send the transcript in one tap, the way the mic button drops it in the box.
const voiceAvailable = useVoiceAvailable();
const [voiceError, setVoiceError] = useState<string | null>(null);
const voiceErrorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
const handleVoiceError = useCallback((msg: string) => {
setVoiceError(msg);
if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000);
}, []);
useEffect(() => () => {
if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current);
}, []);
const noopTranscript = useCallback(() => {}, []);
const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput(
onVoiceTranscript ?? noopTranscript,
handleVoiceError,
);
const isRecording = voiceState === 'recording';
const isTranscribing = voiceState === 'transcribing';
const textareaRect = textareaRef.current?.getBoundingClientRect();
const commandMenuPosition = {
top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0,
@@ -318,7 +343,9 @@ export default function ChatComposer({
<ImageIcon />
</PromptInputButton>
{onVoiceTranscript && <VoiceInputButton onTranscript={onVoiceTranscript} />}
{onVoiceTranscript && voiceAvailable && (
<VoiceInputButton state={voiceState} onToggle={voiceToggle} errorMsg={voiceError} />
)}
<button
type="button"
@@ -398,10 +425,21 @@ export default function ChatComposer({
{sendByCtrlEnter ? t('input.hintText.ctrlEnter') : t('input.hintText.enter')}
</div>
<PromptInputSubmit
onClick={isLoading ? onAbortSession : undefined}
disabled={!isLoading && !input.trim()}
onClick={
isLoading
? onAbortSession
: isRecording
? (e: MouseEvent<HTMLButtonElement>) => {
e.preventDefault();
voiceStop({ send: true });
}
: undefined
}
disabled={isLoading ? false : isRecording ? false : isTranscribing ? true : !input.trim()}
className="h-10 w-10 sm:h-10 sm:w-10"
/>
>
{isTranscribing ? <Loader2 className="h-4 w-4 animate-spin" /> : undefined}
</PromptInputSubmit>
</div>
</PromptInputFooter>
</PromptInput>

View File

@@ -1,37 +1,20 @@
import { useEffect, useRef, useState } from 'react';
import { Mic, Square, Loader2 } from 'lucide-react';
import { useTranslation } from 'react-i18next';
import { useVoiceInput } from '../../hooks/useVoiceInput';
import { useVoiceAvailable } from '../../hooks/useVoiceAvailable';
import { Mic, Square, Loader2 } from 'lucide-react';
import { PromptInputButton } from '../../../../shared/view/ui';
import type { VoiceInputState } from '../../hooks/useVoiceInput';
type Props = {
onTranscript: (text: string) => void;
onError?: (msg: string) => void;
state: VoiceInputState;
onToggle: () => void;
errorMsg?: string | null;
};
// Push-to-talk mic button. Renders nothing unless the optional voice feature is enabled.
// Surfaces transcription errors itself (transiently) so they aren't silently swallowed.
export default function VoiceInputButton({ onTranscript, onError }: Props) {
// Push-to-talk mic button (presentational). Recording state and the stop-and-send action
// are owned by the composer so the main Send button can drive them too. This button just
// starts recording and, while recording, stops and drops the transcript into the input box.
export default function VoiceInputButton({ state, onToggle, errorMsg }: Props) {
const { t } = useTranslation('chat');
const available = useVoiceAvailable();
const [errorMsg, setErrorMsg] = useState<string | null>(null);
const errorTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
const handleError = (msg: string) => {
onError?.(msg);
setErrorMsg(msg);
if (errorTimer.current) clearTimeout(errorTimer.current);
errorTimer.current = setTimeout(() => setErrorMsg(null), 4000);
};
const { state, toggle } = useVoiceInput(onTranscript, handleError);
useEffect(() => () => {
if (errorTimer.current) clearTimeout(errorTimer.current);
}, []);
if (!available) return null;
const icon =
state === 'recording' ? (
@@ -53,7 +36,7 @@ export default function VoiceInputButton({ onTranscript, onError }: Props) {
tooltip={{ content: state === 'recording' ? t('voice.stopRecording') : t('voice.input') }}
onClick={(e: { preventDefault: () => void }) => {
e.preventDefault();
toggle();
onToggle();
}}
>
{icon}

View File

@@ -27,12 +27,14 @@ export default function QuickSettingsPanelView() {
showThinking: preferences.showThinking,
autoScrollToBottom: preferences.autoScrollToBottom,
sendByCtrlEnter: preferences.sendByCtrlEnter,
voiceEnabled: preferences.voiceEnabled,
}), [
preferences.autoExpandTools,
preferences.autoScrollToBottom,
preferences.sendByCtrlEnter,
preferences.showRawParameters,
preferences.showThinking,
preferences.voiceEnabled,
]);
const handlePreferenceChange = useCallback(