From 952ddb9eb7b9f708b48f9b09c11c30ffda3e0464 Mon Sep 17 00:00:00 2001 From: newsbubbles Date: Sat, 13 Jun 2026 13:09:14 +0100 Subject: [PATCH] feat(voice): send transcript with the main send button while recording while dictating, the main send button stops recording, transcribes, and sends in one tap, matching the codex-style flow. the mic button still stops and drops the transcript into the input box to edit before sending. voice recording state is lifted into the composer so both buttons share it, and the send button is enabled (not grayed) while recording. also fix a pre-existing type error: the quick-settings preferences map was missing voiceEnabled. --- .../chat/hooks/useChatComposerState.ts | 12 +++++ src/components/chat/hooks/useVoiceInput.ts | 22 ++++++-- src/components/chat/view/ChatInterface.tsx | 3 +- .../chat/view/subcomponents/ChatComposer.tsx | 50 ++++++++++++++++--- .../view/subcomponents/VoiceInputButton.tsx | 39 ++++----------- .../view/QuickSettingsPanelView.tsx | 2 + 6 files changed, 88 insertions(+), 40 deletions(-) diff --git a/src/components/chat/hooks/useChatComposerState.ts b/src/components/chat/hooks/useChatComposerState.ts index dca2b2f8..93334312 100644 --- a/src/components/chat/hooks/useChatComposerState.ts +++ b/src/components/chat/hooks/useChatComposerState.ts @@ -798,6 +798,17 @@ export function useChatComposerState({ handleSubmitRef.current = handleSubmit; }, [handleSubmit]); + // A voice transcript either fills the input (to edit before sending) or, when the + // user tapped "stop and send", is submitted straight away. Mirror the value into + // inputValueRef synchronously so handleSubmit reads the new text, not the stale state. + const handleVoiceTranscript = useCallback((text: string, send?: boolean) => { + const base = inputValueRef.current.trim(); + const next = base ? `${base} ${text}` : text; + setInput(next); + inputValueRef.current = next; + if (send) handleSubmitRef.current?.(createFakeSubmitEvent()); + }, [setInput]); + useEffect(() => { inputValueRef.current = input; }, [input]); @@ -1050,6 +1061,7 @@ export function useChatComposerState({ isDragActive, openImagePicker: open, handleSubmit, + handleVoiceTranscript, handleInputChange, handleKeyDown, handlePaste, diff --git a/src/components/chat/hooks/useVoiceInput.ts b/src/components/chat/hooks/useVoiceInput.ts index f119dc07..ed6e8d43 100644 --- a/src/components/chat/hooks/useVoiceInput.ts +++ b/src/components/chat/hooks/useVoiceInput.ts @@ -29,11 +29,16 @@ export type VoiceInputState = 'idle' | 'recording' | 'transcribing'; * (an OpenAI-compatible speech-to-text backend via the Express proxy), and * returns the transcript through onTranscript. */ -export function useVoiceInput(onTranscript: (text: string) => void, onError?: (msg: string) => void) { +export function useVoiceInput( + onTranscript: (text: string, send?: boolean) => void, + onError?: (msg: string) => void, +) { const [state, setState] = useState('idle'); const recorderRef = useRef(null); const chunksRef = useRef([]); const streamRef = useRef(null); + // Whether the in-progress stop should auto-send the transcript (vs just fill the box). + const sendRef = useRef(false); const stopTracks = () => { streamRef.current?.getTracks().forEach((t) => t.stop()); @@ -66,6 +71,9 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m rec.onstop = async () => { stopTracks(); + // Capture and clear the send intent for this stop before any async work. + const shouldSend = sendRef.current; + sendRef.current = false; const type = rec.mimeType || 'audio/webm'; const blob = new Blob(chunksRef.current, { type }); if (blob.size < 800) { @@ -86,7 +94,7 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m if (!res.ok) throw new Error(`transcribe ${res.status}`); const data = await res.json(); const text = String(data?.text || '').trim(); - if (text) onTranscript(text); + if (text) onTranscript(text, shouldSend); else onError?.('No speech detected'); } catch (e) { onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`); @@ -107,8 +115,12 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m } }, [onTranscript, onError]); - const stop = useCallback(() => { - if (recorderRef.current && state === 'recording') recorderRef.current.stop(); + // Stop recording. Pass { send: true } to auto-send the transcript once it's ready. + const stop = useCallback((opts?: { send?: boolean }) => { + if (recorderRef.current && state === 'recording') { + sendRef.current = opts?.send ?? false; + recorderRef.current.stop(); + } }, [state]); const toggle = useCallback(() => { @@ -116,5 +128,5 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m else if (state === 'idle') start(); }, [state, start, stop]); - return { state, toggle }; + return { state, toggle, stop }; } diff --git a/src/components/chat/view/ChatInterface.tsx b/src/components/chat/view/ChatInterface.tsx index 18996b71..3b427f64 100644 --- a/src/components/chat/view/ChatInterface.tsx +++ b/src/components/chat/view/ChatInterface.tsx @@ -164,6 +164,7 @@ function ChatInterface({ isDragActive, openImagePicker, handleSubmit, + handleVoiceTranscript, handleInputChange, handleKeyDown, handlePaste, @@ -404,7 +405,7 @@ function ChatInterface({ renderInputWithMentions={renderInputWithMentions} textareaRef={textareaRef} input={input} - onVoiceTranscript={(text) => setInput(input.trim() ? `${input.trim()} ${text}` : text)} + onVoiceTranscript={handleVoiceTranscript} onInputChange={handleInputChange} onTextareaClick={handleTextareaClick} onTextareaKeyDown={handleKeyDown} diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx index ada0bca0..56977177 100644 --- a/src/components/chat/view/subcomponents/ChatComposer.tsx +++ b/src/components/chat/view/subcomponents/ChatComposer.tsx @@ -1,4 +1,5 @@ import { useTranslation } from 'react-i18next'; +import { useCallback, useEffect, useRef, useState } from 'react'; import type { ChangeEvent, ClipboardEvent, @@ -9,8 +10,10 @@ import type { RefObject, TouchEvent, } from 'react'; -import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react'; +import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react'; +import { useVoiceInput } from '../../hooks/useVoiceInput'; +import { useVoiceAvailable } from '../../hooks/useVoiceAvailable'; import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types'; import { PromptInput, @@ -90,7 +93,7 @@ interface ChatComposerProps { renderInputWithMentions: (text: string) => ReactNode; textareaRef: RefObject; input: string; - onVoiceTranscript?: (text: string) => void; + onVoiceTranscript?: (text: string, send?: boolean) => void; onInputChange: (event: ChangeEvent) => void; onTextareaClick: (event: MouseEvent) => void; onTextareaKeyDown: (event: KeyboardEvent) => void; @@ -158,6 +161,28 @@ export default function ChatComposer({ sendByCtrlEnter, }: ChatComposerProps) { const { t } = useTranslation('chat'); + + // Voice state is hosted here (not in the mic button) so the main Send button can stop + // recording and send the transcript in one tap, the way the mic button drops it in the box. + const voiceAvailable = useVoiceAvailable(); + const [voiceError, setVoiceError] = useState(null); + const voiceErrorTimer = useRef | null>(null); + const handleVoiceError = useCallback((msg: string) => { + setVoiceError(msg); + if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current); + voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000); + }, []); + useEffect(() => () => { + if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current); + }, []); + const noopTranscript = useCallback(() => {}, []); + const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput( + onVoiceTranscript ?? noopTranscript, + handleVoiceError, + ); + const isRecording = voiceState === 'recording'; + const isTranscribing = voiceState === 'transcribing'; + const textareaRect = textareaRef.current?.getBoundingClientRect(); const commandMenuPosition = { top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0, @@ -318,7 +343,9 @@ export default function ChatComposer({ - {onVoiceTranscript && } + {onVoiceTranscript && voiceAvailable && ( + + )}