diff --git a/src/components/chat/hooks/useChatComposerState.ts b/src/components/chat/hooks/useChatComposerState.ts index dca2b2f8..93334312 100644 --- a/src/components/chat/hooks/useChatComposerState.ts +++ b/src/components/chat/hooks/useChatComposerState.ts @@ -798,6 +798,17 @@ export function useChatComposerState({ handleSubmitRef.current = handleSubmit; }, [handleSubmit]); + // A voice transcript either fills the input (to edit before sending) or, when the + // user tapped "stop and send", is submitted straight away. Mirror the value into + // inputValueRef synchronously so handleSubmit reads the new text, not the stale state. + const handleVoiceTranscript = useCallback((text: string, send?: boolean) => { + const base = inputValueRef.current.trim(); + const next = base ? `${base} ${text}` : text; + setInput(next); + inputValueRef.current = next; + if (send) handleSubmitRef.current?.(createFakeSubmitEvent()); + }, [setInput]); + useEffect(() => { inputValueRef.current = input; }, [input]); @@ -1050,6 +1061,7 @@ export function useChatComposerState({ isDragActive, openImagePicker: open, handleSubmit, + handleVoiceTranscript, handleInputChange, handleKeyDown, handlePaste, diff --git a/src/components/chat/hooks/useVoiceInput.ts b/src/components/chat/hooks/useVoiceInput.ts index f119dc07..ed6e8d43 100644 --- a/src/components/chat/hooks/useVoiceInput.ts +++ b/src/components/chat/hooks/useVoiceInput.ts @@ -29,11 +29,16 @@ export type VoiceInputState = 'idle' | 'recording' | 'transcribing'; * (an OpenAI-compatible speech-to-text backend via the Express proxy), and * returns the transcript through onTranscript. */ -export function useVoiceInput(onTranscript: (text: string) => void, onError?: (msg: string) => void) { +export function useVoiceInput( + onTranscript: (text: string, send?: boolean) => void, + onError?: (msg: string) => void, +) { const [state, setState] = useState('idle'); const recorderRef = useRef(null); const chunksRef = useRef([]); const streamRef = useRef(null); + // Whether the in-progress stop should auto-send the transcript (vs just fill the box). + const sendRef = useRef(false); const stopTracks = () => { streamRef.current?.getTracks().forEach((t) => t.stop()); @@ -66,6 +71,9 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m rec.onstop = async () => { stopTracks(); + // Capture and clear the send intent for this stop before any async work. + const shouldSend = sendRef.current; + sendRef.current = false; const type = rec.mimeType || 'audio/webm'; const blob = new Blob(chunksRef.current, { type }); if (blob.size < 800) { @@ -86,7 +94,7 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m if (!res.ok) throw new Error(`transcribe ${res.status}`); const data = await res.json(); const text = String(data?.text || '').trim(); - if (text) onTranscript(text); + if (text) onTranscript(text, shouldSend); else onError?.('No speech detected'); } catch (e) { onError?.(`Transcription failed: ${e instanceof Error ? e.message : String(e)}`); @@ -107,8 +115,12 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m } }, [onTranscript, onError]); - const stop = useCallback(() => { - if (recorderRef.current && state === 'recording') recorderRef.current.stop(); + // Stop recording. Pass { send: true } to auto-send the transcript once it's ready. + const stop = useCallback((opts?: { send?: boolean }) => { + if (recorderRef.current && state === 'recording') { + sendRef.current = opts?.send ?? false; + recorderRef.current.stop(); + } }, [state]); const toggle = useCallback(() => { @@ -116,5 +128,5 @@ export function useVoiceInput(onTranscript: (text: string) => void, onError?: (m else if (state === 'idle') start(); }, [state, start, stop]); - return { state, toggle }; + return { state, toggle, stop }; } diff --git a/src/components/chat/view/ChatInterface.tsx b/src/components/chat/view/ChatInterface.tsx index 18996b71..3b427f64 100644 --- a/src/components/chat/view/ChatInterface.tsx +++ b/src/components/chat/view/ChatInterface.tsx @@ -164,6 +164,7 @@ function ChatInterface({ isDragActive, openImagePicker, handleSubmit, + handleVoiceTranscript, handleInputChange, handleKeyDown, handlePaste, @@ -404,7 +405,7 @@ function ChatInterface({ renderInputWithMentions={renderInputWithMentions} textareaRef={textareaRef} input={input} - onVoiceTranscript={(text) => setInput(input.trim() ? `${input.trim()} ${text}` : text)} + onVoiceTranscript={handleVoiceTranscript} onInputChange={handleInputChange} onTextareaClick={handleTextareaClick} onTextareaKeyDown={handleKeyDown} diff --git a/src/components/chat/view/subcomponents/ChatComposer.tsx b/src/components/chat/view/subcomponents/ChatComposer.tsx index ada0bca0..56977177 100644 --- a/src/components/chat/view/subcomponents/ChatComposer.tsx +++ b/src/components/chat/view/subcomponents/ChatComposer.tsx @@ -1,4 +1,5 @@ import { useTranslation } from 'react-i18next'; +import { useCallback, useEffect, useRef, useState } from 'react'; import type { ChangeEvent, ClipboardEvent, @@ -9,8 +10,10 @@ import type { RefObject, TouchEvent, } from 'react'; -import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon } from 'lucide-react'; +import { ImageIcon, MessageSquareIcon, XIcon, ArrowDownIcon, Loader2 } from 'lucide-react'; +import { useVoiceInput } from '../../hooks/useVoiceInput'; +import { useVoiceAvailable } from '../../hooks/useVoiceAvailable'; import type { PendingPermissionRequest, PermissionMode, Provider } from '../../types/types'; import { PromptInput, @@ -90,7 +93,7 @@ interface ChatComposerProps { renderInputWithMentions: (text: string) => ReactNode; textareaRef: RefObject; input: string; - onVoiceTranscript?: (text: string) => void; + onVoiceTranscript?: (text: string, send?: boolean) => void; onInputChange: (event: ChangeEvent) => void; onTextareaClick: (event: MouseEvent) => void; onTextareaKeyDown: (event: KeyboardEvent) => void; @@ -158,6 +161,28 @@ export default function ChatComposer({ sendByCtrlEnter, }: ChatComposerProps) { const { t } = useTranslation('chat'); + + // Voice state is hosted here (not in the mic button) so the main Send button can stop + // recording and send the transcript in one tap, the way the mic button drops it in the box. + const voiceAvailable = useVoiceAvailable(); + const [voiceError, setVoiceError] = useState(null); + const voiceErrorTimer = useRef | null>(null); + const handleVoiceError = useCallback((msg: string) => { + setVoiceError(msg); + if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current); + voiceErrorTimer.current = setTimeout(() => setVoiceError(null), 4000); + }, []); + useEffect(() => () => { + if (voiceErrorTimer.current) clearTimeout(voiceErrorTimer.current); + }, []); + const noopTranscript = useCallback(() => {}, []); + const { state: voiceState, toggle: voiceToggle, stop: voiceStop } = useVoiceInput( + onVoiceTranscript ?? noopTranscript, + handleVoiceError, + ); + const isRecording = voiceState === 'recording'; + const isTranscribing = voiceState === 'transcribing'; + const textareaRect = textareaRef.current?.getBoundingClientRect(); const commandMenuPosition = { top: textareaRect ? Math.max(16, textareaRect.top - 316) : 0, @@ -318,7 +343,9 @@ export default function ChatComposer({ - {onVoiceTranscript && } + {onVoiceTranscript && voiceAvailable && ( + + )}