refactor(whisper): make mic button a feature based component

This commit is contained in:
Haileyesus
2026-02-20 09:39:14 +03:00
parent e47b2702fe
commit 1e3eba930a
10 changed files with 424 additions and 320 deletions

View File

@@ -1,272 +0,0 @@
import React, { useState, useEffect, useRef } from 'react';
import { Mic, Loader2, Brain } from 'lucide-react';
import { transcribeWithWhisper } from '../utils/whisper';
export function MicButton({ onTranscript, className = '' }) {
const [state, setState] = useState('idle'); // idle, recording, transcribing, processing
const [error, setError] = useState(null);
const [isSupported, setIsSupported] = useState(true);
const mediaRecorderRef = useRef(null);
const streamRef = useRef(null);
const chunksRef = useRef([]);
const lastTapRef = useRef(0);
// Check microphone support on mount
useEffect(() => {
const checkSupport = () => {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
setIsSupported(false);
setError('Microphone not supported. Please use HTTPS or a modern browser.');
return;
}
// Additional check for secure context
if (location.protocol !== 'https:' && location.hostname !== 'localhost') {
setIsSupported(false);
setError('Microphone requires HTTPS. Please use a secure connection.');
return;
}
setIsSupported(true);
setError(null);
};
checkSupport();
}, []);
// Start recording
const startRecording = async () => {
try {
console.log('Starting recording...');
setError(null);
chunksRef.current = [];
// Check if getUserMedia is available
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
throw new Error('Microphone access not available. Please use HTTPS or a supported browser.');
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
const mimeType = MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : 'audio/mp4';
const recorder = new MediaRecorder(stream, { mimeType });
mediaRecorderRef.current = recorder;
recorder.ondataavailable = (e) => {
if (e.data.size > 0) {
chunksRef.current.push(e.data);
}
};
recorder.onstop = async () => {
console.log('Recording stopped, creating blob...');
const blob = new Blob(chunksRef.current, { type: mimeType });
// Clean up stream
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop());
streamRef.current = null;
}
// Start transcribing
setState('transcribing');
// Check if we're in an enhancement mode
const whisperMode = window.localStorage.getItem('whisperMode') || 'default';
const isEnhancementMode = whisperMode === 'prompt' || whisperMode === 'vibe' || whisperMode === 'instructions' || whisperMode === 'architect';
// Set up a timer to switch to processing state for enhancement modes
let processingTimer;
if (isEnhancementMode) {
processingTimer = setTimeout(() => {
setState('processing');
}, 2000); // Switch to processing after 2 seconds
}
try {
const text = await transcribeWithWhisper(blob);
if (text && onTranscript) {
onTranscript(text);
}
} catch (err) {
console.error('Transcription error:', err);
setError(err.message);
} finally {
if (processingTimer) {
clearTimeout(processingTimer);
}
setState('idle');
}
};
recorder.start();
setState('recording');
console.log('Recording started successfully');
} catch (err) {
console.error('Failed to start recording:', err);
// Provide specific error messages based on error type
let errorMessage = 'Microphone access failed';
if (err.name === 'NotAllowedError') {
errorMessage = 'Microphone access denied. Please allow microphone permissions.';
} else if (err.name === 'NotFoundError') {
errorMessage = 'No microphone found. Please check your audio devices.';
} else if (err.name === 'NotSupportedError') {
errorMessage = 'Microphone not supported by this browser.';
} else if (err.name === 'NotReadableError') {
errorMessage = 'Microphone is being used by another application.';
} else if (err.message.includes('HTTPS')) {
errorMessage = err.message;
}
setError(errorMessage);
setState('idle');
}
};
// Stop recording
const stopRecording = () => {
console.log('Stopping recording...');
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
// Don't set state here - let the onstop handler do it
} else {
// If recorder isn't in recording state, force cleanup
console.log('Recorder not in recording state, forcing cleanup');
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop());
streamRef.current = null;
}
setState('idle');
}
};
// Handle button click
const handleClick = (e) => {
// Prevent double firing on mobile
if (e) {
e.preventDefault();
e.stopPropagation();
}
// Don't proceed if microphone is not supported
if (!isSupported) {
return;
}
// Debounce for mobile double-tap issue
const now = Date.now();
if (now - lastTapRef.current < 300) {
console.log('Ignoring rapid tap');
return;
}
lastTapRef.current = now;
console.log('Button clicked, current state:', state);
if (state === 'idle') {
startRecording();
} else if (state === 'recording') {
stopRecording();
}
// Do nothing if transcribing or processing
};
// Clean up on unmount
useEffect(() => {
return () => {
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop());
}
};
}, []);
// Button appearance based on state
const getButtonAppearance = () => {
if (!isSupported) {
return {
icon: <Mic className="w-5 h-5" />,
className: 'bg-gray-400 cursor-not-allowed',
disabled: true
};
}
switch (state) {
case 'recording':
return {
icon: <Mic className="w-5 h-5 text-white" />,
className: 'bg-red-500 hover:bg-red-600 animate-pulse',
disabled: false
};
case 'transcribing':
return {
icon: <Loader2 className="w-5 h-5 animate-spin" />,
className: 'bg-blue-500 hover:bg-blue-600',
disabled: true
};
case 'processing':
return {
icon: <Brain className="w-5 h-5 animate-pulse" />,
className: 'bg-purple-500 hover:bg-purple-600',
disabled: true
};
default: // idle
return {
icon: <Mic className="w-5 h-5" />,
className: 'bg-gray-700 hover:bg-gray-600',
disabled: false
};
}
};
const { icon, className: buttonClass, disabled } = getButtonAppearance();
return (
<div className="relative">
<button
type="button"
style={{
backgroundColor: state === 'recording' ? '#ef4444' :
state === 'transcribing' ? '#3b82f6' :
state === 'processing' ? '#a855f7' :
'#374151'
}}
className={`
flex items-center justify-center
w-12 h-12 rounded-full
text-white transition-all duration-200
focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500
dark:ring-offset-gray-800
touch-action-manipulation
${disabled ? 'cursor-not-allowed opacity-75' : 'cursor-pointer'}
${state === 'recording' ? 'animate-pulse' : ''}
hover:opacity-90
${className}
`}
onClick={handleClick}
disabled={disabled}
>
{icon}
</button>
{error && (
<div className="absolute top-full mt-2 left-1/2 transform -translate-x-1/2
bg-red-500 text-white text-xs px-2 py-1 rounded whitespace-nowrap z-10
animate-fade-in">
{error}
</div>
)}
{state === 'recording' && (
<div className="absolute -inset-1 rounded-full border-2 border-red-500 animate-ping pointer-events-none" />
)}
{state === 'processing' && (
<div className="absolute -inset-1 rounded-full border-2 border-purple-500 animate-ping pointer-events-none" />
)}
</div>
);
}

View File

@@ -1,6 +1,6 @@
import CommandMenu from './CommandMenu';
import ClaudeStatus from './ClaudeStatus';
import { MicButton } from '../../../MicButton.jsx';
import MicButton from '../../../mic-button/view/MicButton';
import ImageAttachment from './ImageAttachment';
import PermissionRequestsBanner from './PermissionRequestsBanner';
import ChatInputControls from './ChatInputControls';

View File

@@ -1,16 +1,8 @@
import { Check, ChevronDown, GitCommit, RefreshCw, Sparkles } from 'lucide-react';
import { useState } from 'react';
import { MicButton } from '../../../MicButton.jsx';
import MicButton from '../../../mic-button/view/MicButton';
import type { ConfirmationRequest } from '../../types/types';
type MicButtonProps = {
onTranscript?: (transcript: string) => void;
className?: string;
mode?: string;
};
const MicButtonComponent = MicButton as unknown as (props: MicButtonProps) => JSX.Element;
type CommitComposerProps = {
isMobile: boolean;
selectedFileCount: number;
@@ -148,7 +140,7 @@ export default function CommitComposer({
)}
</button>
<div style={{ display: 'none' }}>
<MicButtonComponent
<MicButton
onTranscript={(transcript) => setCommitMessage(transcript)}
mode="default"
className="p-1.5"

View File

@@ -0,0 +1,45 @@
import type { MicButtonState } from '../types/types';
export const MIC_BUTTON_STATES = {
IDLE: 'idle',
RECORDING: 'recording',
TRANSCRIBING: 'transcribing',
PROCESSING: 'processing',
} as const;
export const MIC_TAP_DEBOUNCE_MS = 300;
export const PROCESSING_STATE_DELAY_MS = 2000;
export const DEFAULT_WHISPER_MODE = 'default';
// Modes that use post-transcription enhancement on the backend.
export const ENHANCEMENT_WHISPER_MODES = new Set([
'prompt',
'vibe',
'instructions',
'architect',
]);
export const BUTTON_BACKGROUND_BY_STATE: Record<MicButtonState, string> = {
idle: '#374151',
recording: '#ef4444',
transcribing: '#3b82f6',
processing: '#a855f7',
};
export const MIC_ERROR_BY_NAME = {
NotAllowedError: 'Microphone access denied. Please allow microphone permissions.',
NotFoundError: 'No microphone found. Please check your audio devices.',
NotSupportedError: 'Microphone not supported by this browser.',
NotReadableError: 'Microphone is being used by another application.',
} as const;
export const MIC_NOT_AVAILABLE_ERROR =
'Microphone access not available. Please use HTTPS or a supported browser.';
export const MIC_NOT_SUPPORTED_ERROR =
'Microphone not supported. Please use HTTPS or a modern browser.';
export const MIC_SECURE_CONTEXT_ERROR =
'Microphone requires HTTPS. Please use a secure connection.';

View File

@@ -0,0 +1,52 @@
import { api } from '../../../utils/api';
type WhisperStatus = 'transcribing';
type WhisperResponse = {
text?: string;
error?: string;
};
export async function transcribeWithWhisper(
audioBlob: Blob,
onStatusChange?: (status: WhisperStatus) => void,
): Promise<string> {
const formData = new FormData();
const fileName = `recording_${Date.now()}.webm`;
const file = new File([audioBlob], fileName, { type: audioBlob.type });
formData.append('audio', file);
const whisperMode = window.localStorage.getItem('whisperMode') || 'default';
formData.append('mode', whisperMode);
try {
// Keep existing status callback behavior.
if (onStatusChange) {
onStatusChange('transcribing');
}
const response = (await api.transcribe(formData)) as Response;
if (!response.ok) {
const errorData = (await response.json().catch(() => ({}))) as WhisperResponse;
throw new Error(
errorData.error ||
`Transcription error: ${response.status} ${response.statusText}`,
);
}
const data = (await response.json()) as WhisperResponse;
return data.text || '';
} catch (error) {
if (
error instanceof Error
&& error.name === 'TypeError'
&& error.message.includes('fetch')
) {
throw new Error('Cannot connect to server. Please ensure the backend is running.');
}
throw error;
}
}

View File

@@ -0,0 +1,204 @@
import { useEffect, useRef, useState } from 'react';
import type { MouseEvent } from 'react';
import { transcribeWithWhisper } from '../data/whisper';
import {
DEFAULT_WHISPER_MODE,
ENHANCEMENT_WHISPER_MODES,
MIC_BUTTON_STATES,
MIC_ERROR_BY_NAME,
MIC_NOT_AVAILABLE_ERROR,
MIC_NOT_SUPPORTED_ERROR,
MIC_SECURE_CONTEXT_ERROR,
MIC_TAP_DEBOUNCE_MS,
PROCESSING_STATE_DELAY_MS,
} from '../constants/constants';
import type { MicButtonState } from '../types/types';
type UseMicButtonControllerArgs = {
onTranscript?: (transcript: string) => void;
};
type UseMicButtonControllerResult = {
state: MicButtonState;
error: string | null;
isSupported: boolean;
handleButtonClick: (event?: MouseEvent<HTMLButtonElement>) => void;
};
const getRecordingErrorMessage = (error: unknown): string => {
if (error instanceof Error && error.message.includes('HTTPS')) {
return error.message;
}
if (error instanceof DOMException) {
return MIC_ERROR_BY_NAME[error.name as keyof typeof MIC_ERROR_BY_NAME] || 'Microphone access failed';
}
return 'Microphone access failed';
};
const getRecorderMimeType = (): string => (
MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : 'audio/mp4'
);
export function useMicButtonController({
onTranscript,
}: UseMicButtonControllerArgs): UseMicButtonControllerResult {
const [state, setState] = useState<MicButtonState>(MIC_BUTTON_STATES.IDLE);
const [error, setError] = useState<string | null>(null);
const [isSupported, setIsSupported] = useState(true);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const chunksRef = useRef<BlobPart[]>([]);
const lastTapRef = useRef(0);
const processingTimerRef = useRef<number | null>(null);
const clearProcessingTimer = (): void => {
if (processingTimerRef.current !== null) {
window.clearTimeout(processingTimerRef.current);
processingTimerRef.current = null;
}
};
const stopStreamTracks = (): void => {
if (!streamRef.current) {
return;
}
streamRef.current.getTracks().forEach((track) => track.stop());
streamRef.current = null;
};
const handleStopRecording = async (mimeType: string): Promise<void> => {
const audioBlob = new Blob(chunksRef.current, { type: mimeType });
// Release the microphone immediately once recording ends.
stopStreamTracks();
setState(MIC_BUTTON_STATES.TRANSCRIBING);
const whisperMode = window.localStorage.getItem('whisperMode') || DEFAULT_WHISPER_MODE;
const shouldShowProcessingState = ENHANCEMENT_WHISPER_MODES.has(whisperMode);
if (shouldShowProcessingState) {
processingTimerRef.current = window.setTimeout(() => {
setState(MIC_BUTTON_STATES.PROCESSING);
}, PROCESSING_STATE_DELAY_MS);
}
try {
const transcript = await transcribeWithWhisper(audioBlob);
if (transcript && onTranscript) {
onTranscript(transcript);
}
} catch (transcriptionError) {
const message = transcriptionError instanceof Error ? transcriptionError.message : 'Transcription error';
setError(message);
} finally {
clearProcessingTimer();
setState(MIC_BUTTON_STATES.IDLE);
}
};
const startRecording = async (): Promise<void> => {
try {
setError(null);
chunksRef.current = [];
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
throw new Error(MIC_NOT_AVAILABLE_ERROR);
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
const mimeType = getRecorderMimeType();
const recorder = new MediaRecorder(stream, { mimeType });
mediaRecorderRef.current = recorder;
recorder.ondataavailable = (event: BlobEvent) => {
if (event.data.size > 0) {
chunksRef.current.push(event.data);
}
};
recorder.onstop = () => {
void handleStopRecording(mimeType);
};
recorder.start();
setState(MIC_BUTTON_STATES.RECORDING);
} catch (recordingError) {
stopStreamTracks();
setError(getRecordingErrorMessage(recordingError));
setState(MIC_BUTTON_STATES.IDLE);
}
};
const stopRecording = (): void => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
return;
}
stopStreamTracks();
setState(MIC_BUTTON_STATES.IDLE);
};
const handleButtonClick = (event?: MouseEvent<HTMLButtonElement>): void => {
if (event) {
event.preventDefault();
event.stopPropagation();
}
if (!isSupported) {
return;
}
// Mobile tap handling can trigger duplicate click events in quick succession.
const now = Date.now();
if (now - lastTapRef.current < MIC_TAP_DEBOUNCE_MS) {
return;
}
lastTapRef.current = now;
if (state === MIC_BUTTON_STATES.IDLE) {
void startRecording();
return;
}
if (state === MIC_BUTTON_STATES.RECORDING) {
stopRecording();
}
};
useEffect(() => {
// getUserMedia needs both browser support and a secure context.
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
setIsSupported(false);
setError(MIC_NOT_SUPPORTED_ERROR);
return;
}
if (location.protocol !== 'https:' && location.hostname !== 'localhost') {
setIsSupported(false);
setError(MIC_SECURE_CONTEXT_ERROR);
return;
}
setIsSupported(true);
setError(null);
}, []);
useEffect(() => () => {
clearProcessingTimer();
stopStreamTracks();
}, []);
return {
state,
error,
isSupported,
handleButtonClick,
};
}

View File

@@ -0,0 +1,2 @@
export type MicButtonState = 'idle' | 'recording' | 'transcribing' | 'processing';

View File

@@ -0,0 +1,32 @@
import { useMicButtonController } from '../hooks/useMicButtonController';
import MicButtonView from './MicButtonView';
type MicButtonProps = {
onTranscript?: (transcript: string) => void;
className?: string;
mode?: string;
};
export default function MicButton({
onTranscript,
className = '',
mode: _mode,
}: MicButtonProps) {
const { state, error, isSupported, handleButtonClick } = useMicButtonController({
onTranscript,
});
// Keep `mode` in the public props for backwards compatibility.
void _mode;
return (
<MicButtonView
state={state}
error={error}
isSupported={isSupported}
className={className}
onButtonClick={handleButtonClick}
/>
);
}

View File

@@ -0,0 +1,86 @@
import { Brain, Loader2, Mic } from 'lucide-react';
import type { MouseEvent, ReactElement } from 'react';
import { BUTTON_BACKGROUND_BY_STATE, MIC_BUTTON_STATES } from '../constants/constants';
import type { MicButtonState } from '../types/types';
type MicButtonViewProps = {
state: MicButtonState;
error: string | null;
isSupported: boolean;
className: string;
onButtonClick: (event?: MouseEvent<HTMLButtonElement>) => void;
};
const getButtonIcon = (state: MicButtonState, isSupported: boolean): ReactElement => {
if (!isSupported) {
return <Mic className="w-5 h-5" />;
}
if (state === MIC_BUTTON_STATES.TRANSCRIBING) {
return <Loader2 className="w-5 h-5 animate-spin" />;
}
if (state === MIC_BUTTON_STATES.PROCESSING) {
return <Brain className="w-5 h-5 animate-pulse" />;
}
if (state === MIC_BUTTON_STATES.RECORDING) {
return <Mic className="w-5 h-5 text-white" />;
}
return <Mic className="w-5 h-5" />;
};
export default function MicButtonView({
state,
error,
isSupported,
className,
onButtonClick,
}: MicButtonViewProps) {
const isDisabled = !isSupported || state === MIC_BUTTON_STATES.TRANSCRIBING || state === MIC_BUTTON_STATES.PROCESSING;
const icon = getButtonIcon(state, isSupported);
return (
<div className="relative">
<button
type="button"
style={{ backgroundColor: BUTTON_BACKGROUND_BY_STATE[state] }}
className={`
flex items-center justify-center
w-12 h-12 rounded-full
text-white transition-all duration-200
focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500
dark:ring-offset-gray-800
touch-action-manipulation
${isDisabled ? 'cursor-not-allowed opacity-75' : 'cursor-pointer'}
${state === MIC_BUTTON_STATES.RECORDING ? 'animate-pulse' : ''}
hover:opacity-90
${className}
`}
onClick={onButtonClick}
disabled={isDisabled}
>
{icon}
</button>
{error && (
<div
className="absolute top-full mt-2 left-1/2 transform -translate-x-1/2
bg-red-500 text-white text-xs px-2 py-1 rounded whitespace-nowrap z-10
animate-fade-in"
>
{error}
</div>
)}
{state === MIC_BUTTON_STATES.RECORDING && (
<div className="absolute -inset-1 rounded-full border-2 border-red-500 animate-ping pointer-events-none" />
)}
{state === MIC_BUTTON_STATES.PROCESSING && (
<div className="absolute -inset-1 rounded-full border-2 border-purple-500 animate-ping pointer-events-none" />
)}
</div>
);
}

View File

@@ -1,37 +0,0 @@
import { api } from './api';
export async function transcribeWithWhisper(audioBlob, onStatusChange) {
const formData = new FormData();
const fileName = `recording_${Date.now()}.webm`;
const file = new File([audioBlob], fileName, { type: audioBlob.type });
formData.append('audio', file);
const whisperMode = window.localStorage.getItem('whisperMode') || 'default';
formData.append('mode', whisperMode);
try {
// Start with transcribing state
if (onStatusChange) {
onStatusChange('transcribing');
}
const response = await api.transcribe(formData);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(
errorData.error ||
`Transcription error: ${response.status} ${response.statusText}`
);
}
const data = await response.json();
return data.text || '';
} catch (error) {
if (error.name === 'TypeError' && error.message.includes('fetch')) {
throw new Error('Cannot connect to server. Please ensure the backend is running.');
}
throw error;
}
}