diff --git a/server/voice-proxy.js b/server/voice-proxy.js
index 149459fb..1ea4a6d8 100644
--- a/server/voice-proxy.js
+++ b/server/voice-proxy.js
@@ -29,7 +29,9 @@ const ENV = {
 function resolveConfig(req) {
   const h = req.headers;
   return {
-    baseUrl: (String(h['x-voice-base-url'] || '') || ENV.baseUrl).replace(/\/$/, ''),
+    // Security: do not allow clients to control the outbound backend host.
+    // Always use the server-side configured base URL.
+    baseUrl: ENV.baseUrl,
     apiKey: String(h['x-voice-api-key'] || '') || ENV.apiKey,
     sttModel: String(h['x-voice-stt-model'] || '') || ENV.sttModel,
     ttsModel: String(h['x-voice-tts-model'] || '') || ENV.ttsModel,
diff --git a/src/components/chat/hooks/useVoiceAvailable.ts b/src/components/chat/hooks/useVoiceAvailable.ts
index 0adccd0d..9ee92c48 100644
--- a/src/components/chat/hooks/useVoiceAvailable.ts
+++ b/src/components/chat/hooks/useVoiceAvailable.ts
@@ -1,31 +1,26 @@
 import { useEffect, useState } from 'react';
 
 import { authenticatedFetch } from '../../../utils/api';
-import { VOICE_CONFIG_SYNC_EVENT, voiceConfigHeaders } from '../../../hooks/useVoiceConfig';
+import { readVoiceConfig, VOICE_CONFIG_SYNC_EVENT } from '../../../hooks/useVoiceConfig';
 
 // Voice UI is gated on the `voiceEnabled` UI preference (toggled in Quick Settings /
 // the Settings modal) and a configured voice backend.
 const STORAGE_KEY = 'uiPreferences';
 const SYNC_EVENT = 'ui-preferences:sync';
-const healthRequests = new Map<string, Promise<boolean>>();
+let healthRequest: Promise<boolean> | null = null;
 
 function checkVoiceHealth(): Promise<boolean> {
-  const baseUrl = voiceConfigHeaders()['x-voice-base-url'];
-  const signature = baseUrl || '';
-  const pending = healthRequests.get(signature);
-  if (pending) return pending;
-  const request = authenticatedFetch('/api/voice/health', {
-    headers: baseUrl ? { 'x-voice-base-url': baseUrl } : {},
-  })
+  if (healthRequest) return healthRequest;
+  const request = authenticatedFetch('/api/voice/health')
     .then(async (response) => {
       if (!response.ok) throw new Error(`Voice health check failed (${response.status})`);
       const data = await response.json();
       return data?.configured === true;
     })
     .finally(() => {
-      healthRequests.delete(signature);
+      healthRequest = null;
     });
-  healthRequests.set(signature, request);
+  healthRequest = request;
   return request;
 }
 
@@ -65,6 +60,10 @@ export function useVoiceAvailable(): boolean {
         setAvailable(false);
         return;
       }
+      if (readVoiceConfig().baseUrl.trim()) {
+        setAvailable(true);
+        return;
+      }
       const id = ++requestId;
       try {
         const result = await checkVoiceHealth();
diff --git a/src/components/chat/hooks/useVoiceInput.ts b/src/components/chat/hooks/useVoiceInput.ts
index 400612a0..6fcadd56 100644
--- a/src/components/chat/hooks/useVoiceInput.ts
+++ b/src/components/chat/hooks/useVoiceInput.ts
@@ -1,7 +1,6 @@
 import { useCallback, useEffect, useRef, useState } from 'react';
 
-import { authenticatedFetch } from '../../../utils/api';
-import { voiceConfigHeaders } from '../../../hooks/useVoiceConfig';
+import { transcribeVoice } from '../../../lib/voiceApi';
 
 // Mobile-safe recording: iOS Safari 18.4+ supports webm/opus; older iOS needs mp4.
 const MIME_CANDIDATES = [
@@ -97,13 +96,7 @@ export function useVoiceInput(
         setState('transcribing');
         try {
           const ext = type.includes('mp4') ? 'm4a' : type.includes('ogg') ? 'ogg' : 'webm';
-          const fd = new FormData();
-          fd.append('audio', blob, `recording.${ext}`);
-          const res = await authenticatedFetch('/api/voice/transcribe', {
-            method: 'POST',
-            body: fd,
-            headers: voiceConfigHeaders(),
-          });
+          const res = await transcribeVoice(blob, `recording.${ext}`);
           if (!res.ok) throw new Error(`transcribe ${res.status}`);
           const data = await res.json();
           if (cancelledRef.current) return;
diff --git a/src/hooks/useVoiceConfig.ts b/src/hooks/useVoiceConfig.ts
index c9141f45..303b6467 100644
--- a/src/hooks/useVoiceConfig.ts
+++ b/src/hooks/useVoiceConfig.ts
@@ -13,7 +13,7 @@ const STORAGE_KEY = 'voiceConfig';
 export const VOICE_CONFIG_SYNC_EVENT = 'voice-config:sync';
 const DEFAULTS: VoiceConfig = { baseUrl: '', apiKey: '', sttModel: '', ttsModel: '', ttsVoice: '', ttsFormat: '' };
 
-function read(): VoiceConfig {
+export function readVoiceConfig(): VoiceConfig {
   try {
     const raw = localStorage.getItem(STORAGE_KEY);
     if (!raw) return { ...DEFAULTS };
@@ -33,9 +33,8 @@ function read(): VoiceConfig {
 // Empty fields are omitted so the server's env defaults apply.
 export function voiceConfigHeaders(): Record<string, string> {
   if (typeof window === 'undefined') return {};
-  const c = read();
+  const c = readVoiceConfig();
   const h: Record<string, string> = {};
-  if (c.baseUrl) h['x-voice-base-url'] = c.baseUrl;
   if (c.apiKey) h['x-voice-api-key'] = c.apiKey;
   if (c.sttModel) h['x-voice-stt-model'] = c.sttModel;
   if (c.ttsModel) h['x-voice-tts-model'] = c.ttsModel;
@@ -46,7 +45,7 @@ export function voiceConfigHeaders(): Record<string, string> {
 
 export function useVoiceConfig() {
   const [config, setConfig] = useState<VoiceConfig>(() =>
-    typeof window === 'undefined' ? { ...DEFAULTS } : read(),
+    typeof window === 'undefined' ? { ...DEFAULTS } : readVoiceConfig(),
   );
 
   const update = (patch: Partial<VoiceConfig>) => {
diff --git a/src/i18n/locales/en/settings.json b/src/i18n/locales/en/settings.json
index 11df5929..2d9772b1 100644
--- a/src/i18n/locales/en/settings.json
+++ b/src/i18n/locales/en/settings.json
@@ -63,7 +63,7 @@
     "ttsModel": "Text-to-speech model",
     "voice": "Voice",
     "format": "Audio format",
-    "note": "The shown defaults work with OpenAI once you add a key. For other providers, set the base URL, model names, and audio format to match."
+    "note": "A custom base URL is called directly by your browser and must allow browser CORS requests. Leave it blank to use the server-configured backend."
   },
   "quickSettings": {
     "title": "Quick Settings",
diff --git a/src/lib/voiceApi.ts b/src/lib/voiceApi.ts
new file mode 100644
index 00000000..3f9549b4
--- /dev/null
+++ b/src/lib/voiceApi.ts
@@ -0,0 +1,60 @@
+import { authenticatedFetch } from '../utils/api';
+import { readVoiceConfig, voiceConfigHeaders } from '../hooks/useVoiceConfig';
+
+function directUrl(baseUrl: string, path: string): string {
+  return `${baseUrl.replace(/\/$/, '')}${path}`;
+}
+
+export function voiceConfigSignature(): string {
+  return JSON.stringify(readVoiceConfig());
+}
+
+export function transcribeVoice(blob: Blob, filename: string): Promise<Response> {
+  const config = readVoiceConfig();
+  const body = new FormData();
+
+  if (config.baseUrl.trim()) {
+    body.append('file', blob, filename);
+    body.append('model', config.sttModel || 'whisper-1');
+    return fetch(directUrl(config.baseUrl.trim(), '/audio/transcriptions'), {
+      method: 'POST',
+      headers: config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {},
+      body,
+    });
+  }
+
+  body.append('audio', blob, filename);
+  return authenticatedFetch('/api/voice/transcribe', {
+    method: 'POST',
+    headers: voiceConfigHeaders(),
+    body,
+  });
+}
+
+export function synthesizeVoice(text: string, signal: AbortSignal): Promise<Response> {
+  const config = readVoiceConfig();
+
+  if (config.baseUrl.trim()) {
+    return fetch(directUrl(config.baseUrl.trim(), '/audio/speech'), {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        ...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
+      },
+      body: JSON.stringify({
+        model: config.ttsModel || 'tts-1',
+        voice: config.ttsVoice || 'alloy',
+        input: text,
+        ...(config.ttsFormat.trim() ? { response_format: config.ttsFormat.trim() } : {}),
+      }),
+      signal,
+    });
+  }
+
+  return authenticatedFetch('/api/voice/tts', {
+    method: 'POST',
+    body: JSON.stringify({ text }),
+    headers: voiceConfigHeaders(),
+    signal,
+  });
+}
diff --git a/src/lib/voicePlayer.ts b/src/lib/voicePlayer.ts
index b09f5170..4c239c29 100644
--- a/src/lib/voicePlayer.ts
+++ b/src/lib/voicePlayer.ts
@@ -1,5 +1,4 @@
-import { authenticatedFetch } from '../utils/api';
-import { voiceConfigHeaders } from '../hooks/useVoiceConfig';
+import { synthesizeVoice, voiceConfigSignature } from './voiceApi';
 
 // A single app-level audio player for read-aloud. It owns one <audio> element, lives
 // outside the React tree, and caches generated audio by content. Because playback is not
@@ -16,8 +15,8 @@ const CACHE_MAX = 24;
 const CLIENT_TIMEOUT_MS = 330000; // backstop; the server proxy already times out at 5 min
 
 // Stable id / cache key from the text and voice settings that affect its audio (djb2).
-export function voiceId(content: string, headers = voiceConfigHeaders()): string {
-  const input = JSON.stringify([content, Object.entries(headers).sort(([a], [b]) => a.localeCompare(b))]);
+export function voiceId(content: string, signature = voiceConfigSignature()): string {
+  const input = JSON.stringify([content, signature]);
   let h = 5381;
   for (let i = 0; i < input.length; i++) h = (((h << 5) + h) + input.charCodeAt(i)) | 0;
   return (h >>> 0).toString(36);
@@ -82,13 +81,12 @@ class VoicePlayer {
   }
 
   toggle(content: string) {
-    const headers = voiceConfigHeaders();
-    const id = voiceId(content, headers);
+    const id = voiceId(content);
     if (this.currentId === id && (this.state === 'playing' || this.state === 'loading')) {
       this.stop();
       return;
     }
-    void this.play(id, content, headers);
+    void this.play(id, content);
   }
 
   stop() {
@@ -131,7 +129,7 @@ class VoicePlayer {
     }, 6000);
   }
 
-  private async play(id: string, content: string, headers: Record<string, string>) {
+  private async play(id: string, content: string) {
     const audio = this.ensureAudio();
     audio.pause();
     this.currentId = id;
@@ -149,12 +147,7 @@ class VoicePlayer {
         const controller = new AbortController();
         this.activeController = controller;
         const timer = setTimeout(() => controller.abort(), CLIENT_TIMEOUT_MS);
-        const res = await authenticatedFetch('/api/voice/tts', {
-          method: 'POST',
-          body: JSON.stringify({ text: content }),
-          headers,
-          signal: controller.signal,
-        }).finally(() => {
+        const res = await synthesizeVoice(content, controller.signal).finally(() => {
           clearTimeout(timer);
           if (this.activeController === controller) this.activeController = null;
         });