Spaces:

abc1181
/

chatterbox

Sleeping

App Files Files Community

chatterbox / app.py

abc1181

Update app.py

535dc07 verified about 1 month ago

raw

history blame contribute delete

54 kB

	from fastapi import FastAPI, Request, UploadFile, File, Form
	from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse, FileResponse
	import torch
	import torchaudio
	import io
	import uvicorn
	import os
	import base64
	import json
	import subprocess
	import tempfile
	import shutil
	import httpx
	from pathlib import Path
	from chatterbox.tts import ChatterboxTTS

	app = FastAPI()

	# ============================================================
	# STARTUP
	# ============================================================

	print("Loading Chatterbox Multilingual...")
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Set HF token for faster downloads
	hf_token = os.getenv("HF_TOKEN")
	if hf_token:
	from huggingface_hub import login
	login(token=hf_token)

	model = ChatterboxTTS.from_pretrained(device=device)
	print(f"✅ Chatterbox loaded on {device}")

	VOICE_SAMPLE_PATH = "voice_sample.wav"
	VOICES_DIR = Path("voices")
	VOICES_DIR.mkdir(exist_ok=True)
	VOICES_META = VOICES_DIR / "meta.json"

	HF_TOKEN = os.getenv("HF_TOKEN")
	HF_REPO_ID = os.getenv("HF_REPO_ID") # e.g. abc1181/livekit-tts-chatterbox
	GROQ_KEY = os.getenv("GROQ_API_KEY")

	voice_sample = VOICE_SAMPLE_PATH if os.path.exists(VOICE_SAMPLE_PATH) else None

	def has_devanagari(text: str) -> bool:
	return any('\u0900' <= c <= '\u097F' for c in text)

	def get_language(text: str) -> str:
	return "hi" if has_devanagari(text) else "en"

	def load_voices_meta() -> dict:
	if VOICES_META.exists():
	return json.loads(VOICES_META.read_text())
	return {}

	def save_voices_meta(meta: dict):
	VOICES_META.write_text(json.dumps(meta, indent=2))

	def push_to_hf(local_path: str, repo_path: str):
	"""Push a file to HF repo permanently."""
	if not HF_TOKEN or not HF_REPO_ID:
	return False
	try:
	from huggingface_hub import HfApi
	api = HfApi()
	api.upload_file(
	path_or_fileobj=local_path,
	path_in_repo=f"voices/{repo_path}",
	repo_id=HF_REPO_ID,
	repo_type="space",
	token=HF_TOKEN,
	)
	return True
	except Exception as e:
	print(f"HF push failed: {e}")
	return False

	def pull_from_hf():
	"""Pull voices from HF repo on startup."""
	if not HF_TOKEN or not HF_REPO_ID:
	return
	try:
	from huggingface_hub import HfApi
	api = HfApi()
	files = api.list_repo_files(repo_id=HF_REPO_ID, repo_type="space", token=HF_TOKEN)
	for f in files:
	if f.startswith("voices/") and f.endswith(".wav"):
	name = Path(f).name
	dest = VOICES_DIR / name
	if not dest.exists():
	api.hf_hub_download(
	repo_id=HF_REPO_ID,
	filename=f,
	repo_type="space",
	token=HF_TOKEN,
	local_dir="."
	)
	except Exception as e:
	print(f"HF pull failed: {e}")

	# Pull voices from HF on startup
	pull_from_hf()

	# ============================================================
	# UI
	# ============================================================

	@app.get("/", response_class=HTMLResponse)
	async def ui():
	return HTMLResponse(content="""
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8"/>
	<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
	<title>Cortana TTS Studio</title>
	<style>
	* { margin:0; padding:0; box-sizing:border-box; }
	body {
	background:#080808;
	color:#fff;
	font-family:'Inter',-apple-system,sans-serif;
	min-height:100vh;
	}
	.sidebar {
	position:fixed;
	left:0; top:0; bottom:0;
	width:220px;
	background:#0d0d0d;
	border-right:1px solid #1a1a1a;
	display:flex;
	flex-direction:column;
	padding:24px 0;
	z-index:100;
	}
	.logo {
	padding:0 20px 24px;
	border-bottom:1px solid #1a1a1a;
	}
	.logo h1 {
	font-size:1.1rem;
	font-weight:700;
	letter-spacing:0.2em;
	text-transform:uppercase;
	background:linear-gradient(135deg,#c0c0c0,#fff);
	-webkit-background-clip:text;
	-webkit-text-fill-color:transparent;
	}
	.logo p { font-size:0.7rem; color:#444; margin-top:4px; }
	.nav { padding:16px 12px; flex:1; }
	.nav-item {
	display:flex;
	align-items:center;
	gap:10px;
	padding:10px 12px;
	border-radius:8px;
	cursor:pointer;
	font-size:0.85rem;
	color:#555;
	transition:all 0.2s;
	margin-bottom:2px;
	}
	.nav-item:hover { background:#151515; color:#888; }
	.nav-item.active { background:#151515; color:#fff; }
	.nav-icon { font-size:1rem; width:20px; text-align:center; }
	.main {
	margin-left:220px;
	padding:32px;
	min-height:100vh;
	}
	.page { display:none; }
	.page.active { display:block; }
	.page-title {
	font-size:1.4rem;
	font-weight:700;
	margin-bottom:8px;
	}
	.page-sub {
	color:#444;
	font-size:0.85rem;
	margin-bottom:28px;
	}
	.card {
	background:#111;
	border:1px solid #1a1a1a;
	border-radius:14px;
	padding:22px;
	margin-bottom:20px;
	}
	.card-title {
	font-size:0.7rem;
	font-weight:600;
	letter-spacing:0.15em;
	text-transform:uppercase;
	color:#444;
	margin-bottom:16px;
	display:flex;
	align-items:center;
	gap:8px;
	}
	.card-title::before {
	content:'';
	display:block;
	width:3px; height:12px;
	background:#c0c0c0;
	border-radius:2px;
	}
	textarea {
	width:100%;
	background:#0a0a0a;
	border:1px solid #1f1f1f;
	border-radius:10px;
	color:#fff;
	font-size:0.95rem;
	padding:14px;
	resize:vertical;
	min-height:130px;
	outline:none;
	font-family:inherit;
	line-height:1.6;
	transition:border-color 0.2s;
	}
	textarea:focus { border-color:#333; }
	textarea::placeholder { color:#2a2a2a; }
	input[type="text"] {
	width:100%;
	background:#0a0a0a;
	border:1px solid #1f1f1f;
	border-radius:8px;
	color:#fff;
	font-size:0.9rem;
	padding:10px 14px;
	outline:none;
	font-family:inherit;
	transition:border-color 0.2s;
	}
	input[type="text"]:focus { border-color:#333; }
	input[type="text"]::placeholder { color:#2a2a2a; }
	.grid2 { display:grid; grid-template-columns:1fr 1fr; gap:16px; }
	.grid3 { display:grid; grid-template-columns:1fr 1fr 1fr; gap:16px; }
	.control-label {
	font-size:0.72rem;
	color:#444;
	text-transform:uppercase;
	letter-spacing:0.1em;
	margin-bottom:8px;
	display:block;
	}
	.slider-row {
	display:flex;
	align-items:center;
	gap:12px;
	}
	input[type="range"] {
	flex:1;
	-webkit-appearance:none;
	height:4px;
	background:#1f1f1f;
	border-radius:2px;
	outline:none;
	}
	input[type="range"]::-webkit-slider-thumb {
	-webkit-appearance:none;
	width:14px; height:14px;
	border-radius:50%;
	background:#c0c0c0;
	cursor:pointer;
	}
	.slider-val { font-size:0.8rem; color:#666; width:32px; text-align:right; }
	.btn {
	background:#151515;
	border:1px solid #222;
	border-radius:8px;
	color:#fff;
	font-size:0.9rem;
	font-weight:600;
	padding:11px 20px;
	cursor:pointer;
	transition:all 0.2s;
	display:inline-flex;
	align-items:center;
	gap:8px;
	}
	.btn:hover { border-color:#444; }
	.btn:disabled { opacity:0.3; cursor:not-allowed; }
	.btn-primary {
	width:100%;
	justify-content:center;
	padding:13px;
	margin-top:14px;
	font-size:0.95rem;
	}
	.btn-primary:hover { box-shadow:0 0 20px rgba(192,192,192,0.08); }
	.btn-sm { padding:7px 14px; font-size:0.8rem; }
	.btn-danger { border-color:#ef444433; color:#ef4444; }
	.btn-danger:hover { border-color:#ef4444; background:rgba(239,68,68,0.1); }
	.spinner {
	width:16px; height:16px;
	border:2px solid #333;
	border-top-color:#c0c0c0;
	border-radius:50%;
	animation:spin 0.8s linear infinite;
	display:none;
	}
	.btn.loading .spinner { display:block; }
	.btn.loading .btn-label { display:none; }
	@keyframes spin { to { transform:rotate(360deg); } }
	.audio-player {
	display:none;
	background:#0a0a0a;
	border:1px solid #1a1a1a;
	border-radius:10px;
	padding:14px;
	margin-top:14px;
	}
	.audio-player.visible { display:block; }
	.audio-player audio { width:100%; filter:invert(0.9); }
	.dl-btn {
	display:inline-flex;
	align-items:center;
	gap:6px;
	margin-top:10px;
	background:#151515;
	border:1px solid #222;
	border-radius:7px;
	color:#666;
	font-size:0.78rem;
	padding:7px 12px;
	cursor:pointer;
	text-decoration:none;
	transition:all 0.2s;
	}
	.dl-btn:hover { color:#fff; border-color:#444; }
	.lang-row {
	display:flex;
	align-items:center;
	gap:10px;
	margin-top:8px;
	}
	.lang-badge {
	background:#151515;
	border:1px solid #222;
	border-radius:999px;
	padding:3px 12px;
	font-size:0.75rem;
	color:#666;
	}
	.upload-area {
	border:1px dashed #1f1f1f;
	border-radius:10px;
	padding:22px;
	text-align:center;
	cursor:pointer;
	transition:all 0.2s;
	background:#0a0a0a;
	position:relative;
	}
	.upload-area:hover { border-color:#333; }
	.upload-area.has-file { border-color:#22c55e; border-style:solid; }
	.upload-area input[type="file"] {
	position:absolute; inset:0;
	opacity:0; cursor:pointer;
	width:100%; height:100%;
	}
	.upload-icon { font-size:1.5rem; margin-bottom:6px; }
	.upload-label { font-size:0.85rem; color:#444; }
	.upload-sub { font-size:0.75rem; color:#2a2a2a; margin-top:4px; }
	.file-ok { font-size:0.8rem; color:#22c55e; margin-top:6px; }
	.history-list { display:flex; flex-direction:column; gap:8px; }
	.hist-item {
	background:#0a0a0a;
	border:1px solid #151515;
	border-radius:9px;
	padding:11px 14px;
	display:flex;
	align-items:center;
	gap:12px;
	}
	.hist-text { flex:1; font-size:0.82rem; color:#555; white-space:nowrap; overflow:hidden; text-overflow:ellipsis; }
	.hist-lang {
	font-size:0.7rem; color:#333;
	background:#111; border:1px solid #1a1a1a;
	border-radius:999px; padding:2px 8px;
	}
	.hist-play {
	width:30px; height:30px;
	border-radius:50%;
	background:#151515; border:1px solid #222;
	color:#666; font-size:0.75rem;
	cursor:pointer; display:flex; align-items:center; justify-content:center;
	transition:all 0.2s; flex-shrink:0;
	}
	.hist-play:hover { border-color:#c0c0c0; color:#fff; }
	.voice-grid {
	display:grid;
	grid-template-columns:repeat(auto-fill,minmax(180px,1fr));
	gap:12px;
	}
	.voice-card {
	background:#0a0a0a;
	border:1px solid #1a1a1a;
	border-radius:10px;
	padding:14px;
	cursor:pointer;
	transition:all 0.2s;
	position:relative;
	}
	.voice-card:hover { border-color:#333; }
	.voice-card.selected { border-color:#c0c0c0; }
	.voice-avatar {
	width:44px; height:44px;
	border-radius:50%;
	background:#1a1a1a;
	display:flex; align-items:center; justify-content:center;
	font-size:1.2rem;
	margin-bottom:10px;
	}
	.voice-name { font-size:0.85rem; font-weight:600; color:#ccc; }
	.voice-lang { font-size:0.72rem; color:#444; margin-top:3px; }
	.voice-actions {
	display:flex; gap:6px; margin-top:10px;
	}
	.empty-state {
	text-align:center;
	padding:40px 20px;
	color:#2a2a2a;
	font-size:0.85rem;
	}
	.empty-icon { font-size:2rem; margin-bottom:10px; }
	.tag {
	display:inline-block;
	background:#151515; border:1px solid #1f1f1f;
	border-radius:6px; padding:4px 10px;
	font-size:0.75rem; color:#555;
	margin:3px;
	}
	.progress-bar {
	height:4px; background:#1a1a1a;
	border-radius:2px; overflow:hidden;
	margin-top:10px; display:none;
	}
	.progress-bar.visible { display:block; }
	.progress-fill {
	height:100%; background:#c0c0c0;
	border-radius:2px;
	transition:width 0.3s;
	}
	.status-log {
	background:#0a0a0a; border:1px solid #1a1a1a;
	border-radius:8px; padding:12px;
	font-family:monospace; font-size:0.78rem;
	color:#444; max-height:120px;
	overflow-y:auto; margin-top:12px;
	display:none;
	}
	.status-log.visible { display:block; }
	.status-line { margin-bottom:4px; }
	.status-line.ok { color:#22c55e; }
	.status-line.err { color:#ef4444; }
	.status-line.info { color:#666; }
	.error-msg {
	background:rgba(239,68,68,0.08);
	border:1px solid rgba(239,68,68,0.15);
	border-radius:7px; padding:9px 13px;
	font-size:0.82rem; color:#ef4444;
	display:none; margin-top:10px;
	}
	.error-msg.visible { display:block; }
	.divider { height:1px; background:#141414; margin:16px 0; }
	.row { display:flex; align-items:center; gap:12px; }
	select {
	background:#0a0a0a; border:1px solid #1f1f1f;
	border-radius:8px; color:#888;
	padding:9px 12px; outline:none;
	font-family:inherit; font-size:0.85rem;
	cursor:pointer; width:100%;
	}
	select:focus { border-color:#333; }
	::-webkit-scrollbar { width:4px; }
	::-webkit-scrollbar-track { background:#0a0a0a; }
	::-webkit-scrollbar-thumb { background:#1f1f1f; border-radius:2px; }
	.voice-design-result {
	background:#0a0a0a; border:1px solid #1a1a1a;
	border-radius:10px; padding:16px;
	margin-top:14px; display:none;
	}
	.voice-design-result.visible { display:block; }
	.param-row {
	display:flex; justify-content:space-between;
	align-items:center; margin-bottom:8px;
	}
	.param-key { font-size:0.78rem; color:#444; }
	.param-val { font-size:0.78rem; color:#888; font-family:monospace; }
	</style>
	</head>
	<body>

	<!-- SIDEBAR -->
	<div class="sidebar">
	<div class="logo">
	<h1>Cortana TTS</h1>
	<p>Studio</p>
	</div>
	<div class="nav">
	<div class="nav-item active" onclick="showPage('tts')">
	<span class="nav-icon">▶</span> Text to Speech
	</div>
	<div class="nav-item" onclick="showPage('library')">
	<span class="nav-icon">🎙</span> Voice Library
	</div>
	<div class="nav-item" onclick="showPage('design')">
	<span class="nav-icon">✨</span> Voice Design
	</div>
	<div class="nav-item" onclick="showPage('dubbing')">
	<span class="nav-icon">🎬</span> Dubbing
	</div>
	<div class="nav-item" onclick="showPage('api')">
	<span class="nav-icon">⚡</span> API
	</div>
	</div>
	</div>

	<!-- MAIN -->
	<div class="main">

	<!-- PAGE: TTS -->
	<div class="page active" id="page-tts">
	<div class="page-title">Text to Speech</div>
	<div class="page-sub">Generate natural speech in English, Hindi or Hinglish</div>

	<div class="card">
	<div class="card-title">Text Input</div>
	<textarea id="ttsText"
	placeholder="Type in English, Hindi or Hinglish... नमस्ते, मैं कोर्टाना हूं। Arey yaar, kya scene hai?"></textarea>
	<div class="lang-row">
	Detected: <span class="lang-badge" id="langBadge">English</span>
	<span style="color:#2a2a2a;font-size:0.75rem;">Ctrl+Enter to generate</span>
	</div>
	</div>

	<div class="card">
	<div class="card-title">Voice</div>
	<div class="grid2">
	<div>
	<span class="control-label">Active Voice</span>
	<select id="voiceSelect" onchange="voiceSelectChanged()">
	<option value="default">Default Voice</option>
	</select>
	</div>
	<div>
	<span class="control-label">Upload One-time Sample</span>
	<div class="upload-area" id="ttsUpload" style="padding:12px;">
	<input type="file" id="ttsVoiceFile" accept=".wav,.mp3" onchange="ttsFileSelected()"/>
	<div style="font-size:0.82rem;color:#333;">Drop WAV/MP3 here</div>
	<div class="file-ok" id="ttsFileName"></div>
	</div>
	</div>
	</div>
	</div>

	<div class="card">
	<div class="card-title">Parameters</div>
	<div class="grid2">
	<div>
	<span class="control-label">Emotion / Expressiveness</span>
	<div class="slider-row">
	<input type="range" id="ttsEmotion" min="0" max="1" step="0.05" value="0.5"/>
	<span class="slider-val" id="ttsEmotionVal">0.5</span>
	</div>
	</div>
	<div>
	<span class="control-label">Speed</span>
	<div class="slider-row">
	<input type="range" id="ttsSpeed" min="0.5" max="2.0" step="0.1" value="1.0"/>
	<span class="slider-val" id="ttsSpeedVal">1.0</span>
	</div>
	</div>
	</div>
	<button class="btn btn-primary" id="ttsBtn" onclick="generateTTS()">
	<div class="spinner"></div>
	<span class="btn-label">▶ Generate Speech</span>
	</button>
	<div class="error-msg" id="ttsError"></div>
	<div class="audio-player" id="ttsPlayer">
	<audio id="ttsAudio" controls></audio><br/>
	<a class="dl-btn" id="ttsDL" download="cortana.mp3">↓ Download MP3</a>
	<button class="btn btn-sm" style="margin-left:8px;margin-top:10px;"
	onclick="saveToHistory()">+ History</button>
	</div>
	</div>

	<div class="card">
	<div class="card-title">Generation History</div>
	<div class="history-list" id="histList">
	<div class="empty-state">
	<div class="empty-icon">🎵</div>
	Your generations appear here
	</div>
	</div>
	</div>
	</div>

	<!-- PAGE: VOICE LIBRARY -->
	<div class="page" id="page-library">
	<div class="page-title">Voice Library</div>
	<div class="page-sub">Save, manage and reuse cloned voices permanently</div>

	<div class="card">
	<div class="card-title">Add New Voice</div>
	<div class="grid2">
	<div>
	<span class="control-label">Voice Name</span>
	<input type="text" id="newVoiceName" placeholder="e.g. Cortana English, Raj Hindi..."/>
	</div>
	<div>
	<span class="control-label">Language Tag</span>
	<select id="newVoiceLang">
	<option value="en">English</option>
	<option value="hi">Hindi</option>
	<option value="both">English + Hindi</option>
	</select>
	</div>
	</div>
	<div style="margin-top:14px;">
	<span class="control-label">Voice Sample (5–30 seconds, clean audio)</span>
	<div class="upload-area" id="libUploadArea">
	<input type="file" id="libVoiceFile" accept=".wav,.mp3" onchange="libFileSelected()"/>
	<div class="upload-icon">🎙</div>
	<div class="upload-label">Upload WAV or MP3</div>
	<div class="upload-sub">No background music — clear speech only</div>
	<div class="file-ok" id="libFileName"></div>
	</div>
	</div>
	<button class="btn btn-primary" id="saveVoiceBtn" onclick="saveVoice()">
	<div class="spinner"></div>
	<span class="btn-label">💾 Save to Library</span>
	</button>
	<div class="error-msg" id="libError"></div>
	</div>

	<div class="card">
	<div class="card-title">Saved Voices</div>
	<div class="voice-grid" id="voiceGrid">
	<div class="empty-state" style="grid-column:1/-1;">
	<div class="empty-icon">🎙</div>
	No voices saved yet — add one above
	</div>
	</div>
	</div>
	</div>

	<!-- PAGE: VOICE DESIGN -->
	<div class="page" id="page-design">
	<div class="page-title">Voice Design</div>
	<div class="page-sub">Describe a voice in plain words — AI generates the parameters</div>

	<div class="card">
	<div class="card-title">Describe Your Voice</div>
	<textarea id="designPrompt" style="min-height:80px;"
	placeholder="e.g. Young Indian woman, warm and friendly, speaks at a medium pace with a hint of excitement e.g. Deep mature male voice, calm and authoritative, slightly slow e.g. Energetic young man, fast paced, very expressive"></textarea>

	<div style="margin-top:14px;">
	<span class="control-label">Preview Text</span>
	<textarea id="designPreviewText" style="min-height:60px;"
	placeholder="Hello, I am Cortana. How can I assist you today?"></textarea>
	</div>

	<button class="btn btn-primary" id="designBtn" onclick="designVoice()">
	<div class="spinner"></div>
	<span class="btn-label">✨ Design Voice</span>
	</button>
	<div class="error-msg" id="designError"></div>

	<div class="voice-design-result" id="designResult">
	<div class="card-title">Generated Parameters</div>
	<div id="designParams"></div>
	<div class="divider"></div>
	<div class="audio-player visible" style="margin-top:0;">
	<audio id="designAudio" controls></audio><br/>
	<a class="dl-btn" id="designDL" download="designed_voice.mp3">↓ Download</a>
	<button class="btn btn-sm" style="margin-left:8px;margin-top:10px;"
	onclick="saveDesignedVoice()">💾 Save to Library</button>
	</div>
	</div>
	</div>

	<div class="card">
	<div class="card-title">Example Prompts</div>
	<div>
	<span class="tag" onclick="setDesignPrompt(this)">Young Indian woman, warm and friendly</span>
	<span class="tag" onclick="setDesignPrompt(this)">Deep mature male, calm and authoritative</span>
	<span class="tag" onclick="setDesignPrompt(this)">Energetic teen, very expressive and fast</span>
	<span class="tag" onclick="setDesignPrompt(this)">Professional newsreader, neutral accent</span>
	<span class="tag" onclick="setDesignPrompt(this)">Soft spoken elderly woman, slow and gentle</span>
	<span class="tag" onclick="setDesignPrompt(this)">Excited sports commentator, loud and fast</span>
	</div>
	</div>
	</div>

	<!-- PAGE: DUBBING -->
	<div class="page" id="page-dubbing">
	<div class="page-title">Dubbing</div>
	<div class="page-sub">Translate and re-voice any video or audio file</div>

	<div class="card" style="border-color:#ef444422;">
	<div style="font-size:0.82rem;color:#ef4444;margin-bottom:4px;">⚠️ Free CPU Warning</div>
	<div style="font-size:0.78rem;color:#555;">
	Dubbing on free CPU takes 10–20 minutes per minute of video.
	Start with a short clip to test. Upgrade to GPU for faster processing.
	</div>
	</div>

	<div class="card">
	<div class="card-title">Upload Media</div>
	<div class="upload-area" id="dubUploadArea">
	<input type="file" id="dubFile" accept="video/,audio/" onchange="dubFileSelected()"/>
	<div class="upload-icon">🎬</div>
	<div class="upload-label">Upload video or audio file</div>
	<div class="upload-sub">MP4, MKV, AVI, MP3, WAV — max 100MB</div>
	<div class="file-ok" id="dubFileName"></div>
	</div>

	<div class="grid2" style="margin-top:16px;">
	<div>
	<span class="control-label">Source Language</span>
	<select id="dubSrcLang">
	<option value="auto">Auto Detect</option>
	<option value="en">English</option>
	<option value="hi">Hindi</option>
	<option value="es">Spanish</option>
	<option value="fr">French</option>
	<option value="de">German</option>
	<option value="ja">Japanese</option>
	<option value="zh">Chinese</option>
	</select>
	</div>
	<div>
	<span class="control-label">Target Language</span>
	<select id="dubTgtLang">
	<option value="en">English</option>
	<option value="hi">Hindi</option>
	<option value="es">Spanish</option>
	<option value="fr">French</option>
	<option value="de">German</option>
	<option value="ja">Japanese</option>
	<option value="zh">Chinese</option>
	</select>
	</div>
	</div>

	<div style="margin-top:14px;">
	<span class="control-label">Dubbing Voice (optional)</span>
	<select id="dubVoiceSelect">
	<option value="default">Default Voice</option>
	</select>
	</div>

	<button class="btn btn-primary" id="dubBtn" onclick="startDubbing()">
	<div class="spinner"></div>
	<span class="btn-label">🎬 Start Dubbing</span>
	</button>
	<div class="error-msg" id="dubError"></div>

	<div class="progress-bar" id="dubProgress">
	<div class="progress-fill" id="dubProgressFill" style="width:0%"></div>
	</div>

	<div class="status-log" id="dubLog"></div>

	<div class="audio-player" id="dubPlayer">
	<audio id="dubAudio" controls></audio><br/>
	<a class="dl-btn" id="dubDL" download="dubbed.mp3">↓ Download Dubbed Audio</a>
	</div>
	</div>
	</div>

	<!-- PAGE: API -->
	<div class="page" id="page-api">
	<div class="page-title">API Reference</div>
	<div class="page-sub">OpenAI-compatible endpoints — drop-in replacement</div>

	<div class="card">
	<div class="card-title">Text to Speech</div>
	<div style="background:#0a0a0a;border:1px solid #1a1a1a;border-radius:8px;padding:14px;font-family:monospace;font-size:0.78rem;color:#555;line-height:1.7;">
	POST /v1/audio/speech<br/><br/>
	{<br/>
	"input": "Hello I am Cortana",<br/>
	"emotion": 0.5,         // 0.0 neutral → 1.0 expressive<br/>
	"speed": 1.0           // 0.5x to 2.0x<br/>
	}<br/><br/>
	Returns: audio/mpeg stream
	</div>
	<button class="btn btn-sm" style="margin-top:10px;" onclick="copyText(0)">Copy</button>
	</div>

	<div class="card">
	<div class="card-title">Voice Cloning (on-the-fly)</div>
	<div style="background:#0a0a0a;border:1px solid #1a1a1a;border-radius:8px;padding:14px;font-family:monospace;font-size:0.78rem;color:#555;line-height:1.7;">
	POST /v1/audio/speech/clone<br/><br/>
	{<br/>
	"input": "Hello I am Cortana",<br/>
	"voice_b64": "base64_encoded_wav",<br/>
	"emotion": 0.5<br/>
	}<br/><br/>
	Returns: audio/mpeg stream
	</div>
	<button class="btn btn-sm" style="margin-top:10px;" onclick="copyText(1)">Copy</button>
	</div>

	<div class="card">
	<div class="card-title">List Voices</div>
	<div style="background:#0a0a0a;border:1px solid #1a1a1a;border-radius:8px;padding:14px;font-family:monospace;font-size:0.78rem;color:#555;line-height:1.7;">
	GET /v1/voices<br/><br/>
	Returns: { "voices": [ { "id": "...", "name": "...", "lang": "..." } ] }
	</div>
	</div>

	<div class="card">
	<div class="card-title">CURL Example</div>
	<div style="background:#0a0a0a;border:1px solid #1a1a1a;border-radius:8px;padding:14px;font-family:monospace;font-size:0.78rem;color:#555;line-height:1.7;" id="curlExample">
	curl -X POST "https://YOUR_SPACE.hf.space/v1/audio/speech" \<br/>
	-H "Authorization: Bearer YOUR_HF_TOKEN" \<br/>
	-H "Content-Type: application/json" \<br/>
	-d '{"input": "Hello I am Cortana", "emotion": 0.5}' \<br/>
	--output speech.mp3
	</div>
	</div>
	</div>

	</div>

	<script>
	const historyItems = [];
	let selectedVoiceId = 'default';
	let designedParams = null;

	// ============ NAVIGATION ============
	function showPage(id) {
	document.querySelectorAll('.page').forEach(p => p.classList.remove('active'));
	document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active'));
	document.getElementById('page-' + id).classList.add('active');
	event.currentTarget.classList.add('active');
	if (id === 'library') loadVoiceLibrary();
	}

	// ============ LANGUAGE DETECTION ============
	document.getElementById('ttsText').addEventListener('input', function() {
	const text = this.value;
	const badge = document.getElementById('langBadge');
	if (/[\u0900-\u097F]/.test(text)) {
	badge.textContent = 'Hindi';
	badge.style.color = '#f97316';
	} else if (/\b(hai\|hoon\|kya\|nahi\|aur\|toh\|yaar\|arey\|bhi)\b/i.test(text)) {
	badge.textContent = 'Hinglish';
	badge.style.color = '#a855f7';
	} else {
	badge.textContent = 'English';
	badge.style.color = '#666';
	}
	});

	// ============ SLIDERS ============
	document.getElementById('ttsEmotion').addEventListener('input', function() {
	document.getElementById('ttsEmotionVal').textContent = this.value;
	});
	document.getElementById('ttsSpeed').addEventListener('input', function() {
	document.getElementById('ttsSpeedVal').textContent = this.value;
	});

	// ============ TTS ============
	function ttsFileSelected() {
	const f = document.getElementById('ttsVoiceFile').files[0];
	if (f) document.getElementById('ttsFileName').textContent = '✅ ' + f.name;
	}

	async function generateTTS() {
	const text = document.getElementById('ttsText').value.trim();
	const emotion = document.getElementById('ttsEmotion').value;
	const speed = document.getElementById('ttsSpeed').value;
	const voiceFile = document.getElementById('ttsVoiceFile').files[0];
	const btn = document.getElementById('ttsBtn');
	const err = document.getElementById('ttsError');

	if (!text) { showError(err, 'Please enter text first.'); return; }
	err.classList.remove('visible');
	setLoading(btn, true);

	try {
	let res;
	if (voiceFile) {
	const b64 = await toB64(voiceFile);
	res = await fetch('/v1/audio/speech/clone', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ input:text, emotion:parseFloat(emotion), speed:parseFloat(speed), voice_b64:b64 })
	});
	} else if (selectedVoiceId !== 'default') {
	res = await fetch('/v1/audio/speech/voice/' + selectedVoiceId, {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ input:text, emotion:parseFloat(emotion), speed:parseFloat(speed) })
	});
	} else {
	res = await fetch('/v1/audio/speech', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ input:text, emotion:parseFloat(emotion), speed:parseFloat(speed) })
	});
	}
	if (!res.ok) { const e = await res.json(); throw new Error(e.error); }
	const blob = await res.blob();
	const url = URL.createObjectURL(blob);
	setAudio('ttsAudio', 'ttsDL', url);
	document.getElementById('ttsPlayer').classList.add('visible');
	document.getElementById('ttsAudio').play();
	// auto add to history
	historyItems.unshift({ text, url, lang: /[\u0900-\u097F]/.test(text) ? 'Hindi' : 'English' });
	renderHistory();
	} catch(e) {
	showError(err, e.message);
	} finally {
	setLoading(btn, false);
	}
	}

	function renderHistory() {
	const list = document.getElementById('histList');
	if (!historyItems.length) return;
	list.innerHTML = historyItems.slice(0,10).map((h,i) => `
	<div class="hist-item">
	<div class="hist-text">${h.text}</div>
	<div class="hist-lang">${h.lang}</div>
	<button class="hist-play" onclick="new Audio('${h.url}').play()">▶</button>
	</div>
	`).join('');
	}

	document.getElementById('ttsText').addEventListener('keydown', e => {
	if (e.ctrlKey && e.key === 'Enter') generateTTS();
	});

	// ============ VOICE LIBRARY ============
	function libFileSelected() {
	const f = document.getElementById('libVoiceFile').files[0];
	if (f) {
	document.getElementById('libFileName').textContent = '✅ ' + f.name;
	document.getElementById('libUploadArea').classList.add('has-file');
	}
	}

	async function saveVoice() {
	const name = document.getElementById('newVoiceName').value.trim();
	const lang = document.getElementById('newVoiceLang').value;
	const file = document.getElementById('libVoiceFile').files[0];
	const btn = document.getElementById('saveVoiceBtn');
	const err = document.getElementById('libError');

	if (!name) { showError(err, 'Please enter a voice name.'); return; }
	if (!file) { showError(err, 'Please upload a voice sample.'); return; }
	err.classList.remove('visible');
	setLoading(btn, true);

	try {
	const b64 = await toB64(file);
	const res = await fetch('/v1/voices', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ name, lang, voice_b64:b64, filename:file.name })
	});
	if (!res.ok) { const e = await res.json(); throw new Error(e.error); }
	document.getElementById('newVoiceName').value = '';
	document.getElementById('libVoiceFile').value = '';
	document.getElementById('libFileName').textContent = '';
	document.getElementById('libUploadArea').classList.remove('has-file');
	await loadVoiceLibrary();
	} catch(e) {
	showError(err, e.message);
	} finally {
	setLoading(btn, false);
	}
	}

	async function loadVoiceLibrary() {
	const res = await fetch('/v1/voices');
	const data = await res.json();
	const grid = document.getElementById('voiceGrid');
	const voices = data.voices \|\| [];

	// Update dropdowns
	updateVoiceDropdowns(voices);

	if (!voices.length) {
	grid.innerHTML = '<div class="empty-state" style="grid-column:1/-1;"><div class="empty-icon">🎙</div>No voices saved yet</div>';
	return;
	}

	grid.innerHTML = voices.map(v => `
	<div class="voice-card ${selectedVoiceId === v.id ? 'selected' : ''}" onclick="selectVoice('${v.id}')">
	<div class="voice-avatar">🎙</div>
	<div class="voice-name">${v.name}</div>
	<div class="voice-lang">${v.lang.toUpperCase()}</div>
	<div class="voice-actions">
	<button class="btn btn-sm" onclick="event.stopPropagation();previewVoice('${v.id}')">▶</button>
	<button class="btn btn-sm btn-danger" onclick="event.stopPropagation();deleteVoice('${v.id}')">✕</button>
	</div>
	</div>
	`).join('');
	}

	function updateVoiceDropdowns(voices) {
	const opts = '<option value="default">Default Voice</option>' +
	voices.map(v => `<option value="${v.id}">${v.name}</option>`).join('');
	document.getElementById('voiceSelect').innerHTML = opts;
	document.getElementById('dubVoiceSelect').innerHTML = opts;
	}

	function voiceSelectChanged() {
	selectedVoiceId = document.getElementById('voiceSelect').value;
	}

	function selectVoice(id) {
	selectedVoiceId = id;
	document.getElementById('voiceSelect').value = id;
	loadVoiceLibrary();
	}

	async function previewVoice(id) {
	const res = await fetch('/v1/audio/speech/voice/' + id, {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ input:'Hello, I am Cortana. How can I assist you?', emotion:0.5 })
	});
	if (!res.ok) return;
	const blob = await res.blob();
	new Audio(URL.createObjectURL(blob)).play();
	}

	async function deleteVoice(id) {
	if (!confirm('Delete this voice?')) return;
	await fetch('/v1/voices/' + id, { method:'DELETE' });
	await loadVoiceLibrary();
	}

	// ============ VOICE DESIGN ============
	function setDesignPrompt(el) {
	document.getElementById('designPrompt').value = el.textContent;
	}

	async function designVoice() {
	const prompt = document.getElementById('designPrompt').value.trim();
	const preview = document.getElementById('designPreviewText').value.trim() \|\|
	'Hello, I am Cortana. How can I assist you today?';
	const btn = document.getElementById('designBtn');
	const err = document.getElementById('designError');

	if (!prompt) { showError(err, 'Please describe the voice first.'); return; }
	err.classList.remove('visible');
	setLoading(btn, true);

	try {
	const res = await fetch('/v1/voices/design', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ prompt, preview_text:preview })
	});
	if (!res.ok) { const e = await res.json(); throw new Error(e.error); }

	const data = await res.json();
	designedParams = data.params;

	// Show params
	document.getElementById('designParams').innerHTML = Object.entries(data.params).map(([k,v]) => `
	<div class="param-row">
	<span class="param-key">${k}</span>
	<span class="param-val">${v}</span>
	</div>
	`).join('');

	// Generate preview audio
	const audioRes = await fetch('/v1/audio/speech', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ input:preview, ...data.params })
	});
	const blob = await audioRes.blob();
	const url = URL.createObjectURL(blob);
	setAudio('designAudio', 'designDL', url);
	document.getElementById('designResult').classList.add('visible');
	document.getElementById('designAudio').play();
	} catch(e) {
	showError(err, e.message);
	} finally {
	setLoading(btn, false);
	}
	}

	async function saveDesignedVoice() {
	const name = prompt('Enter a name for this voice:');
	if (!name \|\| !designedParams) return;
	await fetch('/v1/voices/design/save', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({ name, params:designedParams })
	});
	alert('Voice saved to library!');
	}

	// ============ DUBBING ============
	function dubFileSelected() {
	const f = document.getElementById('dubFile').files[0];
	if (f) {
	document.getElementById('dubFileName').textContent = '✅ ' + f.name;
	document.getElementById('dubUploadArea').classList.add('has-file');
	}
	}

	async function startDubbing() {
	const file = document.getElementById('dubFile').files[0];
	const srcLang = document.getElementById('dubSrcLang').value;
	const tgtLang = document.getElementById('dubTgtLang').value;
	const voiceId = document.getElementById('dubVoiceSelect').value;
	const btn = document.getElementById('dubBtn');
	const err = document.getElementById('dubError');
	const log = document.getElementById('dubLog');
	const prog = document.getElementById('dubProgress');
	const fill = document.getElementById('dubProgressFill');

	if (!file) { showError(err, 'Please upload a video or audio file.'); return; }
	err.classList.remove('visible');
	setLoading(btn, true);
	log.classList.add('visible');
	prog.classList.add('visible');
	log.innerHTML = '<div class="status-line info">Starting dubbing pipeline...</div>';
	fill.style.width = '5%';

	try {
	const b64 = await toB64(file);
	fill.style.width = '10%';
	addLog(log, 'Uploading file...', 'info');

	const res = await fetch('/v1/dubbing', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({
	file_b64: b64,
	filename: file.name,
	src_lang: srcLang,
	tgt_lang: tgtLang,
	voice_id: voiceId
	})
	});

	fill.style.width = '50%';
	addLog(log, 'Processing...', 'info');

	if (!res.ok) { const e = await res.json(); throw new Error(e.error); }

	fill.style.width = '90%';
	addLog(log, 'Finalizing audio...', 'info');

	const blob = await res.blob();
	const url = URL.createObjectURL(blob);
	setAudio('dubAudio', 'dubDL', url);
	document.getElementById('dubPlayer').classList.add('visible');
	fill.style.width = '100%';
	addLog(log, '✅ Dubbing complete!', 'ok');
	} catch(e) {
	showError(err, e.message);
	addLog(log, '❌ ' + e.message, 'err');
	} finally {
	setLoading(btn, false);
	}
	}

	// ============ HELPERS ============
	function toB64(file) {
	return new Promise(res => {
	const r = new FileReader();
	r.onload = e => res(e.target.result.split(',')[1]);
	r.readAsDataURL(file);
	});
	}

	function setAudio(audioId, dlId, url) {
	document.getElementById(audioId).src = url;
	document.getElementById(dlId).href = url;
	}

	function showError(el, msg) {
	el.textContent = '⚠ ' + msg;
	el.classList.add('visible');
	}

	function setLoading(btn, state) {
	btn.disabled = state;
	btn.classList.toggle('loading', state);
	}

	function addLog(el, msg, type) {
	el.innerHTML += `<div class="status-line ${type}">${msg}</div>`;
	el.scrollTop = el.scrollHeight;
	}

	function copyText(idx) {
	const boxes = document.querySelectorAll('#page-api [style*="monospace"]');
	navigator.clipboard.writeText(boxes[idx].innerText);
	}

	// Load voice library on start
	loadVoiceLibrary();
	</script>
	</body>
	</html>
	""")


	# ============================================================
	# API ENDPOINTS
	# ============================================================

	@app.get("/v1")
	async def v1_root():
	return {"status": "ok", "service": "chatterbox-multilingual-tts"}


	@app.post("/v1/audio/speech")
	async def tts(request: Request):
	try:
	data = await request.json()
	text = data.get("input", "")
	emotion = float(data.get("emotion", 0.5))
	if not text:
	return JSONResponse({"error": "No input text"}, status_code=400)
	lang = get_language(text)
	wav = model.generate(text, audio_prompt_path=voice_sample, exaggeration=emotion, language=lang)
	out = io.BytesIO()
	torchaudio.save(out, wav, model.sr, format="mp3")
	out.seek(0)
	return StreamingResponse(out, media_type="audio/mpeg")
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	@app.post("/v1/audio/speech/clone")
	async def tts_clone(request: Request):
	try:
	data = await request.json()
	text = data.get("input", "")
	voice_b64 = data.get("voice_b64", "")
	emotion = float(data.get("emotion", 0.5))
	if not text:
	return JSONResponse({"error": "No input text"}, status_code=400)
	lang = get_language(text)
	if voice_b64:
	voice_bytes = base64.b64decode(voice_b64)
	temp_path = "/tmp/clone_voice.wav"
	with open(temp_path, "wb") as f:
	f.write(voice_bytes)
	prompt_path = temp_path
	else:
	prompt_path = voice_sample
	wav = model.generate(text, audio_prompt_path=prompt_path, exaggeration=emotion, language=lang)
	out = io.BytesIO()
	torchaudio.save(out, wav, model.sr, format="mp3")
	out.seek(0)
	return StreamingResponse(out, media_type="audio/mpeg")
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	@app.post("/v1/audio/speech/voice/{voice_id}")
	async def tts_with_voice(voice_id: str, request: Request):
	try:
	data = await request.json()
	text = data.get("input", "")
	emotion = float(data.get("emotion", 0.5))
	if not text:
	return JSONResponse({"error": "No input text"}, status_code=400)
	meta = load_voices_meta()
	voice_info = meta.get(voice_id)
	prompt_path = str(VOICES_DIR / voice_info["filename"]) if voice_info else voice_sample
	lang = get_language(text)
	wav = model.generate(text, audio_prompt_path=prompt_path, exaggeration=emotion, language=lang)
	out = io.BytesIO()
	torchaudio.save(out, wav, model.sr, format="mp3")
	out.seek(0)
	return StreamingResponse(out, media_type="audio/mpeg")
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	# ============================================================
	# VOICE LIBRARY ENDPOINTS
	# ============================================================

	@app.get("/v1/voices")
	async def list_voices():
	meta = load_voices_meta()
	voices = [{"id": k, "name": v["name"], "lang": v["lang"]} for k, v in meta.items()]
	return {"voices": voices}


	@app.post("/v1/voices")
	async def add_voice(request: Request):
	try:
	data = await request.json()
	name = data.get("name", "").strip()
	lang = data.get("lang", "en")
	voice_b64 = data.get("voice_b64", "")
	filename = data.get("filename", "voice.wav")
	if not name or not voice_b64:
	return JSONResponse({"error": "Name and voice sample required"}, status_code=400)

	import uuid
	voice_id = str(uuid.uuid4())[:8]
	safe_name = f"{voice_id}.wav"
	local_path = str(VOICES_DIR / safe_name)

	voice_bytes = base64.b64decode(voice_b64)
	with open(local_path, "wb") as f:
	f.write(voice_bytes)

	meta = load_voices_meta()
	meta[voice_id] = {"name": name, "lang": lang, "filename": safe_name}
	save_voices_meta(meta)

	# Push to HF repo
	push_to_hf(local_path, safe_name)
	push_to_hf(str(VOICES_META), "meta.json")

	return {"id": voice_id, "name": name, "lang": lang}
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	@app.delete("/v1/voices/{voice_id}")
	async def delete_voice(voice_id: str):
	try:
	meta = load_voices_meta()
	if voice_id in meta:
	wav_path = VOICES_DIR / meta[voice_id]["filename"]
	if wav_path.exists():
	wav_path.unlink()
	del meta[voice_id]
	save_voices_meta(meta)
	push_to_hf(str(VOICES_META), "meta.json")
	return {"deleted": voice_id}
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	# ============================================================
	# VOICE DESIGN ENDPOINTS
	# ============================================================

	@app.post("/v1/voices/design")
	async def design_voice(request: Request):
	try:
	data = await request.json()
	prompt = data.get("prompt", "")
	preview_text = data.get("preview_text", "Hello, I am Cortana.")

	if not GROQ_KEY:
	return JSONResponse({"error": "GROQ_API_KEY not set in secrets"}, status_code=500)

	# Ask Groq LLM to map description to Chatterbox parameters
	async with httpx.AsyncClient(timeout=15.0) as client:
	res = await client.post(
	"https://api.groq.com/openai/v1/chat/completions",
	headers={
	"Authorization": f"Bearer {GROQ_KEY}",
	"Content-Type": "application/json"
	},
	json={
	"model": "llama-3.1-8b-instant",
	"messages": [
	{
	"role": "system",
	"content": """You are a voice parameter mapper for a TTS system.
	Given a voice description, output ONLY a JSON object with these exact fields:
	- emotion: float 0.0 to 1.0 (0=neutral/calm, 1=very expressive/excited)
	- speed: float 0.5 to 2.0 (0.5=very slow, 1.0=normal, 2.0=very fast)
	- description: one sentence summarizing the voice

	Examples:
	"calm elderly woman" -> {"emotion":0.2,"speed":0.8,"description":"Soft calm elderly female voice"}
	"excited sports commentator" -> {"emotion":0.95,"speed":1.6,"description":"Energetic fast sports commentator"}
	"professional newsreader" -> {"emotion":0.3,"speed":1.0,"description":"Neutral professional news voice"}

	Output ONLY the JSON. No explanation. No markdown."""
	},
	{
	"role": "user",
	"content": prompt
	}
	],
	"max_tokens": 100,
	"temperature": 0.3
	}
	)
	result = res.json()
	raw = result["choices"][0]["message"]["content"].strip()
	params = json.loads(raw)

	return {"params": params, "preview_text": preview_text}
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	@app.post("/v1/voices/design/save")
	async def save_designed_voice(request: Request):
	try:
	data = await request.json()
	name = data.get("name", "Designed Voice")
	params = data.get("params", {})
	import uuid
	voice_id = str(uuid.uuid4())[:8]
	meta = load_voices_meta()
	meta[voice_id] = {
	"name": name,
	"lang": "en",
	"filename": None,
	"params": params,
	"designed": True
	}
	save_voices_meta(meta)
	push_to_hf(str(VOICES_META), "meta.json")
	return {"id": voice_id, "name": name}
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	# ============================================================
	# DUBBING ENDPOINT
	# ============================================================

	@app.post("/v1/dubbing")
	async def dub_video(request: Request):
	try:
	data = await request.json()
	file_b64 = data.get("file_b64", "")
	filename = data.get("filename", "input.mp4")
	src_lang = data.get("src_lang", "auto")
	tgt_lang = data.get("tgt_lang", "en")
	voice_id = data.get("voice_id", "default")

	if not file_b64:
	return JSONResponse({"error": "No file provided"}, status_code=400)
	if not GROQ_KEY:
	return JSONResponse({"error": "GROQ_API_KEY not set"}, status_code=500)

	tmpdir = tempfile.mkdtemp()
	try:
	# Step 1 — Save uploaded file
	input_path = os.path.join(tmpdir, filename)
	with open(input_path, "wb") as f:
	f.write(base64.b64decode(file_b64))

	# Step 2 — Extract audio as WAV
	audio_path = os.path.join(tmpdir, "audio.wav")
	subprocess.run([
	"ffmpeg", "-i", input_path,
	"-ar", "16000", "-ac", "1",
	"-y", audio_path
	], check=True, capture_output=True)

	# Step 3 — Transcribe with Whisper via Groq
	with open(audio_path, "rb") as af:
	audio_b64 = base64.b64encode(af.read()).decode()

	async with httpx.AsyncClient(timeout=120.0) as client:
	# Use Groq Whisper for transcription
	with open(audio_path, "rb") as af:
	trans_res = await client.post(
	"https://api.groq.com/openai/v1/audio/transcriptions",
	headers={"Authorization": f"Bearer {GROQ_KEY}"},
	files={"file": (filename, af, "audio/wav")},
	data={
	"model": "whisper-large-v3",
	"language": src_lang if src_lang != "auto" else None,
	"response_format": "verbose_json"
	}
	)
	transcript_data = trans_res.json()
	segments = transcript_data.get("segments", [])
	full_text = transcript_data.get("text", "")

	if not full_text:
	return JSONResponse({"error": "Could not transcribe audio"}, status_code=500)

	# Step 4 — Translate via Groq LLM
	lang_names = {
	"en": "English", "hi": "Hindi", "es": "Spanish",
	"fr": "French", "de": "German", "ja": "Japanese", "zh": "Chinese"
	}
	tgt_name = lang_names.get(tgt_lang, tgt_lang)

	trans_response = await client.post(
	"https://api.groq.com/openai/v1/chat/completions",
	headers={
	"Authorization": f"Bearer {GROQ_KEY}",
	"Content-Type": "application/json"
	},
	json={
	"model": "llama-3.3-70b-versatile",
	"messages": [
	{
	"role": "system",
	"content": f"Translate the following text to {tgt_name}. Output ONLY the translated text. No explanations."
	},
	{"role": "user", "content": full_text}
	],
	"max_tokens": 2000
	}
	)
	translated_text = trans_response.json()["choices"][0]["message"]["content"].strip()

	# Step 5 — Synthesize translated text with Chatterbox
	meta = load_voices_meta()
	voice_info = meta.get(voice_id)
	if voice_info and voice_info.get("filename"):
	prompt_path = str(VOICES_DIR / voice_info["filename"])
	else:
	prompt_path = voice_sample

	lang_code = get_language(translated_text)
	emotion = 0.5
	if voice_info and voice_info.get("params"):
	emotion = float(voice_info["params"].get("emotion", 0.5))

	wav = model.generate(
	translated_text,
	audio_prompt_path=prompt_path,
	exaggeration=emotion,
	language=lang_code
	)

	# Step 6 — Return dubbed audio
	out = io.BytesIO()
	torchaudio.save(out, wav, model.sr, format="mp3")
	out.seek(0)
	return StreamingResponse(out, media_type="audio/mpeg")

	finally:
	shutil.rmtree(tmpdir, ignore_errors=True)

	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)


	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)