EngrMuhammadBilal commited on
Commit
6bb76af
·
verified ·
1 Parent(s): 4c677d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -99
app.py CHANGED
@@ -1,129 +1,114 @@
1
  import io
2
  import os
3
- import tempfile
4
  from datetime import datetime
5
-
6
- import numpy as np
7
- import soundfile as sf
8
  import streamlit as st
9
- from TTS.api import TTS
10
 
11
- st.set_page_config(page_title="Urdu Voice Cloner (XTTS v2)", page_icon="🗣️", layout="centered")
12
 
13
- st.title("🗣️ Urdu Text → Your Voice")
14
- st.caption("Upload a short sample of your voice (WAV), type Urdu text, and get audio in your own voice. Runs on CPU.")
15
 
16
- # Cache model once
17
- @st.cache_resource(show_spinner=True)
18
- def load_tts():
19
- return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
 
20
 
21
- tts = load_tts()
22
 
 
23
  with st.sidebar:
24
  st.header("Options")
25
- similarity = st.slider("Similarity boost", 0.0, 1.0, 0.75, 0.05)
26
- stability = st.slider("Stability", 0.0, 1.0, 0.60, 0.05)
27
- style = st.slider("Style (expressiveness)", 0.0, 1.0, 0.35, 0.05)
28
- normalize = st.checkbox("Normalize loudness", True)
29
- seed = st.number_input("Random seed", value=42, step=1)
30
- base_name = st.text_input("Output filename (no extension)", "urdu_voice_clone")
31
-
32
- def simple_trim_silence(wave: np.ndarray, threshold: float = 1e-4, pad: int = 0) -> np.ndarray:
33
- if wave.ndim > 1:
34
- wave = wave.mean(axis=1)
35
- idx = np.where(np.abs(wave) > threshold)[0]
36
- if idx.size == 0:
37
- return wave
38
- start = max(int(idx[0]) - pad, 0)
39
- end = min(int(idx[-1]) + pad, wave.shape[0])
40
- return wave[start:end]
41
-
42
- def normalize_peak(wave: np.ndarray, peak: float = 0.98) -> np.ndarray:
43
- m = np.max(np.abs(wave)) + 1e-9
44
- return (peak * wave / m).astype(np.float32)
45
-
46
- def wav_bytes_from_array(y: np.ndarray, sr: int) -> bytes:
47
- buf = io.BytesIO()
48
- sf.write(buf, y, sr, format="WAV")
49
- buf.seek(0)
50
- return buf.read()
51
 
52
- # Inputs
53
- ref_file = st.file_uploader("Upload your voice sample (WAV only)", type=["wav"])
54
- default_text = "یہ میری آواز کی مثال ہے۔ آپ یہاں اپنا متن لکھیں اور آڈیو حاصل کریں۔"
55
- text = st.text_area("Urdu text", value=default_text, height=180, placeholder="یہاں اردو میں ٹیکسٹ لکھیں یا پیسٹ کریں…")
56
 
57
  col1, col2 = st.columns(2)
58
  with col1:
59
- run_btn = st.button("🎙️ Generate", use_container_width=True)
60
  with col2:
61
- clear_btn = st.button("🧹 Clear", use_container_width=True)
62
 
63
- if clear_btn:
64
  st.session_state.pop("audio_bytes", None)
65
  st.experimental_rerun()
66
 
67
- if run_btn:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  if not text.strip():
69
- st.warning("براہ کرم اردو متن درج کریں۔")
70
- elif ref_file is None:
71
- st.warning("براہ کرم اپنی آواز کی WAV فائل اپلوڈ کریں۔")
72
  else:
73
  try:
74
- # Save uploaded file
75
- tmp_ref = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
76
- tmp_ref.write(ref_file.read())
77
- tmp_ref.flush()
78
- tmp_ref.close()
79
-
80
- # Optional: quick silence trim
81
- try:
82
- y_ref, sr_ref = sf.read(tmp_ref.name, dtype="float32", always_2d=False)
83
- y_ref = simple_trim_silence(y_ref)
84
- sf.write(tmp_ref.name, y_ref, sr_ref)
85
- except Exception:
86
- pass
87
-
88
- st.info("Cloning voice and synthesizing Urdu… (first run can take a bit on CPU)")
89
-
90
- out_wav_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
91
-
92
- tts.tts_to_file(
93
- text=text.strip(),
94
- file_path=out_wav_path,
95
- speaker_wav=tmp_ref.name,
96
- language="ur",
97
- speaker_similarity=float(similarity),
98
- stability=float(stability),
99
- style=float(style),
100
- split_sentences=True,
101
- seed=int(seed),
102
- )
103
-
104
- y, sr = sf.read(out_wav_path, dtype="float32", always_2d=False)
105
- if normalize:
106
- y = normalize_peak(y)
107
-
108
- st.session_state["audio_bytes"] = wav_bytes_from_array(y, sr)
109
-
110
- try:
111
- os.remove(tmp_ref.name)
112
- os.remove(out_wav_path)
113
- except Exception:
114
- pass
115
-
116
- st.success("آڈیو تیار ہے۔")
117
  except Exception as e:
118
  st.error(f"کچھ مسئلہ آیا: {e}")
119
 
 
120
  if "audio_bytes" in st.session_state:
 
121
  st.markdown("### ▶️ Preview")
122
- st.audio(st.session_state["audio_bytes"], format="audio/wav")
123
-
124
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
125
- fname = f"{(base_name or 'urdu_voice_clone').strip()}_{ts}.wav"
126
- st.download_button("⬇️ Download WAV", data=st.session_state["audio_bytes"], file_name=fname, mime="audio/wav", use_container_width=True)
 
 
 
 
127
 
128
  st.markdown("---")
129
- st.caption("Tips: upload a clean 10–30s WAV sample with low noise. For faster synthesis, upgrade the Space to a GPU.")
 
 
 
 
1
  import io
2
  import os
3
+ from pathlib import Path
4
  from datetime import datetime
 
 
 
5
  import streamlit as st
6
+ from openai import OpenAI
7
 
8
+ st.set_page_config(page_title="Urdu TTS - OpenAI", page_icon="🔊", layout="centered")
9
 
10
+ st.title("🔊 Urdu Text → Speech (OpenAI)")
11
+ st.caption("Type Urdu text and generate natural Urdu speech with OpenAI TTS. If you have a custom OpenAI voice ID, you can use it.")
12
 
13
+ # ---- API key ----
14
+ API_KEY = os.getenv("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") or st.secrets.get("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")
15
+ if not API_KEY:
16
+ st.error("Missing OPENAI_API_KEY. In your Space go to Settings → Secrets and add it.")
17
+ st.stop()
18
 
19
+ client = OpenAI(api_key=API_KEY)
20
 
21
+ # ---- Sidebar options ----
22
  with st.sidebar:
23
  st.header("Options")
24
+ st.caption("Pick a built-in voice or provide your own custom voice ID if you have access.")
25
+ voice_presets = ["alloy", "verse", "aria", "ballad", "cove", "luna", "sage"]
26
+ use_custom = st.checkbox("Use custom OpenAI voice ID", False)
27
+ custom_voice = ""
28
+ if use_custom:
29
+ custom_voice = st.text_input("Custom voice_id", value="", help="Requires Voice access in your OpenAI account")
30
+ else:
31
+ preset = st.selectbox("Built-in voice", options=voice_presets, index=0)
32
+ out_name = st.text_input("Output filename (no extension)", "urdu_tts")
33
+ fmt = st.selectbox("Audio format", options=["mp3_44100_128", "wav"], index=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ sample = "یہ ایک سادہ مثال ہے۔ یہاں اپنا متن لکھیں اور آڈیو حاصل کریں۔"
36
+ text = st.text_area("Urdu text", value=sample, height=200, placeholder="یہاں اردو میں ٹیکسٹ لکھیں یا پیسٹ کریں…")
 
 
37
 
38
  col1, col2 = st.columns(2)
39
  with col1:
40
+ make_audio = st.button("🎙️ Generate", use_container_width=True)
41
  with col2:
42
+ clear = st.button("🧹 Clear", use_container_width=True)
43
 
44
+ if clear:
45
  st.session_state.pop("audio_bytes", None)
46
  st.experimental_rerun()
47
 
48
+ def tts_openai(urdu_text: str, voice: str, output_format: str) -> bytes:
49
+ """
50
+ Uses OpenAI TTS (gpt-4o-mini-tts) to synthesize speech.
51
+ We write to a temp file using streaming response, then return bytes.
52
+ """
53
+ model = "gpt-4o-mini-tts"
54
+ # Map friendly dropdown to OpenAI format
55
+ # mp3_44100_128 is recommended for quality-size balance
56
+ if output_format == "wav":
57
+ audio_format = "wav"
58
+ ext = "wav"
59
+ else:
60
+ audio_format = "mp3"
61
+ ext = "mp3"
62
+
63
+ tmp_path = Path(f"/tmp/tts_{datetime.now().strftime('%H%M%S')}.{ext}")
64
+ with client.audio.speech.with_streaming_response.create(
65
+ model=model,
66
+ voice=voice,
67
+ input=urdu_text,
68
+ format=audio_format
69
+ ) as resp:
70
+ resp.stream_to_file(tmp_path)
71
+
72
+ data = tmp_path.read_bytes()
73
+ try:
74
+ tmp_path.unlink(missing_ok=True)
75
+ except Exception:
76
+ pass
77
+ return data, ext
78
+
79
+ if make_audio:
80
  if not text.strip():
81
+ st.warning("براہ کرم اردو متن درج کریں")
 
 
82
  else:
83
  try:
84
+ voice_to_use = custom_voice.strip() if use_custom and custom_voice.strip() else (preset if not use_custom else None)
85
+ if not voice_to_use:
86
+ st.warning("Custom voice ID is empty. Either uncheck custom voice or provide a valid voice_id.")
87
+ else:
88
+ st.info("Generating speech with OpenAI TTS…")
89
+ audio_bytes, ext = tts_openai(text.strip(), voice_to_use, fmt)
90
+ st.session_state["audio_bytes"] = audio_bytes
91
+ st.session_state["ext"] = ext
92
+ st.success("آڈیو تیار ہے")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
  st.error(f"کچھ مسئلہ آیا: {e}")
95
 
96
+ # ---- Preview and download ----
97
  if "audio_bytes" in st.session_state:
98
+ ext = st.session_state.get("ext", "mp3")
99
  st.markdown("### ▶️ Preview")
100
+ st.audio(st.session_state["audio_bytes"], format=f"audio/{'mpeg' if ext=='mp3' else 'wav'}")
101
+ fname = f"{(out_name or 'urdu_tts').strip()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{ext}"
102
+ st.download_button(
103
+ "⬇️ Download",
104
+ data=st.session_state["audio_bytes"],
105
+ file_name=fname,
106
+ mime="audio/mpeg" if ext == "mp3" else "audio/wav",
107
+ use_container_width=True
108
+ )
109
 
110
  st.markdown("---")
111
+ st.caption(
112
+ "Notes: Built-in voices are not your personal voice. For an exact match you need OpenAI Voice access with a custom voice_id. "
113
+ "Urdu is supported by gpt-4o-mini-tts. If the audio sounds too fast or slow, try the WAV format then adjust speed in an editor."
114
+ )