Spaces:

sadimanna
/

tts

Paused

App Files Files Community

sadimanna commited on Dec 29, 2025

Commit

1168147

1 Parent(s): e41a14b

added files

Browse files

Files changed (3) hide show

Dockerfile +34 -0
app.py +204 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM python:3.10-slim
+# Prevent Python from buffering stdout/stderr
+ENV PYTHONUNBUFFERED=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    ffmpeg \
+    libsndfile1 \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy dependency files
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY app.py .
+# Expose Streamlit port
+EXPOSE 7860
+# Streamlit configuration for HF Spaces
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_PORT=7860
+ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
+# Run Streamlit
+CMD ["streamlit", "run", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import streamlit as st
+import torch
+import numpy as np
+import tempfile
+from transformers import VitsModel, AutoTokenizer
+from scipy.io.wavfile import write
+import librosa
+from scipy.signal import butter, lfilter
+#============================================
+# Voice Presets
+#============================================
+VOICE_PRESETS = {
+    "Custom (Manual)": None,   # special case
+    "Neutral": {
+        "pitch": 0,
+        "speed": 1.0,
+        "effect": None
+    },
+    "Deep": {
+        "pitch": -4,
+        "speed": 0.9,
+        "effect": "bass"
+    },
+    "Child-like": {
+        "pitch": 5,
+        "speed": 1.15,
+        "effect": None
+    },
+    "Robotic": {
+        "pitch": 0,
+        "speed": 1.0,
+        "effect": "robotic"
+    }
+}
+#============================================
+# Audio Post-Processing Functions
+#============================================
+def apply_pitch_speed(audio, sr, pitch=0, speed=1.0):
+    if speed != 1.0:
+        audio = librosa.effects.time_stretch(audio, rate=speed)
+    if pitch != 0:
+        audio = librosa.effects.pitch_shift(audio, sr=sr, n_steps=pitch)
+    return audio
+def bass_boost(audio, sr, gain=1.5, cutoff=200):
+    b, a = butter(2, cutoff / (sr / 2), btype="low")
+    low = lfilter(b, a, audio)
+    return audio + gain * low
+def robotic_effect(audio, sr, freq=30):
+    t = np.arange(len(audio)) / sr
+    modulator = np.sin(2 * np.pi * freq * t)
+    return audio * modulator
+# ------------------------
+# Page config
+# ------------------------
+st.set_page_config(
+    page_title="MMS-TTS English",
+    layout="centered"
+)
+st.title("🔊 MMS-TTS English (Speed & Pitch Control)")
+st.markdown(
+    """
+Generate English speech using **facebook/mms-tts-eng**
+Post-process audio to control **speed** and **pitch**.
+"""
+)
+# ------------------------
+# Load model (cached)
+# ------------------------
+@st.cache_resource
+def load_model():
+    model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+    tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+    model.eval()
+    return model, tokenizer
+model, tokenizer = load_model()
+# ------------------------
+# UI Controls
+# ------------------------
+st.caption(
+    "Select a preset for fixed voice styles, or choose Custom (Manual) to control pitch and speed yourself."
+)
+preset_name = st.selectbox(
+    "Voice Preset",
+    options=list(VOICE_PRESETS.keys()),
+    index=0
+)
+preset = VOICE_PRESETS[preset_name]
+is_custom = preset is None
+text = st.text_area(
+    "Input Text",
+    height=150,
+    placeholder="Enter English text here..."
+)
+speed = st.slider(
+    "Speech Speed",
+    min_value=0.5,
+    max_value=1.5,
+    value=1.0,
+    step=0.05,
+    disabled=not is_custom
+)
+pitch = st.slider(
+    "Pitch Shift (semitones)",
+    min_value=-6,
+    max_value=6,
+    value=0,
+    step=1,
+    disabled=not is_custom
+)
+if not is_custom:
+    st.info(
+        f"Preset selected: **{preset_name}**\n\n"
+        f"- Pitch: {preset['pitch']} semitones\n"
+        f"- Speed: {preset['speed']}x\n"
+        f"- Effect: {preset['effect'] if preset['effect'] else 'None'}"
+    )
+else:
+    preset = {
+        "pitch": pitch,
+        "speed": speed,
+        "effect": None
+    }
+#=------------------------
+# Generate Button
+# ------------------------
+generate = st.button("🎙️ Generate Audio")
+# ------------------------
+# Generation
+# ------------------------
+if generate:
+    if not text.strip():
+        st.warning("Please enter text.")
+    else:
+        with st.spinner("Generating speech..."):
+            inputs = tokenizer(text, return_tensors="pt")
+            with torch.no_grad():
+                waveform = model(**inputs).waveform
+            audio = waveform.squeeze().cpu().numpy()
+            sr = model.config.sampling_rate
+            # Apply pitch + speed
+            audio = apply_pitch_speed(
+                audio,
+                sr,
+                pitch=preset["pitch"],
+                speed=preset["speed"]
+            )
+            # Apply effect
+            if preset["effect"] == "bass":
+                audio = bass_boost(audio, sr)
+            elif preset["effect"] == "robotic":
+                audio = robotic_effect(audio, sr)
+            # Normalize
+            audio = audio / np.max(np.abs(audio))
+            audio_int16 = np.int16(audio * 32767)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+                write(tmp.name, sr, audio_int16)
+                output_path = tmp.name
+        st.success("Audio generated successfully!")
+        st.audio(output_path, format="audio/wav")
+        with open(output_path, "rb") as f:
+            st.download_button(
+                "⬇️ Download WAV",
+                data=f,
+                file_name="mms_tts_output.wav",
+                mime="audio/wav"
+            )
+# ------------------------

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+transformers>=4.33
+accelerate
+streamlit
+scipy
+soundfile
+librosa
+numpy
+pydub