Spaces:

gzsol
/

lab2

Sleeping

App Files Files Community

zsolnai commited on Dec 1, 2025

Commit

4036a2f

1 Parent(s): d6fb39f

Redo logic for error

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -5,34 +5,39 @@ import numpy as np
 import soundfile as sf
 import torch
 # --- STT Setup (using Hugging Face's transformers pipeline for Whisper) ---
 from transformers import pipeline
-# Using a small, English-only Whisper model for fast inference on Spaces
 STT_MODEL_NAME = "openai/whisper-tiny.en"
-device = 0 if torch.cuda.is_available() else -1
-stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)
 # --- TTS Setup (using coqui-ai/TTS) ---
 from TTS.api import TTS
-# Using a standard, high-quality English Tacotron2 model
 TTS_MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC"
 OUTPUT_WAV_FILE = "output.wav"
 tts_model = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)
-tts_model.to(device)
 def speech_to_text(audio_file_path):
-    """Performs Speech-to-Text using the Whisper model."""
     if audio_file_path is None:
         return "Please upload an audio file or record your voice."
     try:
-        # The pipeline can typically handle the file path directly
-        # and manages necessary preprocessing (resampling, loading, etc.)
         result = stt_pipe(audio_file_path)
         return result["text"]
     except Exception as e:
@@ -40,7 +45,7 @@ def speech_to_text(audio_file_path):
 def text_to_speech(text):
-    """Performs Text-to-Speech using the Coqui TTS model."""
     if not text:
         return None, "Please enter text for synthesis."
@@ -50,13 +55,13 @@ def text_to_speech(text):
             text=text,
             file_path=OUTPUT_WAV_FILE,
         )
-        # Return the file path and a success message
         return OUTPUT_WAV_FILE, "Speech synthesis complete."
     except Exception as e:
         return None, f"Error during TTS: {e}"
 # --- Gradio Interface ---
 with gr.Blocks(css="#status {font-weight: bold;}") as demo:
     gr.Markdown("# 🗣️ STT & TTS App (Whisper & TTS Libraries)")

 import soundfile as sf
 import torch
+# --- Device Setup (Only for the Whisper Pipeline) ---
+# Use the device index (0) if CUDA is available, otherwise use CPU (-1)
+device_for_stt = 0 if torch.cuda.is_available() else -1
 # --- STT Setup (using Hugging Face's transformers pipeline for Whisper) ---
 from transformers import pipeline
 STT_MODEL_NAME = "openai/whisper-tiny.en"
+# Pass the device index to the pipeline initialization
+stt_pipe = pipeline(
+    "automatic-speech-recognition", model=STT_MODEL_NAME, device=device_for_stt
+)
 # --- TTS Setup (using coqui-ai/TTS) ---
 from TTS.api import TTS
 TTS_MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC"
 OUTPUT_WAV_FILE = "output.wav"
+# *** CRITICAL FIX HERE ***
+# Initialize the TTS object without explicit device movement.
+# For this specific model, the internal library logic often automatically
+# detects and uses the CUDA device if it's available in the environment.
+# Relying on the internal device management is often safer than forcing 'to(device)'.
 tts_model = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)
 def speech_to_text(audio_file_path):
+    # ... (function body remains the same)
     if audio_file_path is None:
         return "Please upload an audio file or record your voice."
     try:
         result = stt_pipe(audio_file_path)
         return result["text"]
     except Exception as e:
 def text_to_speech(text):
+    # ... (function body remains the same)
     if not text:
         return None, "Please enter text for synthesis."
             text=text,
             file_path=OUTPUT_WAV_FILE,
         )
         return OUTPUT_WAV_FILE, "Speech synthesis complete."
     except Exception as e:
         return None, f"Error during TTS: {e}"
 # --- Gradio Interface ---
+# (The Gradio interface block remains the same)
 with gr.Blocks(css="#status {font-weight: bold;}") as demo:
     gr.Markdown("# 🗣️ STT & TTS App (Whisper & TTS Libraries)")