Spaces:

awacke1
/

TTS-STT-Blocks

Build error

App Files Files Community

awacke1 commited on Oct 3, 2024

Commit

c09ad18

verified ·

1 Parent(s): 5b90064

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -132

app.py CHANGED Viewed

@@ -1,53 +1,14 @@
-import streamlit as st
-import datetime
-from transformers import pipeline
-import gradio as gr
-import tempfile
-from typing import Optional
-import numpy as np
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
-import os
-import csv
-import huggingface_hub
-from huggingface_hub import Repository, hf_hub_download, upload_file
-from datetime import datetime
-# 🌟 Setup dataset repo 🌟
-# Created new dataset as awacke1/MindfulStory.csv
-DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
-DATASET_REPO_ID = "awacke1/MindfulStory.csv"
-DATA_FILENAME = "MindfulStory.csv"
-DATA_FILE = os.path.join("data", DATA_FILENAME)
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# 😅 Oops! Try downloading the dataset (We hope it works!)
-try:
-    hf_hub_download(
-        repo_id=DATASET_REPO_ID,
-        filename=DATA_FILENAME,
-        cache_dir="data",
-        force_filename=DATA_FILENAME
-    )
-except:
-    print("😬 File not found, we’ll act like it’s not a problem...")
-# 🧠 AI Memory: Because forgetting is for humans 🤖
-def AIMemory(name: str, message: str):
-    if name and message:
-        with open(DATA_FILE, "a") as csvfile:
-            writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
-            writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
-        commit_url = repo.push_to_hub()
-    return {"name": name, "message": message, "time": str(datetime.now())}
-# 🌍 Repository setup! Let’s clone like pros 👨‍💻
-repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
-# 🗣️ Set up Speech Recognition
-asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
-# 🎤 Set up TTS Models. Let’s find that sweet robotic voice!
 MODEL_NAMES = [
     "en/ljspeech/tacotron2-DDC",
     "en/ljspeech/glow-tts",
@@ -58,97 +19,49 @@ MODEL_NAMES = [
     "de/thorsten/tacotron2-DCA",
 ]
-# 🛠️ Use Model Manager to load vocoders (Fancy tech magic here)
-MODELS = {}
-manager = ModelManager()
 for MODEL_NAME in MODEL_NAMES:
     print(f"🚀 Downloading {MODEL_NAME}... because waiting is fun!")
-    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
-    vocoder_name: Optional[str] = model_item["default_vocoder"]
-    vocoder_path = None
-    vocoder_config_path = None
-    if vocoder_name is not None:
-        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
-    synthesizer = Synthesizer(
-        model_path, config_path, None, vocoder_path, vocoder_config_path,
-    )
-    MODELS[MODEL_NAME] = synthesizer
-# 🧙‍♂️ Transcribe function: Turning audio into text with a sprinkle of magic!
-def transcribe(audio):
-    text = asr(audio)["text"]
-    return text
-# 📊 Text classifier (because we love labeling things, right?)
-classifier = pipeline("text-classification")
-# 🎤 Speech to Text: Give me your voice, I’ll give you text!
-def speech_to_text(speech):
-    text = asr(speech)["text"]
-    return text
-# 😎 Sentiment Analysis (because even robots care about feelings 💔)
-def text_to_sentiment(text):
-    sentiment = classifier(text)[0]["label"]
-    return sentiment
-# 📦 Saving it for later: Store this priceless info!
-def upsert(text):
-    date_time = str(datetime.datetime.today())
-    doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
-    doc_ref.set({
-        u'firefield': 'Recognize Speech',
-        u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
-        u'last': text,
-        u'born': date_time,
-    })
-    saved = select('TTS-STT', date_time)
-    return saved
-# 🔍 Retrieve all records: Gotta catch ‘em all!
-def selectall(text):
-    docs = db.collection('Text2SpeechSentimentSave').stream()
-    doclist = ''
-    for doc in docs:
-        r = (f'{doc.id} => {doc.to_dict()}')
-        doclist += r
-    return doclist
-# 🗣️ Text to Speech (Because speaking is fun, but robots do it better)
 def tts(text: str, model_name: str):
     print(text, model_name)
     synthesizer = MODELS.get(model_name, None)
     if synthesizer is None:
-        raise NameError("😬 Oops! Model not found.")
-    wavs = synthesizer.tts(text)
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        synthesizer.save_wav(wavs, fp)
-        return fp.name
-# 🎛️ Gradio UI with Emoji and Fun Comments 🎉
-demo = gr.Blocks()
-with demo:
-    # 🎤 Microphone input to capture your golden voice 🎤
-    audio_file = gr.Audio(source="microphone", type="filepath")
-    # 📜 Textbox to display transcribed text 📜
-    text = gr.Textbox(label="Speech to Text")
-    # 🎙️ Radio input to choose the best Text to Speech model 🎙️
-    TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
-    # 🔊 Audio player to play back the robot’s voice 🔊
-    audio = gr.Audio(label="Output", interactive=False)
-    # 🎉 Buttons for all your needs 🎉
-    b1 = gr.Button("🎤 Recognize Speech")
-    b5 = gr.Button("🔊 Read It Back Aloud")
-    # 🖱️ Click buttons to perform actions! 🖱️
-    b1.click(speech_to_text, inputs=audio_file, outputs=text)
-    b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
-demo.launch(share=True)

+import os
+import torch
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
+import tempfile
+from typing import Optional
+# 🛠️ Use Model Manager to load vocoders
+MODELS = {}
+manager = ModelManager()
 MODEL_NAMES = [
     "en/ljspeech/tacotron2-DDC",
     "en/ljspeech/glow-tts",
     "de/thorsten/tacotron2-DCA",
 ]
 for MODEL_NAME in MODEL_NAMES:
     print(f"🚀 Downloading {MODEL_NAME}... because waiting is fun!")
+    try:
+        model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
+        vocoder_name: Optional[str] = model_item["default_vocoder"]
+        vocoder_path = None
+        vocoder_config_path = None
+        if vocoder_name is not None:
+            vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
+        # 🧙‍♂️ Load the synthesizer with vocoder and safe loading of weights
+        synthesizer = Synthesizer(
+            model_path,
+            config_path,
+            None,
+            vocoder_path,
+            vocoder_config_path,
+            use_cuda=False  # Make sure you're not forcing CUDA unless needed
+        )
+        MODELS[MODEL_NAME] = synthesizer
+    except Exception as e:
+        print(f"😬 Failed to load model {MODEL_NAME}: {str(e)}")
+        continue
+# 🗣️ Text to Speech (because speaking is fun, but robots do it better)
 def tts(text: str, model_name: str):
     print(text, model_name)
     synthesizer = MODELS.get(model_name, None)
     if synthesizer is None:
+        raise NameError("Model not found, check if it's loaded properly!")
+    try:
+        wavs = synthesizer.tts(text)
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+            synthesizer.save_wav(wavs, fp)
+            return fp.name
+    except Exception as e:
+        print(f"😬 Error generating speech: {str(e)}")
+        return None