Spaces:

awacke1
/

TTS-STT-Blocks

Build error

App Files Files Community

awacke1 commited on Oct 3, 2024

Commit

5b90064

verified ·

1 Parent(s): 89707c8

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -57

app.py CHANGED Viewed

@@ -2,40 +2,37 @@ import streamlit as st
 import datetime
 from transformers import pipeline
 import gradio as gr
 import tempfile
 from typing import Optional
 import numpy as np
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
-# PersistDataset -----
 import os
 import csv
-import gradio as gr
-from gradio import inputs, outputs
 import huggingface_hub
 from huggingface_hub import Repository, hf_hub_download, upload_file
 from datetime import datetime
-# created new dataset as awacke1/MindfulStory.csv
 DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
 DATASET_REPO_ID = "awacke1/MindfulStory.csv"
 DATA_FILENAME = "MindfulStory.csv"
 DATA_FILE = os.path.join("data", DATA_FILENAME)
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# Download dataset repo using hub download
 try:
     hf_hub_download(
         repo_id=DATASET_REPO_ID,
         filename=DATA_FILENAME,
-        cache_dir=DATA_DIRNAME,
         force_filename=DATA_FILENAME
     )
 except:
-    print("file not found")
 def AIMemory(name: str, message: str):
     if name and message:
         with open(DATA_FILE, "a") as csvfile:
@@ -44,16 +41,13 @@ def AIMemory(name: str, message: str):
         commit_url = repo.push_to_hub()
     return {"name": name, "message": message, "time": str(datetime.now())}
-with open('Mindfulness.txt', 'r') as file:
-        context = file.read()
-# Set up cloned dataset from repo for operations
-repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
-# set up ASR
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
-# set up TTS
 MODEL_NAMES = [
     "en/ljspeech/tacotron2-DDC",
     "en/ljspeech/glow-tts",
@@ -64,11 +58,11 @@ MODEL_NAMES = [
     "de/thorsten/tacotron2-DCA",
 ]
-# Use Model Manager to load vocoders
 MODELS = {}
 manager = ModelManager()
 for MODEL_NAME in MODEL_NAMES:
-    print(f"downloading {MODEL_NAME}")
     model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
     vocoder_name: Optional[str] = model_item["default_vocoder"]
     vocoder_path = None
@@ -81,80 +75,80 @@ for MODEL_NAME in MODEL_NAMES:
     )
     MODELS[MODEL_NAME] = synthesizer
-# transcribe
 def transcribe(audio):
     text = asr(audio)["text"]
     return text
-#text classifier
 classifier = pipeline("text-classification")
 def speech_to_text(speech):
     text = asr(speech)["text"]
-    #rMem = AIMemory("STT", text)
     return text
 def text_to_sentiment(text):
     sentiment = classifier(text)[0]["label"]
-    #rMem = AIMemory(text, sentiment)
     return sentiment
 def upsert(text):
-    date_time =str(datetime.datetime.today())
     doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
-    doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
     saved = select('TTS-STT', date_time)
     return saved
-def select(collection, document):
-    doc_ref = db.collection(collection).document(document)
-    doc = doc_ref.get()
-    docid = ("The id is: ", doc.id)
-    contents = ("The contents are: ", doc.to_dict())
-    return contents
 def selectall(text):
     docs = db.collection('Text2SpeechSentimentSave').stream()
-    doclist=''
     for doc in docs:
-        r=(f'{doc.id} => {doc.to_dict()}')
         doclist += r
     return doclist
 def tts(text: str, model_name: str):
     print(text, model_name)
     synthesizer = MODELS.get(model_name, None)
     if synthesizer is None:
-        raise NameError("model not found")
     wavs = synthesizer.tts(text)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         synthesizer.save_wav(wavs, fp)
-        #rMem = AIMemory("TTS", text + model_name)
         return fp.name
 demo = gr.Blocks()
 with demo:
-    audio_file = gr.inputs.Audio(source="microphone", type="filepath")
     text = gr.Textbox(label="Speech to Text")
-    #label = gr.Label()
-    #saved = gr.Textbox(label="Saved")
-    #savedAll = gr.Textbox(label="SavedAll")
-    TTSchoice = gr.inputs.Radio( label="Pick a Text to Speech Model", choices=MODEL_NAMES,   )
     audio = gr.Audio(label="Output", interactive=False)
-    b1 = gr.Button("Recognize Speech")
-    #b2 = gr.Button("Classify Sentiment")
-    #b3 = gr.Button("Save Speech to Text")
-    #b4 = gr.Button("Retrieve All")
-    b5 = gr.Button("Read It Back Aloud")
     b1.click(speech_to_text, inputs=audio_file, outputs=text)
-    #b2.click(text_to_sentiment, inputs=text, outputs=label)
-    #b3.click(upsert, inputs=text, outputs=saved)
-    #b4.click(selectall, inputs=text, outputs=savedAll)
-    b5.click(tts,  inputs=[text,TTSchoice], outputs=audio)
-demo.launch(share=True)

 import datetime
 from transformers import pipeline
 import gradio as gr
 import tempfile
 from typing import Optional
 import numpy as np
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 import os
 import csv
 import huggingface_hub
 from huggingface_hub import Repository, hf_hub_download, upload_file
 from datetime import datetime
+# 🌟 Setup dataset repo 🌟
+# Created new dataset as awacke1/MindfulStory.csv
 DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
 DATASET_REPO_ID = "awacke1/MindfulStory.csv"
 DATA_FILENAME = "MindfulStory.csv"
 DATA_FILE = os.path.join("data", DATA_FILENAME)
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# 😅 Oops! Try downloading the dataset (We hope it works!)
 try:
     hf_hub_download(
         repo_id=DATASET_REPO_ID,
         filename=DATA_FILENAME,
+        cache_dir="data",
         force_filename=DATA_FILENAME
     )
 except:
+    print("😬 File not found, we’ll act like it’s not a problem...")
+# 🧠 AI Memory: Because forgetting is for humans 🤖
 def AIMemory(name: str, message: str):
     if name and message:
         with open(DATA_FILE, "a") as csvfile:
         commit_url = repo.push_to_hub()
     return {"name": name, "message": message, "time": str(datetime.now())}
+# 🌍 Repository setup! Let’s clone like pros 👨‍💻
+repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
+# 🗣️ Set up Speech Recognition
 asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+# 🎤 Set up TTS Models. Let’s find that sweet robotic voice!
 MODEL_NAMES = [
     "en/ljspeech/tacotron2-DDC",
     "en/ljspeech/glow-tts",
     "de/thorsten/tacotron2-DCA",
 ]
+# 🛠️ Use Model Manager to load vocoders (Fancy tech magic here)
 MODELS = {}
 manager = ModelManager()
 for MODEL_NAME in MODEL_NAMES:
+    print(f"🚀 Downloading {MODEL_NAME}... because waiting is fun!")
     model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
     vocoder_name: Optional[str] = model_item["default_vocoder"]
     vocoder_path = None
     )
     MODELS[MODEL_NAME] = synthesizer
+# 🧙‍♂️ Transcribe function: Turning audio into text with a sprinkle of magic!
 def transcribe(audio):
     text = asr(audio)["text"]
     return text
+# 📊 Text classifier (because we love labeling things, right?)
 classifier = pipeline("text-classification")
+# 🎤 Speech to Text: Give me your voice, I’ll give you text!
 def speech_to_text(speech):
     text = asr(speech)["text"]
     return text
+# 😎 Sentiment Analysis (because even robots care about feelings 💔)
 def text_to_sentiment(text):
     sentiment = classifier(text)[0]["label"]
     return sentiment
+# 📦 Saving it for later: Store this priceless info!
 def upsert(text):
+    date_time = str(datetime.datetime.today())
     doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
+    doc_ref.set({
+        u'firefield': 'Recognize Speech',
+        u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
+        u'last': text,
+        u'born': date_time,
+    })
     saved = select('TTS-STT', date_time)
     return saved
+# 🔍 Retrieve all records: Gotta catch ‘em all!
 def selectall(text):
     docs = db.collection('Text2SpeechSentimentSave').stream()
+    doclist = ''
     for doc in docs:
+        r = (f'{doc.id} => {doc.to_dict()}')
         doclist += r
     return doclist
+# 🗣️ Text to Speech (Because speaking is fun, but robots do it better)
 def tts(text: str, model_name: str):
     print(text, model_name)
     synthesizer = MODELS.get(model_name, None)
     if synthesizer is None:
+        raise NameError("😬 Oops! Model not found.")
     wavs = synthesizer.tts(text)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         synthesizer.save_wav(wavs, fp)
         return fp.name
+# 🎛️ Gradio UI with Emoji and Fun Comments 🎉
 demo = gr.Blocks()
 with demo:
+    # 🎤 Microphone input to capture your golden voice 🎤
+    audio_file = gr.Audio(source="microphone", type="filepath")
+    # 📜 Textbox to display transcribed text 📜
     text = gr.Textbox(label="Speech to Text")
+    # 🎙️ Radio input to choose the best Text to Speech model 🎙️
+    TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
+    # 🔊 Audio player to play back the robot’s voice 🔊
     audio = gr.Audio(label="Output", interactive=False)
+    # 🎉 Buttons for all your needs 🎉
+    b1 = gr.Button("🎤 Recognize Speech")
+    b5 = gr.Button("🔊 Read It Back Aloud")
+    # 🖱️ Click buttons to perform actions! 🖱️
     b1.click(speech_to_text, inputs=audio_file, outputs=text)
+    b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
+demo.launch(share=True)