Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,40 +2,37 @@ import streamlit as st
|
|
| 2 |
import datetime
|
| 3 |
from transformers import pipeline
|
| 4 |
import gradio as gr
|
| 5 |
-
|
| 6 |
import tempfile
|
| 7 |
from typing import Optional
|
| 8 |
import numpy as np
|
| 9 |
from TTS.utils.manage import ModelManager
|
| 10 |
from TTS.utils.synthesizer import Synthesizer
|
| 11 |
-
|
| 12 |
-
# PersistDataset -----
|
| 13 |
import os
|
| 14 |
import csv
|
| 15 |
-
import gradio as gr
|
| 16 |
-
from gradio import inputs, outputs
|
| 17 |
import huggingface_hub
|
| 18 |
from huggingface_hub import Repository, hf_hub_download, upload_file
|
| 19 |
from datetime import datetime
|
| 20 |
|
| 21 |
-
#
|
|
|
|
| 22 |
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
|
| 23 |
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
|
| 24 |
DATA_FILENAME = "MindfulStory.csv"
|
| 25 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
| 26 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 27 |
|
| 28 |
-
#
|
| 29 |
try:
|
| 30 |
hf_hub_download(
|
| 31 |
repo_id=DATASET_REPO_ID,
|
| 32 |
filename=DATA_FILENAME,
|
| 33 |
-
cache_dir=
|
| 34 |
force_filename=DATA_FILENAME
|
| 35 |
)
|
| 36 |
except:
|
| 37 |
-
print("
|
| 38 |
-
|
|
|
|
| 39 |
def AIMemory(name: str, message: str):
|
| 40 |
if name and message:
|
| 41 |
with open(DATA_FILE, "a") as csvfile:
|
|
@@ -44,16 +41,13 @@ def AIMemory(name: str, message: str):
|
|
| 44 |
commit_url = repo.push_to_hub()
|
| 45 |
return {"name": name, "message": message, "time": str(datetime.now())}
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
# Set up cloned dataset from repo for operations
|
| 51 |
-
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
|
| 52 |
|
| 53 |
-
#
|
| 54 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
| 55 |
|
| 56 |
-
#
|
| 57 |
MODEL_NAMES = [
|
| 58 |
"en/ljspeech/tacotron2-DDC",
|
| 59 |
"en/ljspeech/glow-tts",
|
|
@@ -64,11 +58,11 @@ MODEL_NAMES = [
|
|
| 64 |
"de/thorsten/tacotron2-DCA",
|
| 65 |
]
|
| 66 |
|
| 67 |
-
# Use Model Manager to load vocoders
|
| 68 |
MODELS = {}
|
| 69 |
manager = ModelManager()
|
| 70 |
for MODEL_NAME in MODEL_NAMES:
|
| 71 |
-
print(f"
|
| 72 |
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
|
| 73 |
vocoder_name: Optional[str] = model_item["default_vocoder"]
|
| 74 |
vocoder_path = None
|
|
@@ -81,80 +75,80 @@ for MODEL_NAME in MODEL_NAMES:
|
|
| 81 |
)
|
| 82 |
MODELS[MODEL_NAME] = synthesizer
|
| 83 |
|
| 84 |
-
#
|
| 85 |
def transcribe(audio):
|
| 86 |
text = asr(audio)["text"]
|
| 87 |
return text
|
| 88 |
|
| 89 |
-
#
|
| 90 |
classifier = pipeline("text-classification")
|
| 91 |
|
| 92 |
-
|
| 93 |
def speech_to_text(speech):
|
| 94 |
text = asr(speech)["text"]
|
| 95 |
-
#rMem = AIMemory("STT", text)
|
| 96 |
return text
|
| 97 |
|
|
|
|
| 98 |
def text_to_sentiment(text):
|
| 99 |
sentiment = classifier(text)[0]["label"]
|
| 100 |
-
#rMem = AIMemory(text, sentiment)
|
| 101 |
return sentiment
|
| 102 |
|
|
|
|
| 103 |
def upsert(text):
|
| 104 |
-
date_time =str(datetime.datetime.today())
|
| 105 |
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
|
| 106 |
-
doc_ref.set({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
saved = select('TTS-STT', date_time)
|
| 108 |
return saved
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
doc_ref = db.collection(collection).document(document)
|
| 112 |
-
doc = doc_ref.get()
|
| 113 |
-
docid = ("The id is: ", doc.id)
|
| 114 |
-
contents = ("The contents are: ", doc.to_dict())
|
| 115 |
-
return contents
|
| 116 |
-
|
| 117 |
def selectall(text):
|
| 118 |
docs = db.collection('Text2SpeechSentimentSave').stream()
|
| 119 |
-
doclist=''
|
| 120 |
for doc in docs:
|
| 121 |
-
r=(f'{doc.id} => {doc.to_dict()}')
|
| 122 |
doclist += r
|
| 123 |
return doclist
|
| 124 |
-
|
|
|
|
| 125 |
def tts(text: str, model_name: str):
|
| 126 |
print(text, model_name)
|
| 127 |
synthesizer = MODELS.get(model_name, None)
|
| 128 |
if synthesizer is None:
|
| 129 |
-
raise NameError("
|
|
|
|
| 130 |
wavs = synthesizer.tts(text)
|
| 131 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 132 |
synthesizer.save_wav(wavs, fp)
|
| 133 |
-
|
| 134 |
-
#rMem = AIMemory("TTS", text + model_name)
|
| 135 |
-
|
| 136 |
return fp.name
|
| 137 |
|
|
|
|
| 138 |
demo = gr.Blocks()
|
|
|
|
| 139 |
with demo:
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
| 141 |
text = gr.Textbox(label="Speech to Text")
|
| 142 |
-
|
| 143 |
-
#
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 146 |
audio = gr.Audio(label="Output", interactive=False)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
| 155 |
-
|
| 156 |
-
#b3.click(upsert, inputs=text, outputs=saved)
|
| 157 |
-
#b4.click(selectall, inputs=text, outputs=savedAll)
|
| 158 |
-
b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
|
| 159 |
|
| 160 |
-
demo.launch(share=True)
|
|
|
|
| 2 |
import datetime
|
| 3 |
from transformers import pipeline
|
| 4 |
import gradio as gr
|
|
|
|
| 5 |
import tempfile
|
| 6 |
from typing import Optional
|
| 7 |
import numpy as np
|
| 8 |
from TTS.utils.manage import ModelManager
|
| 9 |
from TTS.utils.synthesizer import Synthesizer
|
|
|
|
|
|
|
| 10 |
import os
|
| 11 |
import csv
|
|
|
|
|
|
|
| 12 |
import huggingface_hub
|
| 13 |
from huggingface_hub import Repository, hf_hub_download, upload_file
|
| 14 |
from datetime import datetime
|
| 15 |
|
| 16 |
+
# π Setup dataset repo π
|
| 17 |
+
# Created new dataset as awacke1/MindfulStory.csv
|
| 18 |
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
|
| 19 |
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
|
| 20 |
DATA_FILENAME = "MindfulStory.csv"
|
| 21 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
| 22 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 23 |
|
| 24 |
+
# π
Oops! Try downloading the dataset (We hope it works!)
|
| 25 |
try:
|
| 26 |
hf_hub_download(
|
| 27 |
repo_id=DATASET_REPO_ID,
|
| 28 |
filename=DATA_FILENAME,
|
| 29 |
+
cache_dir="data",
|
| 30 |
force_filename=DATA_FILENAME
|
| 31 |
)
|
| 32 |
except:
|
| 33 |
+
print("π¬ File not found, weβll act like itβs not a problem...")
|
| 34 |
+
|
| 35 |
+
# π§ AI Memory: Because forgetting is for humans π€
|
| 36 |
def AIMemory(name: str, message: str):
|
| 37 |
if name and message:
|
| 38 |
with open(DATA_FILE, "a") as csvfile:
|
|
|
|
| 41 |
commit_url = repo.push_to_hub()
|
| 42 |
return {"name": name, "message": message, "time": str(datetime.now())}
|
| 43 |
|
| 44 |
+
# π Repository setup! Letβs clone like pros π¨βπ»
|
| 45 |
+
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
# π£οΈ Set up Speech Recognition
|
| 48 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
| 49 |
|
| 50 |
+
# π€ Set up TTS Models. Letβs find that sweet robotic voice!
|
| 51 |
MODEL_NAMES = [
|
| 52 |
"en/ljspeech/tacotron2-DDC",
|
| 53 |
"en/ljspeech/glow-tts",
|
|
|
|
| 58 |
"de/thorsten/tacotron2-DCA",
|
| 59 |
]
|
| 60 |
|
| 61 |
+
# π οΈ Use Model Manager to load vocoders (Fancy tech magic here)
|
| 62 |
MODELS = {}
|
| 63 |
manager = ModelManager()
|
| 64 |
for MODEL_NAME in MODEL_NAMES:
|
| 65 |
+
print(f"π Downloading {MODEL_NAME}... because waiting is fun!")
|
| 66 |
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
|
| 67 |
vocoder_name: Optional[str] = model_item["default_vocoder"]
|
| 68 |
vocoder_path = None
|
|
|
|
| 75 |
)
|
| 76 |
MODELS[MODEL_NAME] = synthesizer
|
| 77 |
|
| 78 |
+
# π§ββοΈ Transcribe function: Turning audio into text with a sprinkle of magic!
|
| 79 |
def transcribe(audio):
|
| 80 |
text = asr(audio)["text"]
|
| 81 |
return text
|
| 82 |
|
| 83 |
+
# π Text classifier (because we love labeling things, right?)
|
| 84 |
classifier = pipeline("text-classification")
|
| 85 |
|
| 86 |
+
# π€ Speech to Text: Give me your voice, Iβll give you text!
|
| 87 |
def speech_to_text(speech):
|
| 88 |
text = asr(speech)["text"]
|
|
|
|
| 89 |
return text
|
| 90 |
|
| 91 |
+
# π Sentiment Analysis (because even robots care about feelings π)
|
| 92 |
def text_to_sentiment(text):
|
| 93 |
sentiment = classifier(text)[0]["label"]
|
|
|
|
| 94 |
return sentiment
|
| 95 |
|
| 96 |
+
# π¦ Saving it for later: Store this priceless info!
|
| 97 |
def upsert(text):
|
| 98 |
+
date_time = str(datetime.datetime.today())
|
| 99 |
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
|
| 100 |
+
doc_ref.set({
|
| 101 |
+
u'firefield': 'Recognize Speech',
|
| 102 |
+
u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
|
| 103 |
+
u'last': text,
|
| 104 |
+
u'born': date_time,
|
| 105 |
+
})
|
| 106 |
saved = select('TTS-STT', date_time)
|
| 107 |
return saved
|
| 108 |
+
|
| 109 |
+
# π Retrieve all records: Gotta catch βem all!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def selectall(text):
|
| 111 |
docs = db.collection('Text2SpeechSentimentSave').stream()
|
| 112 |
+
doclist = ''
|
| 113 |
for doc in docs:
|
| 114 |
+
r = (f'{doc.id} => {doc.to_dict()}')
|
| 115 |
doclist += r
|
| 116 |
return doclist
|
| 117 |
+
|
| 118 |
+
# π£οΈ Text to Speech (Because speaking is fun, but robots do it better)
|
| 119 |
def tts(text: str, model_name: str):
|
| 120 |
print(text, model_name)
|
| 121 |
synthesizer = MODELS.get(model_name, None)
|
| 122 |
if synthesizer is None:
|
| 123 |
+
raise NameError("π¬ Oops! Model not found.")
|
| 124 |
+
|
| 125 |
wavs = synthesizer.tts(text)
|
| 126 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 127 |
synthesizer.save_wav(wavs, fp)
|
|
|
|
|
|
|
|
|
|
| 128 |
return fp.name
|
| 129 |
|
| 130 |
+
# ποΈ Gradio UI with Emoji and Fun Comments π
|
| 131 |
demo = gr.Blocks()
|
| 132 |
+
|
| 133 |
with demo:
|
| 134 |
+
# π€ Microphone input to capture your golden voice π€
|
| 135 |
+
audio_file = gr.Audio(source="microphone", type="filepath")
|
| 136 |
+
|
| 137 |
+
# π Textbox to display transcribed text π
|
| 138 |
text = gr.Textbox(label="Speech to Text")
|
| 139 |
+
|
| 140 |
+
# ποΈ Radio input to choose the best Text to Speech model ποΈ
|
| 141 |
+
TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
|
| 142 |
+
|
| 143 |
+
# π Audio player to play back the robotβs voice π
|
| 144 |
audio = gr.Audio(label="Output", interactive=False)
|
| 145 |
|
| 146 |
+
# π Buttons for all your needs π
|
| 147 |
+
b1 = gr.Button("π€ Recognize Speech")
|
| 148 |
+
b5 = gr.Button("π Read It Back Aloud")
|
| 149 |
+
|
| 150 |
+
# π±οΈ Click buttons to perform actions! π±οΈ
|
|
|
|
| 151 |
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
| 152 |
+
b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
+
demo.launch(share=True)
|