Spaces:
Runtime error
Runtime error
Commit ·
f08cb6e
1
Parent(s): d343553
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,57 @@
|
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
-
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
+
from espnet2.bin.tts_inference import Text2Speech
|
| 4 |
+
import warnings
|
| 5 |
+
warnings.filterwarnings('ignore')
|
| 6 |
|
| 7 |
+
def generateTextAndAudio(inputText, numGen):
|
| 8 |
+
|
| 9 |
+
# --- Generating the Text ---
|
| 10 |
+
# With the provided text from user, generate more text up to `numGen` tokens/sub-words
|
| 11 |
+
textOutput = textGenerator(inputText, max_length = numGen)
|
| 12 |
+
# The output of the text generator is a list of dictionaries, grab the first dictionary
|
| 13 |
+
# then get the generated text from the dictionary using the `generated_text` key
|
| 14 |
+
genText = textOutput[0]['generated_text']
|
| 15 |
+
|
| 16 |
+
print("-"*75)
|
| 17 |
+
print("Input Text:", inputText)
|
| 18 |
+
print("Generated Text:", genText)
|
| 19 |
+
print("-"*75)
|
| 20 |
+
|
| 21 |
+
# --- Generating the Audio ---
|
| 22 |
+
# With the newly generated text, generate some speech
|
| 23 |
+
audioOutput = audioGenerator(genText)
|
| 24 |
+
# Get the wav data
|
| 25 |
+
genAudio = audioOutput['wav']
|
| 26 |
+
|
| 27 |
+
# Return two things
|
| 28 |
+
# 1) Generated Text
|
| 29 |
+
# 2) 24k sampling rate, and the Generated Audio (wav) as numpy (instead of tensor)
|
| 30 |
+
return genText, (24000, genAudio.numpy())
|
| 31 |
+
|
| 32 |
+
# Main
|
| 33 |
+
textGenerator = pipeline('text-generation', model = 'gpt2')
|
| 34 |
+
audioGenerator = Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_joint_finetune_conformer_fastspeech2_hifigan")
|
| 35 |
+
|
| 36 |
+
input1_textbox = gr.Textbox(label="Input text")
|
| 37 |
+
input2_slider = gr.Slider(minimum=1, maximum=100, step=1, default=30, label="Number of words to generate")
|
| 38 |
|
| 39 |
+
output1_textbox = gr.Textbox(label = "Generated Text")
|
| 40 |
+
output2_Audio = gr.Audio(label = "Generated Audio")
|
| 41 |
|
| 42 |
+
title = "Generate Text and its Audio!"
|
| 43 |
+
description = "Provide the text, and how many subwords to generate"
|
| 44 |
|
| 45 |
+
examples = [
|
| 46 |
+
["I won a", 50],
|
| 47 |
+
["My name is", 30],
|
| 48 |
+
["I have", 60]
|
| 49 |
+
]
|
| 50 |
+
article = "<p style='text-align: center'><img src='https://visitor-badge.glitch.me/badge?page_id=lilyf_generate_text_and_audio' alt='visitor badge'></p>"
|
| 51 |
+
iface = gr.Interface(fn=generateTextAndAudio,
|
| 52 |
+
inputs=[input1_textbox, input2_slider],
|
| 53 |
+
outputs=[output1_textbox, output2_Audio],
|
| 54 |
+
title=title,
|
| 55 |
+
description=description,
|
| 56 |
+
examples=examples,
|
| 57 |
+
article=article).launch(debug = True)
|