Spaces:
Runtime error
Runtime error
tried to combine best of both world to make a half decent gradio interface . now i will try to manage their library dependencies
c764bfb
| import gradio as gr | |
| import pdfminer | |
| from pdfminer.high_level import extract_text | |
| import logging | |
| from typing import cast | |
| import gradio as gr | |
| from balacoon_tts import TTS | |
| from huggingface_hub import hf_hub_download, list_repo_files | |
| # global tts module, initialized from a model selected | |
| tts = None | |
| def read_pdf(file): | |
| text = extract_text(file.name) | |
| return text | |
| # iface = gr.Interface( | |
| # read_pdf, | |
| # gr.inputs.File(), | |
| # # gr.outputs.Textbox() | |
| # ) | |
| # iface.launch() | |
| def main(): | |
| logging.basicConfig(level=logging.INFO) | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| <h1 align="center">PDF TO SPEECH CONVERTER</h1> | |
| 1. insert a pdf | |
| 2. Select the model to synthesize with | |
| 3. Select speaker | |
| 4. Hit "Generate" and listen to the result! | |
| When you select model for the first time, | |
| it will take a little time to download it. | |
| this project is designed to take the love | |
| of reading without the hassle of looking over. | |
| if you want an audio book , you now got it . | |
| """ | |
| ) | |
| with gr.Row(variant="panel"): | |
| f=gr.inputs.File() | |
| text = read_pdf(f) | |
| with gr.Row(): | |
| with gr.Column(variant="panel"): | |
| repo_files = list_repo_files(repo_id="balacoon/tts") | |
| model_files = [x for x in repo_files if x.endswith("_cpu.addon")] | |
| model_name = gr.Dropdown( | |
| label="Model", | |
| choices=model_files, | |
| ) | |
| with gr.Column(variant="panel"): | |
| speaker = gr.Dropdown(label="Speaker", choices=[]) | |
| def set_model(model_name_str: str): | |
| """ | |
| gets value from `model_name`, loads model, | |
| re-initializes tts object, gets list of | |
| speakers that model supports and set them to `speaker` | |
| """ | |
| model_path = hf_hub_download( | |
| repo_id="balacoon/tts", filename=model_name_str | |
| ) | |
| global tts | |
| tts = TTS(model_path) | |
| speakers = tts.get_speakers() | |
| value = speakers[-1] | |
| return gr.Dropdown.update( | |
| choices=speakers, value=value, visible=True | |
| ) | |
| model_name.change(set_model, inputs=model_name, outputs=speaker) | |
| with gr.Row(variant="panel"): | |
| generate = gr.Button("Generate") | |
| with gr.Row(variant="panel"): | |
| audio = gr.Audio() | |
| def synthesize_audio(text_str: str, speaker_str: str = ""): | |
| """ | |
| gets utterance to synthesize from `text` Textbox | |
| and speaker name from `speaker` dropdown list. | |
| speaker name might be empty for single-speaker models. | |
| Synthesizes the waveform and updates `audio` with it. | |
| """ | |
| if not text_str: | |
| logging.info("text or speaker are not provided") | |
| return None | |
| global tts | |
| if len(text_str) > 1024: | |
| text_str = text_str[:1024] | |
| samples = cast(TTS, tts).synthesize(text_str, speaker_str) | |
| return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples)) | |
| generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() | |