Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from pdfminer.high_level import extract_text | |
| import logging | |
| from typing import cast | |
| from balacoon_tts import TTS | |
| from huggingface_hub import hf_hub_download, list_repo_files | |
| # Global tts module, initialized from a model selected | |
| tts = None | |
| def read_pdf(file): | |
| with open(file.name, "rb") as f: | |
| text = extract_text(f) | |
| return text | |
| def set_model(model_name_str): | |
| """ | |
| Gets value from `model_name`, loads the model, | |
| re-initializes the tts object, and gets a list of | |
| speakers that the model supports and sets them to `speaker`. | |
| """ | |
| model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str) | |
| global tts | |
| tts = TTS(model_path) | |
| speakers = tts.get_speakers() | |
| value = speakers[-1] | |
| speaker.choices = speakers | |
| speaker.value = value | |
| def main(): | |
| logging.basicConfig(level=logging.INFO) | |
| repo_files = list_repo_files(repo_id="balacoon/tts") | |
| model_files = [x for x in repo_files if x.endswith("_cpu.addon")] | |
| model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files) | |
| speaker = gr.inputs.Dropdown(label="Speaker", choices=[]) | |
| file_input = gr.inputs.File(label="Select a PDF File", type="file") | |
| text = gr.outputs.Textbox() | |
| def synthesize_audio(file, model_name_str, speaker_str): | |
| """ | |
| Gets the selected PDF `file`, model name from `model_name`, | |
| and speaker name from `speaker`. Synthesizes the audio waveform | |
| from the text extracted from the PDF and returns it. | |
| """ | |
| if file is None or file.name == "": | |
| logging.info("No file selected.") | |
| return None | |
| text_str = read_pdf(file) | |
| if len(text_str) > 1024: | |
| text_str = text_str[:1024] | |
| global tts | |
| samples = cast(TTS, tts).synthesize(text_str, speaker_str) | |
| return (cast(TTS, tts).get_sampling_rate(), samples) | |
| audio = gr.outputs.Audio(label="Generated Audio", type="numpy") | |
| iface = gr.Interface( | |
| fn=synthesize_audio, | |
| inputs=[file_input, model_name_dropdown, speaker], | |
| outputs=audio, | |
| title="PDF TO SPEECH CONVERTER", | |
| layout="rows", | |
| debug=True | |
| ) | |
| iface.launch() | |
| if __name__ == "__main__": | |
| main() | |