Spaces:

Sambhavnoobcoder
/

PDF-text-extractor_sd_1

Runtime error

App Files Files Community

Sambhavnoobcoder commited on Jun 18, 2023

Commit

c764bfb

1 Parent(s): 214cc79

tried to combine best of both world to make a half decent gradio interface . now i will try to manage their library dependencies

Browse files

Files changed (1) hide show

app.py +99 -6

app.py CHANGED Viewed

@@ -1,14 +1,107 @@
 import gradio as gr
 import pdfminer
 from pdfminer.high_level import extract_text
 def read_pdf(file):
     text = extract_text(file.name)
     return text
-iface = gr.Interface(
-    read_pdf,
-    gr.inputs.File(),
-    # gr.outputs.Textbox()
-)
-iface.launch()

 import gradio as gr
 import pdfminer
 from pdfminer.high_level import extract_text
+import logging
+from typing import cast
+import gradio as gr
+from balacoon_tts import TTS
+from huggingface_hub import hf_hub_download, list_repo_files
+# global tts module, initialized from a model selected
+tts = None
 def read_pdf(file):
     text = extract_text(file.name)
     return text
+# iface = gr.Interface(
+#     read_pdf,
+#     gr.inputs.File(),
+#     # gr.outputs.Textbox()
+# )
+# iface.launch()
+def main():
+    logging.basicConfig(level=logging.INFO)
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            <h1 align="center">PDF TO SPEECH CONVERTER</h1>
+            1. insert a pdf
+            2. Select the model to synthesize with
+            3. Select speaker
+            4. Hit "Generate" and listen to the result!
+            When you select model for the first time,
+            it will take a little time to download it.
+            this project is designed to take the love
+            of reading without the hassle of looking over.
+            if you want an audio book , you now got it .
+            """
+        )
+        with gr.Row(variant="panel"):
+            f=gr.inputs.File()
+            text = read_pdf(f)
+        with gr.Row():
+            with gr.Column(variant="panel"):
+                repo_files = list_repo_files(repo_id="balacoon/tts")
+                model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
+                model_name = gr.Dropdown(
+                    label="Model",
+                    choices=model_files,
+                )
+            with gr.Column(variant="panel"):
+                speaker = gr.Dropdown(label="Speaker", choices=[])
+            def set_model(model_name_str: str):
+                """
+                gets value from `model_name`, loads model,
+                re-initializes tts object, gets list of
+                speakers that model supports and set them to `speaker`
+                """
+                model_path = hf_hub_download(
+                    repo_id="balacoon/tts", filename=model_name_str
+                )
+                global tts
+                tts = TTS(model_path)
+                speakers = tts.get_speakers()
+                value = speakers[-1]
+                return gr.Dropdown.update(
+                    choices=speakers, value=value, visible=True
+                )
+            model_name.change(set_model, inputs=model_name, outputs=speaker)
+        with gr.Row(variant="panel"):
+            generate = gr.Button("Generate")
+        with gr.Row(variant="panel"):
+            audio = gr.Audio()
+        def synthesize_audio(text_str: str, speaker_str: str = ""):
+            """
+            gets utterance to synthesize from `text` Textbox
+            and speaker name from `speaker` dropdown list.
+            speaker name might be empty for single-speaker models.
+            Synthesizes the waveform and updates `audio` with it.
+            """
+            if not text_str:
+                logging.info("text or speaker are not provided")
+                return None
+            global tts
+            if len(text_str) > 1024:
+                text_str = text_str[:1024]
+            samples = cast(TTS, tts).synthesize(text_str, speaker_str)
+            return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))
+        generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)
+    demo.launch()
+if __name__ == "__main__":
+    main()