Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline | |
| from helper import load_image_from_url, render_results_in_image | |
| from helper import summarize_predictions_natural_language | |
| od_pipe = pipeline("object-detection", model="facebook/detr-resnet-50") | |
| #tts_pipe = pipeline("text-to-speech", | |
| # model="kakao-enterprise/vits-ljs") | |
| def get_pipeline_prediction(pil_image): | |
| pipeline_output = od_pipe(pil_image) | |
| text = summarize_predictions_natural_language(pipeline_output) | |
| #text = "Hello, my name is Ratha" | |
| processed_image = render_results_in_image(pil_image, | |
| pipeline_output) | |
| #gen_audio = tts_pipe(text) | |
| #rate= gen_audio["sampling_rate"] | |
| return processed_image, text | |
| #, (rate, gen_audio["audio"][0]) | |
| demo = gr.Interface( | |
| fn=get_pipeline_prediction, | |
| inputs=gr.Image(label="Input image", | |
| type="pil"), | |
| outputs= [ | |
| gr.Image(label="Output image with predicted instances", type="pil"), | |
| gr.Textbox(label="Prediction Summary") | |
| #,gr.Audio(label="Generated Speech") | |
| ] | |
| ) | |
| demo.launch() | |
| #text = itt_pipe(input) | |
| #tts_pipe = pipeline("text-to-speech", | |
| # model="kakao-enterprise/vits-ljs") | |
| #narrated_text = tts_pipe(tts_pipe[0]['generated_text']) | |
| #def launch(text): | |
| # out = tts_pipe(text) | |
| # audio = IPythonAudio(out["audio"][0], | |
| # rate=out["sampling_rate"]) | |
| # return audio | |
| #iface = gr.Interface(launch, | |
| # inputs=gr.Image(type='pil'), | |
| # outputs="text") | |
| #iface.launch() | |