Spaces:
Build error
Build error
| import os | |
| from PIL import Image, ImageDraw, ImageFont | |
| import gradio as gr | |
| from helper import load_image_from_url, render_results_in_image | |
| from helper import summarize_predictions_natural_language | |
| from transformers import pipeline | |
| from transformers.utils import logging | |
| logging.set_verbosity_error() | |
| from helper import ignore_warnings | |
| ignore_warnings() | |
| od_pipe = pipeline("object-detection", "facebook/detr-resnet-50") | |
| tts_pipe = pipeline("text-to-speech", | |
| model="kakao-enterprise/vits-ljs") | |
| def get_pipeline_prediction(pil_image): | |
| pipeline_output = od_pipe(pil_image) | |
| processed_image = render_results_in_image(pil_image, | |
| pipeline_output) | |
| text = summarize_predictions_natural_language(pipeline_output) | |
| print(text) | |
| narrated_text = tts_pipe(text) | |
| #print (narrated_text) | |
| print(narrated_text["audio"][0]) | |
| print (narrated_text["sampling_rate"]) | |
| return processed_image, (narrated_text["sampling_rate"], narrated_text["audio"][0] ) | |
| #return processed_image | |
| demo = gr.Interface( | |
| fn=get_pipeline_prediction, | |
| inputs=gr.Image(label="Input image", | |
| type="pil"), | |
| outputs=[gr.Image(label="Output image with predicted instances", | |
| type="pil"), gr.Audio(label="Narration", type="numpy", autoplay=True)] | |
| #outputs=gr.Image(label="Output image with predicted instances", | |
| # type="pil") | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |