import gradio as gr from transformers import pipeline from helper import render_results_in_image, summarize_predictions_natural_language, ignore_warnings # Suppress non-critical warnings ignore_warnings() # Load DETR object detection pipeline with lower threshold pipe = pipeline("object-detection", model="facebook/detr-resnet-50", threshold=0.3) # Function to get predictions and process image + summary def get_pipeline_prediction(pil_image): pipeline_output = pipe(pil_image) if not pipeline_output: return pil_image, "No objects detected." processed_image = render_results_in_image(pil_image, pipeline_output) summary = summarize_predictions_natural_language(pipeline_output) return processed_image, summary # Gradio interface iface = gr.Interface( fn=get_pipeline_prediction, inputs=gr.Image(label="Upload an Image", type="pil"), outputs=[ gr.Image(label="Image with Detected Objects", type="pil"), gr.Textbox(label="Summary of Detected Objects") ], examples=[ ["examples/kittens.jpeg"], ["examples/beach.jpeg"] ], title="Object Detection with DETR", description="Upload an image to detect objects. Bounding boxes will be drawn and a natural language summary will be provided.", flagging_mode="never" ) if __name__ == "__main__": iface.launch()