| import gradio as gr |
| from PIL import ImageDraw, ImageFont |
| from src.utils import run_pipeline |
|
|
| COLORS = [ |
| "#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", |
| "#FFEAA7", "#DDA0DD", "#98D8C8", "#F7DC6F", |
| ] |
|
|
| def draw_boxes(image, boxes): |
| img = image.copy() |
| draw = ImageDraw.Draw(img) |
| label_color = {} |
| for i, det in enumerate(boxes): |
| label = det["label"] |
| color = label_color.setdefault(label, COLORS[i % len(COLORS)]) |
| b = det["box"] |
| draw.rectangle([b["xmin"], b["ymin"], b["xmax"], b["ymax"]], outline=color, width=3) |
| text = f"{label} {det['score']:.0%}" |
| draw.rectangle([b["xmin"], b["ymin"] - 18, b["xmin"] + len(text) * 7, b["ymin"]], fill=color) |
| draw.text((b["xmin"] + 2, b["ymin"] - 16), text, fill="white") |
| return img |
|
|
| def make_chips(labels): |
| if not labels: |
| return "<p style='color:#888'>No objects detected</p>" |
| chips = "".join( |
| f"<span style='background:#2d2d2d;border:1px solid #555;color:#e0e0e0;" |
| f"padding:4px 10px;border-radius:20px;margin:3px;display:inline-block;" |
| f"font-size:13px'>{l}</span>" |
| for l in sorted(labels) |
| ) |
| return f"<div style='line-height:2'>{chips}</div>" |
|
|
| def process(image, question): |
| if image is None: |
| return None, "<p style='color:#f66'>No image provided</p>", "", "" |
|
|
| caption, labels, boxes, answer = run_pipeline(image, question) |
| annotated = draw_boxes(image, boxes) |
| chips = make_chips(labels) |
| return annotated, chips, caption, answer |
|
|
| CSS = """ |
| body, .gradio-container { background:#1a1a2e !important; color:#e0e0e0 !important; font-family:'Segoe UI',sans-serif; } |
| .gr-button-primary { background:linear-gradient(135deg,#667eea,#764ba2) !important; border:none !important; color:#fff !important; font-weight:600 !important; } |
| .gr-button-primary:hover { opacity:.9 !important; transform:translateY(-1px); } |
| .gr-box, .gr-form, .gr-panel { background:#16213e !important; border:1px solid #2d2d5e !important; border-radius:12px !important; } |
| label { color:#a0a8c0 !important; font-size:12px !important; text-transform:uppercase; letter-spacing:.5px; } |
| textarea, input[type=text] { background:#0f3460 !important; color:#e0e0e0 !important; border:1px solid #2d2d5e !important; border-radius:8px !important; } |
| .output-card { background:#16213e; border:1px solid #2d2d5e; border-radius:12px; padding:16px; margin-top:8px; } |
| """ |
|
|
| with gr.Blocks(css=CSS, theme=gr.themes.Base()) as demo: |
| gr.Markdown( |
| "<h1 style='text-align:center;background:linear-gradient(135deg,#667eea,#764ba2);" |
| "-webkit-background-clip:text;-webkit-text-fill-color:transparent;margin-bottom:4px'>" |
| "🧠 Visual Reasoning Engine</h1>" |
| "<p style='text-align:center;color:#888;margin-top:0'>Object detection · Captioning · Visual Q&A</p>" |
| ) |
|
|
| with gr.Row(equal_height=True): |
| with gr.Column(scale=1): |
| image_input = gr.Image(type="pil", label="📷 Upload Image") |
| question_input = gr.Textbox( |
| label="❓ Ask a question", |
| placeholder="What is happening in this image?", |
| lines=2, |
| ) |
| submit_btn = gr.Button("▶ Run Analysis", variant="primary") |
|
|
| with gr.Column(scale=1): |
| annotated_output = gr.Image(label="🔍 Detected Objects", type="pil") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("<p style='color:#a0a8c0;font-size:12px;text-transform:uppercase;letter-spacing:.5px'>🧱 Detected Objects</p>") |
| objects_output = gr.HTML() |
| with gr.Column(): |
| caption_output = gr.Textbox(label="🧾 Caption", interactive=False) |
| with gr.Column(): |
| answer_output = gr.Textbox(label="🧠 Reasoned Answer", interactive=False) |
|
|
| submit_btn.click( |
| fn=process, |
| inputs=[image_input, question_input], |
| outputs=[annotated_output, objects_output, caption_output, answer_output], |
| ) |
|
|
| demo.launch() |
|
|