| | """ |
| | app.py — ClearPath: Real-Time Scene Description for Visually-Impaired People |
| | Pipeline: Upload Image → ViT-GPT2 Caption → Regex Safety Classifier → SAFE / DANGEROUS |
| | """ |
| |
|
| | import gradio as gr |
| | import numpy as np |
| | import logging |
| | import time |
| | import cv2 |
| |
|
| | from PIL import Image |
| | from scene_captioner import SceneCaptioner |
| | from safety_classifier import SafetyClassifier, ClassificationResult |
| |
|
| | logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | logger.info("🚀 Starting ClearPath — loading captioner …") |
| | captioner = SceneCaptioner() |
| | classifier = SafetyClassifier() |
| | logger.info(f"✅ Pipeline ready — captioner backend: {captioner._backend}") |
| |
|
| | history_log: list[dict] = [] |
| |
|
| | |
| |
|
| | def analyse(image: np.ndarray): |
| | """ |
| | Main pipeline: |
| | 1. Convert numpy array → PIL Image |
| | 2. SceneCaptioner.describe() → caption string |
| | 3. SafetyClassifier.classify() → SAFE / DANGEROUS |
| | 4. Return results to Gradio UI |
| | """ |
| | if image is None: |
| | return ( |
| | _info_html("⬆️ Please upload an image first.", "#6366f1"), |
| | "", |
| | _build_history_md(), |
| | ) |
| |
|
| | t0 = time.time() |
| | pil = Image.fromarray(image).convert("RGB") |
| |
|
| | |
| | try: |
| | caption = captioner.describe(pil) |
| | except Exception as exc: |
| | logger.error(f"Caption error: {exc}") |
| | caption = "Unable to generate caption for this image." |
| |
|
| | |
| | result = classifier.classify(caption) |
| | elapsed = round(time.time() - t0, 2) |
| |
|
| | |
| | if result.label == "DANGEROUS": |
| | hazard_str = " | ".join(result.hazards) |
| | token_str = ", ".join(result.matches[:8]) |
| | banner_html = f""" |
| | <div style=" |
| | background:rgba(239,68,68,0.12); |
| | border:2px solid rgba(239,68,68,0.45); |
| | border-radius:14px; padding:1.1rem 1.4rem; |
| | display:flex; align-items:flex-start; gap:1rem; |
| | animation: fadeIn .3s ease; |
| | "> |
| | <span style="font-size:2.5rem; line-height:1;">⚠️</span> |
| | <div> |
| | <div style="font-weight:800; font-size:1.15rem; color:#fca5a5; |
| | letter-spacing:.04em; margin-bottom:.3rem;"> |
| | DANGER DETECTED |
| | </div> |
| | <div style="font-size:.85rem; color:#f87171; margin-bottom:.25rem;"> |
| | <strong>Categories:</strong> {hazard_str} |
| | </div> |
| | <div style="font-size:.75rem; color:#94a3b8; font-family:monospace;"> |
| | <strong>Matched tokens:</strong> {token_str} |
| | </div> |
| | <div style="font-size:.7rem; color:#64748b; margin-top:.3rem;"> |
| | ⏱ Analysed in {elapsed}s | Backend: {captioner._backend} |
| | </div> |
| | </div> |
| | </div>""" |
| | else: |
| | banner_html = f""" |
| | <div style=" |
| | background:rgba(34,197,94,0.1); |
| | border:2px solid rgba(34,197,94,0.4); |
| | border-radius:14px; padding:1.1rem 1.4rem; |
| | display:flex; align-items:flex-start; gap:1rem; |
| | "> |
| | <span style="font-size:2.5rem; line-height:1;">✅</span> |
| | <div> |
| | <div style="font-weight:800; font-size:1.15rem; color:#86efac; |
| | letter-spacing:.04em; margin-bottom:.3rem;"> |
| | SAFE ENVIRONMENT |
| | </div> |
| | <div style="font-size:.85rem; color:#4ade80;"> |
| | No hazards detected by the 16-category regex engine. |
| | </div> |
| | <div style="font-size:.7rem; color:#64748b; margin-top:.3rem;"> |
| | ⏱ Analysed in {elapsed}s | Backend: {captioner._backend} |
| | </div> |
| | </div> |
| | </div>""" |
| |
|
| | |
| | history_log.insert(0, { |
| | "time" : time.strftime("%H:%M:%S"), |
| | "label" : result.label, |
| | "hazards": ", ".join(result.hazards) if result.hazards else "—", |
| | "caption": caption, |
| | }) |
| |
|
| | return banner_html, caption, _build_history_md() |
| |
|
| |
|
| | def _info_html(msg: str, color: str) -> str: |
| | return ( |
| | f'<div style="background:rgba(99,102,241,.08);border:1px solid {color}33;' |
| | f'border-radius:12px;padding:1rem 1.25rem;color:#94a3b8;font-size:.9rem;">' |
| | f'{msg}</div>' |
| | ) |
| |
|
| |
|
| | def _build_history_md() -> str: |
| | if not history_log: |
| | return "_No analyses yet — upload an image above._" |
| | rows = ["| Time | Result | Hazards | Caption |", |
| | "|------|--------|---------|---------|"] |
| | for h in history_log[:10]: |
| | short = (h["caption"][:70] + "…") if len(h["caption"]) > 70 else h["caption"] |
| | icon = "⚠️" if h["label"] == "DANGEROUS" else "✅" |
| | rows.append(f"| `{h['time']}` | {icon} **{h['label']}** | {h['hazards']} | {short} |") |
| | return "\n".join(rows) |
| |
|
| |
|
| | |
| | CSS = """ |
| | @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;600;800&family=JetBrains+Mono:wght@500;700&display=swap'); |
| | |
| | body, .gradio-container { |
| | background: #0a0a10 !important; |
| | color: #e2e8f0 !important; |
| | font-family: 'DM Sans', sans-serif !important; |
| | } |
| | .gradio-container { max-width: 1100px !important; margin: 0 auto !important; } |
| | |
| | gradio-app { background: #0a0a10 !important; } |
| | |
| | /* Header */ |
| | .app-header { |
| | text-align: center; |
| | padding: 2rem 1rem 1.25rem; |
| | border-bottom: 1px solid rgba(99,102,241,.2); |
| | margin-bottom: 1.25rem; |
| | background: linear-gradient(180deg,rgba(99,102,241,.07) 0%,transparent 100%); |
| | } |
| | .app-title { |
| | font-size: 2.5rem; font-weight: 800; letter-spacing: -.03em; margin: 0; |
| | background: linear-gradient(135deg,#a5b4fc,#e879f9); |
| | -webkit-background-clip: text; -webkit-text-fill-color: transparent; |
| | } |
| | .app-sub { color: #64748b; font-size: .9rem; margin-top: .4rem; } |
| | |
| | /* Pipeline bar */ |
| | .pipe-bar { |
| | display: flex; align-items: center; justify-content: center; |
| | flex-wrap: wrap; gap: .4rem; |
| | padding: .75rem; margin-bottom: 1.25rem; |
| | background: rgba(99,102,241,.04); |
| | border: 1px solid rgba(99,102,241,.15); border-radius: 12px; |
| | font-family: 'JetBrains Mono', monospace; font-size: .75rem; |
| | } |
| | .pipe-node { |
| | background: rgba(99,102,241,.14); border: 1px solid rgba(99,102,241,.3); |
| | color: #a5b4fc; padding: .25rem .75rem; border-radius: 7px; font-weight: 700; |
| | } |
| | .pipe-arrow { color: #334155; font-size: .9rem; } |
| | |
| | /* Panels */ |
| | .gr-block, .gr-box, .panel { |
| | background: #13131e !important; |
| | border: 1px solid rgba(99,102,241,.2) !important; |
| | border-radius: 14px !important; |
| | } |
| | |
| | /* Upload widget */ |
| | .gr-image { border-radius: 12px !important; } |
| | |
| | /* Caption textbox */ |
| | .gr-textbox textarea { |
| | background: rgba(255,255,255,.03) !important; |
| | border: 1px solid rgba(99,102,241,.2) !important; |
| | border-radius: 10px !important; |
| | color: #e2e8f0 !important; |
| | font-family: 'DM Sans', sans-serif !important; |
| | font-size: .95rem !important; |
| | line-height: 1.75 !important; |
| | } |
| | |
| | /* Buttons */ |
| | .gr-button-primary, button[variant=primary] { |
| | background: linear-gradient(135deg,#6366f1,#8b5cf6) !important; |
| | border: none !important; border-radius: 10px !important; |
| | color: white !important; font-weight: 700 !important; |
| | font-family: 'DM Sans', sans-serif !important; |
| | font-size: .95rem !important; |
| | transition: opacity .2s !important; |
| | } |
| | .gr-button-primary:hover { opacity: .85 !important; } |
| | |
| | /* History table */ |
| | .history-box table { width: 100%; border-collapse: collapse; font-size: .8rem; } |
| | .history-box th { |
| | background: rgba(99,102,241,.1); color: #a5b4fc; |
| | padding: .4rem .65rem; text-align: left; |
| | border-bottom: 1px solid rgba(99,102,241,.2); |
| | } |
| | .history-box td { |
| | padding: .4rem .65rem; color: #64748b; |
| | border-bottom: 1px solid rgba(255,255,255,.04); |
| | vertical-align: top; |
| | } |
| | |
| | /* Tabs */ |
| | .tab-nav button { |
| | font-family: 'DM Sans', sans-serif !important; |
| | font-weight: 600 !important; color: #64748b !important; |
| | } |
| | .tab-nav button.selected { color: #a5b4fc !important; } |
| | |
| | @keyframes fadeIn { from {opacity:0;transform:translateY(-6px)} to {opacity:1;transform:translateY(0)} } |
| | """ |
| |
|
| | |
| |
|
| | def build_ui(): |
| | with gr.Blocks(css=CSS, title="ClearPath — Scene Description") as demo: |
| |
|
| | |
| | gr.HTML(""" |
| | <div class="app-header"> |
| | <h1 class="app-title">👁 ClearPath</h1> |
| | <p class="app-sub">Real-Time Scene Description for Visually-Impaired People</p> |
| | </div> |
| | <div class="pipe-bar"> |
| | <span class="pipe-node">📥 Image Input</span> |
| | <span class="pipe-arrow">→</span> |
| | <span class="pipe-node">🧠 ViT-GPT2 / BLIP Captioning</span> |
| | <span class="pipe-arrow">→</span> |
| | <span class="pipe-node">🔍 Regex Safety Classifier</span> |
| | <span class="pipe-arrow">→</span> |
| | <span class="pipe-node">🏷️ SAFE / DANGEROUS</span> |
| | </div> |
| | """) |
| |
|
| | with gr.Tabs(): |
| |
|
| | |
| | with gr.TabItem("📁 Upload Image"): |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | img_input = gr.Image( |
| | label="Upload or drag an image", |
| | type="numpy", |
| | height=300, |
| | ) |
| | analyse_btn = gr.Button( |
| | "🔍 Analyse Scene", |
| | variant="primary", |
| | size="lg", |
| | ) |
| |
|
| | with gr.Column(scale=1): |
| | result_banner = gr.HTML( |
| | value='<div style="background:rgba(99,102,241,.06);border:1px solid rgba(99,102,241,.2);' |
| | 'border-radius:12px;padding:1.25rem;color:#475569;text-align:center;">' |
| | '⬆️ Upload an image and click <strong>Analyse Scene</strong></div>' |
| | ) |
| | caption_out = gr.Textbox( |
| | label="🔊 Scene Description (generated caption)", |
| | lines=5, |
| | interactive=False, |
| | placeholder="The AI-generated scene description will appear here…", |
| | ) |
| |
|
| | analyse_btn.click( |
| | fn=analyse, |
| | inputs=[img_input], |
| | outputs=[result_banner, caption_out, gr.State()], |
| | ) |
| |
|
| | |
| | with gr.TabItem("📷 Webcam"): |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | cam_input = gr.Image( |
| | label="Webcam — capture a snapshot", |
| | sources=["webcam"], |
| | type="numpy", |
| | height=300, |
| | ) |
| | cam_btn = gr.Button( |
| | "📸 Capture & Analyse", |
| | variant="primary", |
| | size="lg", |
| | ) |
| | with gr.Column(scale=1): |
| | cam_banner = gr.HTML( |
| | value='<div style="background:rgba(99,102,241,.06);border:1px solid rgba(99,102,241,.2);' |
| | 'border-radius:12px;padding:1.25rem;color:#475569;text-align:center;">' |
| | '📷 Point your camera and click <strong>Capture & Analyse</strong></div>' |
| | ) |
| | cam_caption = gr.Textbox( |
| | label="🔊 Scene Description", |
| | lines=5, |
| | interactive=False, |
| | ) |
| |
|
| | cam_btn.click( |
| | fn=analyse, |
| | inputs=[cam_input], |
| | outputs=[cam_banner, cam_caption, gr.State()], |
| | ) |
| |
|
| | |
| | with gr.TabItem("🎬 Video"): |
| | gr.Markdown("Upload a video — ClearPath samples one frame every N seconds.") |
| | with gr.Row(): |
| | vid_input = gr.Video(label="Upload Video") |
| | interval = gr.Slider(1, 10, value=3, step=1, label="Interval (seconds)") |
| | vid_btn = gr.Button("▶ Analyse Video", variant="primary") |
| | vid_out = gr.Dataframe( |
| | headers=["Frame", "Time (s)", "Label", "Hazards", "Caption"], |
| | datatype=["number", "number", "str", "str", "str"], |
| | visible=False, |
| | ) |
| |
|
| | def analyse_video(path, secs): |
| | if path is None: |
| | return gr.update(visible=False) |
| | cap = cv2.VideoCapture(path) |
| | fps = cap.get(cv2.CAP_PROP_FPS) or 25 |
| | step = max(1, int(fps * secs)) |
| | rows, idx, n = [], 0, 0 |
| | while True: |
| | ret, frame = cap.read() |
| | if not ret: |
| | break |
| | if idx % step == 0: |
| | pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) |
| | try: |
| | cap_text = captioner.describe(pil) |
| | res = classifier.classify(cap_text) |
| | except Exception as e: |
| | cap_text, res = str(e), ClassificationResult("ERROR", [], []) |
| | rows.append([n + 1, round(idx / fps, 1), |
| | res.label, ", ".join(res.hazards) or "—", cap_text]) |
| | n += 1 |
| | idx += 1 |
| | cap.release() |
| | return gr.update(value=rows, visible=True) |
| |
|
| | vid_btn.click(fn=analyse_video, inputs=[vid_input, interval], outputs=[vid_out]) |
| |
|
| | |
| | with gr.Accordion("📋 Analysis History", open=False): |
| | history_out = gr.Markdown( |
| | "_No analyses yet._", |
| | elem_classes=["history-box"], |
| | ) |
| |
|
| | |
| | def analyse_with_history(image): |
| | banner, caption, _ = analyse(image) |
| | return banner, caption, _build_history_md() |
| |
|
| | analyse_btn.click( |
| | fn=analyse_with_history, |
| | inputs=[img_input], |
| | outputs=[result_banner, caption_out, history_out], |
| | ) |
| | cam_btn.click( |
| | fn=analyse_with_history, |
| | inputs=[cam_input], |
| | outputs=[cam_banner, cam_caption, history_out], |
| | ) |
| |
|
| | return demo |
| |
|
| |
|
| | if __name__ == "__main__": |
| | demo = build_ui() |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |