A7med-Ame3 commited on
Commit
bf990aa
·
verified ·
1 Parent(s): f2f0e94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -204
app.py CHANGED
@@ -1,213 +1,260 @@
1
  """
2
- ╔══════════════════════════════════════════════════════════════════╗
3
- ║ ClearPath Real-Time Scene Description System ║
4
- ║ For Visually-Impaired People ║
5
- ║ ║
6
- ║ Pipeline: Input → Qwen2-VL Captioning → Regex Classifier ║
7
- ║ → SAFE / DANGEROUS + TTS Output ║
8
- ╚══════════════════════════════════════════════════════════════════╝
9
  """
10
 
11
  import gradio as gr
12
- from scene_captioner import SceneCaptioner
13
- from safety_classifier import SafetyClassifier, ClassificationResult
14
- from tts_engine import TTSEngine
15
- import cv2
16
  import numpy as np
17
- from PIL import Image
18
- import time
19
  import logging
 
 
 
 
 
 
20
 
21
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
22
  logger = logging.getLogger(__name__)
23
 
24
- # ── Global singletons (loaded once) ──────────────────────────────────────────
 
25
  captioner = SceneCaptioner()
26
  classifier = SafetyClassifier()
27
- tts = TTSEngine()
28
 
29
  history_log: list[dict] = []
30
 
31
- # ── Core pipeline ─────────────────────────────────────────────────────────────
32
-
33
- def run_pipeline(image: Image.Image) -> tuple[str, ClassificationResult]:
34
- """Full pipeline: caption → classify."""
35
- if image is None:
36
- raise ValueError("No image provided.")
37
- logger.info("▶ Running captioning model …")
38
- caption = captioner.describe(image)
39
- logger.info(f" Caption: {caption}")
40
- logger.info("▶ Running safety classifier …")
41
- result = classifier.classify(caption)
42
- logger.info(f" Classification: {result.label} | Hazards: {result.hazards}")
43
- return caption, result
44
-
45
 
46
- def process_image(image: np.ndarray | None):
47
- """Gradio callback for image upload / webcam snapshot."""
 
 
 
 
 
 
48
  if image is None:
49
  return (
50
- gr.update(value="⚠️ No image provided.", visible=True),
51
- gr.update(value="", visible=False),
52
- gr.update(visible=False),
53
- _build_history_markdown(),
54
  )
55
 
56
- pil_image = Image.fromarray(image).convert("RGB")
 
57
 
 
58
  try:
59
- caption, result = run_pipeline(pil_image)
60
  except Exception as exc:
61
- logger.error(f"Pipeline error: {exc}")
62
- return (
63
- gr.update(value=f"❌ Error: {exc}", visible=True),
64
- gr.update(value="", visible=False),
65
- gr.update(visible=False),
66
- _build_history_markdown(),
67
- )
68
 
69
- # ── TTS ───────────────────────────────────────────────────────────────────
70
- prefix = "⚠️ DANGER DETECTED! " if result.label == "DANGEROUS" else "Safe environment. "
71
- tts.speak_async(prefix + caption)
72
 
73
- # ── Build HTML banner ─────────────────────────────────────────────────────
74
  if result.label == "DANGEROUS":
75
- banner_html = (
76
- '<div style="background:rgba(239,68,68,0.12);border:1px solid rgba(239,68,68,0.4);'
77
- 'border-radius:12px;padding:1rem 1.25rem;display:flex;align-items:flex-start;gap:.85rem;">'
78
- '<span style="font-size:2rem;">⚠️</span>'
79
- '<div><strong style="color:#fca5a5;font-size:1.05rem;letter-spacing:.04em;">DANGER DETECTED</strong>'
80
- f'<br><span style="color:#f87171;font-size:.82rem;">Hazards: {", ".join(result.hazards)}</span>'
81
- f'<br><span style="color:#94a3b8;font-size:.75rem;font-family:monospace;">Tokens: {", ".join(result.matches)}</span>'
82
- '</div></div>'
83
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  else:
85
- banner_html = (
86
- '<div style="background:rgba(34,197,94,0.1);border:1px solid rgba(34,197,94,0.35);'
87
- 'border-radius:12px;padding:1rem 1.25rem;display:flex;align-items:flex-start;gap:.85rem;">'
88
- '<span style="font-size:2rem;">✅</span>'
89
- '<div><strong style="color:#86efac;font-size:1.05rem;letter-spacing:.04em;">SAFE ENVIRONMENT</strong>'
90
- '<br><span style="color:#4ade80;font-size:.82rem;">No hazards detected by the regex engine.</span>'
91
- '</div></div>'
92
- )
93
-
94
- # ── History ───────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  history_log.insert(0, {
96
  "time" : time.strftime("%H:%M:%S"),
97
- "caption": caption,
98
  "label" : result.label,
99
  "hazards": ", ".join(result.hazards) if result.hazards else "—",
 
100
  })
101
 
 
 
 
 
102
  return (
103
- gr.update(value=banner_html, visible=True),
104
- gr.update(value=caption, visible=True),
105
- gr.update(visible=True),
106
- _build_history_markdown(),
107
  )
108
 
109
 
110
- def _build_history_markdown() -> str:
111
  if not history_log:
112
- return "_No analyses yet._"
113
- rows = ["| Time | Label | Hazards | Caption |",
114
- "|------|-------|---------|---------|"]
115
- for h in history_log[:8]:
116
- short = h["caption"][:65] + "…" if len(h["caption"]) > 65 else h["caption"]
117
- emoji = "⚠️" if h["label"] == "DANGEROUS" else "✅"
118
- rows.append(f"| {h['time']} | {emoji} {h['label']} | {h['hazards']} | {short} |")
119
  return "\n".join(rows)
120
 
121
 
122
  # ── Custom CSS ────────────────────────────────────────────────────────────────
123
  CSS = """
124
  @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;600;800&family=JetBrains+Mono:wght@500;700&display=swap');
125
- :root {
126
- --bg:#0a0a10; --surface:#13131e; --border:rgba(99,102,241,.22);
127
- --accent:#6366f1; --text:#e2e8f0; --muted:#64748b;
128
- }
129
  body, .gradio-container {
130
- background: var(--bg) !important;
131
- color: var(--text) !important;
132
  font-family: 'DM Sans', sans-serif !important;
133
  }
134
- .gradio-container { max-width: 1180px !important; margin: 0 auto !important; }
 
 
135
 
 
136
  .app-header {
137
- text-align: center; padding: 2rem 1rem 1.25rem;
138
- background: linear-gradient(180deg, rgba(99,102,241,0.08) 0%, transparent 100%);
139
- border-bottom: 1px solid var(--border); margin-bottom: 1.5rem;
 
 
140
  }
141
  .app-title {
142
- font-size: 2.4rem; font-weight: 800; letter-spacing: -.03em;
143
- background: linear-gradient(135deg, #a5b4fc, #e879f9);
144
  -webkit-background-clip: text; -webkit-text-fill-color: transparent;
145
- margin: 0;
146
  }
147
- .app-subtitle { color: var(--muted); font-size: .92rem; margin-top: .4rem; }
148
 
149
- .pipeline-bar {
 
150
  display: flex; align-items: center; justify-content: center;
151
- gap: .4rem; flex-wrap: wrap; padding: .8rem 1rem;
152
- background: rgba(99,102,241,.04); border-bottom: 1px solid var(--border);
 
 
153
  font-family: 'JetBrains Mono', monospace; font-size: .75rem;
154
- margin-bottom: 1.5rem;
155
  }
156
  .pipe-node {
157
- padding: .28rem .75rem; border-radius: 7px;
158
- background: rgba(99,102,241,.12); border: 1px solid var(--border);
159
- color: #a5b4fc; font-weight: 700;
160
  }
161
- .pipe-arrow { color: #334155; }
162
 
163
- .panel {
164
- background: var(--surface) !important;
165
- border: 1px solid var(--border) !important;
 
166
  border-radius: 14px !important;
167
  }
168
 
169
- .caption-output textarea {
 
 
 
 
170
  background: rgba(255,255,255,.03) !important;
171
- border: 1px solid var(--border) !important;
172
- border-radius: 10px !important; color: var(--text) !important;
173
- font-size: .95rem !important; line-height: 1.75 !important;
174
  font-family: 'DM Sans', sans-serif !important;
 
 
175
  }
176
 
177
- button.primary-btn {
178
- background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
179
- border: none !important; color: white !important;
180
- font-weight: 700 !important; border-radius: 10px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  }
182
- button.primary-btn:hover { opacity: .85 !important; }
183
 
184
- .history-md table { width: 100%; border-collapse: collapse; font-size: .8rem; }
185
- .history-md th { background: rgba(99,102,241,.1); color: #a5b4fc; padding: .4rem .6rem; border-bottom: 1px solid var(--border); }
186
- .history-md td { padding: .4rem .6rem; border-bottom: 1px solid rgba(255,255,255,.04); color: var(--muted); }
 
 
 
187
 
188
- .tab-nav button { font-family: 'DM Sans', sans-serif !important; font-weight: 600 !important; }
189
  """
190
 
191
- # ── Gradio UI ─────────────────────────────────────────────────────────────────
192
 
193
- def build_ui() -> gr.Blocks:
194
  with gr.Blocks(css=CSS, title="ClearPath — Scene Description") as demo:
195
 
 
196
  gr.HTML("""
197
  <div class="app-header">
198
  <h1 class="app-title">👁 ClearPath</h1>
199
- <p class="app-subtitle">Real-Time Scene Description for Visually-Impaired People</p>
200
  </div>
201
- <div class="pipeline-bar">
202
- <span class="pipe-node">📥 Input</span>
203
  <span class="pipe-arrow">→</span>
204
- <span class="pipe-node">🧠 Qwen2-VL</span>
205
  <span class="pipe-arrow">→</span>
206
- <span class="pipe-node">🔍 Regex Classifier</span>
207
  <span class="pipe-arrow">→</span>
208
  <span class="pipe-node">🏷️ SAFE / DANGEROUS</span>
209
- <span class="pipe-arrow">→</span>
210
- <span class="pipe-node">🔊 TTS</span>
211
  </div>
212
  """)
213
 
@@ -217,135 +264,131 @@ def build_ui() -> gr.Blocks:
217
  with gr.TabItem("📁 Upload Image"):
218
  with gr.Row():
219
  with gr.Column(scale=1):
220
- upload_input = gr.Image(
221
- label="Upload or drag an image here",
222
  type="numpy",
223
- elem_classes=["panel"],
224
  )
225
- upload_btn = gr.Button(
226
- "🔍 Describe Scene",
227
  variant="primary",
228
- elem_classes=["primary-btn"],
229
  )
 
230
  with gr.Column(scale=1):
231
- upload_banner = gr.HTML(visible=False)
232
- upload_caption = gr.Textbox(
233
- label="🔊 Scene Description",
 
 
 
 
234
  lines=5,
235
  interactive=False,
236
- placeholder="Scene description will appear here after analysis…",
237
- elem_classes=["caption-output"],
238
- visible=False,
239
  )
240
- upload_speak_btn = gr.Button(
241
- "▶ Read Aloud Again",
242
- visible=False,
243
- elem_classes=["primary-btn"],
244
- )
245
-
246
- history_md_upload = gr.Markdown("", elem_classes=["history-md"])
247
 
248
- upload_btn.click(
249
- fn=process_image,
250
- inputs=[upload_input],
251
- outputs=[upload_banner, upload_caption, upload_speak_btn, history_md_upload],
252
- )
253
- upload_speak_btn.click(
254
- fn=lambda cap: tts.speak_async(cap),
255
- inputs=[upload_caption],
256
  )
257
 
258
  # ── Tab 2: Webcam ─────────────────────────────────────────────────
259
  with gr.TabItem("📷 Webcam"):
260
  with gr.Row():
261
  with gr.Column(scale=1):
262
- webcam_input = gr.Image(
263
- label="Webcam — click the camera button to capture",
264
  sources=["webcam"],
265
  type="numpy",
266
- elem_classes=["panel"],
267
  )
268
  cam_btn = gr.Button(
269
- "📸 Capture & Describe",
270
  variant="primary",
271
- elem_classes=["primary-btn"],
272
  )
273
  with gr.Column(scale=1):
274
- cam_banner = gr.HTML(visible=False)
 
 
 
 
275
  cam_caption = gr.Textbox(
276
  label="🔊 Scene Description",
277
  lines=5,
278
  interactive=False,
279
- placeholder="Point your camera at a scene and click Capture…",
280
- elem_classes=["caption-output"],
281
- visible=False,
282
- )
283
- cam_speak_btn = gr.Button(
284
- "▶ Read Aloud Again",
285
- visible=False,
286
- elem_classes=["primary-btn"],
287
  )
288
 
289
- history_md_cam = gr.Markdown("", elem_classes=["history-md"])
290
-
291
  cam_btn.click(
292
- fn=process_image,
293
- inputs=[webcam_input],
294
- outputs=[cam_banner, cam_caption, cam_speak_btn, history_md_cam],
295
- )
296
- cam_speak_btn.click(
297
- fn=lambda cap: tts.speak_async(cap),
298
- inputs=[cam_caption],
299
  )
300
 
301
  # ── Tab 3: Video ──────────────────────────────────────────────────
302
  with gr.TabItem("🎬 Video"):
303
- gr.Markdown("Upload a video — ClearPath extracts one frame every N seconds and describes each.")
304
  with gr.Row():
305
- video_input = gr.Video(label="Upload Video")
306
- interval_input = gr.Slider(1, 10, value=3, step=1, label="Capture interval (seconds)")
307
- video_btn = gr.Button("▶ Analyse Video", variant="primary", elem_classes=["primary-btn"])
308
-
309
- video_captions = gr.Dataframe(
310
- headers=["Frame #", "Time (s)", "Label", "Hazards", "Caption"],
311
  datatype=["number", "number", "str", "str", "str"],
312
  visible=False,
313
  )
314
 
315
- def process_video(video_path, interval):
316
- if video_path is None:
317
  return gr.update(visible=False)
318
- cap = cv2.VideoCapture(video_path)
319
- fps = cap.get(cv2.CAP_PROP_FPS) or 25
320
- step = max(1, int(fps * interval))
321
- rows, idx, sample_no = [], 0, 0
322
  while True:
323
  ret, frame = cap.read()
324
  if not ret:
325
  break
326
  if idx % step == 0:
327
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
328
- pil = Image.fromarray(rgb)
329
  try:
330
- caption, result = run_pipeline(pil)
 
331
  except Exception as e:
332
- caption, result = str(e), ClassificationResult("ERROR", [], [])
333
- rows.append([sample_no + 1, round(idx / fps, 1),
334
- result.label, ", ".join(result.hazards) or "—", caption])
335
- sample_no += 1
336
  idx += 1
337
  cap.release()
338
  return gr.update(value=rows, visible=True)
339
 
340
- video_btn.click(
341
- fn=process_video,
342
- inputs=[video_input, interval_input],
343
- outputs=[video_captions],
344
- )
345
 
346
- # ── Analysis History ──────────────────────────────────────────────────
347
  with gr.Accordion("📋 Analysis History", open=False):
348
- gr.Markdown("Recent analyses appear here after each run.", elem_classes=["history-md"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
  return demo
351
 
 
1
  """
2
+ app.py — ClearPath: Real-Time Scene Description for Visually-Impaired People
3
+ Pipeline: Upload Image → ViT-GPT2 Caption Regex Safety Classifier → SAFE / DANGEROUS
 
 
 
 
 
4
  """
5
 
6
  import gradio as gr
 
 
 
 
7
  import numpy as np
 
 
8
  import logging
9
+ import time
10
+ import cv2
11
+
12
+ from PIL import Image
13
+ from scene_captioner import SceneCaptioner
14
+ from safety_classifier import SafetyClassifier, ClassificationResult
15
 
16
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
17
  logger = logging.getLogger(__name__)
18
 
19
+ # ── Load pipeline once at startup ─────────────────────────────────────────────
20
+ logger.info("🚀 Starting ClearPath — loading captioner …")
21
  captioner = SceneCaptioner()
22
  classifier = SafetyClassifier()
23
+ logger.info(f"✅ Pipeline ready — captioner backend: {captioner._backend}")
24
 
25
  history_log: list[dict] = []
26
 
27
+ # ── Core pipeline function ────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ def analyse(image: np.ndarray):
30
+ """
31
+ Main pipeline:
32
+ 1. Convert numpy array → PIL Image
33
+ 2. SceneCaptioner.describe() → caption string
34
+ 3. SafetyClassifier.classify() → SAFE / DANGEROUS
35
+ 4. Return results to Gradio UI
36
+ """
37
  if image is None:
38
  return (
39
+ _info_html("⬆️ Please upload an image first.", "#6366f1"),
40
+ "",
41
+ _build_history_md(),
 
42
  )
43
 
44
+ t0 = time.time()
45
+ pil = Image.fromarray(image).convert("RGB")
46
 
47
+ # ── Step 2: Caption ───────────────────────────────────────────────────────
48
  try:
49
+ caption = captioner.describe(pil)
50
  except Exception as exc:
51
+ logger.error(f"Caption error: {exc}")
52
+ caption = "Unable to generate caption for this image."
 
 
 
 
 
53
 
54
+ # ── Step 3: Classify ──────────────────────────────────────────────────────
55
+ result = classifier.classify(caption)
56
+ elapsed = round(time.time() - t0, 2)
57
 
58
+ # ── Build banner HTML ─────────────────────────────────────────────────────
59
  if result.label == "DANGEROUS":
60
+ hazard_str = " &nbsp;|&nbsp; ".join(result.hazards)
61
+ token_str = ", ".join(result.matches[:8])
62
+ banner_html = f"""
63
+ <div style="
64
+ background:rgba(239,68,68,0.12);
65
+ border:2px solid rgba(239,68,68,0.45);
66
+ border-radius:14px; padding:1.1rem 1.4rem;
67
+ display:flex; align-items:flex-start; gap:1rem;
68
+ animation: fadeIn .3s ease;
69
+ ">
70
+ <span style="font-size:2.5rem; line-height:1;">⚠️</span>
71
+ <div>
72
+ <div style="font-weight:800; font-size:1.15rem; color:#fca5a5;
73
+ letter-spacing:.04em; margin-bottom:.3rem;">
74
+ DANGER DETECTED
75
+ </div>
76
+ <div style="font-size:.85rem; color:#f87171; margin-bottom:.25rem;">
77
+ <strong>Categories:</strong> {hazard_str}
78
+ </div>
79
+ <div style="font-size:.75rem; color:#94a3b8; font-family:monospace;">
80
+ <strong>Matched tokens:</strong> {token_str}
81
+ </div>
82
+ <div style="font-size:.7rem; color:#64748b; margin-top:.3rem;">
83
+ ⏱ Analysed in {elapsed}s &nbsp;|&nbsp; Backend: {captioner._backend}
84
+ </div>
85
+ </div>
86
+ </div>"""
87
  else:
88
+ banner_html = f"""
89
+ <div style="
90
+ background:rgba(34,197,94,0.1);
91
+ border:2px solid rgba(34,197,94,0.4);
92
+ border-radius:14px; padding:1.1rem 1.4rem;
93
+ display:flex; align-items:flex-start; gap:1rem;
94
+ ">
95
+ <span style="font-size:2.5rem; line-height:1;">✅</span>
96
+ <div>
97
+ <div style="font-weight:800; font-size:1.15rem; color:#86efac;
98
+ letter-spacing:.04em; margin-bottom:.3rem;">
99
+ SAFE ENVIRONMENT
100
+ </div>
101
+ <div style="font-size:.85rem; color:#4ade80;">
102
+ No hazards detected by the 16-category regex engine.
103
+ </div>
104
+ <div style="font-size:.7rem; color:#64748b; margin-top:.3rem;">
105
+ ⏱ Analysed in {elapsed}s &nbsp;|&nbsp; Backend: {captioner._backend}
106
+ </div>
107
+ </div>
108
+ </div>"""
109
+
110
+ # ── Log to history ────────────────────────────────────────────────────────
111
  history_log.insert(0, {
112
  "time" : time.strftime("%H:%M:%S"),
 
113
  "label" : result.label,
114
  "hazards": ", ".join(result.hazards) if result.hazards else "—",
115
+ "caption": caption,
116
  })
117
 
118
+ return banner_html, caption, _build_history_md()
119
+
120
+
121
+ def _info_html(msg: str, color: str) -> str:
122
  return (
123
+ f'<div style="background:rgba(99,102,241,.08);border:1px solid {color}33;'
124
+ f'border-radius:12px;padding:1rem 1.25rem;color:#94a3b8;font-size:.9rem;">'
125
+ f'{msg}</div>'
 
126
  )
127
 
128
 
129
+ def _build_history_md() -> str:
130
  if not history_log:
131
+ return "_No analyses yet — upload an image above._"
132
+ rows = ["| Time | Result | Hazards | Caption |",
133
+ "|------|--------|---------|---------|"]
134
+ for h in history_log[:10]:
135
+ short = (h["caption"][:70] + "…") if len(h["caption"]) > 70 else h["caption"]
136
+ icon = "⚠️" if h["label"] == "DANGEROUS" else "✅"
137
+ rows.append(f"| `{h['time']}` | {icon} **{h['label']}** | {h['hazards']} | {short} |")
138
  return "\n".join(rows)
139
 
140
 
141
  # ── Custom CSS ────────────────────────────────────────────────────────────────
142
  CSS = """
143
  @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;600;800&family=JetBrains+Mono:wght@500;700&display=swap');
144
+
 
 
 
145
  body, .gradio-container {
146
+ background: #0a0a10 !important;
147
+ color: #e2e8f0 !important;
148
  font-family: 'DM Sans', sans-serif !important;
149
  }
150
+ .gradio-container { max-width: 1100px !important; margin: 0 auto !important; }
151
+
152
+ gradio-app { background: #0a0a10 !important; }
153
 
154
+ /* Header */
155
  .app-header {
156
+ text-align: center;
157
+ padding: 2rem 1rem 1.25rem;
158
+ border-bottom: 1px solid rgba(99,102,241,.2);
159
+ margin-bottom: 1.25rem;
160
+ background: linear-gradient(180deg,rgba(99,102,241,.07) 0%,transparent 100%);
161
  }
162
  .app-title {
163
+ font-size: 2.5rem; font-weight: 800; letter-spacing: -.03em; margin: 0;
164
+ background: linear-gradient(135deg,#a5b4fc,#e879f9);
165
  -webkit-background-clip: text; -webkit-text-fill-color: transparent;
 
166
  }
167
+ .app-sub { color: #64748b; font-size: .9rem; margin-top: .4rem; }
168
 
169
+ /* Pipeline bar */
170
+ .pipe-bar {
171
  display: flex; align-items: center; justify-content: center;
172
+ flex-wrap: wrap; gap: .4rem;
173
+ padding: .75rem; margin-bottom: 1.25rem;
174
+ background: rgba(99,102,241,.04);
175
+ border: 1px solid rgba(99,102,241,.15); border-radius: 12px;
176
  font-family: 'JetBrains Mono', monospace; font-size: .75rem;
 
177
  }
178
  .pipe-node {
179
+ background: rgba(99,102,241,.14); border: 1px solid rgba(99,102,241,.3);
180
+ color: #a5b4fc; padding: .25rem .75rem; border-radius: 7px; font-weight: 700;
 
181
  }
182
+ .pipe-arrow { color: #334155; font-size: .9rem; }
183
 
184
+ /* Panels */
185
+ .gr-block, .gr-box, .panel {
186
+ background: #13131e !important;
187
+ border: 1px solid rgba(99,102,241,.2) !important;
188
  border-radius: 14px !important;
189
  }
190
 
191
+ /* Upload widget */
192
+ .gr-image { border-radius: 12px !important; }
193
+
194
+ /* Caption textbox */
195
+ .gr-textbox textarea {
196
  background: rgba(255,255,255,.03) !important;
197
+ border: 1px solid rgba(99,102,241,.2) !important;
198
+ border-radius: 10px !important;
199
+ color: #e2e8f0 !important;
200
  font-family: 'DM Sans', sans-serif !important;
201
+ font-size: .95rem !important;
202
+ line-height: 1.75 !important;
203
  }
204
 
205
+ /* Buttons */
206
+ .gr-button-primary, button[variant=primary] {
207
+ background: linear-gradient(135deg,#6366f1,#8b5cf6) !important;
208
+ border: none !important; border-radius: 10px !important;
209
+ color: white !important; font-weight: 700 !important;
210
+ font-family: 'DM Sans', sans-serif !important;
211
+ font-size: .95rem !important;
212
+ transition: opacity .2s !important;
213
+ }
214
+ .gr-button-primary:hover { opacity: .85 !important; }
215
+
216
+ /* History table */
217
+ .history-box table { width: 100%; border-collapse: collapse; font-size: .8rem; }
218
+ .history-box th {
219
+ background: rgba(99,102,241,.1); color: #a5b4fc;
220
+ padding: .4rem .65rem; text-align: left;
221
+ border-bottom: 1px solid rgba(99,102,241,.2);
222
+ }
223
+ .history-box td {
224
+ padding: .4rem .65rem; color: #64748b;
225
+ border-bottom: 1px solid rgba(255,255,255,.04);
226
+ vertical-align: top;
227
  }
 
228
 
229
+ /* Tabs */
230
+ .tab-nav button {
231
+ font-family: 'DM Sans', sans-serif !important;
232
+ font-weight: 600 !important; color: #64748b !important;
233
+ }
234
+ .tab-nav button.selected { color: #a5b4fc !important; }
235
 
236
+ @keyframes fadeIn { from {opacity:0;transform:translateY(-6px)} to {opacity:1;transform:translateY(0)} }
237
  """
238
 
239
+ # ── Build Gradio UI ───────────────────────────────────────────────────────────
240
 
241
+ def build_ui():
242
  with gr.Blocks(css=CSS, title="ClearPath — Scene Description") as demo:
243
 
244
+ # ── Header ────────────────────────────────────────────────────────────
245
  gr.HTML("""
246
  <div class="app-header">
247
  <h1 class="app-title">👁 ClearPath</h1>
248
+ <p class="app-sub">Real-Time Scene Description for Visually-Impaired People</p>
249
  </div>
250
+ <div class="pipe-bar">
251
+ <span class="pipe-node">📥 Image Input</span>
252
  <span class="pipe-arrow">→</span>
253
+ <span class="pipe-node">🧠 ViT-GPT2 / BLIP Captioning</span>
254
  <span class="pipe-arrow">→</span>
255
+ <span class="pipe-node">🔍 Regex Safety Classifier</span>
256
  <span class="pipe-arrow">→</span>
257
  <span class="pipe-node">🏷️ SAFE / DANGEROUS</span>
 
 
258
  </div>
259
  """)
260
 
 
264
  with gr.TabItem("📁 Upload Image"):
265
  with gr.Row():
266
  with gr.Column(scale=1):
267
+ img_input = gr.Image(
268
+ label="Upload or drag an image",
269
  type="numpy",
270
+ height=300,
271
  )
272
+ analyse_btn = gr.Button(
273
+ "🔍 Analyse Scene",
274
  variant="primary",
275
+ size="lg",
276
  )
277
+
278
  with gr.Column(scale=1):
279
+ result_banner = gr.HTML(
280
+ value='<div style="background:rgba(99,102,241,.06);border:1px solid rgba(99,102,241,.2);'
281
+ 'border-radius:12px;padding:1.25rem;color:#475569;text-align:center;">'
282
+ '⬆️ Upload an image and click <strong>Analyse Scene</strong></div>'
283
+ )
284
+ caption_out = gr.Textbox(
285
+ label="🔊 Scene Description (generated caption)",
286
  lines=5,
287
  interactive=False,
288
+ placeholder="The AI-generated scene description will appear here…",
 
 
289
  )
 
 
 
 
 
 
 
290
 
291
+ analyse_btn.click(
292
+ fn=analyse,
293
+ inputs=[img_input],
294
+ outputs=[result_banner, caption_out, gr.State()],
 
 
 
 
295
  )
296
 
297
  # ── Tab 2: Webcam ─────────────────────────────────────────────────
298
  with gr.TabItem("📷 Webcam"):
299
  with gr.Row():
300
  with gr.Column(scale=1):
301
+ cam_input = gr.Image(
302
+ label="Webcam — capture a snapshot",
303
  sources=["webcam"],
304
  type="numpy",
305
+ height=300,
306
  )
307
  cam_btn = gr.Button(
308
+ "📸 Capture & Analyse",
309
  variant="primary",
310
+ size="lg",
311
  )
312
  with gr.Column(scale=1):
313
+ cam_banner = gr.HTML(
314
+ value='<div style="background:rgba(99,102,241,.06);border:1px solid rgba(99,102,241,.2);'
315
+ 'border-radius:12px;padding:1.25rem;color:#475569;text-align:center;">'
316
+ '📷 Point your camera and click <strong>Capture & Analyse</strong></div>'
317
+ )
318
  cam_caption = gr.Textbox(
319
  label="🔊 Scene Description",
320
  lines=5,
321
  interactive=False,
 
 
 
 
 
 
 
 
322
  )
323
 
 
 
324
  cam_btn.click(
325
+ fn=analyse,
326
+ inputs=[cam_input],
327
+ outputs=[cam_banner, cam_caption, gr.State()],
 
 
 
 
328
  )
329
 
330
  # ── Tab 3: Video ──────────────────────────────────────────────────
331
  with gr.TabItem("🎬 Video"):
332
+ gr.Markdown("Upload a video — ClearPath samples one frame every N seconds.")
333
  with gr.Row():
334
+ vid_input = gr.Video(label="Upload Video")
335
+ interval = gr.Slider(1, 10, value=3, step=1, label="Interval (seconds)")
336
+ vid_btn = gr.Button("▶ Analyse Video", variant="primary")
337
+ vid_out = gr.Dataframe(
338
+ headers=["Frame", "Time (s)", "Label", "Hazards", "Caption"],
 
339
  datatype=["number", "number", "str", "str", "str"],
340
  visible=False,
341
  )
342
 
343
+ def analyse_video(path, secs):
344
+ if path is None:
345
  return gr.update(visible=False)
346
+ cap = cv2.VideoCapture(path)
347
+ fps = cap.get(cv2.CAP_PROP_FPS) or 25
348
+ step = max(1, int(fps * secs))
349
+ rows, idx, n = [], 0, 0
350
  while True:
351
  ret, frame = cap.read()
352
  if not ret:
353
  break
354
  if idx % step == 0:
355
+ pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
 
356
  try:
357
+ cap_text = captioner.describe(pil)
358
+ res = classifier.classify(cap_text)
359
  except Exception as e:
360
+ cap_text, res = str(e), ClassificationResult("ERROR", [], [])
361
+ rows.append([n + 1, round(idx / fps, 1),
362
+ res.label, ", ".join(res.hazards) or "—", cap_text])
363
+ n += 1
364
  idx += 1
365
  cap.release()
366
  return gr.update(value=rows, visible=True)
367
 
368
+ vid_btn.click(fn=analyse_video, inputs=[vid_input, interval], outputs=[vid_out])
 
 
 
 
369
 
370
+ # ── History ───────────────────────────────────────────────────────────
371
  with gr.Accordion("📋 Analysis History", open=False):
372
+ history_out = gr.Markdown(
373
+ "_No analyses yet._",
374
+ elem_classes=["history-box"],
375
+ )
376
+
377
+ # Wire history refresh on every analyse
378
+ def analyse_with_history(image):
379
+ banner, caption, _ = analyse(image)
380
+ return banner, caption, _build_history_md()
381
+
382
+ analyse_btn.click(
383
+ fn=analyse_with_history,
384
+ inputs=[img_input],
385
+ outputs=[result_banner, caption_out, history_out],
386
+ )
387
+ cam_btn.click(
388
+ fn=analyse_with_history,
389
+ inputs=[cam_input],
390
+ outputs=[cam_banner, cam_caption, history_out],
391
+ )
392
 
393
  return demo
394