Spaces:
Sleeping
Sleeping
Kesheratmex
commited on
Commit
·
a7f88f6
1
Parent(s):
f939cfe
**Add compact prompt builder summarizing video stats and detections**
Browse files
app.py
CHANGED
|
@@ -321,53 +321,85 @@ def _load_gptoss_wrapper():
|
|
| 321 |
return None
|
| 322 |
|
| 323 |
def _build_prompt(frames):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
lines = []
|
| 325 |
lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
|
| 326 |
-
lines.append("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
lines.append("- Summary of main findings")
|
| 328 |
lines.append("- Suggested severity (low/medium/high) when appropriate")
|
| 329 |
lines.append("- Recommended next steps for inspection/repair")
|
| 330 |
lines.append("")
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
| 339 |
det_texts = []
|
| 340 |
for d in dets:
|
| 341 |
conf = d.get("confidence")
|
| 342 |
conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
|
| 343 |
det_texts.append(f"{d.get('label')}({conf_s})")
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
if visual:
|
| 347 |
-
vdesc = visual.get("description", "")
|
| 348 |
-
b = visual.get("brightness")
|
| 349 |
-
c = visual.get("contrast")
|
| 350 |
-
bl = visual.get("blur")
|
| 351 |
-
drgb = visual.get("dominant_rgb")
|
| 352 |
-
objc = visual.get("object_count")
|
| 353 |
-
avg_area = visual.get("avg_bbox_area")
|
| 354 |
metric_parts = []
|
| 355 |
-
if
|
| 356 |
-
metric_parts.append(f"
|
| 357 |
-
if
|
| 358 |
-
metric_parts.append(f"contrast={
|
| 359 |
-
if
|
| 360 |
-
metric_parts.append(f"blur_var={
|
| 361 |
-
if
|
| 362 |
-
metric_parts.append(f"dominant_rgb={
|
| 363 |
-
if objc is not None:
|
| 364 |
-
metric_parts.append(f"object_count={objc}")
|
| 365 |
-
if avg_area is not None:
|
| 366 |
-
metric_parts.append(f"avg_bbox_area={avg_area:.1f}")
|
| 367 |
metrics = "; ".join(metric_parts)
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
lines.append("")
|
| 370 |
-
lines.append("Produce the report in plain text, 6-10 short paragraphs. Also include
|
| 371 |
return "\n".join(lines)
|
| 372 |
|
| 373 |
@GPU_DECORATOR
|
|
|
|
| 321 |
return None
|
| 322 |
|
| 323 |
def _build_prompt(frames):
|
| 324 |
+
"""
|
| 325 |
+
Build a compact prompt that summarizes the entire video while keeping prompt
|
| 326 |
+
size bounded. We include:
|
| 327 |
+
- video-level totals (frames, total detections, counts per class)
|
| 328 |
+
- a concise list of frames that contain detections (frame index + short det summary)
|
| 329 |
+
- an optional compact aggregate of visual metrics for the whole video
|
| 330 |
+
The detailed per-frame visual descriptions remain in the report files (MD/PDF/JSON)
|
| 331 |
+
but are not expanded fully in the prompt to avoid token limits.
|
| 332 |
+
"""
|
| 333 |
+
# Configs (env vars)
|
| 334 |
+
try:
|
| 335 |
+
max_prompt_frames = int(os.getenv("MAX_PROMPT_FRAMES", "200"))
|
| 336 |
+
except Exception:
|
| 337 |
+
max_prompt_frames = 200
|
| 338 |
+
|
| 339 |
+
total_frames = len(frames)
|
| 340 |
+
total_detections = sum(len(f.get("detections", [])) for f in frames)
|
| 341 |
+
|
| 342 |
+
# Aggregate counts per label and collect frames with detections
|
| 343 |
+
counts = {}
|
| 344 |
+
frames_with_dets = []
|
| 345 |
+
for f in frames:
|
| 346 |
+
dets = f.get("detections", [])
|
| 347 |
+
if dets:
|
| 348 |
+
frames_with_dets.append(f)
|
| 349 |
+
for d in dets:
|
| 350 |
+
counts[d.get("label")] = counts.get(d.get("label"), 0) + 1
|
| 351 |
+
|
| 352 |
lines = []
|
| 353 |
lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
|
| 354 |
+
lines.append(f"This video contains {total_frames} frames and {total_detections} total detections.")
|
| 355 |
+
if counts:
|
| 356 |
+
lines.append("Total detections by class: " + ", ".join([f"{k}({v})" for k, v in counts.items()]))
|
| 357 |
+
else:
|
| 358 |
+
lines.append("No detections were found in analyzed frames.")
|
| 359 |
+
|
| 360 |
+
lines.append("")
|
| 361 |
+
lines.append("Instructions: Based on the aggregate information and the selected frame summaries below, produce a concise inspection report that includes:")
|
| 362 |
lines.append("- Summary of main findings")
|
| 363 |
lines.append("- Suggested severity (low/medium/high) when appropriate")
|
| 364 |
lines.append("- Recommended next steps for inspection/repair")
|
| 365 |
lines.append("")
|
| 366 |
+
|
| 367 |
+
# Include up to max_prompt_frames frames that have detections (prioritize them)
|
| 368 |
+
include_list = frames_with_dets[:max_prompt_frames]
|
| 369 |
+
|
| 370 |
+
lines.append(f"Included frame summaries (showing frames with detections, up to {max_prompt_frames} entries):")
|
| 371 |
+
if not include_list:
|
| 372 |
+
lines.append("No frames with detections to list (video may be clear or detections are below threshold).")
|
| 373 |
+
else:
|
| 374 |
+
for f in include_list:
|
| 375 |
+
fid = f.get("frame_index")
|
| 376 |
+
dets = f.get("detections", [])
|
| 377 |
det_texts = []
|
| 378 |
for d in dets:
|
| 379 |
conf = d.get("confidence")
|
| 380 |
conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
|
| 381 |
det_texts.append(f"{d.get('label')}({conf_s})")
|
| 382 |
+
# compact visual metrics (if present)
|
| 383 |
+
visual = f.get("visual") or {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
metric_parts = []
|
| 385 |
+
if visual.get("brightness") is not None:
|
| 386 |
+
metric_parts.append(f"bright={visual['brightness']:.0f}")
|
| 387 |
+
if visual.get("contrast") is not None:
|
| 388 |
+
metric_parts.append(f"contrast={visual['contrast']:.0f}")
|
| 389 |
+
if visual.get("blur") is not None:
|
| 390 |
+
metric_parts.append(f"blur_var={visual['blur']:.0f}")
|
| 391 |
+
if visual.get("dominant_rgb"):
|
| 392 |
+
metric_parts.append(f"dominant_rgb={visual['dominant_rgb']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
metrics = "; ".join(metric_parts)
|
| 394 |
+
if metrics:
|
| 395 |
+
lines.append(f"Frame {fid}: " + ", ".join(det_texts) + f" [{metrics}]")
|
| 396 |
+
else:
|
| 397 |
+
lines.append(f"Frame {fid}: " + ", ".join(det_texts))
|
| 398 |
+
|
| 399 |
+
lines.append("")
|
| 400 |
+
lines.append("NOTE: Full per-frame visual descriptions and images are attached in the generated report files. If you need a fully exhaustive token-by-token per-frame prompt, set FULL_FRAME_REPORT and increase MAX_PROMPT_FRAMES (may exceed model token limits).")
|
| 401 |
lines.append("")
|
| 402 |
+
lines.append("Produce the report in plain text, 6-10 short paragraphs. Also include 1-2 short sentences summarizing why the listed frames are noteworthy (e.g., what the detection likely means).")
|
| 403 |
return "\n".join(lines)
|
| 404 |
|
| 405 |
@GPU_DECORATOR
|