Kesheratmex commited on
Commit
a7f88f6
·
1 Parent(s): f939cfe

**Add compact prompt builder summarizing video stats and detections**

Browse files
Files changed (1) hide show
  1. app.py +65 -33
app.py CHANGED
@@ -321,53 +321,85 @@ def _load_gptoss_wrapper():
321
  return None
322
 
323
  def _build_prompt(frames):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  lines = []
325
  lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
326
- lines.append("Given per-frame detections (label, confidence, bbox) and a short visual description of each frame, write a concise inspection report with:")
 
 
 
 
 
 
 
327
  lines.append("- Summary of main findings")
328
  lines.append("- Suggested severity (low/medium/high) when appropriate")
329
  lines.append("- Recommended next steps for inspection/repair")
330
  lines.append("")
331
- lines.append("Frame detections and visual summaries follow:")
332
- for f in frames:
333
- fid = f.get("frame_index")
334
- dets = f.get("detections", [])
335
- visual = f.get("visual") or {}
336
- if not dets:
337
- lines.append(f"Frame {fid}: No detections")
338
- else:
 
 
 
339
  det_texts = []
340
  for d in dets:
341
  conf = d.get("confidence")
342
  conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
343
  det_texts.append(f"{d.get('label')}({conf_s})")
344
- lines.append(f"Frame {fid}: " + ", ".join(det_texts))
345
- # Add visual description details (short natural language + key metrics)
346
- if visual:
347
- vdesc = visual.get("description", "")
348
- b = visual.get("brightness")
349
- c = visual.get("contrast")
350
- bl = visual.get("blur")
351
- drgb = visual.get("dominant_rgb")
352
- objc = visual.get("object_count")
353
- avg_area = visual.get("avg_bbox_area")
354
  metric_parts = []
355
- if b is not None:
356
- metric_parts.append(f"brightness={b:.1f}")
357
- if c is not None:
358
- metric_parts.append(f"contrast={c:.1f}")
359
- if bl is not None:
360
- metric_parts.append(f"blur_var={bl:.1f}")
361
- if drgb:
362
- metric_parts.append(f"dominant_rgb={drgb}")
363
- if objc is not None:
364
- metric_parts.append(f"object_count={objc}")
365
- if avg_area is not None:
366
- metric_parts.append(f"avg_bbox_area={avg_area:.1f}")
367
  metrics = "; ".join(metric_parts)
368
- lines.append(f"Visual summary: {vdesc} ({metrics})")
 
 
 
 
 
 
369
  lines.append("")
370
- lines.append("Produce the report in plain text, 6-10 short paragraphs. Also include a short descriptive sentence per frame describing what the image shows visually.")
371
  return "\n".join(lines)
372
 
373
  @GPU_DECORATOR
 
321
  return None
322
 
323
  def _build_prompt(frames):
324
+ """
325
+ Build a compact prompt that summarizes the entire video while keeping prompt
326
+ size bounded. We include:
327
+ - video-level totals (frames, total detections, counts per class)
328
+ - a concise list of frames that contain detections (frame index + short det summary)
329
+ - an optional compact aggregate of visual metrics for the whole video
330
+ The detailed per-frame visual descriptions remain in the report files (MD/PDF/JSON)
331
+ but are not expanded fully in the prompt to avoid token limits.
332
+ """
333
+ # Configs (env vars)
334
+ try:
335
+ max_prompt_frames = int(os.getenv("MAX_PROMPT_FRAMES", "200"))
336
+ except Exception:
337
+ max_prompt_frames = 200
338
+
339
+ total_frames = len(frames)
340
+ total_detections = sum(len(f.get("detections", [])) for f in frames)
341
+
342
+ # Aggregate counts per label and collect frames with detections
343
+ counts = {}
344
+ frames_with_dets = []
345
+ for f in frames:
346
+ dets = f.get("detections", [])
347
+ if dets:
348
+ frames_with_dets.append(f)
349
+ for d in dets:
350
+ counts[d.get("label")] = counts.get(d.get("label"), 0) + 1
351
+
352
  lines = []
353
  lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
354
+ lines.append(f"This video contains {total_frames} frames and {total_detections} total detections.")
355
+ if counts:
356
+ lines.append("Total detections by class: " + ", ".join([f"{k}({v})" for k, v in counts.items()]))
357
+ else:
358
+ lines.append("No detections were found in analyzed frames.")
359
+
360
+ lines.append("")
361
+ lines.append("Instructions: Based on the aggregate information and the selected frame summaries below, produce a concise inspection report that includes:")
362
  lines.append("- Summary of main findings")
363
  lines.append("- Suggested severity (low/medium/high) when appropriate")
364
  lines.append("- Recommended next steps for inspection/repair")
365
  lines.append("")
366
+
367
+ # Include up to max_prompt_frames frames that have detections (prioritize them)
368
+ include_list = frames_with_dets[:max_prompt_frames]
369
+
370
+ lines.append(f"Included frame summaries (showing frames with detections, up to {max_prompt_frames} entries):")
371
+ if not include_list:
372
+ lines.append("No frames with detections to list (video may be clear or detections are below threshold).")
373
+ else:
374
+ for f in include_list:
375
+ fid = f.get("frame_index")
376
+ dets = f.get("detections", [])
377
  det_texts = []
378
  for d in dets:
379
  conf = d.get("confidence")
380
  conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
381
  det_texts.append(f"{d.get('label')}({conf_s})")
382
+ # compact visual metrics (if present)
383
+ visual = f.get("visual") or {}
 
 
 
 
 
 
 
 
384
  metric_parts = []
385
+ if visual.get("brightness") is not None:
386
+ metric_parts.append(f"bright={visual['brightness']:.0f}")
387
+ if visual.get("contrast") is not None:
388
+ metric_parts.append(f"contrast={visual['contrast']:.0f}")
389
+ if visual.get("blur") is not None:
390
+ metric_parts.append(f"blur_var={visual['blur']:.0f}")
391
+ if visual.get("dominant_rgb"):
392
+ metric_parts.append(f"dominant_rgb={visual['dominant_rgb']}")
 
 
 
 
393
  metrics = "; ".join(metric_parts)
394
+ if metrics:
395
+ lines.append(f"Frame {fid}: " + ", ".join(det_texts) + f" [{metrics}]")
396
+ else:
397
+ lines.append(f"Frame {fid}: " + ", ".join(det_texts))
398
+
399
+ lines.append("")
400
+ lines.append("NOTE: Full per-frame visual descriptions and images are attached in the generated report files. If you need a fully exhaustive token-by-token per-frame prompt, set FULL_FRAME_REPORT and increase MAX_PROMPT_FRAMES (may exceed model token limits).")
401
  lines.append("")
402
+ lines.append("Produce the report in plain text, 6-10 short paragraphs. Also include 1-2 short sentences summarizing why the listed frames are noteworthy (e.g., what the detection likely means).")
403
  return "\n".join(lines)
404
 
405
  @GPU_DECORATOR