Spaces:

Kesherat
/

blade-inspection-demo

Sleeping

App Files Files Community

Kesheratmex commited on Aug 11

Commit

a7f88f6

1 Parent(s): f939cfe

Add compact prompt builder summarizing video stats and detections

Browse files

Files changed (1) hide show

app.py +65 -33

app.py CHANGED Viewed

@@ -321,53 +321,85 @@ def _load_gptoss_wrapper():
         return None
 def _build_prompt(frames):
     lines = []
     lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
-    lines.append("Given per-frame detections (label, confidence, bbox) and a short visual description of each frame, write a concise inspection report with:")
     lines.append("- Summary of main findings")
     lines.append("- Suggested severity (low/medium/high) when appropriate")
     lines.append("- Recommended next steps for inspection/repair")
     lines.append("")
-    lines.append("Frame detections and visual summaries follow:")
-    for f in frames:
-        fid = f.get("frame_index")
-        dets = f.get("detections", [])
-        visual = f.get("visual") or {}
-        if not dets:
-            lines.append(f"Frame {fid}: No detections")
-        else:
             det_texts = []
             for d in dets:
                 conf = d.get("confidence")
                 conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
                 det_texts.append(f"{d.get('label')}({conf_s})")
-            lines.append(f"Frame {fid}: " + ", ".join(det_texts))
-        # Add visual description details (short natural language + key metrics)
-        if visual:
-            vdesc = visual.get("description", "")
-            b = visual.get("brightness")
-            c = visual.get("contrast")
-            bl = visual.get("blur")
-            drgb = visual.get("dominant_rgb")
-            objc = visual.get("object_count")
-            avg_area = visual.get("avg_bbox_area")
             metric_parts = []
-            if b is not None:
-                metric_parts.append(f"brightness={b:.1f}")
-            if c is not None:
-                metric_parts.append(f"contrast={c:.1f}")
-            if bl is not None:
-                metric_parts.append(f"blur_var={bl:.1f}")
-            if drgb:
-                metric_parts.append(f"dominant_rgb={drgb}")
-            if objc is not None:
-                metric_parts.append(f"object_count={objc}")
-            if avg_area is not None:
-                metric_parts.append(f"avg_bbox_area={avg_area:.1f}")
             metrics = "; ".join(metric_parts)
-            lines.append(f"Visual summary: {vdesc} ({metrics})")
     lines.append("")
-    lines.append("Produce the report in plain text, 6-10 short paragraphs. Also include a short descriptive sentence per frame describing what the image shows visually.")
     return "\n".join(lines)
 @GPU_DECORATOR

         return None
 def _build_prompt(frames):
+    """
+    Build a compact prompt that summarizes the entire video while keeping prompt
+    size bounded. We include:
+      - video-level totals (frames, total detections, counts per class)
+      - a concise list of frames that contain detections (frame index + short det summary)
+      - an optional compact aggregate of visual metrics for the whole video
+    The detailed per-frame visual descriptions remain in the report files (MD/PDF/JSON)
+    but are not expanded fully in the prompt to avoid token limits.
+    """
+    # Configs (env vars)
+    try:
+        max_prompt_frames = int(os.getenv("MAX_PROMPT_FRAMES", "200"))
+    except Exception:
+        max_prompt_frames = 200
+    total_frames = len(frames)
+    total_detections = sum(len(f.get("detections", [])) for f in frames)
+    # Aggregate counts per label and collect frames with detections
+    counts = {}
+    frames_with_dets = []
+    for f in frames:
+        dets = f.get("detections", [])
+        if dets:
+            frames_with_dets.append(f)
+            for d in dets:
+                counts[d.get("label")] = counts.get(d.get("label"), 0) + 1
     lines = []
     lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
+    lines.append(f"This video contains {total_frames} frames and {total_detections} total detections.")
+    if counts:
+        lines.append("Total detections by class: " + ", ".join([f"{k}({v})" for k, v in counts.items()]))
+    else:
+        lines.append("No detections were found in analyzed frames.")
+    lines.append("")
+    lines.append("Instructions: Based on the aggregate information and the selected frame summaries below, produce a concise inspection report that includes:")
     lines.append("- Summary of main findings")
     lines.append("- Suggested severity (low/medium/high) when appropriate")
     lines.append("- Recommended next steps for inspection/repair")
     lines.append("")
+    # Include up to max_prompt_frames frames that have detections (prioritize them)
+    include_list = frames_with_dets[:max_prompt_frames]
+    lines.append(f"Included frame summaries (showing frames with detections, up to {max_prompt_frames} entries):")
+    if not include_list:
+        lines.append("No frames with detections to list (video may be clear or detections are below threshold).")
+    else:
+        for f in include_list:
+            fid = f.get("frame_index")
+            dets = f.get("detections", [])
             det_texts = []
             for d in dets:
                 conf = d.get("confidence")
                 conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
                 det_texts.append(f"{d.get('label')}({conf_s})")
+            # compact visual metrics (if present)
+            visual = f.get("visual") or {}
             metric_parts = []
+            if visual.get("brightness") is not None:
+                metric_parts.append(f"bright={visual['brightness']:.0f}")
+            if visual.get("contrast") is not None:
+                metric_parts.append(f"contrast={visual['contrast']:.0f}")
+            if visual.get("blur") is not None:
+                metric_parts.append(f"blur_var={visual['blur']:.0f}")
+            if visual.get("dominant_rgb"):
+                metric_parts.append(f"dominant_rgb={visual['dominant_rgb']}")
             metrics = "; ".join(metric_parts)
+            if metrics:
+                lines.append(f"Frame {fid}: " + ", ".join(det_texts) + f"  [{metrics}]")
+            else:
+                lines.append(f"Frame {fid}: " + ", ".join(det_texts))
+    lines.append("")
+    lines.append("NOTE: Full per-frame visual descriptions and images are attached in the generated report files. If you need a fully exhaustive token-by-token per-frame prompt, set FULL_FRAME_REPORT and increase MAX_PROMPT_FRAMES (may exceed model token limits).")
     lines.append("")
+    lines.append("Produce the report in plain text, 6-10 short paragraphs. Also include 1-2 short sentences summarizing why the listed frames are noteworthy (e.g., what the detection likely means).")
     return "\n".join(lines)
 @GPU_DECORATOR

**Add compact prompt builder summarizing video stats and detections**

Add compact prompt builder summarizing video stats and detections