Spaces:

Kesherat
/

blade-inspection-demo

Sleeping

App Files Files Community

Kesheratmex commited on Aug 11

Commit

011a229

1 Parent(s): f9d898d

feat(multimodal): add Generar analisis fuerte demo and GPT-OSS wrapper

Browse files

Files changed (1) hide show

blade-inspection-demo/app.py +283 -0

blade-inspection-demo/app.py ADDED Viewed

	@@ -0,0 +1,283 @@

+import os
+import tempfile
+import json
+import shutil
+import cv2
+from typing import List, Dict, Any
+import gradio as gr
+# Local GPT-OSS wrapper (created previously)
+from gptoss_wrapper import GPTOSSWrapper
+# Try to import ReportLab for PDF generation; fall back to plain text PDF if unavailable
+try:
+    from reportlab.lib.pagesizes import A4
+    from reportlab.pdfgen import canvas
+    REPORTLAB_AVAILABLE = True
+except Exception:
+    REPORTLAB_AVAILABLE = False
+# Simple helper: write a PDF with the narrative and per-frame detections
+def _write_pdf(path: str, title: str, narrative: str, frames: List[Dict[str, Any]]):
+    if REPORTLAB_AVAILABLE:
+        c = canvas.Canvas(path, pagesize=A4)
+        width, height = A4
+        margin = 40
+        y = height - margin
+        c.setFont("Helvetica-Bold", 16)
+        c.drawString(margin, y, title)
+        y -= 30
+        c.setFont("Helvetica", 11)
+        # Narrative (wrap simple)
+        for line in narrative.splitlines():
+            if y < margin + 50:
+                c.showPage()
+                y = height - margin
+                c.setFont("Helvetica", 11)
+            c.drawString(margin, y, line)
+            y -= 16
+        y -= 10
+        c.setFont("Helvetica-Bold", 12)
+        c.drawString(margin, y, "Per-frame detections:")
+        y -= 18
+        c.setFont("Helvetica", 10)
+        for f in frames:
+            if y < margin + 50:
+                c.showPage()
+                y = height - margin
+                c.setFont("Helvetica", 10)
+            header = f"Frame {f.get('frame_index')}:"
+            c.drawString(margin, y, header)
+            y -= 14
+            dets = f.get("detections", [])
+            if not dets:
+                c.drawString(margin + 12, y, "No detections")
+                y -= 12
+            else:
+                for d in dets:
+                    line = f"- {d.get('label')} | conf={d.get('confidence')} | bbox={d.get('bbox')}"
+                    if y < margin + 50:
+                        c.showPage()
+                        y = height - margin
+                        c.setFont("Helvetica", 10)
+                    c.drawString(margin + 12, y, line)
+                    y -= 12
+        c.save()
+    else:
+        # Fallback: write a very small text-like PDF using binary write (not a real PDF viewer-friendly)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(title + "\n\n")
+            f.write(narrative + "\n\n")
+            f.write("Per-frame detections:\n")
+            for frame in frames:
+                f.write(f"Frame {frame.get('frame_index')}:\n")
+                dets = frame.get("detections", [])
+                if not dets:
+                    f.write("  No detections\n")
+                else:
+                    for d in dets:
+                        f.write(f"  - {d}\n")
+# Build a compact prompt for the GPT model from per-frame detections
+def _build_prompt(frames: List[Dict[str, Any]]) -> str:
+    lines = []
+    lines.append("You are an expert inspection assistant for wind turbine blade images/videos.")
+    lines.append("Given per-frame detections (label, confidence, bbox), write a concise inspection report with:")
+    lines.append("- Summary of main findings")
+    lines.append("- Suggested severity (low/medium/high) when appropriate")
+    lines.append("- Recommended next steps for inspection/repair")
+    lines.append("")
+    lines.append("Frame detections follow:")
+    for f in frames:
+        fid = f.get("frame_index")
+        dets = f.get("detections", [])
+        if not dets:
+            lines.append(f"Frame {fid}: No detections")
+        else:
+            det_texts = []
+            for d in dets:
+                conf = d.get("confidence")
+                conf_s = f"{conf:.2f}" if isinstance(conf, float) else str(conf)
+                det_texts.append(f"{d.get('label')}({conf_s})")
+            lines.append(f"Frame {fid}: " + ", ".join(det_texts))
+    lines.append("")
+    lines.append("Produce the report in plain text, 6-10 short paragraphs.")
+    return "\n".join(lines)
+# Minimal (safe) detector synthesizer:
+# If YOLO model exists at repo root (../best2.pt), we try to perform simple detection on up to N frames.
+# Otherwise we synthesize a small example so the GPT step can be exercised in the Space without heavy deps.
+def extract_detections_from_media(media_path: str, max_frames: int = 3) -> List[Dict[str, Any]]:
+    frames = []
+    # Try to locate best2.pt one level above this folder
+    root_model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "best2.pt"))
+    try:
+        if os.path.exists(root_model_path):
+            # Attempt to use YOLO if available
+            try:
+                from ultralytics import YOLO
+                model = YOLO(root_model_path)
+                ext = os.path.splitext(media_path)[1].lower()
+                if ext in [".mp4", ".mov", ".avi", ".mkv"]:
+                    cap = cv2.VideoCapture(media_path)
+                    idx = 0
+                    grabbed = 0
+                    while grabbed < max_frames:
+                        ret, frame = cap.read()
+                        if not ret:
+                            break
+                        # save frame temporarily
+                        tmpf = os.path.join(tempfile.gettempdir(), f"tmp_frame_{idx}.jpg")
+                        cv2.imwrite(tmpf, frame)
+                        results = model.predict(source=tmpf, conf=0.25, iou=0.45)
+                        dets = []
+                        if results and len(results) > 0:
+                            for box in results[0].boxes:
+                                try:
+                                    cls_id = int(box.cls[0])
+                                    label = model.names[cls_id]
+                                except Exception:
+                                    label = "object"
+                                try:
+                                    x1, y1, x2, y2 = map(int, box.xyxy[0])
+                                except Exception:
+                                    x1 = y1 = x2 = y2 = 0
+                                try:
+                                    confv = float(box.conf[0])
+                                except Exception:
+                                    confv = None
+                                dets.append({"label": label, "confidence": confv, "bbox": [x1, y1, x2, y2]})
+                        frames.append({"frame_index": idx, "detections": dets})
+                        idx += 1
+                        grabbed += 1
+                    cap.release()
+                else:
+                    # Single image
+                    results = model.predict(source=media_path, conf=0.25, iou=0.45)
+                    dets = []
+                    if results and len(results) > 0:
+                        for box in results[0].boxes:
+                            try:
+                                cls_id = int(box.cls[0])
+                                label = model.names[cls_id]
+                            except Exception:
+                                label = "object"
+                            try:
+                                x1, y1, x2, y2 = map(int, box.xyxy[0])
+                            except Exception:
+                                x1 = y1 = x2 = y2 = 0
+                            try:
+                                confv = float(box.conf[0])
+                            except Exception:
+                                confv = None
+                            dets.append({"label": label, "confidence": confv, "bbox": [x1, y1, x2, y2]})
+                    frames.append({"frame_index": 0, "detections": dets})
+                return frames
+            except Exception:
+                # If any error happens with YOLO or ultralytics, fall through to synthesize
+                pass
+    except Exception:
+        pass
+    # Synthesize fallback detections for demo
+    ext = os.path.splitext(media_path)[1].lower()
+    if ext in [".mp4", ".mov", ".avi", ".mkv"]:
+        # create a small synthetic set
+        for i in range(max_frames):
+            if i == 0:
+                dets = [{"label": "crack", "confidence": 0.87, "bbox": [120, 80, 300, 220]},
+                        {"label": "erosion", "confidence": 0.62, "bbox": [400, 200, 520, 330]}]
+            elif i == 1:
+                dets = [{"label": "crack", "confidence": 0.81, "bbox": [125, 85, 305, 225]}]
+            else:
+                dets = []
+            frames.append({"frame_index": i, "detections": dets})
+    else:
+        # single image fallback
+        frames.append({"frame_index": 0, "detections": [{"label": "crack", "confidence": 0.78, "bbox": [100, 50, 260, 210]}]})
+    return frames
+# Main action triggered by the Gradio button
+def generar_analisis_fuerte(media: str):
+    """
+    media: filepath provided by Gradio (video or image)
+    Returns: dict with paths to generated artifacts
+    """
+    if not media:
+        return {"status": "No media provided", "report_pdf": None, "report_md": None, "report_json": None}
+    tmpdir = tempfile.mkdtemp()
+    try:
+        frames = extract_detections_from_media(media)
+        prompt = _build_prompt(frames)
+        wrapper = GPTOSSWrapper(model="gpt-oss-120")
+        try:
+            narrative = wrapper.generate(prompt)
+        except Exception as e:
+            narrative = f"(GPT call failed) {e}\n\nFallback narrative:\n"
+            # simple fallback narrative constructed from frames
+            counts = {}
+            for f in frames:
+                for d in f.get("detections", []):
+                    counts[d["label"]] = counts.get(d["label"], 0) + 1
+            narrative += "Detected classes: " + ", ".join([f"{k}({v})" for k, v in counts.items()]) if counts else "No detections"
+        # Write Markdown
+        report_md = os.path.join(tmpdir, "report.md")
+        with open(report_md, "w", encoding="utf-8") as md:
+            md.write("# Informe de inspección (Generar analisis fuerte)\n\n")
+            md.write(narrative or "Sin narrativa disponible.\n\n")
+            md.write("\n## Per-frame detections\n\n")
+            for f in frames:
+                md.write(f"- Frame {f.get('frame_index')}: ")
+                dets = f.get("detections", [])
+                if not dets:
+                    md.write("No detections\n")
+                else:
+                    md.write("; ".join([f\"{d['label']}({d['confidence']}) bbox={d['bbox']}\" for d in dets]) + "\n")
+        # Write JSON
+        report_json = os.path.join(tmpdir, "report.json")
+        with open(report_json, "w", encoding="utf-8") as jf:
+            json.dump({"narrative": narrative, "frames": frames}, jf, indent=2)
+        # Write PDF
+        report_pdf = os.path.join(tmpdir, "report.pdf")
+        _write_pdf(report_pdf, "Informe de inspección - Generar analisis fuerte", narrative, frames)
+        return {
+            "status": "done",
+            "report_pdf": report_pdf,
+            "report_md": report_md,
+            "report_json": report_json
+        }
+    except Exception as e:
+        return {"status": f"error: {e}", "report_pdf": None, "report_md": None, "report_json": None}
+    finally:
+        # do not remove tmpdir: keep outputs available for download
+        pass
+# Gradio UI
+with gr.Blocks(title="Generador de análisis fuerte") as demo:
+    gr.Markdown("## Generar análisis multimodal (GPT-OSS 120)\n\nSube una imagen o vídeo y pulsa **Generar analisis fuerte** para producir un PDF con el informe AI.")
+    with gr.Row():
+        media = gr.File(label="Sube imagen o vídeo (archivo)")
+    btn = gr.Button("Generar analisis fuerte")
+    status = gr.Textbox(label="Estado", interactive=False)
+    pdf_out = gr.File(label="Reporte PDF")
+    md_out = gr.File(label="Reporte Markdown")
+    json_out = gr.File(label="Reporte JSON")
+    def _on_click(file_obj):
+        if file_obj is None:
+            return {"status": "No file provided", "report_pdf": None, "report_md": None, "report_json": None}
+        # Gradio File returns dict with 'name' key on local runs
+        path = file_obj.name if hasattr(file_obj, "name") else file_obj
+        res = generar_analisis_fuerte(path)
+        return res.get("status"), (res.get("report_pdf") if res.get("report_pdf") else None), (res.get("report_md") if res.get("report_md") else None), (res.get("report_json") if res.get("report_json") else None)
+    btn.click(fn=_on_click, inputs=[media], outputs=[status, pdf_out, md_out, json_out])
+if __name__ == "__main__":
+    demo.launch()