""" app.py — Agentic AI Video Analysis System (Gradio Edition) Gradio entry point for Hugging Face Spaces. Uses GROQ_API_KEY from HF Space Settings → Secrets. All file writes go to /tmp/ (HF filesystem is read-only elsewhere). """ import json import os import shutil import uuid from typing import Any, Dict, List import cv2 import gradio as gr try: from dotenv import load_dotenv load_dotenv() except ImportError: pass from ai_summarizer import get_summary from agent_workflow import run_agent from frame_extractor import extract_frames, get_frame_stats from object_detector import load_detector, detect_objects_in_frames, summarize_detections from video_input import load_video, release_video TMP = "/tmp/vas" os.makedirs(TMP, exist_ok=True) HAS_GROQ = bool(os.getenv("GROQ_API_KEY")) _MODEL = None def get_model(): global _MODEL if _MODEL is None: _MODEL = load_detector() return _MODEL def safe_html(text: str) -> str: return str(text).replace("&", "&").replace("<", "<").replace(">", ">") def build_status_html() -> str: if HAS_GROQ: return """
Groq API key detected. LLaMA-3 summarization and agentic analysis are enabled.
""" return """
⚠️ No Groq API key found. The app will run in mock mode. Core analysis still works, and AI text falls back safely to rule-based output.
""" def summarize_frame_table(all_detections: List[Dict[str, Any]]) -> List[List[str]]: rows = [] for det in all_detections: if det["objects"]: classes = ", ".join( f"{obj['class']} ({obj['confidence']:.1%})" for obj in det["objects"] ) else: classes = "No objects above threshold" rows.append([ str(det["frame_index"]), str(len(det["objects"])), classes ]) return rows def make_report_text( uploaded_name: str, meta: Dict[str, Any], duration_sec: float, detection_summary: Dict[str, int], ai_summary: str, agent_report: Dict[str, Any], ) -> str: detection_lines = "\n".join(f" {k}: {v}" for k, v in detection_summary.items()) or " None" insights = "\n".join(f" • {x}" for x in agent_report.get("insights", [])) or " None" risks = "\n".join(f" 🚩 {x}" for x in agent_report.get("risk_flags", [])) or " None" actions = "\n".join(f" → {x}" for x in agent_report.get("recommended_actions", [])) or " None" return ( f"VIDEO ANALYSIS REPORT\n{'=' * 50}\n" f"File: {uploaded_name}\n" f"Duration: {duration_sec}s @ {meta['fps']:.1f} FPS\n" f"Resolution: {meta['width']}x{meta['height']}\n\n" f"DETECTION SUMMARY\n{'-' * 30}\n{detection_lines}\n\n" f"AI SUMMARY\n{'-' * 30}\n{ai_summary}\n\n" f"KEY INSIGHTS\n{'-' * 30}\n{insights}\n\n" f"RISK FLAGS\n{'-' * 30}\n{risks}\n\n" f"RECOMMENDED ACTIONS\n{'-' * 30}\n{actions}\n" ) def analyze_video( video_file, frame_interval, max_frames, confidence, ai_provider, agent_mode, progress=gr.Progress(track_tqdm=False), ): if video_file is None: raise gr.Error("Please upload a video file first.") input_path = video_file if isinstance(video_file, str) else video_file.name run_id = str(uuid.uuid4())[:8] run_dir = os.path.join(TMP, run_id) frame_dir = os.path.join(run_dir, "frames") annotated_dir = os.path.join(run_dir, "annotated") os.makedirs(frame_dir, exist_ok=True) os.makedirs(annotated_dir, exist_ok=True) uploaded_name = os.path.basename(input_path) stable_video_path = os.path.join(run_dir, uploaded_name) shutil.copy2(input_path, stable_video_path) progress(0.05, desc="Loading video") try: cap, meta = load_video(stable_video_path) except Exception as e: raise gr.Error(f"Could not load video: {e}") try: fps = float(meta.get("fps") or 25.0) total_frames = int(meta.get("total_frames") or 0) duration_sec = round(total_frames / max(fps, 1), 1) progress(0.22, desc="Extracting frames") paths, arrays = extract_frames( cap, output_dir=frame_dir, frame_interval=int(frame_interval), max_frames=int(max_frames), ) finally: release_video(cap) if not arrays: raise gr.Error( "No frames could be extracted from the uploaded video. " "Please try another file or lower the frame interval." ) progress(0.48, desc="Running YOLOv8 detection") model = get_model() all_detections = detect_objects_in_frames( model, arrays, paths, confidence_threshold=float(confidence), output_dir=annotated_dir, ) detection_summary = summarize_detections(all_detections) progress(0.70, desc="Generating summary") ai_summary = get_summary(detection_summary, meta, provider=ai_provider) progress(0.84, desc="Running agentic analysis") agent_report = run_agent(detection_summary, meta, ai_summary, mode=agent_mode) frame_stats = get_frame_stats(arrays) total_obj = int(sum(detection_summary.values())) top_cls = list(detection_summary.keys())[0] if detection_summary else "none" metrics_html = f"""
FPS
{fps:.1f}
Resolution
{meta['width']} × {meta['height']}
Duration
{duration_sec}s
Total detections
{total_obj}
Top class
{safe_html(top_cls)}
""" preview_gallery = [cv2.cvtColor(arr, cv2.COLOR_BGR2RGB) for arr in arrays[:6]] annotated_gallery = [det["annotated_path"] for det in all_detections[:6]] summary_rows = [[k, int(v)] for k, v in detection_summary.items()] if not summary_rows: summary_rows = [["No objects detected", 0]] frame_detail_rows = summarize_frame_table(all_detections) insights_md = "\n".join(f"- {x}" for x in agent_report.get("insights", [])) or "- None" risks_md = "\n".join(f"- {x}" for x in agent_report.get("risk_flags", [])) or "- None" actions_md = "\n".join(f"- {x}" for x in agent_report.get("recommended_actions", [])) or "- None" full_report = { "video_file": uploaded_name, "video_metadata": meta, "frame_stats": frame_stats, "settings": { "frame_interval": int(frame_interval), "max_frames": int(max_frames), "confidence": float(confidence), "ai_provider": ai_provider, "agent_mode": agent_mode, }, "detection_summary": detection_summary, "all_detections": all_detections, "ai_summary": ai_summary, "agent_report": agent_report, } json_path = os.path.join(run_dir, "video_analysis_report.json") txt_path = os.path.join(run_dir, "video_analysis_summary.txt") with open(json_path, "w", encoding="utf-8") as f: json.dump(full_report, f, indent=2) with open(txt_path, "w", encoding="utf-8") as f: f.write( make_report_text( uploaded_name, meta, duration_sec, detection_summary, ai_summary, agent_report, ) ) progress(1.0, desc="Done") return ( metrics_html, preview_gallery, annotated_gallery, summary_rows, frame_detail_rows, ai_summary, insights_md, risks_md, actions_md, full_report, json_path, txt_path, ) custom_css = """ .gradio-container {max-width: 1280px !important;} .hero { background: linear-gradient(135deg, #1a1f3a 0%, #0d1117 100%); border: 1px solid #2d3250; border-radius: 14px; padding: 22px 24px; margin-bottom: 14px; } .badge { display:inline-block; margin-left:10px; padding:4px 12px; border-radius:999px; background:#1a3a1a; border:1px solid #166534; color:#4ade80; font-size:12px; font-weight:600; } """ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Video Analyzer — Gradio") as demo: gr.HTML( """

🎬 Agentic AI Video Analysis System Gradio Edition

YOLOv8 object detection · Groq LLaMA-3 summarization · Agentic insights

""" ) gr.HTML(build_status_html()) with gr.Row(): with gr.Column(scale=1): video_input = gr.File( label="Upload Video", file_types=[".mp4", ".avi", ".mov", ".mkv"], type="filepath", ) frame_interval = gr.Slider( minimum=5, maximum=120, value=30, step=5, label="Extract 1 frame every N frames", ) max_frames = gr.Slider( minimum=5, maximum=50, value=15, step=5, label="Max frames to analyze", ) confidence = gr.Slider( minimum=0.10, maximum=0.90, value=0.40, step=0.05, label="Confidence threshold", ) ai_options = ["groq", "mock"] if HAS_GROQ else ["mock"] agent_options = ["groq", "mock"] if HAS_GROQ else ["mock"] ai_provider = gr.Dropdown( choices=ai_options, value=ai_options[0], label="Summarization model", ) agent_mode = gr.Dropdown( choices=agent_options, value=agent_options[0], label="Agentic workflow", ) analyze_btn = gr.Button("🚀 Analyze Video", variant="primary") with gr.Column(scale=2): metrics_html = gr.HTML(label="Video Metrics") with gr.Tab("Extracted Frames"): preview_gallery = gr.Gallery(label="Extracted Frames", columns=3, height="auto") with gr.Tab("Annotated Frames"): annotated_gallery = gr.Gallery(label="Annotated Frames", columns=3, height="auto") with gr.Tab("Detection Summary"): summary_table = gr.Dataframe( headers=["Class", "Count"], datatype=["str", "number"], interactive=False, label="Detections by Class", ) frame_table = gr.Dataframe( headers=["Frame Index", "Object Count", "Detected Objects"], datatype=["str", "str", "str"], interactive=False, label="Per-frame Detection Details", ) with gr.Tab("AI Summary"): ai_summary_out = gr.Textbox(label="AI Summary", lines=8) with gr.Tab("Agent Report"): insights_out = gr.Markdown(label="Key Insights") risks_out = gr.Markdown(label="Risk Flags") actions_out = gr.Markdown(label="Recommended Actions") with gr.Tab("Downloads"): report_json_view = gr.JSON(label="Full Report Preview") json_file = gr.File(label="Download JSON Report") txt_file = gr.File(label="Download Text Summary") analyze_btn.click( fn=analyze_video, inputs=[ video_input, frame_interval, max_frames, confidence, ai_provider, agent_mode, ], outputs=[ metrics_html, preview_gallery, annotated_gallery, summary_table, frame_table, ai_summary_out, insights_out, risks_out, actions_out, report_json_view, json_file, txt_file, ], ) if __name__ == "__main__": demo.queue() demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))