AmirAziz1221's picture
Update app.py
db8b945 verified
"""
app.py — Agentic AI Video Analysis System (Gradio Edition)
Gradio entry point for Hugging Face Spaces.
Uses GROQ_API_KEY from HF Space Settings → Secrets.
All file writes go to /tmp/ (HF filesystem is read-only elsewhere).
"""
import json
import os
import shutil
import uuid
from typing import Any, Dict, List
import cv2
import gradio as gr
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
from ai_summarizer import get_summary
from agent_workflow import run_agent
from frame_extractor import extract_frames, get_frame_stats
from object_detector import load_detector, detect_objects_in_frames, summarize_detections
from video_input import load_video, release_video
TMP = "/tmp/vas"
os.makedirs(TMP, exist_ok=True)
HAS_GROQ = bool(os.getenv("GROQ_API_KEY"))
_MODEL = None
def get_model():
global _MODEL
if _MODEL is None:
_MODEL = load_detector()
return _MODEL
def safe_html(text: str) -> str:
return str(text).replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
def build_status_html() -> str:
if HAS_GROQ:
return """
<div style="padding:12px 14px;border-radius:10px;background:#0a1c0f;border-left:4px solid #22c55e;">
✅ <b>Groq API key detected.</b> LLaMA-3 summarization and agentic analysis are enabled.
</div>
"""
return """
<div style="padding:12px 14px;border-radius:10px;background:#1f1400;border-left:4px solid #f59e0b;">
⚠️ <b>No Groq API key found.</b> The app will run in <b>mock mode</b>.
Core analysis still works, and AI text falls back safely to rule-based output.
</div>
"""
def summarize_frame_table(all_detections: List[Dict[str, Any]]) -> List[List[str]]:
rows = []
for det in all_detections:
if det["objects"]:
classes = ", ".join(
f"{obj['class']} ({obj['confidence']:.1%})" for obj in det["objects"]
)
else:
classes = "No objects above threshold"
rows.append([
str(det["frame_index"]),
str(len(det["objects"])),
classes
])
return rows
def make_report_text(
uploaded_name: str,
meta: Dict[str, Any],
duration_sec: float,
detection_summary: Dict[str, int],
ai_summary: str,
agent_report: Dict[str, Any],
) -> str:
detection_lines = "\n".join(f" {k}: {v}" for k, v in detection_summary.items()) or " None"
insights = "\n".join(f" • {x}" for x in agent_report.get("insights", [])) or " None"
risks = "\n".join(f" 🚩 {x}" for x in agent_report.get("risk_flags", [])) or " None"
actions = "\n".join(f" → {x}" for x in agent_report.get("recommended_actions", [])) or " None"
return (
f"VIDEO ANALYSIS REPORT\n{'=' * 50}\n"
f"File: {uploaded_name}\n"
f"Duration: {duration_sec}s @ {meta['fps']:.1f} FPS\n"
f"Resolution: {meta['width']}x{meta['height']}\n\n"
f"DETECTION SUMMARY\n{'-' * 30}\n{detection_lines}\n\n"
f"AI SUMMARY\n{'-' * 30}\n{ai_summary}\n\n"
f"KEY INSIGHTS\n{'-' * 30}\n{insights}\n\n"
f"RISK FLAGS\n{'-' * 30}\n{risks}\n\n"
f"RECOMMENDED ACTIONS\n{'-' * 30}\n{actions}\n"
)
def analyze_video(
video_file,
frame_interval,
max_frames,
confidence,
ai_provider,
agent_mode,
progress=gr.Progress(track_tqdm=False),
):
if video_file is None:
raise gr.Error("Please upload a video file first.")
input_path = video_file if isinstance(video_file, str) else video_file.name
run_id = str(uuid.uuid4())[:8]
run_dir = os.path.join(TMP, run_id)
frame_dir = os.path.join(run_dir, "frames")
annotated_dir = os.path.join(run_dir, "annotated")
os.makedirs(frame_dir, exist_ok=True)
os.makedirs(annotated_dir, exist_ok=True)
uploaded_name = os.path.basename(input_path)
stable_video_path = os.path.join(run_dir, uploaded_name)
shutil.copy2(input_path, stable_video_path)
progress(0.05, desc="Loading video")
try:
cap, meta = load_video(stable_video_path)
except Exception as e:
raise gr.Error(f"Could not load video: {e}")
try:
fps = float(meta.get("fps") or 25.0)
total_frames = int(meta.get("total_frames") or 0)
duration_sec = round(total_frames / max(fps, 1), 1)
progress(0.22, desc="Extracting frames")
paths, arrays = extract_frames(
cap,
output_dir=frame_dir,
frame_interval=int(frame_interval),
max_frames=int(max_frames),
)
finally:
release_video(cap)
if not arrays:
raise gr.Error(
"No frames could be extracted from the uploaded video. "
"Please try another file or lower the frame interval."
)
progress(0.48, desc="Running YOLOv8 detection")
model = get_model()
all_detections = detect_objects_in_frames(
model,
arrays,
paths,
confidence_threshold=float(confidence),
output_dir=annotated_dir,
)
detection_summary = summarize_detections(all_detections)
progress(0.70, desc="Generating summary")
ai_summary = get_summary(detection_summary, meta, provider=ai_provider)
progress(0.84, desc="Running agentic analysis")
agent_report = run_agent(detection_summary, meta, ai_summary, mode=agent_mode)
frame_stats = get_frame_stats(arrays)
total_obj = int(sum(detection_summary.values()))
top_cls = list(detection_summary.keys())[0] if detection_summary else "none"
metrics_html = f"""
<div style="display:grid;grid-template-columns:repeat(5,minmax(120px,1fr));gap:12px;margin:8px 0 14px 0;">
<div style="background:#1e2130;padding:12px;border-radius:12px;border:1px solid #2d3250;"><b>FPS</b><br>{fps:.1f}</div>
<div style="background:#1e2130;padding:12px;border-radius:12px;border:1px solid #2d3250;"><b>Resolution</b><br>{meta['width']} × {meta['height']}</div>
<div style="background:#1e2130;padding:12px;border-radius:12px;border:1px solid #2d3250;"><b>Duration</b><br>{duration_sec}s</div>
<div style="background:#1e2130;padding:12px;border-radius:12px;border:1px solid #2d3250;"><b>Total detections</b><br>{total_obj}</div>
<div style="background:#1e2130;padding:12px;border-radius:12px;border:1px solid #2d3250;"><b>Top class</b><br>{safe_html(top_cls)}</div>
</div>
"""
preview_gallery = [cv2.cvtColor(arr, cv2.COLOR_BGR2RGB) for arr in arrays[:6]]
annotated_gallery = [det["annotated_path"] for det in all_detections[:6]]
summary_rows = [[k, int(v)] for k, v in detection_summary.items()]
if not summary_rows:
summary_rows = [["No objects detected", 0]]
frame_detail_rows = summarize_frame_table(all_detections)
insights_md = "\n".join(f"- {x}" for x in agent_report.get("insights", [])) or "- None"
risks_md = "\n".join(f"- {x}" for x in agent_report.get("risk_flags", [])) or "- None"
actions_md = "\n".join(f"- {x}" for x in agent_report.get("recommended_actions", [])) or "- None"
full_report = {
"video_file": uploaded_name,
"video_metadata": meta,
"frame_stats": frame_stats,
"settings": {
"frame_interval": int(frame_interval),
"max_frames": int(max_frames),
"confidence": float(confidence),
"ai_provider": ai_provider,
"agent_mode": agent_mode,
},
"detection_summary": detection_summary,
"all_detections": all_detections,
"ai_summary": ai_summary,
"agent_report": agent_report,
}
json_path = os.path.join(run_dir, "video_analysis_report.json")
txt_path = os.path.join(run_dir, "video_analysis_summary.txt")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(full_report, f, indent=2)
with open(txt_path, "w", encoding="utf-8") as f:
f.write(
make_report_text(
uploaded_name,
meta,
duration_sec,
detection_summary,
ai_summary,
agent_report,
)
)
progress(1.0, desc="Done")
return (
metrics_html,
preview_gallery,
annotated_gallery,
summary_rows,
frame_detail_rows,
ai_summary,
insights_md,
risks_md,
actions_md,
full_report,
json_path,
txt_path,
)
custom_css = """
.gradio-container {max-width: 1280px !important;}
.hero {
background: linear-gradient(135deg, #1a1f3a 0%, #0d1117 100%);
border: 1px solid #2d3250;
border-radius: 14px;
padding: 22px 24px;
margin-bottom: 14px;
}
.badge {
display:inline-block;
margin-left:10px;
padding:4px 12px;
border-radius:999px;
background:#1a3a1a;
border:1px solid #166534;
color:#4ade80;
font-size:12px;
font-weight:600;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Video Analyzer — Gradio") as demo:
gr.HTML(
"""
<div class="hero">
<h1 style="margin:0;">🎬 Agentic AI Video Analysis System <span class="badge">Gradio Edition</span></h1>
<p style="margin:8px 0 0 0;color:#cbd5e1;">
YOLOv8 object detection · Groq LLaMA-3 summarization · Agentic insights
</p>
</div>
"""
)
gr.HTML(build_status_html())
with gr.Row():
with gr.Column(scale=1):
video_input = gr.File(
label="Upload Video",
file_types=[".mp4", ".avi", ".mov", ".mkv"],
type="filepath",
)
frame_interval = gr.Slider(
minimum=5,
maximum=120,
value=30,
step=5,
label="Extract 1 frame every N frames",
)
max_frames = gr.Slider(
minimum=5,
maximum=50,
value=15,
step=5,
label="Max frames to analyze",
)
confidence = gr.Slider(
minimum=0.10,
maximum=0.90,
value=0.40,
step=0.05,
label="Confidence threshold",
)
ai_options = ["groq", "mock"] if HAS_GROQ else ["mock"]
agent_options = ["groq", "mock"] if HAS_GROQ else ["mock"]
ai_provider = gr.Dropdown(
choices=ai_options,
value=ai_options[0],
label="Summarization model",
)
agent_mode = gr.Dropdown(
choices=agent_options,
value=agent_options[0],
label="Agentic workflow",
)
analyze_btn = gr.Button("🚀 Analyze Video", variant="primary")
with gr.Column(scale=2):
metrics_html = gr.HTML(label="Video Metrics")
with gr.Tab("Extracted Frames"):
preview_gallery = gr.Gallery(label="Extracted Frames", columns=3, height="auto")
with gr.Tab("Annotated Frames"):
annotated_gallery = gr.Gallery(label="Annotated Frames", columns=3, height="auto")
with gr.Tab("Detection Summary"):
summary_table = gr.Dataframe(
headers=["Class", "Count"],
datatype=["str", "number"],
interactive=False,
label="Detections by Class",
)
frame_table = gr.Dataframe(
headers=["Frame Index", "Object Count", "Detected Objects"],
datatype=["str", "str", "str"],
interactive=False,
label="Per-frame Detection Details",
)
with gr.Tab("AI Summary"):
ai_summary_out = gr.Textbox(label="AI Summary", lines=8)
with gr.Tab("Agent Report"):
insights_out = gr.Markdown(label="Key Insights")
risks_out = gr.Markdown(label="Risk Flags")
actions_out = gr.Markdown(label="Recommended Actions")
with gr.Tab("Downloads"):
report_json_view = gr.JSON(label="Full Report Preview")
json_file = gr.File(label="Download JSON Report")
txt_file = gr.File(label="Download Text Summary")
analyze_btn.click(
fn=analyze_video,
inputs=[
video_input,
frame_interval,
max_frames,
confidence,
ai_provider,
agent_mode,
],
outputs=[
metrics_html,
preview_gallery,
annotated_gallery,
summary_table,
frame_table,
ai_summary_out,
insights_out,
risks_out,
actions_out,
report_json_view,
json_file,
txt_file,
],
)
if __name__ == "__main__":
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))