bjj-analysis / main.py
samiee2213's picture
Update main.py
fa73ac6 verified
from __future__ import annotations
import os
import time
import shutil
import uuid
import json
import asyncio
import base64
import re
import traceback
from typing import List, Optional, Dict, Any
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Form
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, ConfigDict
import warnings
# Suppress warnings
warnings.filterwarnings('ignore', category=FutureWarning)
# CrewAI imports
from crewai import Agent, Task, Crew, Process
from crewai.llm import LLM
# Gemini imports
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
# OpenCV
import cv2
import numpy as np
# Configuration
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GOOGLE_API_KEY environment variable required")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY environment variable required")
genai.configure(api_key=GEMINI_API_KEY)
app = FastAPI(title="BJJ AI Coach - Hybrid Agentic")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- MODELS ---
class TimestampedEvent(BaseModel):
time: str
title: str
description: str
category: Optional[str] = "GENERAL"
frame_image: Optional[str] = None
frame_timestamp: Optional[str] = None
model_config = ConfigDict(extra="allow")
class Drill(BaseModel):
name: str
focus_area: str
reason: str
duration: Optional[str] = "15 min/day"
frequency: Optional[str] = "5x/week"
class DetailedSkillBreakdown(BaseModel):
offense: int
defense: int
guard: int
passing: int
standup: int
class PerformanceGrades(BaseModel):
defense_grade: str
offense_grade: str
control_grade: str
class AnalysisResult(BaseModel):
overall_score: int
performance_label: str
performance_grades: PerformanceGrades
skill_breakdown: DetailedSkillBreakdown
strengths: List[str]
weaknesses: List[str]
missed_opportunities: List[TimestampedEvent]
key_moments: List[TimestampedEvent]
coach_notes: str
recommended_drills: List[Drill]
db_storage = {}
# --- UTILITIES ---
def parse_time_to_seconds(time_str: str) -> Optional[int]:
if not time_str:
return None
match = re.search(r"(\d{1,2}):(\d{2})", time_str)
if not match:
return None
mm, ss = match.groups()
return int(mm) * 60 + int(ss)
def find_closest_frame(target_time_sec: int, frames: list) -> dict:
return min(frames, key=lambda f: abs(f["second"] - target_time_sec))
def attach_frames_to_events(events: List[dict], frames: list):
for event in events:
try:
event_time_sec = parse_time_to_seconds(event.get("time"))
if event_time_sec is None:
continue
closest = find_closest_frame(event_time_sec, frames)
event["frame_timestamp"] = closest["timestamp"]
event["frame_image"] = base64.b64encode(closest["bytes"]).decode("utf-8")
except:
event["frame_image"] = None
def extract_json_from_text(text: str) -> Dict:
"""Robust JSON extraction"""
text = text.strip()
# Direct parse
try:
return json.loads(text)
except:
pass
# Remove markdown
if "```json" in text or "```" in text:
try:
if "```json" in text:
text = text.split("```json")[1].split("```")[0]
else:
text = text.split("```")[1].split("```")[0]
return json.loads(text.strip())
except:
pass
# Find boundaries
try:
start_idx = text.find('{')
if start_idx == -1:
raise ValueError("No opening brace")
brace_count = 0
end_idx = -1
for i in range(start_idx, len(text)):
if text[i] == '{':
brace_count += 1
elif text[i] == '}':
brace_count -= 1
if brace_count == 0:
end_idx = i
break
if end_idx != -1:
json_str = text[start_idx:end_idx+1]
return json.loads(json_str)
# Truncation repair
json_str = text[start_idx:]
open_braces = json_str.count('{')
close_braces = json_str.count('}')
open_brackets = json_str.count('[')
close_brackets = json_str.count(']')
if open_brackets > close_brackets:
json_str += ']' * (open_brackets - close_brackets)
if open_braces > close_braces:
json_str += '}' * (open_braces - close_braces)
return json.loads(json_str)
except:
pass
raise ValueError("Could not extract JSON")
def is_generic(text: str) -> bool:
"""Check if feedback is too generic"""
patterns = [r'^More \w+$', r'^Improve \w+$', r'^Work \w+$', r'^Better \w+$']
for p in patterns:
if re.match(p, text.strip(), re.IGNORECASE):
return True
if not re.search(r'\d{1,2}:\d{2}', text):
return True
if len(text) < 20:
return True
return False
# --- FRAME EXTRACTION ---
def extract_frames(video_path: str) -> tuple:
"""Extract frames with weighted distribution (40% from end)"""
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise Exception("Cannot open video")
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps if fps > 0 else 0
if duration <= 30:
total_to_extract = 14
elif duration <= 60:
total_to_extract = 16
else:
total_to_extract = 18
print(f"Extracting {total_to_extract} frames from {duration:.1f}s video")
# Weighted: 25% start, 35% middle, 40% end
start_frames = max(4, int(total_to_extract * 0.25))
end_frames = max(6, int(total_to_extract * 0.40))
middle_frames = total_to_extract - start_frames - end_frames
start_section_end = int(total_frames * 0.20)
end_section_start = int(total_frames * 0.80)
frames = []
# Extract START
start_interval = max(1, start_section_end // start_frames)
for i in range(0, start_section_end, start_interval):
if len([f for f in frames if f["second"] < duration * 0.20]) >= start_frames:
break
frame = get_frame(cap, i, fps)
if frame:
frames.append(frame)
# Extract MIDDLE
middle_section_frames = end_section_start - start_section_end
middle_interval = max(1, middle_section_frames // middle_frames)
for i in range(start_section_end, end_section_start, middle_interval):
if len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.80]) >= middle_frames:
break
frame = get_frame(cap, i, fps)
if frame:
frames.append(frame)
# Extract END
end_section_frames = total_frames - end_section_start
end_interval = max(1, end_section_frames // end_frames)
for i in range(end_section_start, total_frames, end_interval):
if len([f for f in frames if f["second"] >= duration * 0.80]) >= end_frames:
break
frame = get_frame(cap, i, fps)
if frame:
frames.append(frame)
# Always add last frame
last = get_frame(cap, total_frames - 1, fps)
if last and last not in frames:
frames.append(last)
cap.release()
frames.sort(key=lambda f: f["second"])
metadata = {
"duration": round(duration, 2),
"fps": round(fps, 2),
"frames_extracted": len(frames),
"distribution": {"start": start_frames, "middle": middle_frames, "end": end_frames}
}
print(f"Extracted {len(frames)} frames")
return frames, metadata
except Exception as e:
if 'cap' in locals():
cap.release()
raise Exception(f"Frame extraction failed: {str(e)}")
def get_frame(cap: cv2.VideoCapture, frame_idx: int, fps: float) -> Optional[dict]:
try:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
ret, frame = cap.read()
if not ret:
return None
h, w = frame.shape[:2]
target_h = 720
target_w = int(w * (target_h / h))
resized = cv2.resize(frame, (target_w, target_h))
_, buffer = cv2.imencode('.jpg', resized, [cv2.IMWRITE_JPEG_QUALITY, 85])
timestamp_sec = frame_idx / fps
timestamp_str = f"{int(timestamp_sec // 60):02d}:{int(timestamp_sec % 60):02d}"
return {
"bytes": buffer.tobytes(),
"timestamp": timestamp_str,
"second": round(timestamp_sec, 2),
"frame_idx": frame_idx
}
except:
return None
# --- STEP 1: GEMINI VISION EXTRACTION ---
async def extract_frame_observations(frames: List[Dict], user_desc: str, opp_desc: str, duration: float) -> str:
"""Use Gemini to analyze frames and extract observations"""
print("STEP 1: Gemini Vision - Frame Analysis")
try:
# Build frame list
frame_list = "\n".join([
f"Frame {i+1} at {f['timestamp']} ({f['second']}s)"
for i, f in enumerate(frames)
])
prompt = f"""
You are an expert Brazilian Jiu-Jitsu (BJJ) video analyst performing STRICT FRAME-BY-FRAME PERCEPTION.
YOUR ROLE IS LIMITED TO OBSERVATION.
You do NOT judge performance, assign scores, or give coaching advice.
PRIMARY RULES (NON-NEGOTIABLE):
- You MUST rely ONLY on what is visibly observable in each frame.
- You are FORBIDDEN from assuming intent, pain, referee actions, or outcomes.
- If evidence is unclear or partially visible, you MUST say:
"Insufficient visual evidence to confirm."
- If video or frames are not related to BJJ, say "You must provide a video of Brazilian-Jiu-Jitsu only.".
====================
VIDEO CONTEXT
====================
- Duration: {duration}s
- Total Frames: {len(frames)}
- Athlete Being Analyzed (User): {user_desc}
- Opponent: {opp_desc}
====================
REFERENCE KNOWLEDGE (VOCABULARY ONLY)
====================
Use the following terms ONLY if clearly supported by visual evidence.
POSITIONS:
Standing, Clinch,
Closed Guard,
Open Guard (Butterfly, De La Riva, Spider, X-Guard),
Half Guard (Top / Bottom, Knee Shield, Deep Half),
Side Control (Standard, Kesa Gatame, Reverse Kesa),
North-South,
Mount (Low, High, S-Mount),
Back Control (with hooks or body triangle),
Turtle (Top / Bottom).
IMPORTANT POSITION RULE:
- "Full Mount" requires:
BOTH knees on the mat,
hips square over opponent’s torso,
opponent flat on back,
and NO leg entanglement.
- If ANY condition is missing, DO NOT label as mount.
Use "Top control (not mount)" or "Transitional position".
ATTACKS & THREATS:
Chokes (RNC, Guillotine, Triangle, Arm Triangle, D'Arce, Anaconda,
Ezekiel, Collar chokes),
Joint Locks (Armbar, Kimura, Americana, Omoplata, Wrist locks),
Leg Locks (Straight Ankle, Kneebar, Heel Hook, Toe Hold, Calf Slicer).
CONTROL INDICATORS (SUPPORTING ONLY, NOT DECISIVE):
- Hip or head control
- Chest-to-chest pressure
- Hooks or body triangle
- Limb isolation
- Flattening opponent
- Opponent forced into defensive posture
====================
SUBMISSION CONFIRMATION (STRICT)
====================
A submission may ONLY be marked if at least ONE is explicitly visible:
- Tapping (hand, foot, or body)
- Match stoppage during a locked submission
- Footage ends immediately during an unmistakably locked submission
Pattern cues alone (leg entanglement, arching, neck control)
are NEVER sufficient.
If unclear → classify as "submission attempt" or "no submission".
====================
FRAME-BY-FRAME TASK
====================
For EACH frame, report exactly:
1. POSITION:
The clearest dominant or transitional position
(use conservative labels when unsure).
2. ADVANTAGE:
User / Opponent / Neutral
(based ONLY on visible control).
3. ACTION TYPE (SELECT ONE):
OFFENSE | DEFENSE | GUARD | PASSING | STANDUP | NONE
4. THREATS:
None / Submission Attempt (name it) / Positional Advance.
5. TECHNICAL DETAILS:
Observable grips, pressure, transitions, defenses, or escapes.
Do NOT speculate.
ACTION TYPE DEFINITIONS:
- OFFENSE: Initiated submission attempts or attack chains
- DEFENSE: Escaping, framing, or defending submissions
- GUARD: Bottom-position control, sweeps, or attacks
- PASSING: Clearing legs and advancing past guard
- STANDUP: Takedowns or clinch exchanges
- NONE: Static control or transitions without active skill use
STRICT OUTPUT FORMAT:
Frame X (MM:SS):
[Position] - [Advantage] - [Action Type] - [Threats] - [Technical Details]
====================
CRITICAL FINAL FRAMES (LAST 6–7 ONLY)
====================
Analyze carefully:
- Is a submission CLEARLY locked?
- Is tapping EXPLICITLY visible?
- Does the footage end during control?
DECISION RULE:
- Without tapping or stoppage → NO submission.
====================
FINAL SUMMARY (FACTUAL ONLY)
====================
Provide a short factual summary:
1. OUTCOME VERDICT:
- Submission: YES / NO
- Winner: User / Opponent / NONE
- Technique: <name or NONE>
- Time: MM:SS or NONE
- Confidence: HIGH / MEDIUM / LOW
- Evidence: Brief quote or paraphrase from frames
2. POSITIONAL OVERVIEW:
- Which positions were clearly established?
- Who held visible positional control overall?
FINAL CHECK (MANDATORY):
- No submission without explicit evidence
- No "full mount" unless criteria are met
- No techniques not visible in frames
- No coaching, scoring, or evaluation language
"""
# Prepare content
content = []
for f in frames:
content.append({
"mime_type": "image/jpeg",
"data": base64.b64encode(f["bytes"]).decode("utf-8")
})
content.append(prompt)
# Call Gemini
start = time.time()
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
generation_config={
"temperature": 0.2,
"max_output_tokens": 8000
}
)
response = await asyncio.get_event_loop().run_in_executor(
None,
lambda: model.generate_content(
content,
safety_settings={
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
}
)
)
elapsed = time.time() - start
print(f"Gemini vision: {elapsed:.2f}s")
# Get text
try:
observations = response.text
except:
observations = response.candidates[0].content.parts[0].text
return observations
except Exception as e:
print(f"Vision extraction failed: {e}")
return f"Error analyzing frames: {str(e)}"
# --- STEP 2: CREWAI AGENTS ---
def create_analysis_crew(observations: str, user_desc: str, opp_desc: str, duration: float):
"""Create CrewAI agents for analysis and formatting"""
# Groq LLM for fast text processing
llm = LLM(
model="groq/llama-3.3-70b-versatile",
api_key=GROQ_API_KEY,
temperature=0.2
)
# Agent 1: Technical Analyst
analyst = Agent(
role="BJJ Technical Analyst",
goal=f"Analyze frame observations for {user_desc} to detect submissions, score performance, and identify strengths/weaknesses",
backstory="""
You are a BJJ black belt coach acting as an EVIDENCE-BASED TECHNICAL AUDITOR.
IMPORTANT SCOPE LIMIT:
- You do NOT analyze video or frames directly.
- You ONLY analyze the OBSERVATIONS provided by the vision model.
- The OBSERVATIONS are the single source of truth.
OUTCOME AUTHORITY RULE:
- You MUST accept the Outcome Verdict stated in the OBSERVATIONS.
- You are NOT allowed to override or reinterpret submission decisions.
- If the verdict confidence is MEDIUM or LOW, treat the match as having NO submission.
POSITION AUTHORITY RULE:
- You MUST respect position labels used in OBSERVATIONS.
- You may summarize positional trends but MUST NOT relabel positions.
ALLOWED ACTIONS:
- Aggregate frame-level facts into performance insights
- Score performance based on observed evidence
- Identify strengths, weaknesses, and missed opportunities
- Provide coaching feedback grounded in timestamps
FORBIDDEN ACTIONS:
- Do NOT infer intent, pain, or referee behavior
- Do NOT upgrade control into a submission
- Do NOT introduce techniques not present in OBSERVATIONS
- Do NOT repeat the same issue using different wording
LANGUAGE & SCORING CONSTRAINTS:
- Every claim must reference a timestamp
- Generic phrases are forbidden
- Every score must be justified by at least one timestamp
- If user was submitted: Defense ≤40
- If user finished opponent: Offense ≥80
-NEVER mention frame numbers, frame ranges, or frame indices in the final output.
"""
,
verbose=True,
allow_delegation=False,
llm=llm,
memory=True
)
# Agent 2: JSON Formatter
formatter = Agent(
role="Data Structure Specialist",
goal="Convert analysis into valid JSON matching exact schema requirements",
backstory="""You transform technical analysis into structured JSON. You ensure:
- Exactly 3 strengths and 3 weaknesses
- All feedback includes timestamps (MM:SS format)
- No generic phrases like "More aggression"
- Scores reflect actual match outcome
- JSON is valid (no trailing commas, proper syntax)
""",
verbose=True,
allow_delegation=False,
llm=llm
)
# Task 1: Analysis
analysis_task = Task(
description=f"""
Analyze the OBSERVATIONS produced by a frame-by-frame BJJ vision system.
IMPORTANT:
- Do NOT re-detect submissions or re-label positions.
- Your role is to evaluate performance quality based on OBSERVATIONS only.
====================
OBSERVATIONS (AUTHORITATIVE)
====================
{observations}
VIDEO INFO:
- Duration: {duration}s
- User: {user_desc}
- Opponent: {opp_desc}
====================
REQUIRED OUTPUT
====================
1. OUTCOME SUMMARY:
- Restate the outcome exactly as supported by OBSERVATIONS.
- Do NOT modify submission status or technique.
2. SKILL SCORING (0–100, REALISTIC):
**OVERALL SCORE PHILOSOPHY:**
- Recreational rolls (no finish): 60-75 range
- User achieves submission: 75-88 range
- User gets submitted: 45-60 range
- Close competitive match: 65-75 range
- Dominant performance (no finish): 70-80 range
⚔️ OFFENSE (Submission attempts & attack chains):
- Finished opponent: 80-92
- Multiple strong attempts: 70-80
- Some attempts visible: 60-70
- Positional pressure only: 50-60
- Minimal attacking: 40-50
- Purely defensive: 30-40
🛡️ DEFENSE (Escapes, survival, defending attacks):
- Got submitted: 35-45 (defensive failure evident)
- Never seriously threatened: 70-80
- Defended some attacks: 60-70
- Struggled but survived: 50-60
- Heavy defensive pressure: 40-50
🔒 GUARD (Bottom position effectiveness):
- Active sweeps/attacks: 70-85
- Controlled from bottom: 60-70
- Some guard work: 50-60
- Guard passed easily: 35-45
- Minimal guard play: 25-35
🚶 PASSING (Clearing legs, advancing):
- Multiple passes: 75-85
- Successful pass(es): 65-75
- Attempted passing: 55-65
- Pressure but no pass: 45-55
- Minimal passing work: 35-45
🧍 STANDUP (Takedowns/clinch):
- Successful takedown(s): 70-85
- Strong attempts: 60-70
- Some standup: 50-60
- No standup engagement: 0
**OUTCOME ADJUSTMENT:**
- Submission finish: +10-15 to overall, offense 80+, defense 70+
- Got submitted: -10-15 to overall, defense 35-45, offense capped at 65
- Dominant position control: +5-10 to overall
- Lost position badly: -5-10 to overall
- Close match: neutral (65-70 base)
Each score MUST reference at least one timestamp.
3. STRENGTHS (EXACTLY 3):
- Timestamped, technical, non-repetitive
- If submission occurred, Strength #1 MUST be the finish
4. WEAKNESSES (EXACTLY 3):
- Timestamped, distinct technical issues
- If user was submitted, Weakness #1 MUST be the failure
5. MISSED OPPORTUNITIES (2–3):
- Must be visible in OBSERVATIONS
- Positional or submission-chain only
6. COACH NOTES (150–250 words):
- Technical, honest, evidence-based
- No speculation
7. DRILLS (EXACTLY 3):
- Each drill maps directly to a weakness
- Include timestamp justification
FINAL CHECK:
- No contradiction of OBSERVATIONS
- No new techniques
- Scores align with demonstrated actions
"""
,
agent=analyst,
expected_output="Detailed technical analysis with submission detection"
)
# Task 2: JSON Formatting
formatting_task = Task(
description="""Convert the analysis into this EXACT JSON structure. NO markdown wrapping.
{{
"overall_score": <int 0-100>,
"performance_label": "EXCELLENT|STRONG|SOLID|DEVELOPING|NEEDS IMPROVEMENT",
"performance_grades": {{
"defense_grade": "<A+|A|B+|B|C+|C|D+|D>",
"offense_grade": "<letter>",
"control_grade": "<letter>"
}},
"skill_breakdown": {{
"offense": <int>,
"defense": <int>,
"guard": <int>,
"passing": <int>,
"standup": <int>
}},
"strengths": [
"At 0:XX - Specific observation (min 25 chars)",
"At 0:XX - Another specific observation",
"At 0:XX - Third specific observation"
],
"weaknesses": [
"At 0:XX - Specific weakness (min 25 chars)",
"At 0:XX - Another weakness",
"At 0:XX - Third weakness"
],
"missed_opportunities": [
{{"time": "MM:SS", "title": "Brief", "description": "Detail", "category": "SUBMISSION|POSITION|SWEEP"}}
],
"key_moments": [
{{"time": "MM:SS", "title": "Event", "description": "What happened", "category": "SUBMISSION|TRANSITION|DEFENSE"}}
],
"coach_notes": "Paragraph 150-250 words",
"recommended_drills": [
{{"name": "Drill 1", "focus_area": "Area", "reason": "Why at timestamp", "duration": "15 min/day", "frequency": "5x/week"}},
{{"name": "Drill 2", "focus_area": "Area", "reason": "Why", "duration": "10 min/day", "frequency": "4x/week"}},
{{"name": "Drill 3", "focus_area": "Area", "reason": "Why", "duration": "12 min/day", "frequency": "3x/week"}}
]
}}
VALIDATION:
- All timestamps in MM:SS format
- No trailing commas
- Exactly 3 strengths, 3 weaknesses, 3 drills
- All feedback includes timestamps
- No generic phrases
""",
agent=formatter,
expected_output="Valid JSON only"
)
# Create crew
crew = Crew(
agents=[analyst, formatter],
tasks=[analysis_task, formatting_task],
process=Process.sequential,
verbose=True
)
return crew
# --- HYBRID ANALYSIS ---
async def hybrid_agentic_analysis(
frames: List[Dict],
metadata: Dict,
user_desc: str,
opp_desc: str,
activity_type: str,
analysis_id: str = None
) -> AnalysisResult:
"""Hybrid: Gemini vision + CrewAI agents + Python validation"""
print("\n" + "="*70)
print("HYBRID AGENTIC ANALYSIS")
print("="*70)
try:
if analysis_id:
db_storage[analysis_id]["progress"] = 30
# STEP 1: Gemini Vision
observations = await extract_frame_observations(
frames, user_desc, opp_desc, metadata["duration"]
)
if analysis_id:
db_storage[analysis_id]["progress"] = 60
# STEP 2: CrewAI Agents
print("\nSTEP 2: CrewAI Agents - Analysis & Formatting")
crew = create_analysis_crew(observations, user_desc, opp_desc, metadata["duration"])
crew_start = time.time()
result = await asyncio.get_event_loop().run_in_executor(
None,
crew.kickoff
)
crew_time = time.time() - crew_start
print(f"CrewAI: {crew_time:.2f}s")
if analysis_id:
db_storage[analysis_id]["progress"] = 90
# STEP 3: Parse & Validate
print("\nSTEP 3: Python Validation")
result_text = str(result)
# Clean markdown
if "```json" in result_text:
result_text = result_text.split("```json")[1].split("```")[0].strip()
elif "```" in result_text:
result_text = result_text.split("```")[1].split("```")[0].strip()
data = extract_json_from_text(result_text)
data = validate_and_filter(data, frames)
# Attach frames
attach_frames_to_events(data.get("missed_opportunities", []), frames)
attach_frames_to_events(data.get("key_moments", []), frames)
if analysis_id:
db_storage[analysis_id]["progress"] = 100
print("Analysis complete")
print("="*70 + "\n")
return AnalysisResult(**data)
except Exception as e:
print(f"Hybrid analysis failed: {e}")
traceback.print_exc()
fallback = make_fallback(frames)
if analysis_id:
db_storage[analysis_id]["used_fallback"] = True
return AnalysisResult(**fallback)
def validate_and_filter(data: Dict, frames: List[Dict]) -> Dict:
"""Python-level validation and generic filtering"""
# Validate scores
if "overall_score" not in data:
data["overall_score"] = 65
data["overall_score"] = max(0, min(100, data["overall_score"]))
if "performance_label" not in data:
score = data["overall_score"]
if score >= 85:
data["performance_label"] = "EXCELLENT PERFORMANCE"
elif score >= 75:
data["performance_label"] = "STRONG PERFORMANCE"
elif score >= 60:
data["performance_label"] = "SOLID PERFORMANCE"
else:
data["performance_label"] = "DEVELOPING PERFORMANCE"
if "performance_grades" not in data:
data["performance_grades"] = {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"}
if "skill_breakdown" not in data:
base = data["overall_score"]
data["skill_breakdown"] = {
"offense": max(0, min(100, base - 5)),
"defense": max(0, min(100, base + 3)),
"guard": max(0, min(100, base - 2)),
"passing": max(0, min(100, base - 10)),
"standup": max(0, min(100, base - 13))
}
# Filter generic feedback
for field in ["strengths", "weaknesses"]:
if field in data and data[field]:
filtered = [item for item in data[field] if not is_generic(item)]
if len(filtered) >= 3:
data[field] = filtered[:3]
else:
data[field] = make_specific(field, frames, filtered)
else:
data[field] = make_specific(field, frames, [])
# Validate other fields
if "missed_opportunities" not in data or not data["missed_opportunities"]:
data["missed_opportunities"] = [{
"time": frames[len(frames)//2]["timestamp"],
"title": "Position",
"description": "Review",
"category": "POSITION"
}]
if "key_moments" not in data or not data["key_moments"]:
data["key_moments"] = [{
"time": frames[-3]["timestamp"],
"title": "Exchange",
"description": "Work",
"category": "TRANSITION"
}]
if "coach_notes" not in data or len(data["coach_notes"]) < 50:
data["coach_notes"] = "Focus on techniques. Review timestamps for improvement."
if "recommended_drills" not in data or len(data["recommended_drills"]) < 3:
data["recommended_drills"] = [
{"name": "Control", "focus_area": "General", "reason": "Improve awareness", "duration": "15 min/day", "frequency": "5x/week"},
{"name": "Guard", "focus_area": "Defense", "reason": "Strengthen defense", "duration": "10 min/day", "frequency": "4x/week"},
{"name": "Flow", "focus_area": "Movement", "reason": "Improve transitions", "duration": "12 min/day", "frequency": "3x/week"}
]
return data
def make_specific(field: str, frames: List[Dict], existing: List[str]) -> List[str]:
feedback = existing.copy()
start = frames[len(frames) // 6]
mid = frames[len(frames) // 2]
end = frames[-2] if len(frames) > 1 else frames[-1]
if field == "strengths":
templates = [
f"At {start['timestamp']} - Maintained structure in opening",
f"At {mid['timestamp']} - Showed awareness during exchange",
f"At {end['timestamp']} - Demonstrated control"
]
else:
templates = [
f"At {start['timestamp']} - Could improve positioning",
f"At {mid['timestamp']} - Slow to recognize opportunity",
f"At {end['timestamp']} - Room to improve execution"
]
for t in templates:
if len(feedback) < 3:
feedback.append(t)
return feedback[:3]
def make_fallback(frames: List[Dict]) -> Dict:
mid = frames[len(frames)//2]["timestamp"]
end = frames[-2]["timestamp"] if len(frames) > 1 else frames[-1]["timestamp"]
return {
"overall_score": 65,
"performance_label": "SOLID PERFORMANCE",
"performance_grades": {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"},
"skill_breakdown": {"offense": 60, "defense": 68, "guard": 63, "passing": 55, "standup": 52},
"strengths": [
f"At 0:10 - Maintained structure",
f"At {mid} - Showed awareness",
f"At {end} - Demonstrated control"
],
"weaknesses": [
f"At 0:15 - Could improve positioning",
f"At {mid} - Slow to recognize opportunity",
f"At {end} - Room to improve execution"
],
"missed_opportunities": [{"time": mid, "title": "Position", "description": "Review", "category": "POSITION"}],
"key_moments": [{"time": end, "title": "Exchange", "description": "Work", "category": "TRANSITION"}],
"coach_notes": "Focus on techniques. Review timestamps.",
"recommended_drills": [
{"name": "Control", "focus_area": "General", "reason": "Improve", "duration": "15 min/day", "frequency": "5x/week"},
{"name": "Guard", "focus_area": "Defense", "reason": "Strengthen", "duration": "10 min/day", "frequency": "4x/week"},
{"name": "Flow", "focus_area": "Movement", "reason": "Improve", "duration": "12 min/day", "frequency": "3x/week"}
]
}
# --- API ---
@app.post("/analyze-complete")
async def analyze_complete(
file: UploadFile = File(...),
user_description: str = Form(...),
opponent_description: str = Form(...),
activity_type: str = Form("Brazilian Jiu-Jitsu")
):
start_time = time.time()
file_path = None
try:
file_name = f"{uuid.uuid4()}_{file.filename}"
file_path = f"temp_videos/{file_name}"
os.makedirs("temp_videos", exist_ok=True)
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
analysis_id = str(uuid.uuid4())
db_storage[analysis_id] = {"status": "processing", "progress": 0}
# Extract frames
frames, metadata = await asyncio.get_event_loop().run_in_executor(
None, extract_frames, file_path
)
# Hybrid analysis
result = await hybrid_agentic_analysis(
frames, metadata,
user_description.strip(), opponent_description.strip(),
activity_type, analysis_id
)
total_time = time.time() - start_time
return {
"status": "completed",
"data": result.model_dump(),
"processing_time": f"{total_time:.2f}s",
"used_fallback": db_storage[analysis_id].get("used_fallback", False),
"method": "hybrid_agentic"
}
except Exception as e:
print(f"Error: {e}")
try:
frames_fb, _ = await asyncio.get_event_loop().run_in_executor(None, extract_frames, file_path)
fallback = make_fallback(frames_fb)
except:
fallback = make_fallback([{"timestamp": "00:30", "second": 30}])
return {
"status": "completed_with_fallback",
"data": fallback,
"error": str(e),
"used_fallback": True
}
finally:
if file_path:
try:
os.remove(file_path)
except:
pass
@app.get("/health")
async def health_check():
return {"status": "healthy", "version": "26.0.0-hybrid-agentic"}
@app.get("/")
async def root():
return {
"message": "BJJ AI Coach - Hybrid Agentic",
"version": "26.0.0",
"architecture": "Gemini Vision + CrewAI Agents + Python Validation",
"agents": {
"gemini": "Frame-by-frame vision analysis",
"analyst_agent": "Technical analysis + submission detection (Groq)",
"formatter_agent": "JSON structure + validation (Groq)",
"python": "Generic filtering + frame attachment"
},
"benefits": [
"Gemini's vision for accurate frame analysis",
"Groq's speed for text processing (3-5x faster)",
"Multi-agent review for quality",
"Python guardrails against generic feedback"
]
}
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port)