bjj-agentic / old.py
samiee2213's picture
Update old.py
480caba verified
from __future__ import annotations
import os
import time
import shutil
import uuid
import json
import asyncio
import base64
import re
import traceback
from typing import List, Optional, Dict, Any
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Form
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, ConfigDict
import warnings
# Suppress warnings
warnings.filterwarnings('ignore', category=FutureWarning)
# CrewAI imports
from crewai import Agent, Task, Crew, Process
from crewai.llm import LLM
# Gemini imports
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
# OpenCV
import cv2
import numpy as np
# Configuration
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GOOGLE_API_KEY environment variable required")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY environment variable required")
genai.configure(api_key=GEMINI_API_KEY)
app = FastAPI(title="BJJ AI Coach")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- MODELS ---
class TimestampedEvent(BaseModel):
time: str
title: str
description: str
category: Optional[str] = "GENERAL"
frame_image: Optional[str] = None
frame_timestamp: Optional[str] = None
model_config = ConfigDict(extra="allow")
class Drill(BaseModel):
name: str
focus_area: str
reason: str
duration: Optional[str] = "15 min/day"
frequency: Optional[str] = "5x/week"
class DetailedSkillBreakdown(BaseModel):
offense: int
defense: int
guard: int
passing: int
standup: int
class PerformanceGrades(BaseModel):
defense_grade: str
offense_grade: str
control_grade: str
class AnalysisResult(BaseModel):
overall_score: int
performance_label: str
performance_grades: PerformanceGrades
skill_breakdown: DetailedSkillBreakdown
strengths: List[str]
weaknesses: List[str]
missed_opportunities: List[TimestampedEvent]
key_moments: List[TimestampedEvent]
coach_notes: str
recommended_drills: List[Drill]
db_storage = {}
# --- UTILITIES ---
def parse_time_to_seconds(time_str: str) -> Optional[int]:
if not time_str:
return None
match = re.search(r"(\d{1,2}):(\d{2})", time_str)
if not match:
return None
mm, ss = match.groups()
return int(mm) * 60 + int(ss)
def find_closest_frame(target_time_sec: int, frames: list) -> dict:
return min(frames, key=lambda f: abs(f["second"] - target_time_sec))
def attach_frames_to_events(events: List[dict], frames: list):
for event in events:
try:
event_time_sec = parse_time_to_seconds(event.get("time"))
if event_time_sec is None:
continue
closest = find_closest_frame(event_time_sec, frames)
event["frame_timestamp"] = closest["timestamp"]
event["frame_image"] = base64.b64encode(closest["bytes"]).decode("utf-8")
except:
event["frame_image"] = None
def extract_json_from_text(text: str) -> Dict:
"""Robust JSON extraction"""
text = text.strip()
try:
return json.loads(text)
except:
pass
if "```json" in text or "```" in text:
try:
if "```json" in text:
text = text.split("```json")[1].split("```")[0]
else:
text = text.split("```")[1].split("```")[0]
return json.loads(text.strip())
except:
pass
try:
start_idx = text.find('{')
if start_idx == -1:
raise ValueError("No opening brace")
brace_count = 0
end_idx = -1
for i in range(start_idx, len(text)):
if text[i] == '{':
brace_count += 1
elif text[i] == '}':
brace_count -= 1
if brace_count == 0:
end_idx = i
break
if end_idx != -1:
json_str = text[start_idx:end_idx+1]
return json.loads(json_str)
json_str = text[start_idx:]
open_braces = json_str.count('{')
close_braces = json_str.count('}')
open_brackets = json_str.count('[')
close_brackets = json_str.count(']')
if open_brackets > close_brackets:
json_str += ']' * (open_brackets - close_brackets)
if open_braces > close_braces:
json_str += '}' * (open_braces - close_braces)
return json.loads(json_str)
except:
pass
raise ValueError("Could not extract JSON")
def is_generic(text: str) -> bool:
"""Check if feedback is too generic"""
patterns = [r'^More \w+$', r'^Improve \w+$', r'^Work \w+$', r'^Better \w+$']
for p in patterns:
if re.match(p, text.strip(), re.IGNORECASE):
return True
if not re.search(r'\d{1,2}:\d{2}', text):
return True
if len(text) < 20:
return True
return False
def calculate_feedback_count(duration: float) -> Dict[str, int]:
"""
Calculate feedback counts based on video duration (client-specified scaling).
Client's Requirements:
- ≤15s: 1 strength, 1 weakness
- 15-45s: 2 strengths, 2 weaknesses
- 45-90s: 3 strengths, 3 weaknesses
- 90-180s: 4 strengths, 4 weaknesses
- 180-360s: 5 strengths, 5 weaknesses
Returns dict with counts for: strengths, weaknesses, opportunities, moments
"""
if duration <= 15:
return {
"strengths": 1,
"weaknesses": 1,
"opportunities": 1,
"moments": 1
}
elif duration <= 45:
return {
"strengths": 2,
"weaknesses": 2,
"opportunities": 2,
"moments": 2
}
elif duration <= 90: # 1:30 minutes
return {
"strengths": 3,
"weaknesses": 3,
"opportunities": 2,
"moments": 3
}
elif duration <= 180: # 3 minutes
return {
"strengths": 4,
"weaknesses": 4,
"opportunities": 3,
"moments": 4
}
else: # 3-6 minutes (up to 360s)
return {
"strengths": 5,
"weaknesses": 5,
"opportunities": 4,
"moments": 5
}
# --- ENHANCED DENSE FRAME EXTRACTION ---
def extract_dense_consecutive_frames(video_path: str) -> tuple:
"""
OPTIMIZED: Extract frames for MAXIMUM ACCURACY in 50-60s total processing
Strategy - Balanced for speed + accuracy:
- 10-15s video: 15 frames (~1.0s intervals) → Gemini ~30s
- 15-30s video: 20 frames (~1.2s intervals) → Gemini ~40s
- 30-60s video: 30 frames (~1.8s intervals) → Gemini ~50s
- 60-90s video: 40 frames (~2.0s intervals) → Gemini ~60s
Distribution (submission-focused):
- START (0-20%): 20% of frames
- MIDDLE (20-70%): 30% of frames
- END (70-100%): 50% of frames (DENSEST for submission detection)
"""
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise Exception("Cannot open video")
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps if fps > 0 else 0
# Validate video duration
if duration < 5:
raise ValueError("Video too short (< 5 seconds). Please upload a longer clip (10-90 seconds recommended).")
if duration > 360: # 6 minutes max
raise ValueError("Video too long (> 6 minutes). Please upload a shorter clip (15s-6min) for optimal analysis.")
# OPTIMIZED FRAME COUNTS - Balanced for 50-60s Gemini processing
if duration <= 15:
total_to_extract = 15 # ~1.0s intervals → ~30s Gemini
elif duration <= 30:
total_to_extract = 20 # ~1.5s intervals → ~40s Gemini
elif duration <= 45:
total_to_extract = 25 # ~2.0s intervals → ~50s Gemini
elif duration <= 60:
total_to_extract = 25 # ~2.25s intervals → ~60s Gemini
else:
total_to_extract = 35 # ~2.7s intervals → ~65s Gemini (max)
print(f"📹 OPTIMIZED EXTRACTION: {total_to_extract} frames from {duration:.1f}s video")
print(f" Target: 1 frame every {duration/total_to_extract:.1f}s (Gemini: ~{total_to_extract * 1.5:.0f}s)")
# SUBMISSION-FOCUSED distribution: 20% start, 30% middle, 50% end
start_frames = max(3, int(total_to_extract * 0.20))
middle_frames = max(6, int(total_to_extract * 0.30))
end_frames = total_to_extract - start_frames - middle_frames
print(f" Distribution (submission-focused): START={start_frames}, MIDDLE={middle_frames}, END={end_frames}")
# Define sections
start_section_end = int(total_frames * 0.20)
middle_section_start = start_section_end
middle_section_end = int(total_frames * 0.70)
end_section_start = middle_section_end
frames = []
# Extract START section (0-20%) - Overview
start_interval = max(1, start_section_end // start_frames)
for i in range(0, start_section_end, start_interval):
if len([f for f in frames if f["second"] < duration * 0.20]) >= start_frames:
break
frame = get_frame(cap, i, fps)
if frame:
frames.append(frame)
# Extract MIDDLE section (20-70%) - Standard coverage
middle_section_frames = middle_section_end - middle_section_start
middle_interval = max(1, middle_section_frames // middle_frames)
for i in range(middle_section_start, middle_section_end, middle_interval):
if len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]) >= middle_frames:
break
frame = get_frame(cap, i, fps)
if frame:
frames.append(frame)
# Extract END section (70-100%) - DENSEST for submissions (50% of all frames!)
end_section_frames = total_frames - end_section_start
end_interval = max(1, end_section_frames // end_frames)
print(f" END section (50% of frames): 1 frame every {end_interval/fps:.2f}s for submission detection")
for i in range(end_section_start, total_frames, end_interval):
if len([f for f in frames if f["second"] >= duration * 0.70]) >= end_frames:
break
frame = get_frame(cap, i, fps)
if frame:
frames.append(frame)
# CRITICAL: Always add final 2 frames for tap detection
for offset in [2, 1]:
final_frame_idx = total_frames - offset
if final_frame_idx > 0:
frame = get_frame(cap, final_frame_idx, fps)
if frame:
if not any(f["frame_idx"] == frame["frame_idx"] for f in frames):
frames.append(frame)
cap.release()
frames.sort(key=lambda f: f["second"])
# Calculate stats
intervals = []
for i in range(1, len(frames)):
time_gap = frames[i]["second"] - frames[i-1]["second"]
intervals.append(time_gap)
avg_interval = sum(intervals) / len(intervals) if intervals else 0
metadata = {
"duration": round(duration, 2),
"fps": round(fps, 2),
"frames_extracted": len(frames),
"avg_frame_interval": round(avg_interval, 2),
"estimated_gemini_time": round(len(frames) * 1.5, 1), # ~1.5s per frame
"distribution": {
"start": len([f for f in frames if f["second"] < duration * 0.20]),
"middle": len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]),
"end": len([f for f in frames if f["second"] >= duration * 0.70])
}
}
print(f"✅ Extracted {len(frames)} frames (avg interval: {avg_interval:.2f}s)")
print(f" Estimated Gemini time: ~{metadata['estimated_gemini_time']:.0f}s")
print(f" Actual distribution: START={metadata['distribution']['start']}, "
f"MIDDLE={metadata['distribution']['middle']}, "
f"END={metadata['distribution']['end']} (50% in final 30%!)")
return frames, metadata
except Exception as e:
if 'cap' in locals():
cap.release()
raise Exception(f"Frame extraction failed: {str(e)}")
def get_frame(cap: cv2.VideoCapture, frame_idx: int, fps: float) -> Optional[dict]:
try:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
ret, frame = cap.read()
if not ret:
return None
h, w = frame.shape[:2]
target_h = 720
target_w = int(w * (target_h / h))
resized = cv2.resize(frame, (target_w, target_h))
_, buffer = cv2.imencode('.jpg', resized, [cv2.IMWRITE_JPEG_QUALITY, 85])
timestamp_sec = frame_idx / fps
timestamp_str = f"{int(timestamp_sec // 60):02d}:{int(timestamp_sec % 60):02d}"
return {
"bytes": buffer.tobytes(),
"timestamp": timestamp_str,
"second": round(timestamp_sec, 2),
"frame_idx": frame_idx
}
except:
return None
# --- ENHANCED GEMINI VISION WITH CONSECUTIVE CONTEXT ---
async def extract_frame_observations(frames: List[Dict], user_desc: str, opp_desc: str, duration: float, metadata: Dict) -> str:
"""Use Gemini to analyze DENSE CONSECUTIVE frames"""
print("STEP 1: Gemini Vision - Dense Consecutive Frame Analysis")
try:
# Build detailed frame list with time gaps
frame_details = []
for i, f in enumerate(frames):
if i > 0:
time_gap = f["second"] - frames[i-1]["second"]
gap_indicator = f" [+{time_gap:.1f}s]" if time_gap > 2 else ""
else:
gap_indicator = ""
frame_details.append(f"Frame {i+1} @ {f['timestamp']} ({f['second']:.1f}s){gap_indicator}")
frame_list = "\n".join(frame_details)
avg_interval = metadata.get("avg_frame_interval", 2.0)
print(user_desc, opp_desc)
prompt = f"""
You are an expert BJJ analyst performing CONSECUTIVE FRAME ANALYSIS on {len(frames)} frames from a {duration}s match.
USER: {user_desc} | OPPONENT: {opp_desc}
Identify the two main grapplers on the basis of user and opponent description above.
IMP: Ignore background people, only focus on 2 athletes described across all frames.
Average gap: {avg_interval:.1f}s between frames
FRAMES WITH TIME GAPS:
{frame_list}
====================================================================================
CRITICAL: CONSECUTIVE FRAME CONTEXT
====================================================================================
You have {len(frames)} CONSECUTIVE frames with small time gaps. This allows you to:
- SEE COMPLETE SEQUENCES develop (setup → execution → finish)
- Connect frames showing how techniques develop.
- IDENTIFY PATTERNS in technique development
- UNDERSTAND CONTEXT of each position change
Your analysis MUST span the ENTIRE {duration}s video!
REQUIRED DISTRIBUTION:
- Early (0-{int(duration*0.2)}s): ~20% of key moments
- Middle ({int(duration*0.2)}-{int(duration*0.7)}s): ~30% of key moments
- Late ({int(duration*0.7)}-{int(duration)}s): ~50% of key moments
- Focus on covering consecutive span of action also in all the sections
CONSECUTIVE ANALYSIS RULES:
1. Frames < 2s apart = CONTINUOUS ACTION
→ Describe HOW the action PROGRESSED from previous frame
→ Example: "Continues Frame 5's wrist control, now rotating hips underneath..."
2. Frames > 3s apart = NEW SEQUENCE
→ Note the gap: "NEW SEQUENCE [3.2s gap] - position changed to..."
3. Track Multi-Frame Developments:
→ Frame 8: "User establishes wrist control"
→ Frame 9: "Continues wrist control from Frame 8, now rotating hips"
→ Frame 10: "Continues Frame 8-9 sequence, arm now fully extended"
4. Always Reference Previous Frames:
→ "Continues the sweep attempt from Frame 12..."
→ "Builds on the guard pass started in Frames 15-16..."
====================================================================================
STEP 0: CONTENT VERIFICATION
====================================================================================
Is this BJJ/grappling? (gi/no-gi, wrestling, judo newaza, submission grappling)
If NO → {{"content_verification": "FAILED", "reason": "[what you see]"}}
====================================================================================
CORE PRINCIPLES
====================================================================================
MUST DO:
- Describe ONLY what's visible
- Say "Unclear" when uncertain
- ALWAYS reference previous frames when action continues
- Track how positions DEVELOP across consecutive frames
FORBIDDEN:
- NO assumptions about pain, intent, or gaps between frames
- NO speculation beyond visible evidence
====================================================================================
BJJ REFERENCE VOCABULARY
====================================================================================
Use actual BJJ Techniques names
POSITIONS:
- Standing: Both athletes upright
- Clinch: Standing with upper body control
- Closed Guard: Legs locked around opponent's waist
- Open Guard: Legs not locked but controlling (Butterfly, De La Riva, Spider, X-Guard)
- Half Guard: One leg trapped between opponent's legs
- Side Control: Chest across chest, perpendicular, opponent flat
- North-South: Head-to-head position
- Mount: ONLY if ALL 4 criteria met (both knees down, hips square, opponent flat, NO leg entanglement)
→ If ANY missing: say "Top pressure" or "Transitional position"
- Back Control: Behind opponent with hooks or body triangle
- Turtle: On hands and knees
- Scramble: Both moving, position unclear
COMMON SUBMISSIONS:
Chokes: Rear Naked Choke (RNC), Guillotine, Triangle, Arm Triangle, D'Arce, Anaconda, Ezekiel
Joint Locks: Armbar, Kimura, Americana, Omoplata, Wrist locks
Leg Locks: Straight Ankle Lock, Kneebar, Heel Hook, Toe Hold, Calf Slicer
SWEEPS & TECHNIQUES:
Scissor Sweep, Flower Sweep, Hip Bump, Butterfly Sweep, X-Guard Sweep
Technical Standup, Elbow Escape (Shrimp), Bridge & Roll
====================================================================================
SUBMISSION CONFIRMATION (STRICT)
====================================================================================
With consecutive frames, track COMPLETE submission sequences:
CONFIRMED ONLY IF:
- Lock visible in 2+ consecutive frames AND
- EXPLICIT tap (hand slapping mat/body 2+ times) OR match stops during lock
Example Progression:
Frame 18: "Ankle isolated, beginning extension"
Frame 19: "Continues Frame 18 - extension increasing, back arching"
Frame 20: "Continues Frame 18-19 - full extension, grimacing visible"
Frame 21: "Hand slapping mat 2x - TAP CONFIRMED"
NOT SUFFICIENT:
- Position alone without tap
- "Appears painful" without tap
- Hand moves once
====================================================================================
DETAILED FRAME-BY-FRAME ANALYSIS (EMPHASIZE PROGRESSION)
====================================================================================
For EACH frame:
Frame X (MM:SS) [+X.Xs from previous]:
POSITION: [Specific name or "Transitional"]
ADVANTAGE: User / Opponent / Neutral
ACTION TYPE: OFFENSE / DEFENSE / GUARD / PASSING / STANDUP / NONE
- OFFENSE = Submission attempts or attack chains (NOT just holding)
- DEFENSE = Escaping, framing, defending (NOT just being on bottom)
- GUARD = Bottom with legs controlling (NOT just being on back)
- PASSING = Actively clearing legs (NOT just being on top)
- STANDUP = Takedown attempts or clinch
- NONE = Static control or unclear
WHAT'S HAPPENING (DETAILED):
[Describe body positions, grips, pressure points, movement direction,use BJJ technique used]
Be specific: "User's right hand controls opponent's left wrist at 90° angle, left hand framing chest..."
ATHLETE POSITIONS:
User: [Upper/lower body, grips, hip placement, head position, what attempting]
Opponent: [Position, posture, reactions, defensive/offensive actions]
THREATS: None / [Specific submission or positional advance]
CONSECUTIVE CONTEXT (CRITICAL):
If < 2s from previous:
→ "CONTINUES [action] from Frame X - progression: [what changed]"
→ "Builds on Frame X's [position], now [new development]"
If > 3s gap:
→ "NEW SEQUENCE - [describe new situation]"
FRAME-TO-FRAME CHANGES:
[What specifically CHANGED from previous frame: grips, weight, limb positions, pressure]
Describe HOW things developed, not just static positions.
Be specific about what is happenening in depth position and bjj voacbulary relevant to frame and accurate
====================================================================================
EXAMPLE (Follow This Pattern)
====================================================================================
Frame 12 (00:28) [+1.2s]:
POSITION: Half Guard (User on bottom)
ADVANTAGE: Neutral
ACTION TYPE: GUARD
WHAT'S HAPPENING: User securing half guard with right leg hooking opponent's left leg. Left arm framing against chest, right hand controlling wrist. Opponent driving forward.
ATHLETE POSITIONS:
User: Bottom half guard, active knee shield with left leg, maintaining frame distance
Opponent: Top pressure, attempting to flatten, right hand posting
THREATS: Opponent attempting guard pass
STRENGTH/WEAKNESS OF USER: [if applicable]
CONSECUTIVE CONTEXT: NEW SEQUENCE after scramble in previous frames
FRAME-TO-FRAME CHANGES: Stabilized into half guard from scramble
Frame 13 (00:29) [+1.0s]:
POSITION: Half Guard (User on bottom)
ADVANTAGE: Slightly favors User
ACTION TYPE: GUARD
WHAT'S HAPPENING: User secured underhook with right arm. Left leg knee shield more active. Beginning to turn into opponent.
ATHLETE POSITIONS:
User: Underhook secured, knee shield elevated, hips turning underneath
Opponent: Pressure reduced, posting with both hands
STRENGTH/WEAKNESS OF USER: [if applicable]
THREATS: User developing sweep opportunity
CONSECUTIVE CONTEXT: CONTINUES half guard from Frame 12 - PROGRESSION: secured underhook, beginning sweep mechanics
FRAME-TO-FRAME CHANGES: Right arm moved from wrist control to underhook; hips rotated 15-20 degrees
====================================================================================
FINAL SUMMARY
====================================================================================
OUTCOME:
- Submission: YES/NO (only if tap visible)
- Winner: User / Opponent / NONE
- Technique: [Name] or NONE
- Evidence: "Frames X-Y show [progression]: Frame X (setup) → Frame Y (control) → Frame Z (finish)"
- Confidence: HIGH/MEDIUM/LOW
POSITIONAL FLOW:
Narrate match progression chronologically:
- How positions developed across consecutive frames
- Key transitions and turning points
- Which sequences led to advantages/disadvantages
KEY MULTI-FRAME SEQUENCES (2-4):
Format: "Frames X-Y: [Sequence Name]"
- Frame X: [Initial state]
- Frame Y: [Development]
- Frame Z: [Culmination]
- Impact: [Effect on match]
SUBMISSION SEQUENCES (if any):
If submission occurred, describe COMPLETE development:
- Setup phase (Frames X-Y): [How lock initiated]
- Control phase (Frames Y-Z): [How position tightened]
- Finish phase (Frame Z): [How tap occurred]
====================================================================================
CRITICAL REMINDERS
====================================================================================
- Your advantage: {len(frames)} consecutive frames = see COMPLETE sequences
- - ALWAYS use specific BJJ technique names
- Distinguish attacking positions (side control, mount) from defensive (turtle, bottom)
- Always connect frames: "Continues from Frame X..." or "Builds on Frame X..."
- Track progressions: Describe HOW things developed, not just static positions
- Reference sequences: "Frames X-Y show [technique] developing..."
- Time gaps matter: Note when gaps > 3s indicate new sequences
- Be detailed: Specific grips, angles, pressure points, momentum
- "Unclear" better than guessing: Conservative analysis prevents wrong diagnosis
Think like a slow-motion replay analyst - you can see every step of technique development.
"""
# Prepare content
content = []
for f in frames:
content.append({
"mime_type": "image/jpeg",
"data": base64.b64encode(f["bytes"]).decode("utf-8")
})
content.append(prompt)
# Call Gemini
start = time.time()
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
generation_config={
"temperature": 0.2,
"max_output_tokens": 12000 # Increased for more frames
}
)
response = await asyncio.get_event_loop().run_in_executor(
None,
lambda: model.generate_content(
content,
safety_settings={
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
}
)
)
elapsed = time.time() - start
print(f"✅ Gemini vision completed: {elapsed:.2f}s ({len(frames)} frames analyzed)")
try:
observations = response.text
except:
observations = response.candidates[0].content.parts[0].text
# Log first 500 chars for debugging
print(f"📄 Observations preview: {observations[:500]}...")
return observations
except Exception as e:
print(f"❌ Vision extraction failed: {e}")
traceback.print_exc()
return f"Error analyzing frames: {str(e)}"
# --- CREWAI AGENTS (UPDATED FOR DENSE FRAMES) ---
def create_analysis_crew(observations: str, user_desc: str, opp_desc: str, duration: float, num_frames: int):
"""Create CrewAI agents with awareness of dense consecutive frame analysis"""
feedback_counts = calculate_feedback_count(duration)
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
generation_config={
"temperature": 0.2,
"max_output_tokens": 12000 # Increased for more frames
}
)
llm = LLM(
model="groq/llama-3.3-70b-versatile",
api_key=GROQ_API_KEY,
temperature=0.2
)
analyst = Agent(
role="BJJ Technical Analyst",
goal=f"Analyze {num_frames} consecutive frame observations for {user_desc} to detect submissions, score performance, and identify patterns",
backstory=f"""
You are a BJJ black belt coach analyzing DENSE CONSECUTIVE FRAME observations.
CONTEXT AWARENESS:
- You received observations from {num_frames} frames (high density sampling)
- Frames are CONSECUTIVE with small time gaps (avg 1-2 seconds)
- This allows you to see COMPLETE action sequences, not just snapshots
CRITICAL RULES:
1. OUTCOME AUTHORITY: Accept submission verdicts from observations - do NOT override
2. SEQUENCE AWARENESS: Look for multi-frame progressions described in observations
3. POSITION AUTHORITY: Respect position labels used in observations
4. TIMESTAMP PRECISION: Every claim must reference specific timestamps
5. NO GENERICS: "More aggression" and similar phrases are FORBIDDEN
6. TIMESTAMP DISTRIBUTION: Spread feedback across ENTIRE {duration}s video
- Strength 1: From early section (0-{int(duration*0.2)}s)
- Strength 2: From middle section ({int(duration*0.2)}-{int(duration*0.7)}s)
- Strength 3: From late section ({int(duration*0.7)}-{int(duration)}s)
- Same pattern for weaknesses, opportunities, and key moments
SCORING GUIDELINES:
- If user was submitted: Defense ≤40, Overall ≤60
- If user finished opponent: Offense ≥80, Overall ≥80
- Score based on demonstrated actions, not potential
STRENGTHS/WEAKNESSES:
- Must be SPECIFIC with timestamps
- Minimum 25 characters with context
- If submission occurred, it MUST be #1 in relevant category
- Each item must be distinct (no repetition with different wording)
DENSE FRAME ADVANTAGE:
- Use the sequential context to identify setup patterns
- Reference frame progressions (e.g., "Frames 25-28 showed grip sequence leading to...")
- Distinguish between isolated mistakes vs systematic issues
""",
verbose=True,
allow_delegation=False,
llm=llm,
memory=True
)
formatter = Agent(
role="Data Structure Specialist",
goal="Convert analysis into valid JSON matching exact schema requirements",
backstory="""You transform technical analysis into structured JSON.
REQUIREMENTS:
- Exactly {feedback_counts['strengths']} strengths and {feedback_counts['weaknesses']} weaknesses (dynamic based on {duration}s video)
- All feedback includes timestamps (MM:SS format)
- No generic phrases like "More aggression" or "Improve timing"
- Scores reflect actual match outcome
- JSON is valid (no trailing commas, proper syntax)
- Each strength/weakness minimum 25 characters
VALIDATION CHECKS:
- All timestamps in MM:SS format? ✓
- No trailing commas? ✓
- Exactly 3 of each category? ✓
- All feedback includes timestamps? ✓
- No generic phrases? ✓
""",
verbose=True,
allow_delegation=False,
llm=llm,
memory=True
)
analysis_task = Task(
description=f"""
Analyze CONSECUTIVE frame observations from BJJ match.
OBSERVATIONS (from {num_frames} frames):
{observations}
VIDEO INFO:
- Duration: {duration}s
- Frames analyzed: {num_frames} (consecutive with ~1-2s intervals)
- User: {user_desc}
- Opponent: {opp_desc}
- Imp: Do not hallucinate any info beyond observations, refer it as the only truth.
REQUIRED OUTPUT:
1. OUTCOME SUMMARY:
- Restate outcome exactly as in observations, no truth beyond observations.
- Note frame sequences if submission occurred
2. SKILL SCORING (0-100, evidence-based):
⚔️ OFFENSE (0-100):
Measures: Submission attempts, attack chains, offensive pressure
- Achieved submission: 82-94
- Multiple dangerous attempts: 72-82
- Some offensive work: 62-72
- Limited attacking: 52-62
- Minimal offense: 42-52
- No offensive actions: 30-42
🛡️ DEFENSE (0-100):
Measures: Escapes, survival under pressure, defending submissions
- Got submitted: 38-48 (clear defensive gap)
- Under heavy pressure but survived: 52-62
- Some defensive challenges: 62-72
- Solid defense, few threats: 72-82
- Never seriously threatened: 75-85
🔒 GUARD (0-100):
Measures: Bottom position control and attacks
- Active sweeps/submissions from guard: 72-85
- Controlled well from bottom: 62-72
- Some guard retention: 52-62
- Guard passed multiple times: 38-48
- Minimal guard engagement: 28-38
🚶 PASSING (0-100):
Measures: Ability to clear legs and advance past guard
- Multiple successful passes: 75-88
- One or more passes: 68-78
- Strong passing pressure: 58-68
- Attempted but unsuccessful: 48-58
- Minimal passing work: 38-48
🧍 STANDUP (0-100):
Measures: Takedowns and clinch exchanges
- Successful takedown(s): 72-88
- Strong attempts: 62-72
- Some standup work: 52-62
- Brief standup only: 42-52
- No standup engagement: 0
**OVERALL SCORE CALCULATION:**
1. Start with base score from positional flow
2. Apply outcome modifier:
- Submission achieved: +12-18 points
- Got submitted: -12-18 points
- Dominant positions: +6-10 points
- Lost positions badly: -6-10 points
3. Ensure final score reflects match reality
4. Range check: 45-58 (submitted), 60-72 (typical), 72-88 (strong/dominant)
5. KEY PRINCIPLE: If a phase isn't in the video, it doesn't affect the score negatively! no penalising
Below all should be coach-like, specific, and reference timestamps
Most relevant/imp observations should be used for strengths, weaknesses, opportunities, and key moments and timestamps/frame analysis should be distributed evenly throughout the video.
3. STRENGTHS (EXACTLY {feedback_counts['strengths']})::
- Format: "At MM:SS - [Specific TECHNIQUE used + WHAT it accomplished]"
- They should be the most important positive actions observed by user
- Example style: "You defended opponent's heel hook by maintaining forward pressure" or "You initiated scramble to pass guard using leg drag"
- If submission: #1 MUST be the finish
- NO vague phrases like "showed awareness" or "maintained position" - be SPECIFIC about the action and result
4. WEAKNESSES (EXACTLY {feedback_counts['strengths']})::
- Format: "At MM:SS - [Specific MISTAKE + CONSEQUENCE that resulted]"
-n They should be the most critical negative actions observed by user
- Example style: "You didn't have enough top pressure allowing opponent to escape to turtle" or "Weak submission attempt caused you to lose control"
- If submitted: #1 MUST be the defensive failure
- CRITICAL: Check timestamps don't contradict strengths (don't say they failed at what they succeeded at)
5. MISSED OPPORTUNITIES :
- List specific, frame-visible technical opportunities with exact timestamps.
- Only include actions clearly observable in the footage.
Good examples:
* “Darce choke opening from top side control at 00:24”
* “Single-leg available when opponent posted hand at 00:15”
* “Guard pass opportunity during leg reposition at 00:31”
* “Side control escape by framing under chin at 00:42”
Avoid generic or subjective feedback/ non-observable claims/missing timestamps
6. KEY MOMENTS (EXACTLY {feedback_counts['strengths']})::
- Highlight the BEST moments from BOTH athletes (not biased to user only)
- Include significant actions from BOTH user AND opponent (takedowns, passes, submissions, escapes, sweeps)
- Think like a highlight reel: What were the most important/impressive moments in the match?
- Examples: "User took down opponent with double leg", "Opponent quickly recovered guard", "Opponent submitted user with armbar"
7. COACH NOTES (150-250 words):
Write like a REAL gym coach talking after watching the roll - conversational, direct, accurate.
CRITICAL:
- Start with what you SAW: "Nice work on...", "I noticed...", "That pass at..." and address the user as "You".
- Be ACCURATE: Only mention what actually happened (no standup if none occurred, don't say "struggled" if they succeeded)
- Use BJJ slang naturally: "That knee slice was tight", "Hunt for the underhook", "Stay heavy on top"
- Avoid AI words: "demonstrated", "showcased", "exhibited", "positional awareness"
- Give 2-3 specific things to work on with timestamps
- End with encouragement or next steps
8. DRILLS (EXACTLY 3):
- Each addresses a specific weakness
- Include timestamp justification
""",
agent=analyst,
expected_output="Detailed technical analysis with submission detection and sequential awareness"
)
formatting_task = Task(
description="""Convert the analysis into this EXACT JSON structure. NO markdown wrapping.
{{
"overall_score": <int 0-100>,
"performance_label": "EXCELLENT|STRONG|SOLID|DEVELOPING|NEEDS IMPROVEMENT",
"performance_grades": {{
"defense_grade": "<A+|A|B+|B|C+|C|D+|D>",
"offense_grade": "<letter>",
"control_grade": "<letter>"
}},
"skill_breakdown": {{
"offense": <int>,
"defense": <int>,
"guard": <int>,
"passing": <int>,
"standup": <int>
}},
"strengths": [
"At 0:XX - Specific observation with context (min 25 chars)",
"At 0:XX - Another specific observation",
"At 0:XX - Third specific observation"
],
"weaknesses": [
"At 0:XX - Specific weakness with context (min 25 chars)",
"At 0:XX - Another weakness",
"At 0:XX - Third weakness"
],
"missed_opportunities":
{{"time": "MM:SS", "title": "Brief", "description": "Detail", "category": "SUBMISSION|POSITION|SWEEP"}}
],
"key_moments": [
{{"time": "MM:SS", "title": "Event", "description": "What happened", "category": "SUBMISSION|TRANSITION|DEFENSE"}}
],
"coach_notes": "Paragraph 150-250 words",
"recommended_drills": [
{{"name": "Drill 1", "focus_area": "Area", "reason": "Why (reference timestamp)", "duration": "15 min/day", "frequency": "5x/week"}},
{{"name": "Drill 2", "focus_area": "Area", "reason": "Why", "duration": "10 min/day", "frequency": "4x/week"}},
{{"name": "Drill 3", "focus_area": "Area", "reason": "Why", "duration": "12 min/day", "frequency": "3x/week"}}
]
}}
VALIDATION CHECKS:
- All timestamps in MM:SS format ✓
- No trailing commas ✓
- All feedback includes timestamps ✓
- No generic phrases ✓
- Valid JSON syntax ✓
""",
agent=formatter,
expected_output="Valid JSON only"
)
crew = Crew(
agents=[analyst, formatter],
tasks=[analysis_task, formatting_task],
process=Process.sequential,
verbose=True
)
return crew
# --- HYBRID ANALYSIS ---
async def hybrid_agentic_analysis(
frames: List[Dict],
metadata: Dict,
user_desc: str,
opp_desc: str,
activity_type: str,
analysis_id: str = None
) -> AnalysisResult:
"""Hybrid: Gemini vision + CrewAI agents + Python validation"""
print("\n" + "="*70)
print("HYBRID AGENTIC ANALYSIS (Dense Consecutive Frames)")
print("="*70)
try:
if analysis_id:
db_storage[analysis_id]["progress"] = 30
# STEP 1: Gemini Vision with dense frames
observations = await extract_frame_observations(
frames, user_desc, opp_desc, metadata["duration"], metadata
)
# Check for content verification failure
if "content_verification" in observations and "FAILED" in observations:
print("❌ Content verification failed - not BJJ/grappling content")
# Try to parse the rejection message
try:
rejection_data = json.loads(observations)
reason = rejection_data.get("reason", "Video does not appear to contain BJJ or grappling content.")
suggested = rejection_data.get("suggested_action", "Please upload a BJJ or grappling video.")
if analysis_id:
db_storage[analysis_id]["status"] = "rejected"
db_storage[analysis_id]["rejection_reason"] = reason
# Return a special rejection result
return AnalysisResult(**{
"overall_score": 0,
"performance_label": "CONTENT VERIFICATION FAILED",
"performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"},
"skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0},
"strengths": [
"This video does not appear to contain BJJ or grappling content.",
"Please upload footage showing ground grappling, submissions, or takedowns.",
"Acceptable: BJJ (gi/no-gi), wrestling, judo newaza, submission grappling."
],
"weaknesses": [
f"Content detected: {reason}",
"This system is designed specifically for grappling analysis.",
f"Action needed: {suggested}"
],
"missed_opportunities": [],
"key_moments": [],
"coach_notes": f"⚠️ CONTENT VERIFICATION FAILED\n\n{reason}\n\n{suggested}\n\nThis AI system is specifically trained for Brazilian Jiu-Jitsu and grappling analysis. It cannot analyze striking-based martial arts, non-combat sports, or general videos. Please upload a video showing:\n\n• Ground grappling or submissions\n• Takedowns or clinch work\n• BJJ, wrestling, judo, or submission grappling\n\nFor best results, ensure the video clearly shows both athletes engaged in grappling exchanges.",
"recommended_drills": []
})
except:
# Fallback if parsing fails
if analysis_id:
db_storage[analysis_id]["status"] = "rejected"
db_storage[analysis_id]["rejection_reason"] = "Video content verification failed"
return AnalysisResult(**{
"overall_score": 0,
"performance_label": "CONTENT VERIFICATION FAILED",
"performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"},
"skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0},
"strengths": [
"Video does not appear to contain BJJ or grappling content.",
"Please upload footage of ground grappling or submissions.",
"This system is designed for grappling analysis only."
],
"weaknesses": [
"Upload a video showing BJJ, wrestling, or submission grappling.",
"Ensure both athletes are visible and engaged in grappling.",
"Videos should show ground work, takedowns, or submissions."
],
"missed_opportunities": [],
"key_moments": [],
"coach_notes": "⚠️ CONTENT VERIFICATION FAILED\n\nThis video does not appear to contain Brazilian Jiu-Jitsu or grappling content. This AI system is specifically designed for analyzing ground grappling, submissions, and takedowns.\n\nPlease upload a video showing:\n• BJJ (gi or no-gi)\n• Wrestling\n• Judo (newaza)\n• Submission grappling\n• MMA grappling exchanges\n\nFor optimal results, ensure the video clearly shows both athletes engaged in grappling.",
"recommended_drills": []
})
if analysis_id:
db_storage[analysis_id]["progress"] = 60
# STEP 2: CrewAI Agents
print("\nSTEP 2: CrewAI Agents - Analysis & Formatting")
crew = create_analysis_crew(observations, user_desc, opp_desc, metadata["duration"], len(frames))
crew_start = time.time()
result = await asyncio.get_event_loop().run_in_executor(
None,
crew.kickoff
)
crew_time = time.time() - crew_start
print(f"✅ CrewAI completed: {crew_time:.2f}s")
if analysis_id:
db_storage[analysis_id]["progress"] = 90
# STEP 3: Parse & Validate
print("\nSTEP 3: Python Validation")
result_text = str(result)
if "```json" in result_text:
result_text = result_text.split("```json")[1].split("```")[0].strip()
elif "```" in result_text:
result_text = result_text.split("```")[1].split("```")[0].strip()
data = extract_json_from_text(result_text)
data = validate_and_filter(data, frames, metadata["duration"])
attach_frames_to_events(data.get("missed_opportunities", []), frames)
attach_frames_to_events(data.get("key_moments", []), frames)
if analysis_id:
db_storage[analysis_id]["progress"] = 100
print("✅ Analysis complete")
print("="*70 + "\n")
return AnalysisResult(**data)
except Exception as e:
print(f"❌ Hybrid analysis failed: {e}")
traceback.print_exc()
fallback = make_fallback(frames)
if analysis_id:
db_storage[analysis_id]["used_fallback"] = True
return AnalysisResult(**fallback)
def validate_and_filter(data: Dict, frames: List[Dict], duration: float) -> Dict:
"""Python-level validation and generic filtering"""
if "overall_score" not in data:
data["overall_score"] = 65
data["overall_score"] = max(0, min(100, data["overall_score"]))
if "performance_label" not in data:
score = data["overall_score"]
if score >= 85:
data["performance_label"] = "EXCELLENT PERFORMANCE"
elif score >= 75:
data["performance_label"] = "STRONG PERFORMANCE"
elif score >= 60:
data["performance_label"] = "SOLID PERFORMANCE"
else:
data["performance_label"] = "DEVELOPING PERFORMANCE"
if "performance_grades" not in data:
data["performance_grades"] = {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"}
if "skill_breakdown" not in data:
base = data["overall_score"]
data["skill_breakdown"] = {
"offense": max(0, min(100, base - 5)),
"defense": max(0, min(100, base + 3)),
"guard": max(0, min(100, base - 2)),
"passing": max(0, min(100, base - 10)),
"standup": max(0, min(100, base - 13))
}
feedback_counts = calculate_feedback_count(duration)
# Filter generic feedback
late_section_start = duration * 0.7
for field in ["strengths", "weaknesses"]:
has_late_timestamp = False
for item in data[field]:
timestamp_match = re.search(r'(\d{1,2}):(\d{2})', item)
if timestamp_match:
mm, ss = timestamp_match.groups()
time_in_seconds = int(mm) * 60 + int(ss)
if time_in_seconds >= late_section_start:
has_late_timestamp = True
break
# If no late timestamps, FORCE add one
if not has_late_timestamp:
late_frames = [f for f in frames if f["second"] >= late_section_start]
if late_frames:
late_frame = late_frames[-3] # Pick near end
if field == "strengths":
late_item = f"At {late_frame['timestamp']} - Maintained control and pressure in final phase"
else:
late_item = f"At {late_frame['timestamp']} - Could increase urgency in final moments"
data[field][-1] = late_item # Replace last item
print(f"⚠️ FORCED late timestamp: {late_frame['timestamp']}")
if "missed_opportunities" not in data or not data["missed_opportunities"]:
data["missed_opportunities"] = [{
"time": frames[len(frames)//2]["timestamp"],
"title": "Position",
"description": "Review sequence for improvement opportunities",
"category": "POSITION"
}]
# Update key moments with dynamic count
if "key_moments" not in data or len(data["key_moments"]) < feedback_counts["moments"]:
default_moments = []
for i in range(feedback_counts["moments"]):
frame_idx = len(frames) // (feedback_counts["moments"] + 1) * (i + 1)
default_moments.append({
"time": frames[frame_idx]["timestamp"],
"title": "Exchange",
"description": "Significant moment in match flow",
"category": "TRANSITION"
})
data["key_moments"] = default_moments
if "coach_notes" not in data or len(data["coach_notes"]) < 50:
data["coach_notes"] = "Focus on maintaining consistent technique throughout sequences. Review timestamped moments for detailed improvement areas."
if "recommended_drills" not in data or len(data["recommended_drills"]) < 3:
data["recommended_drills"] = [
{"name": "Position Control Sequences", "focus_area": "General", "reason": "Improve sequential awareness", "duration": "15 min/day", "frequency": "5x/week"},
{"name": "Guard Retention Drills", "focus_area": "Defense", "reason": "Strengthen defensive sequences", "duration": "10 min/day", "frequency": "4x/week"},
{"name": "Transition Flow Training", "focus_area": "Movement", "reason": "Improve position transitions", "duration": "12 min/day", "frequency": "3x/week"}
]
return data
def make_specific(field: str, frames: List[Dict], existing: List[str], count: int, duration: float) -> List[str]:
"""
Generate specific feedback distributed across entire video duration.
count: How many items to generate (3-7 based on video length)
duration: Video length in seconds for timestamp distribution
"""
feedback = existing.copy()
# Calculate timestamps spread across video
timestamps_needed = count - len(feedback)
if timestamps_needed <= 0:
return feedback[:count]
early_count = max(1, int(timestamps_needed * 0.2))
middle_count = max(1, int(timestamps_needed * 0.3))
late_count = timestamps_needed - early_count - middle_count
early_frames = [f for f in frames if f["second"] < duration * 0.2]
middle_frames = [f for f in frames if duration * 0.2 <= f["second"] < duration * 0.7]
late_frames = [f for f in frames if f["second"] >= duration * 0.7]
def get_frame_from_section(section_frames, index, section_count):
if not section_frames:
return frames[0]
frame_idx = len(section_frames) // (section_count + 1) * (index + 1)
return section_frames[min(frame_idx, len(section_frames) - 1)]
if field == "strengths":
# Early strengths
for i in range(early_count):
frame = get_frame_from_section(early_frames, i, early_count)
feedback.append(f"At {frame['timestamp']} - Maintained good structural positioning during opening sequence")
# Middle strengths
for i in range(middle_count):
frame = get_frame_from_section(middle_frames, i, middle_count)
feedback.append(f"At {frame['timestamp']} - Demonstrated positional awareness during mid-match exchange")
# Late strengths
for i in range(late_count):
frame = get_frame_from_section(late_frames, i, late_count)
feedback.append(f"At {frame['timestamp']} - Showed consistent control in final phase of match")
else:
# Early weaknesses
for i in range(early_count):
frame = get_frame_from_section(early_frames, i, early_count)
feedback.append(f"At {frame['timestamp']} - Could improve initial positioning strategy and grip selection")
# Middle weaknesses
for i in range(middle_count):
frame = get_frame_from_section(middle_frames, i, middle_count)
feedback.append(f"At {frame['timestamp']} - Slow to recognize transitional opportunity during position change")
# Late weaknesses
for i in range(late_count):
frame = get_frame_from_section(late_frames, i, late_count)
feedback.append(f"At {frame['timestamp']} - Room to improve execution and pressure application in final sequence")
return feedback[:count]
def make_fallback(frames: List[Dict]) -> Dict:
mid = frames[len(frames)//2]["timestamp"] if frames else "00:30"
end = frames[-3]["timestamp"] if len(frames) > 2 else "00:45"
return {
"overall_score": 65,
"performance_label": "SOLID PERFORMANCE",
"performance_grades": {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"},
"skill_breakdown": {"offense": 60, "defense": 68, "guard": 63, "passing": 55, "standup": 52},
"strengths": [
"At 0:10 - Maintained structural integrity during opening",
f"At {mid} - Showed positional awareness during exchange",
f"At {end} - Demonstrated control in final sequences"
],
"weaknesses": [
"At 0:15 - Could improve initial positioning approach",
f"At {mid} - Slow to recognize transitional opportunities",
f"At {end} - Room to improve execution in final phase"
],
"missed_opportunities": [{"time": mid, "title": "Position", "description": "Review for improvement", "category": "POSITION"}],
"key_moments": [{"time": end, "title": "Exchange", "description": "Significant sequence", "category": "TRANSITION"}],
"coach_notes": "Focus on maintaining consistent technique throughout match sequences. Review specific timestamped moments for detailed improvement areas.",
"recommended_drills": [
{"name": "Sequential Control", "focus_area": "General", "reason": "Improve awareness", "duration": "15 min/day", "frequency": "5x/week"},
{"name": "Guard Sequences", "focus_area": "Defense", "reason": "Strengthen defense", "duration": "10 min/day", "frequency": "4x/week"},
{"name": "Flow Training", "focus_area": "Movement", "reason": "Improve transitions", "duration": "12 min/day", "frequency": "3x/week"}
]
}
# --- API ---
@app.post("/analyze-complete")
async def analyze_complete(
file: UploadFile = File(...),
user_description: str = Form(...),
opponent_description: str = Form(...),
activity_type: str = Form("Brazilian Jiu-Jitsu")
):
start_time = time.time()
file_path = None
try:
file_name = f"{uuid.uuid4()}_{file.filename}"
file_path = f"temp_videos/{file_name}"
os.makedirs("temp_videos", exist_ok=True)
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
analysis_id = str(uuid.uuid4())
db_storage[analysis_id] = {"status": "processing", "progress": 0}
# Extract DENSE CONSECUTIVE frames
try:
frames, metadata = await asyncio.get_event_loop().run_in_executor(
None, extract_dense_consecutive_frames, file_path
)
except ValueError as ve:
# Duration validation error
error_msg = str(ve)
print(f"⚠️ Duration validation failed: {error_msg}")
return {
"status": "rejected",
"error": error_msg,
"error_type": "duration_validation",
"data": {
"overall_score": 0,
"performance_label": "VIDEO DURATION ERROR",
"performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"},
"skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0},
"strengths": [],
"weaknesses": [],
"missed_opportunities": [],
"key_moments": [],
"coach_notes": f"⚠️ VIDEO DURATION ERROR\n\n{error_msg}\n\nRecommended video length: 10-90 seconds\n\nTips:\n• Focus on a single exchange or position\n• Trim longer videos to key moments\n• Ensure the clip shows clear grappling action",
"recommended_drills": []
}
}
# Hybrid analysis
result = await hybrid_agentic_analysis(
frames, metadata,
user_description.strip(), opponent_description.strip(),
activity_type, analysis_id
)
total_time = time.time() - start_time
# Check if content was rejected
if result.performance_label == "CONTENT VERIFICATION FAILED":
return {
"status": "rejected",
"error": "Video content verification failed - not BJJ/grappling",
"error_type": "content_verification",
"data": result.model_dump(),
"processing_time": f"{total_time:.2f}s"
}
return {
"status": "completed",
"data": result.model_dump(),
"processing_time": f"{total_time:.2f}s",
"frames_analyzed": len(frames),
"avg_frame_interval": f"{metadata.get('avg_frame_interval', 0):.2f}s",
"used_fallback": db_storage[analysis_id].get("used_fallback", False),
"method": "dense_consecutive_frames"
}
except Exception as e:
print(f"❌ Error: {e}")
traceback.print_exc()
# Try to provide helpful fallback
try:
frames_fb, _ = await asyncio.get_event_loop().run_in_executor(None, extract_dense_consecutive_frames, file_path)
fallback = make_fallback(frames_fb)
except:
fallback = make_fallback([{"timestamp": "00:30", "second": 30}])
return {
"status": "completed_with_fallback",
"data": fallback,
"error": str(e),
"used_fallback": True
}
finally:
if file_path:
try:
os.remove(file_path)
except:
pass
@app.get("/health")
async def health_check():
return {"status": "healthy", "version": "29.0.0-optimized-accurate"}
@app.get("/")
async def root():
return {
"message": "BJJ AI Coach - Optimized for Speed + Accuracy",
"version": "29.0.0",
"target_performance": "Total analysis: 50-60 seconds",
"architecture": "Gemini Vision + CrewAI Agents + Python Validation",
"optimizations": [
"⚡ Optimized frame counts for 50-60s Gemini processing",
"🎯 50% of frames in final 30% (submission-focused)",
"📊 15-40 frames (optimized for speed + accuracy)",
"✅ Ultra-strict evidence requirements (prevents wrong diagnosis)",
"🔍 Conservative analysis (admits uncertainty when unclear)",
"⏱️ Target: 50-60s total (15s video: ~30s, 60s video: ~50s)"
],
"frame_strategy": {
"10-15s_video": "15 frames (~1.0s intervals) → Gemini ~30s",
"15-30s_video": "20 frames (~1.5s intervals) → Gemini ~40s",
"30-60s_video": "30 frames (~2.0s intervals) → Gemini ~50s",
"60-90s_video": "40 frames (~2.3s intervals) → Gemini ~60s"
},
"submission_focus": {
"distribution": "20% start, 30% middle, 50% end",
"end_section": "50% of all frames in final 30% of video",
"final_frames": "Always includes last 2 frames for tap detection",
"confirmation": "Ultra-strict: requires EXPLICIT tap visible (2+ slaps)"
},
"accuracy_improvements": [
"Evidence-only analysis (NO assumptions or inferences)",
"Conservative position labels (says 'Unclear' when uncertain)",
"Stricter submission confirmation (tap must be EXPLICIT)",
"Mount requires ALL 4 criteria (knees, hips, flat, no entangle)",
"No pain inference, intent assumption, or guessing",
"Better to admit uncertainty than make wrong diagnosis"
],
"validation": {
"content_types_accepted": [
"BJJ (gi/no-gi)",
"Submission grappling",
"Wrestling",
"Judo (newaza)",
"MMA grappling"
],
"content_types_rejected": [
"Striking arts",
"Kata/forms",
"Non-combat sports"
],
"duration": "5-120 seconds"
}
}
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port)