Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| import os | |
| import time | |
| import shutil | |
| import uuid | |
| import json | |
| import asyncio | |
| import base64 | |
| import re | |
| import traceback | |
| from typing import List, Optional, Dict, Any | |
| from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Form | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, ConfigDict | |
| import warnings | |
| # Suppress warnings | |
| warnings.filterwarnings('ignore', category=FutureWarning) | |
| # CrewAI imports | |
| from crewai import Agent, Task, Crew, Process | |
| from crewai.llm import LLM | |
| # Gemini imports | |
| import google.generativeai as genai | |
| from google.generativeai.types import HarmCategory, HarmBlockThreshold | |
| # OpenCV | |
| import cv2 | |
| import numpy as np | |
| # Configuration | |
| GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GEMINI_API_KEY: | |
| raise ValueError("GOOGLE_API_KEY environment variable required") | |
| if not GROQ_API_KEY: | |
| raise ValueError("GROQ_API_KEY environment variable required") | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| app = FastAPI(title="BJJ AI Coach - Dense Frame Analysis") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --- MODELS --- | |
| class TimestampedEvent(BaseModel): | |
| time: str | |
| title: str | |
| description: str | |
| category: Optional[str] = "GENERAL" | |
| frame_image: Optional[str] = None | |
| frame_timestamp: Optional[str] = None | |
| model_config = ConfigDict(extra="allow") | |
| class Drill(BaseModel): | |
| name: str | |
| focus_area: str | |
| reason: str | |
| duration: Optional[str] = "15 min/day" | |
| frequency: Optional[str] = "5x/week" | |
| class DetailedSkillBreakdown(BaseModel): | |
| offense: int | |
| defense: int | |
| guard: int | |
| passing: int | |
| standup: int | |
| class PerformanceGrades(BaseModel): | |
| defense_grade: str | |
| offense_grade: str | |
| control_grade: str | |
| class AnalysisResult(BaseModel): | |
| overall_score: int | |
| performance_label: str | |
| performance_grades: PerformanceGrades | |
| skill_breakdown: DetailedSkillBreakdown | |
| strengths: List[str] | |
| weaknesses: List[str] | |
| missed_opportunities: List[TimestampedEvent] | |
| key_moments: List[TimestampedEvent] | |
| coach_notes: str | |
| recommended_drills: List[Drill] | |
| db_storage = {} | |
| # --- UTILITIES --- | |
| def parse_time_to_seconds(time_str: str) -> Optional[int]: | |
| if not time_str: | |
| return None | |
| match = re.search(r"(\d{1,2}):(\d{2})", time_str) | |
| if not match: | |
| return None | |
| mm, ss = match.groups() | |
| return int(mm) * 60 + int(ss) | |
| def find_closest_frame(target_time_sec: int, frames: list) -> dict: | |
| return min(frames, key=lambda f: abs(f["second"] - target_time_sec)) | |
| def attach_frames_to_events(events: List[dict], frames: list): | |
| for event in events: | |
| try: | |
| event_time_sec = parse_time_to_seconds(event.get("time")) | |
| if event_time_sec is None: | |
| continue | |
| closest = find_closest_frame(event_time_sec, frames) | |
| event["frame_timestamp"] = closest["timestamp"] | |
| event["frame_image"] = base64.b64encode(closest["bytes"]).decode("utf-8") | |
| except: | |
| event["frame_image"] = None | |
| def extract_json_from_text(text: str) -> Dict: | |
| """Robust JSON extraction""" | |
| text = text.strip() | |
| try: | |
| return json.loads(text) | |
| except: | |
| pass | |
| if "```json" in text or "```" in text: | |
| try: | |
| if "```json" in text: | |
| text = text.split("```json")[1].split("```")[0] | |
| else: | |
| text = text.split("```")[1].split("```")[0] | |
| return json.loads(text.strip()) | |
| except: | |
| pass | |
| try: | |
| start_idx = text.find('{') | |
| if start_idx == -1: | |
| raise ValueError("No opening brace") | |
| brace_count = 0 | |
| end_idx = -1 | |
| for i in range(start_idx, len(text)): | |
| if text[i] == '{': | |
| brace_count += 1 | |
| elif text[i] == '}': | |
| brace_count -= 1 | |
| if brace_count == 0: | |
| end_idx = i | |
| break | |
| if end_idx != -1: | |
| json_str = text[start_idx:end_idx+1] | |
| return json.loads(json_str) | |
| json_str = text[start_idx:] | |
| open_braces = json_str.count('{') | |
| close_braces = json_str.count('}') | |
| open_brackets = json_str.count('[') | |
| close_brackets = json_str.count(']') | |
| if open_brackets > close_brackets: | |
| json_str += ']' * (open_brackets - close_brackets) | |
| if open_braces > close_braces: | |
| json_str += '}' * (open_braces - close_braces) | |
| return json.loads(json_str) | |
| except: | |
| pass | |
| raise ValueError("Could not extract JSON") | |
| def is_generic(text: str) -> bool: | |
| """Check if feedback is too generic""" | |
| patterns = [r'^More \w+$', r'^Improve \w+$', r'^Work \w+$', r'^Better \w+$'] | |
| for p in patterns: | |
| if re.match(p, text.strip(), re.IGNORECASE): | |
| return True | |
| if not re.search(r'\d{1,2}:\d{2}', text): | |
| return True | |
| if len(text) < 20: | |
| return True | |
| return False | |
| # --- ENHANCED DENSE FRAME EXTRACTION --- | |
| def extract_dense_consecutive_frames(video_path: str) -> tuple: | |
| """ | |
| OPTIMIZED: Extract frames for MAXIMUM ACCURACY in 50-60s total processing | |
| Strategy - Balanced for speed + accuracy: | |
| - 10-15s video: 15 frames (~1.0s intervals) β Gemini ~30s | |
| - 15-30s video: 20 frames (~1.2s intervals) β Gemini ~40s | |
| - 30-60s video: 30 frames (~1.8s intervals) β Gemini ~50s | |
| - 60-90s video: 40 frames (~2.0s intervals) β Gemini ~60s | |
| Distribution (submission-focused): | |
| - START (0-20%): 20% of frames | |
| - MIDDLE (20-70%): 30% of frames | |
| - END (70-100%): 50% of frames (DENSEST for submission detection) | |
| """ | |
| try: | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| raise Exception("Cannot open video") | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| duration = total_frames / fps if fps > 0 else 0 | |
| # Validate video duration | |
| if duration < 5: | |
| raise ValueError("Video too short (< 5 seconds). Please upload a longer clip (10-90 seconds recommended).") | |
| if duration > 120: | |
| raise ValueError("Video too long (> 2 minutes). Please upload a shorter clip (10-90 seconds recommended) for optimal analysis.") | |
| # OPTIMIZED FRAME COUNTS - Balanced for 50-60s Gemini processing | |
| if duration <= 15: | |
| total_to_extract = 15 # ~1.0s intervals β ~30s Gemini | |
| elif duration <= 30: | |
| total_to_extract = 20 # ~1.5s intervals β ~40s Gemini | |
| elif duration <= 60: | |
| total_to_extract = 30 # ~2.0s intervals β ~50s Gemini | |
| elif duration <= 90: | |
| total_to_extract = 40 # ~2.25s intervals β ~60s Gemini | |
| else: | |
| total_to_extract = 45 # ~2.7s intervals β ~65s Gemini (max) | |
| print(f"πΉ OPTIMIZED EXTRACTION: {total_to_extract} frames from {duration:.1f}s video") | |
| print(f" Target: 1 frame every {duration/total_to_extract:.1f}s (Gemini: ~{total_to_extract * 1.5:.0f}s)") | |
| # SUBMISSION-FOCUSED distribution: 20% start, 30% middle, 50% end | |
| start_frames = max(3, int(total_to_extract * 0.20)) | |
| middle_frames = max(6, int(total_to_extract * 0.30)) | |
| end_frames = total_to_extract - start_frames - middle_frames | |
| print(f" Distribution (submission-focused): START={start_frames}, MIDDLE={middle_frames}, END={end_frames}") | |
| # Define sections | |
| start_section_end = int(total_frames * 0.20) | |
| middle_section_start = start_section_end | |
| middle_section_end = int(total_frames * 0.70) | |
| end_section_start = middle_section_end | |
| frames = [] | |
| # Extract START section (0-20%) - Overview | |
| start_interval = max(1, start_section_end // start_frames) | |
| for i in range(0, start_section_end, start_interval): | |
| if len([f for f in frames if f["second"] < duration * 0.20]) >= start_frames: | |
| break | |
| frame = get_frame(cap, i, fps) | |
| if frame: | |
| frames.append(frame) | |
| # Extract MIDDLE section (20-70%) - Standard coverage | |
| middle_section_frames = middle_section_end - middle_section_start | |
| middle_interval = max(1, middle_section_frames // middle_frames) | |
| for i in range(middle_section_start, middle_section_end, middle_interval): | |
| if len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]) >= middle_frames: | |
| break | |
| frame = get_frame(cap, i, fps) | |
| if frame: | |
| frames.append(frame) | |
| # Extract END section (70-100%) - DENSEST for submissions (50% of all frames!) | |
| end_section_frames = total_frames - end_section_start | |
| end_interval = max(1, end_section_frames // end_frames) | |
| print(f" END section (50% of frames): 1 frame every {end_interval/fps:.2f}s for submission detection") | |
| for i in range(end_section_start, total_frames, end_interval): | |
| if len([f for f in frames if f["second"] >= duration * 0.70]) >= end_frames: | |
| break | |
| frame = get_frame(cap, i, fps) | |
| if frame: | |
| frames.append(frame) | |
| # CRITICAL: Always add final 2 frames for tap detection | |
| for offset in [2, 1]: | |
| final_frame_idx = total_frames - offset | |
| if final_frame_idx > 0: | |
| frame = get_frame(cap, final_frame_idx, fps) | |
| if frame: | |
| if not any(f["frame_idx"] == frame["frame_idx"] for f in frames): | |
| frames.append(frame) | |
| cap.release() | |
| frames.sort(key=lambda f: f["second"]) | |
| # Calculate stats | |
| intervals = [] | |
| for i in range(1, len(frames)): | |
| time_gap = frames[i]["second"] - frames[i-1]["second"] | |
| intervals.append(time_gap) | |
| avg_interval = sum(intervals) / len(intervals) if intervals else 0 | |
| metadata = { | |
| "duration": round(duration, 2), | |
| "fps": round(fps, 2), | |
| "frames_extracted": len(frames), | |
| "avg_frame_interval": round(avg_interval, 2), | |
| "estimated_gemini_time": round(len(frames) * 1.5, 1), # ~1.5s per frame | |
| "distribution": { | |
| "start": len([f for f in frames if f["second"] < duration * 0.20]), | |
| "middle": len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]), | |
| "end": len([f for f in frames if f["second"] >= duration * 0.70]) | |
| } | |
| } | |
| print(f"β Extracted {len(frames)} frames (avg interval: {avg_interval:.2f}s)") | |
| print(f" Estimated Gemini time: ~{metadata['estimated_gemini_time']:.0f}s") | |
| print(f" Actual distribution: START={metadata['distribution']['start']}, " | |
| f"MIDDLE={metadata['distribution']['middle']}, " | |
| f"END={metadata['distribution']['end']} (50% in final 30%!)") | |
| return frames, metadata | |
| except Exception as e: | |
| if 'cap' in locals(): | |
| cap.release() | |
| raise Exception(f"Frame extraction failed: {str(e)}") | |
| def get_frame(cap: cv2.VideoCapture, frame_idx: int, fps: float) -> Optional[dict]: | |
| try: | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) | |
| ret, frame = cap.read() | |
| if not ret: | |
| return None | |
| h, w = frame.shape[:2] | |
| target_h = 720 | |
| target_w = int(w * (target_h / h)) | |
| resized = cv2.resize(frame, (target_w, target_h)) | |
| _, buffer = cv2.imencode('.jpg', resized, [cv2.IMWRITE_JPEG_QUALITY, 85]) | |
| timestamp_sec = frame_idx / fps | |
| timestamp_str = f"{int(timestamp_sec // 60):02d}:{int(timestamp_sec % 60):02d}" | |
| return { | |
| "bytes": buffer.tobytes(), | |
| "timestamp": timestamp_str, | |
| "second": round(timestamp_sec, 2), | |
| "frame_idx": frame_idx | |
| } | |
| except: | |
| return None | |
| # --- ENHANCED GEMINI VISION WITH CONSECUTIVE CONTEXT --- | |
| async def extract_frame_observations(frames: List[Dict], user_desc: str, opp_desc: str, duration: float, metadata: Dict) -> str: | |
| """Use Gemini to analyze DENSE CONSECUTIVE frames""" | |
| print("STEP 1: Gemini Vision - Dense Consecutive Frame Analysis") | |
| try: | |
| # Build detailed frame list with time gaps | |
| frame_details = [] | |
| for i, f in enumerate(frames): | |
| if i > 0: | |
| time_gap = f["second"] - frames[i-1]["second"] | |
| gap_indicator = f" [+{time_gap:.1f}s]" if time_gap > 2 else "" | |
| else: | |
| gap_indicator = "" | |
| frame_details.append(f"Frame {i+1} @ {f['timestamp']} ({f['second']:.1f}s){gap_indicator}") | |
| frame_list = "\n".join(frame_details) | |
| avg_interval = metadata.get("avg_frame_interval", 2.0) | |
| prompt = f""" | |
| You are an EXPERT Brazilian Jiu-Jitsu black belt analyst performing PRECISE EVIDENCE-BASED frame analysis. | |
| ==================== | |
| CRITICAL: VIDEO CONTENT VERIFICATION (STEP 0 - MANDATORY) | |
| ==================== | |
| BEFORE analyzing frames, verify this is BJJ/grappling content. | |
| ACCEPTABLE: BJJ (gi/no-gi), Wrestling, Judo (newaza), Submission grappling, MMA grappling | |
| REJECT: Striking arts, kata/forms, non-combat sports, random videos | |
| If NOT grappling β Output this JSON and STOP: | |
| {{"content_verification": "FAILED", "reason": "This video shows [what you see]. Please upload BJJ/grappling footage.", "suggested_action": "Upload ground grappling, submissions, or takedowns."}} | |
| ==================== | |
| CORE PRINCIPLE: EVIDENCE-ONLY ANALYSIS | |
| ==================== | |
| YOU ARE ABSOLUTELY FORBIDDEN FROM: | |
| β Assuming intent or motivation | |
| β Inferring pain levels or discomfort | |
| β Guessing what happened between frames | |
| β Extrapolating beyond visible evidence | |
| β Making confident claims from unclear visuals | |
| YOU MUST ONLY: | |
| β Describe EXACTLY what is visible in each frame | |
| β Use conservative language when uncertain | |
| β Say "Unclear" or "Insufficient evidence" if you cannot confirm | |
| β Track visible progressions across consecutive frames | |
| ==================== | |
| CONSECUTIVE FRAME CONTEXT | |
| ==================== | |
| You have {len(frames)} frames with ~{avg_interval:.1f}s average interval. | |
| Frame sequence (time gaps shown): | |
| {frame_list} | |
| IMPORTANT: | |
| - Frames < 2s apart = CONTINUOUS ACTION (track progressions) | |
| - Frames > 3s apart = POTENTIAL TRANSITION (note gap) | |
| - Final 50% of frames are DENSE (70-100% of video) for submission detection | |
| ==================== | |
| POSITION CLASSIFICATION (STRICT RULES) | |
| ==================== | |
| Use ONLY these positions if CLEARLY visible: | |
| STANDING/CLINCH: | |
| - Standing: Both athletes upright, no ground contact | |
| - Clinch: Standing with upper body control | |
| GUARD POSITIONS (Bottom player has legs between them): | |
| - Closed Guard: Legs locked around opponent's waist | |
| - Open Guard: Legs not locked, but controlling opponent (Butterfly, DLR, Spider, X-Guard) | |
| - Half Guard: One leg trapped between opponent's legs | |
| TOP CONTROL: | |
| - Side Control: Chest across opponent's chest, perpendicular, opponent flat | |
| - North-South: Head-to-head, chest across opponent's chest | |
| - Mount: ONLY if BOTH knees on mat, hips square over torso, opponent flat, NO leg entanglement | |
| - If ANY condition missing β "Top pressure (not mount)" or "Knee on belly" | |
| - Back Control: Behind opponent with hooks or body triangle | |
| NEUTRAL/TRANSITION: | |
| - Turtle: Opponent on hands/knees | |
| - Scramble: Both athletes moving, position unclear | |
| - Transitional: Between defined positions | |
| WHEN UNCERTAIN: Use "Unclear position" or "Transitional control" - NEVER guess! | |
| ==================== | |
| SUBMISSION DETECTION (ULTRA-STRICT) | |
| ==================== | |
| A submission is confirmed ONLY if you see ALL of: | |
| 1. β CLEAR lock/control visible in 2+ consecutive frames | |
| 2. β EXPLICIT tap (hand slapping mat/body 2+ times) OR | |
| 3. β Match stopping during locked submission OR | |
| 4. β Video ending during unmistakable locked submission | |
| TAP INDICATORS (must be EXPLICIT): | |
| - β Hand rapidly slapping mat (2+ distinct slaps) | |
| - β Hand rapidly patting opponent's body (2+ distinct pats) | |
| - β Verbal submission with visible distress | |
| - β Body going completely limp during lock | |
| INSUFFICIENT FOR CONFIRMATION: | |
| - β Position control alone (even if perfect) | |
| - β "Could be applying pressure" - NOT confirmed | |
| - β "Appears to be in pain" - NOT confirmed | |
| - β "Submission position visible" - NOT confirmed unless TAP visible | |
| - β Hand moving once - NOT a tap | |
| - β Match ending without clear tap or lock - NOT confirmed | |
| DECISION TREE: | |
| Is lock clearly visible? NO β "No submission" | |
| Is lock clearly visible? YES β Is tap EXPLICITLY visible? NO β "Submission attempt only" | |
| Is tap EXPLICITLY visible? YES β "SUBMISSION CONFIRMED" | |
| ==================== | |
| FRAME-BY-FRAME ANALYSIS (REQUIRED FORMAT) | |
| ==================== | |
| For EACH frame, report: | |
| Frame X (MM:SS): | |
| Position: [Conservative label - say "Unclear" if unsure] | |
| Advantage: [User / Opponent / Neutral - based ONLY on visible control] | |
| Action: [OFFENSE / DEFENSE / GUARD / PASSING / STANDUP / NONE] | |
| Threats: [None / Submission attempt (name) / Positional advance] | |
| Details: [Observable grips, pressure, movements - NO speculation] | |
| Progression: [If < 2s from previous frame: "Continues [action]" / If > 3s: "New sequence"] | |
| CRITICAL RULES: | |
| - If position unclear β say "Position unclear" | |
| - If advantage unclear β say "Neutral" | |
| - If can't see details β say "Insufficient detail visible" | |
| - NEVER fill in gaps with assumptions | |
| ==================== | |
| ACTION TYPE DEFINITIONS (STRICT) | |
| ==================== | |
| OFFENSE: Initiated submission attempts OR active attack chains (NOT just control) | |
| DEFENSE: Actively escaping, framing, or defending attacks (NOT just being on bottom) | |
| GUARD: Bottom position with legs controlling opponent (NOT just being on back) | |
| PASSING: Actively clearing legs and advancing position (NOT just being on top) | |
| STANDUP: Takedown attempts or clinch exchanges | |
| NONE: Static control, unclear action, or transitional movement | |
| ==================== | |
| FINAL SUMMARY (EVIDENCE-LOCKED) | |
| ==================== | |
| 1. OUTCOME VERDICT: | |
| - Submission: YES (only if tap EXPLICITLY visible) / NO / UNCLEAR | |
| - Winner: User / Opponent / NONE / UNCLEAR | |
| - Technique: [Name ONLY if lock + tap confirmed] / NONE / UNCLEAR | |
| - Evidence: "Frames X-Y show [specific visible evidence]" | |
| - Confidence: HIGH (tap explicitly visible) / MEDIUM (strong indicators) / LOW (unclear) | |
| 2. POSITIONAL SUMMARY: | |
| - Describe visible progressions | |
| - Note dominant positions | |
| - List transitions between confirmed positions | |
| - ADMIT UNCERTAINTY where applicable | |
| 3. KEY SEQUENCES: | |
| - List multi-frame progressions with frame references | |
| - Format: "Frames X-Y: [observable progression]" | |
| ==================== | |
| QUALITY CHECKLIST (VERIFY BEFORE SUBMITTING) | |
| ==================== | |
| Before finalizing, verify: | |
| - [ ] Did I ONLY describe what's CLEARLY visible? | |
| - [ ] Did I use "Unclear" when uncertain? | |
| - [ ] Did I confirm submission ONLY if tap EXPLICITLY visible? | |
| - [ ] Did I avoid assuming pain, intent, or motivation? | |
| - [ ] Did I track progressions in consecutive frames? | |
| - [ ] Did I use conservative position labels? | |
| - [ ] Did I admit gaps in evidence? | |
| - [ ] Did I verify "mount" meets ALL 4 criteria? | |
| REMEMBER: | |
| - It is BETTER to say "Unclear" than to make a wrong diagnosis | |
| - Conservative analysis is MORE valuable than confident guessing | |
| - Visible evidence > Positional inference | |
| - When in doubt, describe what you SEE, not what you THINK | |
| Your analysis will guide training decisions. ACCURACY and HONESTY are paramount. | |
| β Track continuous movement progressions | |
| β Identify setup sequences (e.g., grip β control β finish) | |
| β Detect transitional movements between positions | |
| β Recognize submission attempts developing over multiple frames | |
| β See tapping sequences frame-by-frame | |
| IMPORTANT INSTRUCTIONS: | |
| 1. When frames are close together (< 2 seconds apart), treat them as CONTINUOUS ACTION | |
| 2. Look for PROGRESSIONS across consecutive frames, not just isolated moments | |
| 3. A technique may develop over 3-5 consecutive frames - describe the SEQUENCE | |
| 4. For submissions: Track the setup (Frame N) β control (Frame N+1) β finish (Frame N+2) β tap (Frame N+3) | |
| ==================== | |
| VIDEO CONTEXT | |
| ==================== | |
| - Duration: {duration}s | |
| - Total Frames: {len(frames)} (DENSE consecutive sampling) | |
| - Average time between frames: {avg_interval:.1f}s | |
| - Athlete Being Analyzed (User): {user_desc} | |
| - Opponent: {opp_desc} | |
| ==================== | |
| FRAME SEQUENCE | |
| ==================== | |
| {frame_list} | |
| NOTE: Frames marked with [+X.Xs] have larger time gaps - these are transitions between sequences. | |
| ==================== | |
| REFERENCE KNOWLEDGE (VOCABULARY ONLY) | |
| ==================== | |
| Use these terms ONLY if clearly visible in frames. | |
| POSITIONS: | |
| Standing, Clinch, Closed Guard, Open Guard (Butterfly, De La Riva, Spider, X-Guard), | |
| Half Guard (Top/Bottom, Knee Shield, Deep Half), Side Control (Standard, Kesa Gatame), | |
| North-South, Mount (Low, High, S-Mount), Back Control, Turtle (Top/Bottom) | |
| CRITICAL POSITION RULE: | |
| - "Full Mount" requires: BOTH knees on mat, hips square, opponent flat, NO leg entanglement | |
| - If ANY missing β use "Top control (not mount)" or "Transitional position" | |
| ATTACKS & THREATS: | |
| Chokes (RNC, Guillotine, Triangle, Arm Triangle, D'Arce, Anaconda, Ezekiel, Collar chokes) | |
| Joint Locks (Armbar, Kimura, Americana, Omoplata, Wrist locks) | |
| Leg Locks (Straight Ankle, Kneebar, Heel Hook, Toe Hold, Calf Slicer) | |
| ==================== | |
| SUBMISSION DETECTION (STRICT) | |
| ==================== | |
| With {len(frames)} dense frames, you can now track COMPLETE submission sequences: | |
| A submission is confirmed ONLY if you see: | |
| 1. SETUP in earlier frames (e.g., Frame 28: "Leg entangled") | |
| 2. CONTROL in middle frames (e.g., Frame 29: "Ankle isolated, arching back") | |
| 3. PRESSURE in later frames (e.g., Frame 30: "Full extension applied") | |
| 4. TAP or STOPPAGE in final frames (e.g., Frame 31: "Hand tapping mat") | |
| Visual tap indicators: | |
| - β Hand slapping mat/body rapidly (2+ times) | |
| - β Verbal submission (grimacing in pain) | |
| - β Body going limp/giving up resistance | |
| - β Match ending during locked submission | |
| If unclear or incomplete sequence β classify as "submission attempt" NOT "submission" | |
| ==================== | |
| CONSECUTIVE FRAME ANALYSIS TASK | |
| ==================== | |
| For EACH frame, provide: | |
| 1. POSITION: Current position (conservative labels if unclear) | |
| 2. ADVANTAGE: User / Opponent / Neutral (based on visible control) | |
| 3. ACTION TYPE: OFFENSE | DEFENSE | GUARD | PASSING | STANDUP | NONE | |
| 4. THREATS: None / Submission Attempt (name it) / Positional Advance | |
| 5. TECHNICAL DETAILS: Observable grips, pressure, transitions | |
| - For consecutive frames < 2s apart: Describe the PROGRESSION | |
| - Example: "Continuing from previous frame, hand now moved to..." | |
| 6. CONSECUTIVE CONTEXT: (NEW - VERY IMPORTANT) | |
| - If this frame continues action from previous frame, note: "Continuation of [action]" | |
| - If this starts new sequence, note: "New sequence initiated" | |
| - Track multi-frame progressions: "Frame 3/5 of [technique] setup" | |
| STRICT OUTPUT FORMAT: | |
| Frame X (MM:SS): | |
| Position: [name] | |
| Advantage: [User/Opponent/Neutral] | |
| Action: [type] | |
| Threats: [description] | |
| Details: [technical observation] | |
| Context: [consecutive progression if applicable] | |
| ==================== | |
| CONSECUTIVE SEQUENCE TRACKING (CRITICAL) | |
| ==================== | |
| With dense frames, pay special attention to: | |
| 1. MULTI-FRAME PROGRESSIONS: | |
| - Frame 25: Grip established | |
| - Frame 26: Control secured (progression) | |
| - Frame 27: Position improved (progression continues) | |
| - Frame 28: Submission attempt initiated (culmination) | |
| 2. SUBMISSION SEQUENCES (if visible): | |
| Track EVERY step: | |
| - Early frame: "Leg control established, foot isolated" | |
| - Next frame: "Opponent arching back, pressure applied" | |
| - Next frame: "Ankle lock fully extended" | |
| - Final frame: "Tapping motion visible / Match stopped" | |
| 3. TRANSITIONAL FLOWS: | |
| Note when position changes occur across consecutive frames | |
| ==================== | |
| FINAL SUMMARY (EVIDENCE-LOCKED) | |
| ==================== | |
| 1. OUTCOME VERDICT: | |
| - Submission: YES / NO | |
| - Winner: User / Opponent / NONE | |
| - Technique: <name or NONE> | |
| - Time: MM:SS or NONE | |
| - Frame Sequence: "Frames X-Y showed [setup/execution/finish]" | |
| - Confidence: HIGH / MEDIUM / LOW | |
| - Evidence: Specific frame numbers + descriptions | |
| 2. POSITIONAL FLOW: | |
| - Describe the overall progression through consecutive frames | |
| - Note dominant positions and transitions | |
| - Identify key turning points in the sequence | |
| 3. KEY SEQUENCES: | |
| - List any multi-frame progressions that led to significant moments | |
| - Format: "Frames X-Y: [description of progression]" | |
| ==================== | |
| FINAL CHECKLIST | |
| ==================== | |
| Before submitting, verify: | |
| - [ ] Did I analyze frames CONSECUTIVELY, not in isolation? | |
| - [ ] Did I track multi-frame progressions (setup β execution β finish)? | |
| - [ ] For close frames (< 2s apart), did I note continuations? | |
| - [ ] If submission visible, did I describe the COMPLETE sequence? | |
| - [ ] Did I use "Frame X-Y" notation for extended sequences? | |
| - [ ] Are all position labels conservative and evidence-based? | |
| - [ ] No speculation beyond what's visible in frames? | |
| REMEMBER: With {len(frames)} dense consecutive frames, you can see COMPLETE action sequences. | |
| Use this advantage to provide CONTEXTUAL analysis, not just isolated observations. | |
| """ | |
| # Prepare content | |
| content = [] | |
| for f in frames: | |
| content.append({ | |
| "mime_type": "image/jpeg", | |
| "data": base64.b64encode(f["bytes"]).decode("utf-8") | |
| }) | |
| content.append(prompt) | |
| # Call Gemini | |
| start = time.time() | |
| model = genai.GenerativeModel( | |
| model_name="gemini-2.5-flash", | |
| generation_config={ | |
| "temperature": 0.2, | |
| "max_output_tokens": 12000 # Increased for more frames | |
| } | |
| ) | |
| response = await asyncio.get_event_loop().run_in_executor( | |
| None, | |
| lambda: model.generate_content( | |
| content, | |
| safety_settings={ | |
| HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, | |
| HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, | |
| HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, | |
| HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, | |
| } | |
| ) | |
| ) | |
| elapsed = time.time() - start | |
| print(f"β Gemini vision completed: {elapsed:.2f}s ({len(frames)} frames analyzed)") | |
| try: | |
| observations = response.text | |
| except: | |
| observations = response.candidates[0].content.parts[0].text | |
| # Log first 500 chars for debugging | |
| print(f"π Observations preview: {observations[:500]}...") | |
| return observations | |
| except Exception as e: | |
| print(f"β Vision extraction failed: {e}") | |
| traceback.print_exc() | |
| return f"Error analyzing frames: {str(e)}" | |
| # --- CREWAI AGENTS (UPDATED FOR DENSE FRAMES) --- | |
| def create_analysis_crew(observations: str, user_desc: str, opp_desc: str, duration: float, num_frames: int): | |
| """Create CrewAI agents with awareness of dense consecutive frame analysis""" | |
| model = genai.GenerativeModel( | |
| model_name="gemini-3-flash-preview", | |
| generation_config={ | |
| "temperature": 0.2, | |
| "max_output_tokens": 12000 # Increased for more frames | |
| } | |
| ) | |
| llm = LLM( | |
| model="groq/llama-3.3-70b-versatile", | |
| api_key=GROQ_API_KEY, | |
| temperature=0.2 | |
| ) | |
| analyst = Agent( | |
| role="BJJ Technical Analyst", | |
| goal=f"Analyze {num_frames} consecutive frame observations for {user_desc} to detect submissions, score performance, and identify patterns", | |
| backstory=f""" | |
| You are a BJJ black belt coach analyzing DENSE CONSECUTIVE FRAME observations. | |
| CONTEXT AWARENESS: | |
| - You received observations from {num_frames} frames (high density sampling) | |
| - Frames are CONSECUTIVE with small time gaps (avg 1-2 seconds) | |
| - This allows you to see COMPLETE action sequences, not just snapshots | |
| CRITICAL RULES: | |
| 1. OUTCOME AUTHORITY: Accept submission verdicts from observations - do NOT override | |
| 2. SEQUENCE AWARENESS: Look for multi-frame progressions described in observations | |
| 3. POSITION AUTHORITY: Respect position labels used in observations | |
| 4. TIMESTAMP PRECISION: Every claim must reference specific timestamps | |
| 5. NO GENERICS: "More aggression" and similar phrases are FORBIDDEN | |
| SCORING GUIDELINES: | |
| - If user was submitted: Defense β€40, Overall β€60 | |
| - If user finished opponent: Offense β₯80, Overall β₯80 | |
| - Score based on demonstrated actions, not potential | |
| STRENGTHS/WEAKNESSES: | |
| - Must be SPECIFIC with timestamps | |
| - Minimum 25 characters with context | |
| - If submission occurred, it MUST be #1 in relevant category | |
| - Each item must be distinct (no repetition with different wording) | |
| DENSE FRAME ADVANTAGE: | |
| - Use the sequential context to identify setup patterns | |
| - Reference frame progressions (e.g., "Frames 25-28 showed grip sequence leading to...") | |
| - Distinguish between isolated mistakes vs systematic issues | |
| """, | |
| verbose=True, | |
| allow_delegation=False, | |
| llm=llm, | |
| memory=True | |
| ) | |
| formatter = Agent( | |
| role="Data Structure Specialist", | |
| goal="Convert analysis into valid JSON matching exact schema requirements", | |
| backstory="""You transform technical analysis into structured JSON. | |
| REQUIREMENTS: | |
| - Exactly 3 strengths and 3 weaknesses | |
| - All feedback includes timestamps (MM:SS format) | |
| - No generic phrases like "More aggression" or "Improve timing" | |
| - Scores reflect actual match outcome | |
| - JSON is valid (no trailing commas, proper syntax) | |
| - Each strength/weakness minimum 25 characters | |
| VALIDATION CHECKS: | |
| - All timestamps in MM:SS format? β | |
| - No trailing commas? β | |
| - Exactly 3 of each category? β | |
| - All feedback includes timestamps? β | |
| - No generic phrases? β | |
| """, | |
| verbose=True, | |
| allow_delegation=False, | |
| llm=llm, | |
| memory=True | |
| ) | |
| analysis_task = Task( | |
| description=f""" | |
| Analyze DENSE CONSECUTIVE frame observations from BJJ match. | |
| OBSERVATIONS (from {num_frames} frames): | |
| {observations} | |
| VIDEO INFO: | |
| - Duration: {duration}s | |
| - Frames analyzed: {num_frames} (consecutive with ~1-2s intervals) | |
| - User: {user_desc} | |
| - Opponent: {opp_desc} | |
| REQUIRED OUTPUT: | |
| 1. OUTCOME SUMMARY: | |
| - Restate outcome exactly as in observations | |
| - Note frame sequences if submission occurred | |
| 2. SKILL SCORING (0-100, evidence-based): | |
| - Offense: Submission attempts / attacks (NOT positional control) | |
| - Defense: Escapes / survival (β€40 if submitted, β€65 if never threatened) | |
| - Guard: Bottom position effectiveness (β€40 if not meaningfully used) | |
| - Passing: Clearing legs and advancing (mount β passing) | |
| - Standup: Takedowns / clinch (=0 if no standing engagement) | |
| 3. STRENGTHS (EXACTLY 3): | |
| - Format: "At MM:SS - [Specific technical observation, min 25 chars]" | |
| - If submission: #1 MUST be the finish | |
| - Use sequential context from observations | |
| - NO generics | |
| 4. WEAKNESSES (EXACTLY 3): | |
| - Format: "At MM:SS - [Specific technical flaw, min 25 chars]" | |
| - If submitted: #1 MUST be the defensive failure | |
| - Reference frame progressions if applicable | |
| - NO generics | |
| 5. MISSED OPPORTUNITIES (2-3): | |
| - Must be visible in observations | |
| - Reference specific timestamps | |
| 6. KEY MOMENTS (2-4): | |
| - Include submission if occurred | |
| - Note significant transitions | |
| 7. COACH NOTES (150-250 words): | |
| - Technical, honest, evidence-based | |
| - Reference sequential patterns if observed | |
| - No speculation | |
| 8. DRILLS (EXACTLY 3): | |
| - Each addresses a specific weakness | |
| - Include timestamp justification | |
| """, | |
| agent=analyst, | |
| expected_output="Detailed technical analysis with submission detection and sequential awareness" | |
| ) | |
| formatting_task = Task( | |
| description="""Convert the analysis into this EXACT JSON structure. NO markdown wrapping. | |
| {{ | |
| "overall_score": <int 0-100>, | |
| "performance_label": "EXCELLENT|STRONG|SOLID|DEVELOPING|NEEDS IMPROVEMENT", | |
| "performance_grades": {{ | |
| "defense_grade": "<A+|A|B+|B|C+|C|D+|D>", | |
| "offense_grade": "<letter>", | |
| "control_grade": "<letter>" | |
| }}, | |
| "skill_breakdown": {{ | |
| "offense": <int>, | |
| "defense": <int>, | |
| "guard": <int>, | |
| "passing": <int>, | |
| "standup": <int> | |
| }}, | |
| "strengths": [ | |
| "At 0:XX - Specific observation with context (min 25 chars)", | |
| "At 0:XX - Another specific observation", | |
| "At 0:XX - Third specific observation" | |
| ], | |
| "weaknesses": [ | |
| "At 0:XX - Specific weakness with context (min 25 chars)", | |
| "At 0:XX - Another weakness", | |
| "At 0:XX - Third weakness" | |
| ], | |
| "missed_opportunities": [ | |
| {{"time": "MM:SS", "title": "Brief", "description": "Detail", "category": "SUBMISSION|POSITION|SWEEP"}} | |
| ], | |
| "key_moments": [ | |
| {{"time": "MM:SS", "title": "Event", "description": "What happened", "category": "SUBMISSION|TRANSITION|DEFENSE"}} | |
| ], | |
| "coach_notes": "Paragraph 150-250 words", | |
| "recommended_drills": [ | |
| {{"name": "Drill 1", "focus_area": "Area", "reason": "Why (reference timestamp)", "duration": "15 min/day", "frequency": "5x/week"}}, | |
| {{"name": "Drill 2", "focus_area": "Area", "reason": "Why", "duration": "10 min/day", "frequency": "4x/week"}}, | |
| {{"name": "Drill 3", "focus_area": "Area", "reason": "Why", "duration": "12 min/day", "frequency": "3x/week"}} | |
| ] | |
| }} | |
| VALIDATION CHECKS: | |
| - All timestamps in MM:SS format β | |
| - No trailing commas β | |
| - Exactly 3 strengths, 3 weaknesses, 3 drills β | |
| - All feedback includes timestamps β | |
| - No generic phrases β | |
| - Valid JSON syntax β | |
| """, | |
| agent=formatter, | |
| expected_output="Valid JSON only" | |
| ) | |
| crew = Crew( | |
| agents=[analyst, formatter], | |
| tasks=[analysis_task, formatting_task], | |
| process=Process.sequential, | |
| verbose=True | |
| ) | |
| return crew | |
| # --- HYBRID ANALYSIS --- | |
| async def hybrid_agentic_analysis( | |
| frames: List[Dict], | |
| metadata: Dict, | |
| user_desc: str, | |
| opp_desc: str, | |
| activity_type: str, | |
| analysis_id: str = None | |
| ) -> AnalysisResult: | |
| """Hybrid: Gemini vision + CrewAI agents + Python validation""" | |
| print("\n" + "="*70) | |
| print("HYBRID AGENTIC ANALYSIS (Dense Consecutive Frames)") | |
| print("="*70) | |
| try: | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 30 | |
| # STEP 1: Gemini Vision with dense frames | |
| observations = await extract_frame_observations( | |
| frames, user_desc, opp_desc, metadata["duration"], metadata | |
| ) | |
| # Check for content verification failure | |
| if "content_verification" in observations and "FAILED" in observations: | |
| print("β Content verification failed - not BJJ/grappling content") | |
| # Try to parse the rejection message | |
| try: | |
| rejection_data = json.loads(observations) | |
| reason = rejection_data.get("reason", "Video does not appear to contain BJJ or grappling content.") | |
| suggested = rejection_data.get("suggested_action", "Please upload a BJJ or grappling video.") | |
| if analysis_id: | |
| db_storage[analysis_id]["status"] = "rejected" | |
| db_storage[analysis_id]["rejection_reason"] = reason | |
| # Return a special rejection result | |
| return AnalysisResult(**{ | |
| "overall_score": 0, | |
| "performance_label": "CONTENT VERIFICATION FAILED", | |
| "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"}, | |
| "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0}, | |
| "strengths": [ | |
| "This video does not appear to contain BJJ or grappling content.", | |
| "Please upload footage showing ground grappling, submissions, or takedowns.", | |
| "Acceptable: BJJ (gi/no-gi), wrestling, judo newaza, submission grappling." | |
| ], | |
| "weaknesses": [ | |
| f"Content detected: {reason}", | |
| "This system is designed specifically for grappling analysis.", | |
| f"Action needed: {suggested}" | |
| ], | |
| "missed_opportunities": [], | |
| "key_moments": [], | |
| "coach_notes": f"β οΈ CONTENT VERIFICATION FAILED\n\n{reason}\n\n{suggested}\n\nThis AI system is specifically trained for Brazilian Jiu-Jitsu and grappling analysis. It cannot analyze striking-based martial arts, non-combat sports, or general videos. Please upload a video showing:\n\nβ’ Ground grappling or submissions\nβ’ Takedowns or clinch work\nβ’ BJJ, wrestling, judo, or submission grappling\n\nFor best results, ensure the video clearly shows both athletes engaged in grappling exchanges.", | |
| "recommended_drills": [] | |
| }) | |
| except: | |
| # Fallback if parsing fails | |
| if analysis_id: | |
| db_storage[analysis_id]["status"] = "rejected" | |
| db_storage[analysis_id]["rejection_reason"] = "Video content verification failed" | |
| return AnalysisResult(**{ | |
| "overall_score": 0, | |
| "performance_label": "CONTENT VERIFICATION FAILED", | |
| "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"}, | |
| "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0}, | |
| "strengths": [ | |
| "Video does not appear to contain BJJ or grappling content.", | |
| "Please upload footage of ground grappling or submissions.", | |
| "This system is designed for grappling analysis only." | |
| ], | |
| "weaknesses": [ | |
| "Upload a video showing BJJ, wrestling, or submission grappling.", | |
| "Ensure both athletes are visible and engaged in grappling.", | |
| "Videos should show ground work, takedowns, or submissions." | |
| ], | |
| "missed_opportunities": [], | |
| "key_moments": [], | |
| "coach_notes": "β οΈ CONTENT VERIFICATION FAILED\n\nThis video does not appear to contain Brazilian Jiu-Jitsu or grappling content. This AI system is specifically designed for analyzing ground grappling, submissions, and takedowns.\n\nPlease upload a video showing:\nβ’ BJJ (gi or no-gi)\nβ’ Wrestling\nβ’ Judo (newaza)\nβ’ Submission grappling\nβ’ MMA grappling exchanges\n\nFor optimal results, ensure the video clearly shows both athletes engaged in grappling.", | |
| "recommended_drills": [] | |
| }) | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 60 | |
| # STEP 2: CrewAI Agents | |
| print("\nSTEP 2: CrewAI Agents - Analysis & Formatting") | |
| crew = create_analysis_crew(observations, user_desc, opp_desc, metadata["duration"], len(frames)) | |
| crew_start = time.time() | |
| result = await asyncio.get_event_loop().run_in_executor( | |
| None, | |
| crew.kickoff | |
| ) | |
| crew_time = time.time() - crew_start | |
| print(f"β CrewAI completed: {crew_time:.2f}s") | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 90 | |
| # STEP 3: Parse & Validate | |
| print("\nSTEP 3: Python Validation") | |
| result_text = str(result) | |
| if "```json" in result_text: | |
| result_text = result_text.split("```json")[1].split("```")[0].strip() | |
| elif "```" in result_text: | |
| result_text = result_text.split("```")[1].split("```")[0].strip() | |
| data = extract_json_from_text(result_text) | |
| data = validate_and_filter(data, frames) | |
| # Attach frames | |
| attach_frames_to_events(data.get("missed_opportunities", []), frames) | |
| attach_frames_to_events(data.get("key_moments", []), frames) | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 100 | |
| print("β Analysis complete") | |
| print("="*70 + "\n") | |
| return AnalysisResult(**data) | |
| except Exception as e: | |
| print(f"β Hybrid analysis failed: {e}") | |
| traceback.print_exc() | |
| fallback = make_fallback(frames) | |
| if analysis_id: | |
| db_storage[analysis_id]["used_fallback"] = True | |
| return AnalysisResult(**fallback) | |
| def validate_and_filter(data: Dict, frames: List[Dict]) -> Dict: | |
| """Python-level validation and generic filtering""" | |
| if "overall_score" not in data: | |
| data["overall_score"] = 65 | |
| data["overall_score"] = max(0, min(100, data["overall_score"])) | |
| if "performance_label" not in data: | |
| score = data["overall_score"] | |
| if score >= 85: | |
| data["performance_label"] = "EXCELLENT PERFORMANCE" | |
| elif score >= 75: | |
| data["performance_label"] = "STRONG PERFORMANCE" | |
| elif score >= 60: | |
| data["performance_label"] = "SOLID PERFORMANCE" | |
| else: | |
| data["performance_label"] = "DEVELOPING PERFORMANCE" | |
| if "performance_grades" not in data: | |
| data["performance_grades"] = {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"} | |
| if "skill_breakdown" not in data: | |
| base = data["overall_score"] | |
| data["skill_breakdown"] = { | |
| "offense": max(0, min(100, base - 5)), | |
| "defense": max(0, min(100, base + 3)), | |
| "guard": max(0, min(100, base - 2)), | |
| "passing": max(0, min(100, base - 10)), | |
| "standup": max(0, min(100, base - 13)) | |
| } | |
| # Filter generic feedback | |
| for field in ["strengths", "weaknesses"]: | |
| if field in data and data[field]: | |
| filtered = [item for item in data[field] if not is_generic(item)] | |
| if len(filtered) >= 3: | |
| data[field] = filtered[:3] | |
| else: | |
| data[field] = make_specific(field, frames, filtered) | |
| else: | |
| data[field] = make_specific(field, frames, []) | |
| if "missed_opportunities" not in data or not data["missed_opportunities"]: | |
| data["missed_opportunities"] = [{ | |
| "time": frames[len(frames)//2]["timestamp"], | |
| "title": "Position", | |
| "description": "Review sequence for improvement opportunities", | |
| "category": "POSITION" | |
| }] | |
| if "key_moments" not in data or not data["key_moments"]: | |
| data["key_moments"] = [{ | |
| "time": frames[-3]["timestamp"], | |
| "title": "Exchange", | |
| "description": "Significant moment in match flow", | |
| "category": "TRANSITION" | |
| }] | |
| if "coach_notes" not in data or len(data["coach_notes"]) < 50: | |
| data["coach_notes"] = "Focus on maintaining consistent technique throughout sequences. Review timestamped moments for detailed improvement areas." | |
| if "recommended_drills" not in data or len(data["recommended_drills"]) < 3: | |
| data["recommended_drills"] = [ | |
| {"name": "Position Control Sequences", "focus_area": "General", "reason": "Improve sequential awareness", "duration": "15 min/day", "frequency": "5x/week"}, | |
| {"name": "Guard Retention Drills", "focus_area": "Defense", "reason": "Strengthen defensive sequences", "duration": "10 min/day", "frequency": "4x/week"}, | |
| {"name": "Transition Flow Training", "focus_area": "Movement", "reason": "Improve position transitions", "duration": "12 min/day", "frequency": "3x/week"} | |
| ] | |
| return data | |
| def make_specific(field: str, frames: List[Dict], existing: List[str]) -> List[str]: | |
| feedback = existing.copy() | |
| start = frames[len(frames) // 8] | |
| mid = frames[len(frames) // 2] | |
| end = frames[-3] if len(frames) > 2 else frames[-1] | |
| if field == "strengths": | |
| templates = [ | |
| f"At {start['timestamp']} - Maintained good structural positioning during opening sequence", | |
| f"At {mid['timestamp']} - Demonstrated positional awareness during mid-match exchange", | |
| f"At {end['timestamp']} - Showed consistent control in final phase of match" | |
| ] | |
| else: | |
| templates = [ | |
| f"At {start['timestamp']} - Could improve initial positioning strategy and grip selection", | |
| f"At {mid['timestamp']} - Slow to recognize transitional opportunity during position change", | |
| f"At {end['timestamp']} - Room to improve execution and pressure application in final sequence" | |
| ] | |
| for t in templates: | |
| if len(feedback) < 3: | |
| feedback.append(t) | |
| return feedback[:3] | |
| def make_fallback(frames: List[Dict]) -> Dict: | |
| mid = frames[len(frames)//2]["timestamp"] if frames else "00:30" | |
| end = frames[-3]["timestamp"] if len(frames) > 2 else "00:45" | |
| return { | |
| "overall_score": 65, | |
| "performance_label": "SOLID PERFORMANCE", | |
| "performance_grades": {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"}, | |
| "skill_breakdown": {"offense": 60, "defense": 68, "guard": 63, "passing": 55, "standup": 52}, | |
| "strengths": [ | |
| "At 0:10 - Maintained structural integrity during opening", | |
| f"At {mid} - Showed positional awareness during exchange", | |
| f"At {end} - Demonstrated control in final sequences" | |
| ], | |
| "weaknesses": [ | |
| "At 0:15 - Could improve initial positioning approach", | |
| f"At {mid} - Slow to recognize transitional opportunities", | |
| f"At {end} - Room to improve execution in final phase" | |
| ], | |
| "missed_opportunities": [{"time": mid, "title": "Position", "description": "Review for improvement", "category": "POSITION"}], | |
| "key_moments": [{"time": end, "title": "Exchange", "description": "Significant sequence", "category": "TRANSITION"}], | |
| "coach_notes": "Focus on maintaining consistent technique throughout match sequences. Review specific timestamped moments for detailed improvement areas.", | |
| "recommended_drills": [ | |
| {"name": "Sequential Control", "focus_area": "General", "reason": "Improve awareness", "duration": "15 min/day", "frequency": "5x/week"}, | |
| {"name": "Guard Sequences", "focus_area": "Defense", "reason": "Strengthen defense", "duration": "10 min/day", "frequency": "4x/week"}, | |
| {"name": "Flow Training", "focus_area": "Movement", "reason": "Improve transitions", "duration": "12 min/day", "frequency": "3x/week"} | |
| ] | |
| } | |
| # --- API --- | |
| async def analyze_complete( | |
| file: UploadFile = File(...), | |
| user_description: str = Form(...), | |
| opponent_description: str = Form(...), | |
| activity_type: str = Form("Brazilian Jiu-Jitsu") | |
| ): | |
| start_time = time.time() | |
| file_path = None | |
| try: | |
| file_name = f"{uuid.uuid4()}_{file.filename}" | |
| file_path = f"temp_videos/{file_name}" | |
| os.makedirs("temp_videos", exist_ok=True) | |
| with open(file_path, "wb") as buffer: | |
| shutil.copyfileobj(file.file, buffer) | |
| analysis_id = str(uuid.uuid4()) | |
| db_storage[analysis_id] = {"status": "processing", "progress": 0} | |
| # Extract DENSE CONSECUTIVE frames | |
| try: | |
| frames, metadata = await asyncio.get_event_loop().run_in_executor( | |
| None, extract_dense_consecutive_frames, file_path | |
| ) | |
| except ValueError as ve: | |
| # Duration validation error | |
| error_msg = str(ve) | |
| print(f"β οΈ Duration validation failed: {error_msg}") | |
| return { | |
| "status": "rejected", | |
| "error": error_msg, | |
| "error_type": "duration_validation", | |
| "data": { | |
| "overall_score": 0, | |
| "performance_label": "VIDEO DURATION ERROR", | |
| "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"}, | |
| "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0}, | |
| "strengths": [], | |
| "weaknesses": [], | |
| "missed_opportunities": [], | |
| "key_moments": [], | |
| "coach_notes": f"β οΈ VIDEO DURATION ERROR\n\n{error_msg}\n\nRecommended video length: 10-90 seconds\n\nTips:\nβ’ Focus on a single exchange or position\nβ’ Trim longer videos to key moments\nβ’ Ensure the clip shows clear grappling action", | |
| "recommended_drills": [] | |
| } | |
| } | |
| # Hybrid analysis | |
| result = await hybrid_agentic_analysis( | |
| frames, metadata, | |
| user_description.strip(), opponent_description.strip(), | |
| activity_type, analysis_id | |
| ) | |
| total_time = time.time() - start_time | |
| # Check if content was rejected | |
| if result.performance_label == "CONTENT VERIFICATION FAILED": | |
| return { | |
| "status": "rejected", | |
| "error": "Video content verification failed - not BJJ/grappling", | |
| "error_type": "content_verification", | |
| "data": result.model_dump(), | |
| "processing_time": f"{total_time:.2f}s" | |
| } | |
| return { | |
| "status": "completed", | |
| "data": result.model_dump(), | |
| "processing_time": f"{total_time:.2f}s", | |
| "frames_analyzed": len(frames), | |
| "avg_frame_interval": f"{metadata.get('avg_frame_interval', 0):.2f}s", | |
| "used_fallback": db_storage[analysis_id].get("used_fallback", False), | |
| "method": "dense_consecutive_frames" | |
| } | |
| except Exception as e: | |
| print(f"β Error: {e}") | |
| traceback.print_exc() | |
| # Try to provide helpful fallback | |
| try: | |
| frames_fb, _ = await asyncio.get_event_loop().run_in_executor(None, extract_dense_consecutive_frames, file_path) | |
| fallback = make_fallback(frames_fb) | |
| except: | |
| fallback = make_fallback([{"timestamp": "00:30", "second": 30}]) | |
| return { | |
| "status": "completed_with_fallback", | |
| "data": fallback, | |
| "error": str(e), | |
| "used_fallback": True | |
| } | |
| finally: | |
| if file_path: | |
| try: | |
| os.remove(file_path) | |
| except: | |
| pass | |
| async def health_check(): | |
| return {"status": "healthy", "version": "29.0.0-optimized-accurate"} | |
| async def root(): | |
| return { | |
| "message": "BJJ AI Coach - Optimized for Speed + Accuracy", | |
| "version": "29.0.0", | |
| "target_performance": "Total analysis: 50-60 seconds", | |
| "architecture": "Gemini Vision + CrewAI Agents + Python Validation", | |
| "optimizations": [ | |
| "β‘ Optimized frame counts for 50-60s Gemini processing", | |
| "π― 50% of frames in final 30% (submission-focused)", | |
| "π 15-40 frames (optimized for speed + accuracy)", | |
| "β Ultra-strict evidence requirements (prevents wrong diagnosis)", | |
| "π Conservative analysis (admits uncertainty when unclear)", | |
| "β±οΈ Target: 50-60s total (15s video: ~30s, 60s video: ~50s)" | |
| ], | |
| "frame_strategy": { | |
| "10-15s_video": "15 frames (~1.0s intervals) β Gemini ~30s", | |
| "15-30s_video": "20 frames (~1.5s intervals) β Gemini ~40s", | |
| "30-60s_video": "30 frames (~2.0s intervals) β Gemini ~50s", | |
| "60-90s_video": "40 frames (~2.3s intervals) β Gemini ~60s" | |
| }, | |
| "submission_focus": { | |
| "distribution": "20% start, 30% middle, 50% end", | |
| "end_section": "50% of all frames in final 30% of video", | |
| "final_frames": "Always includes last 2 frames for tap detection", | |
| "confirmation": "Ultra-strict: requires EXPLICIT tap visible (2+ slaps)" | |
| }, | |
| "accuracy_improvements": [ | |
| "Evidence-only analysis (NO assumptions or inferences)", | |
| "Conservative position labels (says 'Unclear' when uncertain)", | |
| "Stricter submission confirmation (tap must be EXPLICIT)", | |
| "Mount requires ALL 4 criteria (knees, hips, flat, no entangle)", | |
| "No pain inference, intent assumption, or guessing", | |
| "Better to admit uncertainty than make wrong diagnosis" | |
| ], | |
| "validation": { | |
| "content_types_accepted": [ | |
| "BJJ (gi/no-gi)", | |
| "Submission grappling", | |
| "Wrestling", | |
| "Judo (newaza)", | |
| "MMA grappling" | |
| ], | |
| "content_types_rejected": [ | |
| "Striking arts", | |
| "Kata/forms", | |
| "Non-combat sports" | |
| ], | |
| "duration": "5-120 seconds" | |
| } | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.environ.get("PORT", 7860)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) |