Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import os | |
| import time | |
| import shutil | |
| import uuid | |
| import json | |
| import asyncio | |
| import base64 | |
| import re | |
| import traceback | |
| from typing import List, Optional, Dict, Any | |
| from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Form | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, ConfigDict | |
| import warnings | |
| # Suppress warnings | |
| warnings.filterwarnings('ignore', category=FutureWarning) | |
| # CrewAI imports | |
| from crewai import Agent, Task, Crew, Process | |
| from crewai.llm import LLM | |
| # Gemini imports | |
| import google.generativeai as genai | |
| from google.generativeai.types import HarmCategory, HarmBlockThreshold | |
| # OpenCV | |
| import cv2 | |
| import numpy as np | |
| # Configuration | |
| GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GEMINI_API_KEY: | |
| raise ValueError("GOOGLE_API_KEY environment variable required") | |
| if not GROQ_API_KEY: | |
| raise ValueError("GROQ_API_KEY environment variable required") | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| app = FastAPI(title="BJJ AI Coach") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --- MODELS --- | |
| class TimestampedEvent(BaseModel): | |
| time: str | |
| title: str | |
| description: str | |
| category: Optional[str] = "GENERAL" | |
| frame_image: Optional[str] = None | |
| frame_timestamp: Optional[str] = None | |
| model_config = ConfigDict(extra="allow") | |
| class Drill(BaseModel): | |
| name: str | |
| focus_area: str | |
| reason: str | |
| duration: Optional[str] = "15 min/day" | |
| frequency: Optional[str] = "5x/week" | |
| class DetailedSkillBreakdown(BaseModel): | |
| offense: int | |
| defense: int | |
| guard: int | |
| passing: int | |
| standup: int | |
| class PerformanceGrades(BaseModel): | |
| defense_grade: str | |
| offense_grade: str | |
| control_grade: str | |
| class AnalysisResult(BaseModel): | |
| overall_score: int | |
| performance_label: str | |
| performance_grades: PerformanceGrades | |
| skill_breakdown: DetailedSkillBreakdown | |
| strengths: List[str] | |
| weaknesses: List[str] | |
| missed_opportunities: List[TimestampedEvent] | |
| key_moments: List[TimestampedEvent] | |
| coach_notes: str | |
| recommended_drills: List[Drill] | |
| db_storage = {} | |
| # --- UTILITIES --- | |
| def parse_time_to_seconds(time_str: str) -> Optional[int]: | |
| if not time_str: | |
| return None | |
| match = re.search(r"(\d{1,2}):(\d{2})", time_str) | |
| if not match: | |
| return None | |
| mm, ss = match.groups() | |
| return int(mm) * 60 + int(ss) | |
| def find_closest_frame(target_time_sec: int, frames: list) -> dict: | |
| return min(frames, key=lambda f: abs(f["second"] - target_time_sec)) | |
| def attach_frames_to_events(events: List[dict], frames: list): | |
| for event in events: | |
| try: | |
| event_time_sec = parse_time_to_seconds(event.get("time")) | |
| if event_time_sec is None: | |
| continue | |
| closest = find_closest_frame(event_time_sec, frames) | |
| event["frame_timestamp"] = closest["timestamp"] | |
| event["frame_image"] = base64.b64encode(closest["bytes"]).decode("utf-8") | |
| except: | |
| event["frame_image"] = None | |
| def extract_json_from_text(text: str) -> Dict: | |
| """Robust JSON extraction""" | |
| text = text.strip() | |
| try: | |
| return json.loads(text) | |
| except: | |
| pass | |
| if "```json" in text or "```" in text: | |
| try: | |
| if "```json" in text: | |
| text = text.split("```json")[1].split("```")[0] | |
| else: | |
| text = text.split("```")[1].split("```")[0] | |
| return json.loads(text.strip()) | |
| except: | |
| pass | |
| try: | |
| start_idx = text.find('{') | |
| if start_idx == -1: | |
| raise ValueError("No opening brace") | |
| brace_count = 0 | |
| end_idx = -1 | |
| for i in range(start_idx, len(text)): | |
| if text[i] == '{': | |
| brace_count += 1 | |
| elif text[i] == '}': | |
| brace_count -= 1 | |
| if brace_count == 0: | |
| end_idx = i | |
| break | |
| if end_idx != -1: | |
| json_str = text[start_idx:end_idx+1] | |
| return json.loads(json_str) | |
| json_str = text[start_idx:] | |
| open_braces = json_str.count('{') | |
| close_braces = json_str.count('}') | |
| open_brackets = json_str.count('[') | |
| close_brackets = json_str.count(']') | |
| if open_brackets > close_brackets: | |
| json_str += ']' * (open_brackets - close_brackets) | |
| if open_braces > close_braces: | |
| json_str += '}' * (open_braces - close_braces) | |
| return json.loads(json_str) | |
| except: | |
| pass | |
| raise ValueError("Could not extract JSON") | |
| def is_generic(text: str) -> bool: | |
| """Check if feedback is too generic""" | |
| patterns = [r'^More \w+$', r'^Improve \w+$', r'^Work \w+$', r'^Better \w+$'] | |
| for p in patterns: | |
| if re.match(p, text.strip(), re.IGNORECASE): | |
| return True | |
| if not re.search(r'\d{1,2}:\d{2}', text): | |
| return True | |
| if len(text) < 20: | |
| return True | |
| return False | |
| def calculate_feedback_count(duration: float) -> Dict[str, int]: | |
| """ | |
| Calculate feedback counts based on video duration (client-specified scaling). | |
| Client's Requirements: | |
| - ≤15s: 1 strength, 1 weakness | |
| - 15-45s: 2 strengths, 2 weaknesses | |
| - 45-90s: 3 strengths, 3 weaknesses | |
| - 90-180s: 4 strengths, 4 weaknesses | |
| - 180-360s: 5 strengths, 5 weaknesses | |
| Returns dict with counts for: strengths, weaknesses, opportunities, moments | |
| """ | |
| if duration <= 15: | |
| return { | |
| "strengths": 1, | |
| "weaknesses": 1, | |
| "opportunities": 1, | |
| "moments": 1 | |
| } | |
| elif duration <= 45: | |
| return { | |
| "strengths": 2, | |
| "weaknesses": 2, | |
| "opportunities": 2, | |
| "moments": 2 | |
| } | |
| elif duration <= 90: # 1:30 minutes | |
| return { | |
| "strengths": 3, | |
| "weaknesses": 3, | |
| "opportunities": 2, | |
| "moments": 3 | |
| } | |
| elif duration <= 180: # 3 minutes | |
| return { | |
| "strengths": 4, | |
| "weaknesses": 4, | |
| "opportunities": 3, | |
| "moments": 4 | |
| } | |
| else: # 3-6 minutes (up to 360s) | |
| return { | |
| "strengths": 5, | |
| "weaknesses": 5, | |
| "opportunities": 4, | |
| "moments": 5 | |
| } | |
| # --- ENHANCED DENSE FRAME EXTRACTION --- | |
| def extract_dense_consecutive_frames(video_path: str) -> tuple: | |
| """ | |
| OPTIMIZED: Extract frames for MAXIMUM ACCURACY in 50-60s total processing | |
| Strategy - Balanced for speed + accuracy: | |
| - 10-15s video: 15 frames (~1.0s intervals) → Gemini ~30s | |
| - 15-30s video: 20 frames (~1.2s intervals) → Gemini ~40s | |
| - 30-60s video: 30 frames (~1.8s intervals) → Gemini ~50s | |
| - 60-90s video: 40 frames (~2.0s intervals) → Gemini ~60s | |
| Distribution (submission-focused): | |
| - START (0-20%): 20% of frames | |
| - MIDDLE (20-70%): 30% of frames | |
| - END (70-100%): 50% of frames (DENSEST for submission detection) | |
| """ | |
| try: | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| raise Exception("Cannot open video") | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| duration = total_frames / fps if fps > 0 else 0 | |
| # Validate video duration | |
| if duration < 5: | |
| raise ValueError("Video too short (< 5 seconds). Please upload a longer clip (10-90 seconds recommended).") | |
| if duration > 360: # 6 minutes max | |
| raise ValueError("Video too long (> 6 minutes). Please upload a shorter clip (15s-6min) for optimal analysis.") | |
| # OPTIMIZED FRAME COUNTS - Balanced for 50-60s Gemini processing | |
| if duration <= 15: | |
| total_to_extract = 15 # ~1.0s intervals → ~30s Gemini | |
| elif duration <= 30: | |
| total_to_extract = 20 # ~1.5s intervals → ~40s Gemini | |
| elif duration <= 45: | |
| total_to_extract = 25 # ~2.0s intervals → ~50s Gemini | |
| elif duration <= 60: | |
| total_to_extract = 25 # ~2.25s intervals → ~60s Gemini | |
| else: | |
| total_to_extract = 35 # ~2.7s intervals → ~65s Gemini (max) | |
| print(f"📹 OPTIMIZED EXTRACTION: {total_to_extract} frames from {duration:.1f}s video") | |
| print(f" Target: 1 frame every {duration/total_to_extract:.1f}s (Gemini: ~{total_to_extract * 1.5:.0f}s)") | |
| # SUBMISSION-FOCUSED distribution: 20% start, 30% middle, 50% end | |
| start_frames = max(3, int(total_to_extract * 0.20)) | |
| middle_frames = max(6, int(total_to_extract * 0.30)) | |
| end_frames = total_to_extract - start_frames - middle_frames | |
| print(f" Distribution (submission-focused): START={start_frames}, MIDDLE={middle_frames}, END={end_frames}") | |
| # Define sections | |
| start_section_end = int(total_frames * 0.20) | |
| middle_section_start = start_section_end | |
| middle_section_end = int(total_frames * 0.70) | |
| end_section_start = middle_section_end | |
| frames = [] | |
| # Extract START section (0-20%) - Overview | |
| start_interval = max(1, start_section_end // start_frames) | |
| for i in range(0, start_section_end, start_interval): | |
| if len([f for f in frames if f["second"] < duration * 0.20]) >= start_frames: | |
| break | |
| frame = get_frame(cap, i, fps) | |
| if frame: | |
| frames.append(frame) | |
| # Extract MIDDLE section (20-70%) - Standard coverage | |
| middle_section_frames = middle_section_end - middle_section_start | |
| middle_interval = max(1, middle_section_frames // middle_frames) | |
| for i in range(middle_section_start, middle_section_end, middle_interval): | |
| if len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]) >= middle_frames: | |
| break | |
| frame = get_frame(cap, i, fps) | |
| if frame: | |
| frames.append(frame) | |
| # Extract END section (70-100%) - DENSEST for submissions (50% of all frames!) | |
| end_section_frames = total_frames - end_section_start | |
| end_interval = max(1, end_section_frames // end_frames) | |
| print(f" END section (50% of frames): 1 frame every {end_interval/fps:.2f}s for submission detection") | |
| for i in range(end_section_start, total_frames, end_interval): | |
| if len([f for f in frames if f["second"] >= duration * 0.70]) >= end_frames: | |
| break | |
| frame = get_frame(cap, i, fps) | |
| if frame: | |
| frames.append(frame) | |
| # CRITICAL: Always add final 2 frames for tap detection | |
| for offset in [2, 1]: | |
| final_frame_idx = total_frames - offset | |
| if final_frame_idx > 0: | |
| frame = get_frame(cap, final_frame_idx, fps) | |
| if frame: | |
| if not any(f["frame_idx"] == frame["frame_idx"] for f in frames): | |
| frames.append(frame) | |
| cap.release() | |
| frames.sort(key=lambda f: f["second"]) | |
| # Calculate stats | |
| intervals = [] | |
| for i in range(1, len(frames)): | |
| time_gap = frames[i]["second"] - frames[i-1]["second"] | |
| intervals.append(time_gap) | |
| avg_interval = sum(intervals) / len(intervals) if intervals else 0 | |
| metadata = { | |
| "duration": round(duration, 2), | |
| "fps": round(fps, 2), | |
| "frames_extracted": len(frames), | |
| "avg_frame_interval": round(avg_interval, 2), | |
| "estimated_gemini_time": round(len(frames) * 1.5, 1), # ~1.5s per frame | |
| "distribution": { | |
| "start": len([f for f in frames if f["second"] < duration * 0.20]), | |
| "middle": len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]), | |
| "end": len([f for f in frames if f["second"] >= duration * 0.70]) | |
| } | |
| } | |
| print(f"✅ Extracted {len(frames)} frames (avg interval: {avg_interval:.2f}s)") | |
| print(f" Estimated Gemini time: ~{metadata['estimated_gemini_time']:.0f}s") | |
| print(f" Actual distribution: START={metadata['distribution']['start']}, " | |
| f"MIDDLE={metadata['distribution']['middle']}, " | |
| f"END={metadata['distribution']['end']} (50% in final 30%!)") | |
| return frames, metadata | |
| except Exception as e: | |
| if 'cap' in locals(): | |
| cap.release() | |
| raise Exception(f"Frame extraction failed: {str(e)}") | |
| def get_frame(cap: cv2.VideoCapture, frame_idx: int, fps: float) -> Optional[dict]: | |
| try: | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) | |
| ret, frame = cap.read() | |
| if not ret: | |
| return None | |
| h, w = frame.shape[:2] | |
| target_h = 720 | |
| target_w = int(w * (target_h / h)) | |
| resized = cv2.resize(frame, (target_w, target_h)) | |
| _, buffer = cv2.imencode('.jpg', resized, [cv2.IMWRITE_JPEG_QUALITY, 85]) | |
| timestamp_sec = frame_idx / fps | |
| timestamp_str = f"{int(timestamp_sec // 60):02d}:{int(timestamp_sec % 60):02d}" | |
| return { | |
| "bytes": buffer.tobytes(), | |
| "timestamp": timestamp_str, | |
| "second": round(timestamp_sec, 2), | |
| "frame_idx": frame_idx | |
| } | |
| except: | |
| return None | |
| # --- ENHANCED GEMINI VISION WITH CONSECUTIVE CONTEXT --- | |
| async def extract_frame_observations(frames: List[Dict], user_desc: str, opp_desc: str, duration: float, metadata: Dict) -> str: | |
| """Use Gemini to analyze DENSE CONSECUTIVE frames""" | |
| print("STEP 1: Gemini Vision - Dense Consecutive Frame Analysis") | |
| try: | |
| # Build detailed frame list with time gaps | |
| frame_details = [] | |
| for i, f in enumerate(frames): | |
| if i > 0: | |
| time_gap = f["second"] - frames[i-1]["second"] | |
| gap_indicator = f" [+{time_gap:.1f}s]" if time_gap > 2 else "" | |
| else: | |
| gap_indicator = "" | |
| frame_details.append(f"Frame {i+1} @ {f['timestamp']} ({f['second']:.1f}s){gap_indicator}") | |
| frame_list = "\n".join(frame_details) | |
| avg_interval = metadata.get("avg_frame_interval", 2.0) | |
| print(user_desc, opp_desc) | |
| prompt = f""" | |
| You are an expert BJJ analyst performing CONSECUTIVE FRAME ANALYSIS on {len(frames)} frames from a {duration}s match. | |
| USER: {user_desc} | OPPONENT: {opp_desc} | |
| Identify the two main grapplers on the basis of user and opponent description above. | |
| IMP: Ignore background people, only focus on 2 athletes described across all frames. | |
| Average gap: {avg_interval:.1f}s between frames | |
| FRAMES WITH TIME GAPS: | |
| {frame_list} | |
| ==================================================================================== | |
| CRITICAL: CONSECUTIVE FRAME CONTEXT | |
| ==================================================================================== | |
| You have {len(frames)} CONSECUTIVE frames with small time gaps. This allows you to: | |
| - SEE COMPLETE SEQUENCES develop (setup → execution → finish) | |
| - Connect frames showing how techniques develop. | |
| - IDENTIFY PATTERNS in technique development | |
| - UNDERSTAND CONTEXT of each position change | |
| Your analysis MUST span the ENTIRE {duration}s video! | |
| REQUIRED DISTRIBUTION: | |
| - Early (0-{int(duration*0.2)}s): ~20% of key moments | |
| - Middle ({int(duration*0.2)}-{int(duration*0.7)}s): ~30% of key moments | |
| - Late ({int(duration*0.7)}-{int(duration)}s): ~50% of key moments | |
| - Focus on covering consecutive span of action also in all the sections | |
| CONSECUTIVE ANALYSIS RULES: | |
| 1. Frames < 2s apart = CONTINUOUS ACTION | |
| → Describe HOW the action PROGRESSED from previous frame | |
| → Example: "Continues Frame 5's wrist control, now rotating hips underneath..." | |
| 2. Frames > 3s apart = NEW SEQUENCE | |
| → Note the gap: "NEW SEQUENCE [3.2s gap] - position changed to..." | |
| 3. Track Multi-Frame Developments: | |
| → Frame 8: "User establishes wrist control" | |
| → Frame 9: "Continues wrist control from Frame 8, now rotating hips" | |
| → Frame 10: "Continues Frame 8-9 sequence, arm now fully extended" | |
| 4. Always Reference Previous Frames: | |
| → "Continues the sweep attempt from Frame 12..." | |
| → "Builds on the guard pass started in Frames 15-16..." | |
| ==================================================================================== | |
| STEP 0: CONTENT VERIFICATION | |
| ==================================================================================== | |
| Is this BJJ/grappling? (gi/no-gi, wrestling, judo newaza, submission grappling) | |
| If NO → {{"content_verification": "FAILED", "reason": "[what you see]"}} | |
| ==================================================================================== | |
| CORE PRINCIPLES | |
| ==================================================================================== | |
| MUST DO: | |
| - Describe ONLY what's visible | |
| - Say "Unclear" when uncertain | |
| - ALWAYS reference previous frames when action continues | |
| - Track how positions DEVELOP across consecutive frames | |
| FORBIDDEN: | |
| - NO assumptions about pain, intent, or gaps between frames | |
| - NO speculation beyond visible evidence | |
| ==================================================================================== | |
| BJJ REFERENCE VOCABULARY | |
| ==================================================================================== | |
| Use actual BJJ Techniques names | |
| POSITIONS: | |
| - Standing: Both athletes upright | |
| - Clinch: Standing with upper body control | |
| - Closed Guard: Legs locked around opponent's waist | |
| - Open Guard: Legs not locked but controlling (Butterfly, De La Riva, Spider, X-Guard) | |
| - Half Guard: One leg trapped between opponent's legs | |
| - Side Control: Chest across chest, perpendicular, opponent flat | |
| - North-South: Head-to-head position | |
| - Mount: ONLY if ALL 4 criteria met (both knees down, hips square, opponent flat, NO leg entanglement) | |
| → If ANY missing: say "Top pressure" or "Transitional position" | |
| - Back Control: Behind opponent with hooks or body triangle | |
| - Turtle: On hands and knees | |
| - Scramble: Both moving, position unclear | |
| COMMON SUBMISSIONS: | |
| Chokes: Rear Naked Choke (RNC), Guillotine, Triangle, Arm Triangle, D'Arce, Anaconda, Ezekiel | |
| Joint Locks: Armbar, Kimura, Americana, Omoplata, Wrist locks | |
| Leg Locks: Straight Ankle Lock, Kneebar, Heel Hook, Toe Hold, Calf Slicer | |
| SWEEPS & TECHNIQUES: | |
| Scissor Sweep, Flower Sweep, Hip Bump, Butterfly Sweep, X-Guard Sweep | |
| Technical Standup, Elbow Escape (Shrimp), Bridge & Roll | |
| ==================================================================================== | |
| SUBMISSION CONFIRMATION (STRICT) | |
| ==================================================================================== | |
| With consecutive frames, track COMPLETE submission sequences: | |
| CONFIRMED ONLY IF: | |
| - Lock visible in 2+ consecutive frames AND | |
| - EXPLICIT tap (hand slapping mat/body 2+ times) OR match stops during lock | |
| Example Progression: | |
| Frame 18: "Ankle isolated, beginning extension" | |
| Frame 19: "Continues Frame 18 - extension increasing, back arching" | |
| Frame 20: "Continues Frame 18-19 - full extension, grimacing visible" | |
| Frame 21: "Hand slapping mat 2x - TAP CONFIRMED" | |
| NOT SUFFICIENT: | |
| - Position alone without tap | |
| - "Appears painful" without tap | |
| - Hand moves once | |
| ==================================================================================== | |
| DETAILED FRAME-BY-FRAME ANALYSIS (EMPHASIZE PROGRESSION) | |
| ==================================================================================== | |
| For EACH frame: | |
| Frame X (MM:SS) [+X.Xs from previous]: | |
| POSITION: [Specific name or "Transitional"] | |
| ADVANTAGE: User / Opponent / Neutral | |
| ACTION TYPE: OFFENSE / DEFENSE / GUARD / PASSING / STANDUP / NONE | |
| - OFFENSE = Submission attempts or attack chains (NOT just holding) | |
| - DEFENSE = Escaping, framing, defending (NOT just being on bottom) | |
| - GUARD = Bottom with legs controlling (NOT just being on back) | |
| - PASSING = Actively clearing legs (NOT just being on top) | |
| - STANDUP = Takedown attempts or clinch | |
| - NONE = Static control or unclear | |
| WHAT'S HAPPENING (DETAILED): | |
| [Describe body positions, grips, pressure points, movement direction,use BJJ technique used] | |
| Be specific: "User's right hand controls opponent's left wrist at 90° angle, left hand framing chest..." | |
| ATHLETE POSITIONS: | |
| User: [Upper/lower body, grips, hip placement, head position, what attempting] | |
| Opponent: [Position, posture, reactions, defensive/offensive actions] | |
| THREATS: None / [Specific submission or positional advance] | |
| CONSECUTIVE CONTEXT (CRITICAL): | |
| If < 2s from previous: | |
| → "CONTINUES [action] from Frame X - progression: [what changed]" | |
| → "Builds on Frame X's [position], now [new development]" | |
| If > 3s gap: | |
| → "NEW SEQUENCE - [describe new situation]" | |
| FRAME-TO-FRAME CHANGES: | |
| [What specifically CHANGED from previous frame: grips, weight, limb positions, pressure] | |
| Describe HOW things developed, not just static positions. | |
| Be specific about what is happenening in depth position and bjj voacbulary relevant to frame and accurate | |
| ==================================================================================== | |
| EXAMPLE (Follow This Pattern) | |
| ==================================================================================== | |
| Frame 12 (00:28) [+1.2s]: | |
| POSITION: Half Guard (User on bottom) | |
| ADVANTAGE: Neutral | |
| ACTION TYPE: GUARD | |
| WHAT'S HAPPENING: User securing half guard with right leg hooking opponent's left leg. Left arm framing against chest, right hand controlling wrist. Opponent driving forward. | |
| ATHLETE POSITIONS: | |
| User: Bottom half guard, active knee shield with left leg, maintaining frame distance | |
| Opponent: Top pressure, attempting to flatten, right hand posting | |
| THREATS: Opponent attempting guard pass | |
| STRENGTH/WEAKNESS OF USER: [if applicable] | |
| CONSECUTIVE CONTEXT: NEW SEQUENCE after scramble in previous frames | |
| FRAME-TO-FRAME CHANGES: Stabilized into half guard from scramble | |
| Frame 13 (00:29) [+1.0s]: | |
| POSITION: Half Guard (User on bottom) | |
| ADVANTAGE: Slightly favors User | |
| ACTION TYPE: GUARD | |
| WHAT'S HAPPENING: User secured underhook with right arm. Left leg knee shield more active. Beginning to turn into opponent. | |
| ATHLETE POSITIONS: | |
| User: Underhook secured, knee shield elevated, hips turning underneath | |
| Opponent: Pressure reduced, posting with both hands | |
| STRENGTH/WEAKNESS OF USER: [if applicable] | |
| THREATS: User developing sweep opportunity | |
| CONSECUTIVE CONTEXT: CONTINUES half guard from Frame 12 - PROGRESSION: secured underhook, beginning sweep mechanics | |
| FRAME-TO-FRAME CHANGES: Right arm moved from wrist control to underhook; hips rotated 15-20 degrees | |
| ==================================================================================== | |
| FINAL SUMMARY | |
| ==================================================================================== | |
| OUTCOME: | |
| - Submission: YES/NO (only if tap visible) | |
| - Winner: User / Opponent / NONE | |
| - Technique: [Name] or NONE | |
| - Evidence: "Frames X-Y show [progression]: Frame X (setup) → Frame Y (control) → Frame Z (finish)" | |
| - Confidence: HIGH/MEDIUM/LOW | |
| POSITIONAL FLOW: | |
| Narrate match progression chronologically: | |
| - How positions developed across consecutive frames | |
| - Key transitions and turning points | |
| - Which sequences led to advantages/disadvantages | |
| KEY MULTI-FRAME SEQUENCES (2-4): | |
| Format: "Frames X-Y: [Sequence Name]" | |
| - Frame X: [Initial state] | |
| - Frame Y: [Development] | |
| - Frame Z: [Culmination] | |
| - Impact: [Effect on match] | |
| SUBMISSION SEQUENCES (if any): | |
| If submission occurred, describe COMPLETE development: | |
| - Setup phase (Frames X-Y): [How lock initiated] | |
| - Control phase (Frames Y-Z): [How position tightened] | |
| - Finish phase (Frame Z): [How tap occurred] | |
| ==================================================================================== | |
| CRITICAL REMINDERS | |
| ==================================================================================== | |
| - Your advantage: {len(frames)} consecutive frames = see COMPLETE sequences | |
| - - ALWAYS use specific BJJ technique names | |
| - Distinguish attacking positions (side control, mount) from defensive (turtle, bottom) | |
| - Always connect frames: "Continues from Frame X..." or "Builds on Frame X..." | |
| - Track progressions: Describe HOW things developed, not just static positions | |
| - Reference sequences: "Frames X-Y show [technique] developing..." | |
| - Time gaps matter: Note when gaps > 3s indicate new sequences | |
| - Be detailed: Specific grips, angles, pressure points, momentum | |
| - "Unclear" better than guessing: Conservative analysis prevents wrong diagnosis | |
| Think like a slow-motion replay analyst - you can see every step of technique development. | |
| """ | |
| # Prepare content | |
| content = [] | |
| for f in frames: | |
| content.append({ | |
| "mime_type": "image/jpeg", | |
| "data": base64.b64encode(f["bytes"]).decode("utf-8") | |
| }) | |
| content.append(prompt) | |
| # Call Gemini | |
| start = time.time() | |
| model = genai.GenerativeModel( | |
| model_name="gemini-2.5-flash", | |
| generation_config={ | |
| "temperature": 0.2, | |
| "max_output_tokens": 12000 # Increased for more frames | |
| } | |
| ) | |
| response = await asyncio.get_event_loop().run_in_executor( | |
| None, | |
| lambda: model.generate_content( | |
| content, | |
| safety_settings={ | |
| HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, | |
| HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, | |
| HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, | |
| HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, | |
| } | |
| ) | |
| ) | |
| elapsed = time.time() - start | |
| print(f"✅ Gemini vision completed: {elapsed:.2f}s ({len(frames)} frames analyzed)") | |
| try: | |
| observations = response.text | |
| except: | |
| observations = response.candidates[0].content.parts[0].text | |
| # Log first 500 chars for debugging | |
| print(f"📄 Observations preview: {observations[:500]}...") | |
| return observations | |
| except Exception as e: | |
| print(f"❌ Vision extraction failed: {e}") | |
| traceback.print_exc() | |
| return f"Error analyzing frames: {str(e)}" | |
| # --- CREWAI AGENTS (UPDATED FOR DENSE FRAMES) --- | |
| def create_analysis_crew(observations: str, user_desc: str, opp_desc: str, duration: float, num_frames: int): | |
| """Create CrewAI agents with awareness of dense consecutive frame analysis""" | |
| feedback_counts = calculate_feedback_count(duration) | |
| model = genai.GenerativeModel( | |
| model_name="gemini-2.5-flash", | |
| generation_config={ | |
| "temperature": 0.2, | |
| "max_output_tokens": 12000 # Increased for more frames | |
| } | |
| ) | |
| llm = LLM( | |
| model="groq/llama-3.3-70b-versatile", | |
| api_key=GROQ_API_KEY, | |
| temperature=0.2 | |
| ) | |
| analyst = Agent( | |
| role="BJJ Technical Analyst", | |
| goal=f"Analyze {num_frames} consecutive frame observations for {user_desc} to detect submissions, score performance, and identify patterns", | |
| backstory=f""" | |
| You are a BJJ black belt coach analyzing DENSE CONSECUTIVE FRAME observations. | |
| CONTEXT AWARENESS: | |
| - You received observations from {num_frames} frames (high density sampling) | |
| - Frames are CONSECUTIVE with small time gaps (avg 1-2 seconds) | |
| - This allows you to see COMPLETE action sequences, not just snapshots | |
| CRITICAL RULES: | |
| 1. OUTCOME AUTHORITY: Accept submission verdicts from observations - do NOT override | |
| 2. SEQUENCE AWARENESS: Look for multi-frame progressions described in observations | |
| 3. POSITION AUTHORITY: Respect position labels used in observations | |
| 4. TIMESTAMP PRECISION: Every claim must reference specific timestamps | |
| 5. NO GENERICS: "More aggression" and similar phrases are FORBIDDEN | |
| 6. TIMESTAMP DISTRIBUTION: Spread feedback across ENTIRE {duration}s video | |
| - Strength 1: From early section (0-{int(duration*0.2)}s) | |
| - Strength 2: From middle section ({int(duration*0.2)}-{int(duration*0.7)}s) | |
| - Strength 3: From late section ({int(duration*0.7)}-{int(duration)}s) | |
| - Same pattern for weaknesses, opportunities, and key moments | |
| SCORING GUIDELINES: | |
| - If user was submitted: Defense ≤40, Overall ≤60 | |
| - If user finished opponent: Offense ≥80, Overall ≥80 | |
| - Score based on demonstrated actions, not potential | |
| STRENGTHS/WEAKNESSES: | |
| - Must be SPECIFIC with timestamps | |
| - Minimum 25 characters with context | |
| - If submission occurred, it MUST be #1 in relevant category | |
| - Each item must be distinct (no repetition with different wording) | |
| DENSE FRAME ADVANTAGE: | |
| - Use the sequential context to identify setup patterns | |
| - Reference frame progressions (e.g., "Frames 25-28 showed grip sequence leading to...") | |
| - Distinguish between isolated mistakes vs systematic issues | |
| """, | |
| verbose=True, | |
| allow_delegation=False, | |
| llm=llm, | |
| memory=True | |
| ) | |
| formatter = Agent( | |
| role="Data Structure Specialist", | |
| goal="Convert analysis into valid JSON matching exact schema requirements", | |
| backstory="""You transform technical analysis into structured JSON. | |
| REQUIREMENTS: | |
| - Exactly {feedback_counts['strengths']} strengths and {feedback_counts['weaknesses']} weaknesses (dynamic based on {duration}s video) | |
| - All feedback includes timestamps (MM:SS format) | |
| - No generic phrases like "More aggression" or "Improve timing" | |
| - Scores reflect actual match outcome | |
| - JSON is valid (no trailing commas, proper syntax) | |
| - Each strength/weakness minimum 25 characters | |
| VALIDATION CHECKS: | |
| - All timestamps in MM:SS format? ✓ | |
| - No trailing commas? ✓ | |
| - Exactly 3 of each category? ✓ | |
| - All feedback includes timestamps? ✓ | |
| - No generic phrases? ✓ | |
| """, | |
| verbose=True, | |
| allow_delegation=False, | |
| llm=llm, | |
| memory=True | |
| ) | |
| analysis_task = Task( | |
| description=f""" | |
| Analyze CONSECUTIVE frame observations from BJJ match. | |
| OBSERVATIONS (from {num_frames} frames): | |
| {observations} | |
| VIDEO INFO: | |
| - Duration: {duration}s | |
| - Frames analyzed: {num_frames} (consecutive with ~1-2s intervals) | |
| - User: {user_desc} | |
| - Opponent: {opp_desc} | |
| - Imp: Do not hallucinate any info beyond observations, refer it as the only truth. | |
| REQUIRED OUTPUT: | |
| 1. OUTCOME SUMMARY: | |
| - Restate outcome exactly as in observations, no truth beyond observations. | |
| - Note frame sequences if submission occurred | |
| 2. SKILL SCORING (0-100, evidence-based): | |
| ⚔️ OFFENSE (0-100): | |
| Measures: Submission attempts, attack chains, offensive pressure | |
| - Achieved submission: 82-94 | |
| - Multiple dangerous attempts: 72-82 | |
| - Some offensive work: 62-72 | |
| - Limited attacking: 52-62 | |
| - Minimal offense: 42-52 | |
| - No offensive actions: 30-42 | |
| 🛡️ DEFENSE (0-100): | |
| Measures: Escapes, survival under pressure, defending submissions | |
| - Got submitted: 38-48 (clear defensive gap) | |
| - Under heavy pressure but survived: 52-62 | |
| - Some defensive challenges: 62-72 | |
| - Solid defense, few threats: 72-82 | |
| - Never seriously threatened: 75-85 | |
| 🔒 GUARD (0-100): | |
| Measures: Bottom position control and attacks | |
| - Active sweeps/submissions from guard: 72-85 | |
| - Controlled well from bottom: 62-72 | |
| - Some guard retention: 52-62 | |
| - Guard passed multiple times: 38-48 | |
| - Minimal guard engagement: 28-38 | |
| 🚶 PASSING (0-100): | |
| Measures: Ability to clear legs and advance past guard | |
| - Multiple successful passes: 75-88 | |
| - One or more passes: 68-78 | |
| - Strong passing pressure: 58-68 | |
| - Attempted but unsuccessful: 48-58 | |
| - Minimal passing work: 38-48 | |
| 🧍 STANDUP (0-100): | |
| Measures: Takedowns and clinch exchanges | |
| - Successful takedown(s): 72-88 | |
| - Strong attempts: 62-72 | |
| - Some standup work: 52-62 | |
| - Brief standup only: 42-52 | |
| - No standup engagement: 0 | |
| **OVERALL SCORE CALCULATION:** | |
| 1. Start with base score from positional flow | |
| 2. Apply outcome modifier: | |
| - Submission achieved: +12-18 points | |
| - Got submitted: -12-18 points | |
| - Dominant positions: +6-10 points | |
| - Lost positions badly: -6-10 points | |
| 3. Ensure final score reflects match reality | |
| 4. Range check: 45-58 (submitted), 60-72 (typical), 72-88 (strong/dominant) | |
| 5. KEY PRINCIPLE: If a phase isn't in the video, it doesn't affect the score negatively! no penalising | |
| Below all should be coach-like, specific, and reference timestamps | |
| Most relevant/imp observations should be used for strengths, weaknesses, opportunities, and key moments and timestamps/frame analysis should be distributed evenly throughout the video. | |
| 3. STRENGTHS (EXACTLY {feedback_counts['strengths']}):: | |
| - Format: "At MM:SS - [Specific TECHNIQUE used + WHAT it accomplished]" | |
| - They should be the most important positive actions observed by user | |
| - Example style: "You defended opponent's heel hook by maintaining forward pressure" or "You initiated scramble to pass guard using leg drag" | |
| - If submission: #1 MUST be the finish | |
| - NO vague phrases like "showed awareness" or "maintained position" - be SPECIFIC about the action and result | |
| 4. WEAKNESSES (EXACTLY {feedback_counts['strengths']}):: | |
| - Format: "At MM:SS - [Specific MISTAKE + CONSEQUENCE that resulted]" | |
| -n They should be the most critical negative actions observed by user | |
| - Example style: "You didn't have enough top pressure allowing opponent to escape to turtle" or "Weak submission attempt caused you to lose control" | |
| - If submitted: #1 MUST be the defensive failure | |
| - CRITICAL: Check timestamps don't contradict strengths (don't say they failed at what they succeeded at) | |
| 5. MISSED OPPORTUNITIES : | |
| - List specific, frame-visible technical opportunities with exact timestamps. | |
| - Only include actions clearly observable in the footage. | |
| Good examples: | |
| * “Darce choke opening from top side control at 00:24” | |
| * “Single-leg available when opponent posted hand at 00:15” | |
| * “Guard pass opportunity during leg reposition at 00:31” | |
| * “Side control escape by framing under chin at 00:42” | |
| Avoid generic or subjective feedback/ non-observable claims/missing timestamps | |
| 6. KEY MOMENTS (EXACTLY {feedback_counts['strengths']}):: | |
| - Highlight the BEST moments from BOTH athletes (not biased to user only) | |
| - Include significant actions from BOTH user AND opponent (takedowns, passes, submissions, escapes, sweeps) | |
| - Think like a highlight reel: What were the most important/impressive moments in the match? | |
| - Examples: "User took down opponent with double leg", "Opponent quickly recovered guard", "Opponent submitted user with armbar" | |
| 7. COACH NOTES (150-250 words): | |
| Write like a REAL gym coach talking after watching the roll - conversational, direct, accurate. | |
| CRITICAL: | |
| - Start with what you SAW: "Nice work on...", "I noticed...", "That pass at..." and address the user as "You". | |
| - Be ACCURATE: Only mention what actually happened (no standup if none occurred, don't say "struggled" if they succeeded) | |
| - Use BJJ slang naturally: "That knee slice was tight", "Hunt for the underhook", "Stay heavy on top" | |
| - Avoid AI words: "demonstrated", "showcased", "exhibited", "positional awareness" | |
| - Give 2-3 specific things to work on with timestamps | |
| - End with encouragement or next steps | |
| 8. DRILLS (EXACTLY 3): | |
| - Each addresses a specific weakness | |
| - Include timestamp justification | |
| """, | |
| agent=analyst, | |
| expected_output="Detailed technical analysis with submission detection and sequential awareness" | |
| ) | |
| formatting_task = Task( | |
| description="""Convert the analysis into this EXACT JSON structure. NO markdown wrapping. | |
| {{ | |
| "overall_score": <int 0-100>, | |
| "performance_label": "EXCELLENT|STRONG|SOLID|DEVELOPING|NEEDS IMPROVEMENT", | |
| "performance_grades": {{ | |
| "defense_grade": "<A+|A|B+|B|C+|C|D+|D>", | |
| "offense_grade": "<letter>", | |
| "control_grade": "<letter>" | |
| }}, | |
| "skill_breakdown": {{ | |
| "offense": <int>, | |
| "defense": <int>, | |
| "guard": <int>, | |
| "passing": <int>, | |
| "standup": <int> | |
| }}, | |
| "strengths": [ | |
| "At 0:XX - Specific observation with context (min 25 chars)", | |
| "At 0:XX - Another specific observation", | |
| "At 0:XX - Third specific observation" | |
| ], | |
| "weaknesses": [ | |
| "At 0:XX - Specific weakness with context (min 25 chars)", | |
| "At 0:XX - Another weakness", | |
| "At 0:XX - Third weakness" | |
| ], | |
| "missed_opportunities": | |
| {{"time": "MM:SS", "title": "Brief", "description": "Detail", "category": "SUBMISSION|POSITION|SWEEP"}} | |
| ], | |
| "key_moments": [ | |
| {{"time": "MM:SS", "title": "Event", "description": "What happened", "category": "SUBMISSION|TRANSITION|DEFENSE"}} | |
| ], | |
| "coach_notes": "Paragraph 150-250 words", | |
| "recommended_drills": [ | |
| {{"name": "Drill 1", "focus_area": "Area", "reason": "Why (reference timestamp)", "duration": "15 min/day", "frequency": "5x/week"}}, | |
| {{"name": "Drill 2", "focus_area": "Area", "reason": "Why", "duration": "10 min/day", "frequency": "4x/week"}}, | |
| {{"name": "Drill 3", "focus_area": "Area", "reason": "Why", "duration": "12 min/day", "frequency": "3x/week"}} | |
| ] | |
| }} | |
| VALIDATION CHECKS: | |
| - All timestamps in MM:SS format ✓ | |
| - No trailing commas ✓ | |
| - All feedback includes timestamps ✓ | |
| - No generic phrases ✓ | |
| - Valid JSON syntax ✓ | |
| """, | |
| agent=formatter, | |
| expected_output="Valid JSON only" | |
| ) | |
| crew = Crew( | |
| agents=[analyst, formatter], | |
| tasks=[analysis_task, formatting_task], | |
| process=Process.sequential, | |
| verbose=True | |
| ) | |
| return crew | |
| # --- HYBRID ANALYSIS --- | |
| async def hybrid_agentic_analysis( | |
| frames: List[Dict], | |
| metadata: Dict, | |
| user_desc: str, | |
| opp_desc: str, | |
| activity_type: str, | |
| analysis_id: str = None | |
| ) -> AnalysisResult: | |
| """Hybrid: Gemini vision + CrewAI agents + Python validation""" | |
| print("\n" + "="*70) | |
| print("HYBRID AGENTIC ANALYSIS (Dense Consecutive Frames)") | |
| print("="*70) | |
| try: | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 30 | |
| # STEP 1: Gemini Vision with dense frames | |
| observations = await extract_frame_observations( | |
| frames, user_desc, opp_desc, metadata["duration"], metadata | |
| ) | |
| # Check for content verification failure | |
| if "content_verification" in observations and "FAILED" in observations: | |
| print("❌ Content verification failed - not BJJ/grappling content") | |
| # Try to parse the rejection message | |
| try: | |
| rejection_data = json.loads(observations) | |
| reason = rejection_data.get("reason", "Video does not appear to contain BJJ or grappling content.") | |
| suggested = rejection_data.get("suggested_action", "Please upload a BJJ or grappling video.") | |
| if analysis_id: | |
| db_storage[analysis_id]["status"] = "rejected" | |
| db_storage[analysis_id]["rejection_reason"] = reason | |
| # Return a special rejection result | |
| return AnalysisResult(**{ | |
| "overall_score": 0, | |
| "performance_label": "CONTENT VERIFICATION FAILED", | |
| "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"}, | |
| "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0}, | |
| "strengths": [ | |
| "This video does not appear to contain BJJ or grappling content.", | |
| "Please upload footage showing ground grappling, submissions, or takedowns.", | |
| "Acceptable: BJJ (gi/no-gi), wrestling, judo newaza, submission grappling." | |
| ], | |
| "weaknesses": [ | |
| f"Content detected: {reason}", | |
| "This system is designed specifically for grappling analysis.", | |
| f"Action needed: {suggested}" | |
| ], | |
| "missed_opportunities": [], | |
| "key_moments": [], | |
| "coach_notes": f"⚠️ CONTENT VERIFICATION FAILED\n\n{reason}\n\n{suggested}\n\nThis AI system is specifically trained for Brazilian Jiu-Jitsu and grappling analysis. It cannot analyze striking-based martial arts, non-combat sports, or general videos. Please upload a video showing:\n\n• Ground grappling or submissions\n• Takedowns or clinch work\n• BJJ, wrestling, judo, or submission grappling\n\nFor best results, ensure the video clearly shows both athletes engaged in grappling exchanges.", | |
| "recommended_drills": [] | |
| }) | |
| except: | |
| # Fallback if parsing fails | |
| if analysis_id: | |
| db_storage[analysis_id]["status"] = "rejected" | |
| db_storage[analysis_id]["rejection_reason"] = "Video content verification failed" | |
| return AnalysisResult(**{ | |
| "overall_score": 0, | |
| "performance_label": "CONTENT VERIFICATION FAILED", | |
| "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"}, | |
| "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0}, | |
| "strengths": [ | |
| "Video does not appear to contain BJJ or grappling content.", | |
| "Please upload footage of ground grappling or submissions.", | |
| "This system is designed for grappling analysis only." | |
| ], | |
| "weaknesses": [ | |
| "Upload a video showing BJJ, wrestling, or submission grappling.", | |
| "Ensure both athletes are visible and engaged in grappling.", | |
| "Videos should show ground work, takedowns, or submissions." | |
| ], | |
| "missed_opportunities": [], | |
| "key_moments": [], | |
| "coach_notes": "⚠️ CONTENT VERIFICATION FAILED\n\nThis video does not appear to contain Brazilian Jiu-Jitsu or grappling content. This AI system is specifically designed for analyzing ground grappling, submissions, and takedowns.\n\nPlease upload a video showing:\n• BJJ (gi or no-gi)\n• Wrestling\n• Judo (newaza)\n• Submission grappling\n• MMA grappling exchanges\n\nFor optimal results, ensure the video clearly shows both athletes engaged in grappling.", | |
| "recommended_drills": [] | |
| }) | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 60 | |
| # STEP 2: CrewAI Agents | |
| print("\nSTEP 2: CrewAI Agents - Analysis & Formatting") | |
| crew = create_analysis_crew(observations, user_desc, opp_desc, metadata["duration"], len(frames)) | |
| crew_start = time.time() | |
| result = await asyncio.get_event_loop().run_in_executor( | |
| None, | |
| crew.kickoff | |
| ) | |
| crew_time = time.time() - crew_start | |
| print(f"✅ CrewAI completed: {crew_time:.2f}s") | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 90 | |
| # STEP 3: Parse & Validate | |
| print("\nSTEP 3: Python Validation") | |
| result_text = str(result) | |
| if "```json" in result_text: | |
| result_text = result_text.split("```json")[1].split("```")[0].strip() | |
| elif "```" in result_text: | |
| result_text = result_text.split("```")[1].split("```")[0].strip() | |
| data = extract_json_from_text(result_text) | |
| data = validate_and_filter(data, frames, metadata["duration"]) | |
| attach_frames_to_events(data.get("missed_opportunities", []), frames) | |
| attach_frames_to_events(data.get("key_moments", []), frames) | |
| if analysis_id: | |
| db_storage[analysis_id]["progress"] = 100 | |
| print("✅ Analysis complete") | |
| print("="*70 + "\n") | |
| return AnalysisResult(**data) | |
| except Exception as e: | |
| print(f"❌ Hybrid analysis failed: {e}") | |
| traceback.print_exc() | |
| fallback = make_fallback(frames) | |
| if analysis_id: | |
| db_storage[analysis_id]["used_fallback"] = True | |
| return AnalysisResult(**fallback) | |
| def validate_and_filter(data: Dict, frames: List[Dict], duration: float) -> Dict: | |
| """Python-level validation and generic filtering""" | |
| if "overall_score" not in data: | |
| data["overall_score"] = 65 | |
| data["overall_score"] = max(0, min(100, data["overall_score"])) | |
| if "performance_label" not in data: | |
| score = data["overall_score"] | |
| if score >= 85: | |
| data["performance_label"] = "EXCELLENT PERFORMANCE" | |
| elif score >= 75: | |
| data["performance_label"] = "STRONG PERFORMANCE" | |
| elif score >= 60: | |
| data["performance_label"] = "SOLID PERFORMANCE" | |
| else: | |
| data["performance_label"] = "DEVELOPING PERFORMANCE" | |
| if "performance_grades" not in data: | |
| data["performance_grades"] = {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"} | |
| if "skill_breakdown" not in data: | |
| base = data["overall_score"] | |
| data["skill_breakdown"] = { | |
| "offense": max(0, min(100, base - 5)), | |
| "defense": max(0, min(100, base + 3)), | |
| "guard": max(0, min(100, base - 2)), | |
| "passing": max(0, min(100, base - 10)), | |
| "standup": max(0, min(100, base - 13)) | |
| } | |
| feedback_counts = calculate_feedback_count(duration) | |
| # Filter generic feedback | |
| late_section_start = duration * 0.7 | |
| for field in ["strengths", "weaknesses"]: | |
| has_late_timestamp = False | |
| for item in data[field]: | |
| timestamp_match = re.search(r'(\d{1,2}):(\d{2})', item) | |
| if timestamp_match: | |
| mm, ss = timestamp_match.groups() | |
| time_in_seconds = int(mm) * 60 + int(ss) | |
| if time_in_seconds >= late_section_start: | |
| has_late_timestamp = True | |
| break | |
| # If no late timestamps, FORCE add one | |
| if not has_late_timestamp: | |
| late_frames = [f for f in frames if f["second"] >= late_section_start] | |
| if late_frames: | |
| late_frame = late_frames[-3] # Pick near end | |
| if field == "strengths": | |
| late_item = f"At {late_frame['timestamp']} - Maintained control and pressure in final phase" | |
| else: | |
| late_item = f"At {late_frame['timestamp']} - Could increase urgency in final moments" | |
| data[field][-1] = late_item # Replace last item | |
| print(f"⚠️ FORCED late timestamp: {late_frame['timestamp']}") | |
| if "missed_opportunities" not in data or not data["missed_opportunities"]: | |
| data["missed_opportunities"] = [{ | |
| "time": frames[len(frames)//2]["timestamp"], | |
| "title": "Position", | |
| "description": "Review sequence for improvement opportunities", | |
| "category": "POSITION" | |
| }] | |
| # Update key moments with dynamic count | |
| if "key_moments" not in data or len(data["key_moments"]) < feedback_counts["moments"]: | |
| default_moments = [] | |
| for i in range(feedback_counts["moments"]): | |
| frame_idx = len(frames) // (feedback_counts["moments"] + 1) * (i + 1) | |
| default_moments.append({ | |
| "time": frames[frame_idx]["timestamp"], | |
| "title": "Exchange", | |
| "description": "Significant moment in match flow", | |
| "category": "TRANSITION" | |
| }) | |
| data["key_moments"] = default_moments | |
| if "coach_notes" not in data or len(data["coach_notes"]) < 50: | |
| data["coach_notes"] = "Focus on maintaining consistent technique throughout sequences. Review timestamped moments for detailed improvement areas." | |
| if "recommended_drills" not in data or len(data["recommended_drills"]) < 3: | |
| data["recommended_drills"] = [ | |
| {"name": "Position Control Sequences", "focus_area": "General", "reason": "Improve sequential awareness", "duration": "15 min/day", "frequency": "5x/week"}, | |
| {"name": "Guard Retention Drills", "focus_area": "Defense", "reason": "Strengthen defensive sequences", "duration": "10 min/day", "frequency": "4x/week"}, | |
| {"name": "Transition Flow Training", "focus_area": "Movement", "reason": "Improve position transitions", "duration": "12 min/day", "frequency": "3x/week"} | |
| ] | |
| return data | |
| def make_specific(field: str, frames: List[Dict], existing: List[str], count: int, duration: float) -> List[str]: | |
| """ | |
| Generate specific feedback distributed across entire video duration. | |
| count: How many items to generate (3-7 based on video length) | |
| duration: Video length in seconds for timestamp distribution | |
| """ | |
| feedback = existing.copy() | |
| # Calculate timestamps spread across video | |
| timestamps_needed = count - len(feedback) | |
| if timestamps_needed <= 0: | |
| return feedback[:count] | |
| early_count = max(1, int(timestamps_needed * 0.2)) | |
| middle_count = max(1, int(timestamps_needed * 0.3)) | |
| late_count = timestamps_needed - early_count - middle_count | |
| early_frames = [f for f in frames if f["second"] < duration * 0.2] | |
| middle_frames = [f for f in frames if duration * 0.2 <= f["second"] < duration * 0.7] | |
| late_frames = [f for f in frames if f["second"] >= duration * 0.7] | |
| def get_frame_from_section(section_frames, index, section_count): | |
| if not section_frames: | |
| return frames[0] | |
| frame_idx = len(section_frames) // (section_count + 1) * (index + 1) | |
| return section_frames[min(frame_idx, len(section_frames) - 1)] | |
| if field == "strengths": | |
| # Early strengths | |
| for i in range(early_count): | |
| frame = get_frame_from_section(early_frames, i, early_count) | |
| feedback.append(f"At {frame['timestamp']} - Maintained good structural positioning during opening sequence") | |
| # Middle strengths | |
| for i in range(middle_count): | |
| frame = get_frame_from_section(middle_frames, i, middle_count) | |
| feedback.append(f"At {frame['timestamp']} - Demonstrated positional awareness during mid-match exchange") | |
| # Late strengths | |
| for i in range(late_count): | |
| frame = get_frame_from_section(late_frames, i, late_count) | |
| feedback.append(f"At {frame['timestamp']} - Showed consistent control in final phase of match") | |
| else: | |
| # Early weaknesses | |
| for i in range(early_count): | |
| frame = get_frame_from_section(early_frames, i, early_count) | |
| feedback.append(f"At {frame['timestamp']} - Could improve initial positioning strategy and grip selection") | |
| # Middle weaknesses | |
| for i in range(middle_count): | |
| frame = get_frame_from_section(middle_frames, i, middle_count) | |
| feedback.append(f"At {frame['timestamp']} - Slow to recognize transitional opportunity during position change") | |
| # Late weaknesses | |
| for i in range(late_count): | |
| frame = get_frame_from_section(late_frames, i, late_count) | |
| feedback.append(f"At {frame['timestamp']} - Room to improve execution and pressure application in final sequence") | |
| return feedback[:count] | |
| def make_fallback(frames: List[Dict]) -> Dict: | |
| mid = frames[len(frames)//2]["timestamp"] if frames else "00:30" | |
| end = frames[-3]["timestamp"] if len(frames) > 2 else "00:45" | |
| return { | |
| "overall_score": 65, | |
| "performance_label": "SOLID PERFORMANCE", | |
| "performance_grades": {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"}, | |
| "skill_breakdown": {"offense": 60, "defense": 68, "guard": 63, "passing": 55, "standup": 52}, | |
| "strengths": [ | |
| "At 0:10 - Maintained structural integrity during opening", | |
| f"At {mid} - Showed positional awareness during exchange", | |
| f"At {end} - Demonstrated control in final sequences" | |
| ], | |
| "weaknesses": [ | |
| "At 0:15 - Could improve initial positioning approach", | |
| f"At {mid} - Slow to recognize transitional opportunities", | |
| f"At {end} - Room to improve execution in final phase" | |
| ], | |
| "missed_opportunities": [{"time": mid, "title": "Position", "description": "Review for improvement", "category": "POSITION"}], | |
| "key_moments": [{"time": end, "title": "Exchange", "description": "Significant sequence", "category": "TRANSITION"}], | |
| "coach_notes": "Focus on maintaining consistent technique throughout match sequences. Review specific timestamped moments for detailed improvement areas.", | |
| "recommended_drills": [ | |
| {"name": "Sequential Control", "focus_area": "General", "reason": "Improve awareness", "duration": "15 min/day", "frequency": "5x/week"}, | |
| {"name": "Guard Sequences", "focus_area": "Defense", "reason": "Strengthen defense", "duration": "10 min/day", "frequency": "4x/week"}, | |
| {"name": "Flow Training", "focus_area": "Movement", "reason": "Improve transitions", "duration": "12 min/day", "frequency": "3x/week"} | |
| ] | |
| } | |
| # --- API --- | |
| async def analyze_complete( | |
| file: UploadFile = File(...), | |
| user_description: str = Form(...), | |
| opponent_description: str = Form(...), | |
| activity_type: str = Form("Brazilian Jiu-Jitsu") | |
| ): | |
| start_time = time.time() | |
| file_path = None | |
| try: | |
| file_name = f"{uuid.uuid4()}_{file.filename}" | |
| file_path = f"temp_videos/{file_name}" | |
| os.makedirs("temp_videos", exist_ok=True) | |
| with open(file_path, "wb") as buffer: | |
| shutil.copyfileobj(file.file, buffer) | |
| analysis_id = str(uuid.uuid4()) | |
| db_storage[analysis_id] = {"status": "processing", "progress": 0} | |
| # Extract DENSE CONSECUTIVE frames | |
| try: | |
| frames, metadata = await asyncio.get_event_loop().run_in_executor( | |
| None, extract_dense_consecutive_frames, file_path | |
| ) | |
| except ValueError as ve: | |
| # Duration validation error | |
| error_msg = str(ve) | |
| print(f"⚠️ Duration validation failed: {error_msg}") | |
| return { | |
| "status": "rejected", | |
| "error": error_msg, | |
| "error_type": "duration_validation", | |
| "data": { | |
| "overall_score": 0, | |
| "performance_label": "VIDEO DURATION ERROR", | |
| "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"}, | |
| "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0}, | |
| "strengths": [], | |
| "weaknesses": [], | |
| "missed_opportunities": [], | |
| "key_moments": [], | |
| "coach_notes": f"⚠️ VIDEO DURATION ERROR\n\n{error_msg}\n\nRecommended video length: 10-90 seconds\n\nTips:\n• Focus on a single exchange or position\n• Trim longer videos to key moments\n• Ensure the clip shows clear grappling action", | |
| "recommended_drills": [] | |
| } | |
| } | |
| # Hybrid analysis | |
| result = await hybrid_agentic_analysis( | |
| frames, metadata, | |
| user_description.strip(), opponent_description.strip(), | |
| activity_type, analysis_id | |
| ) | |
| total_time = time.time() - start_time | |
| # Check if content was rejected | |
| if result.performance_label == "CONTENT VERIFICATION FAILED": | |
| return { | |
| "status": "rejected", | |
| "error": "Video content verification failed - not BJJ/grappling", | |
| "error_type": "content_verification", | |
| "data": result.model_dump(), | |
| "processing_time": f"{total_time:.2f}s" | |
| } | |
| return { | |
| "status": "completed", | |
| "data": result.model_dump(), | |
| "processing_time": f"{total_time:.2f}s", | |
| "frames_analyzed": len(frames), | |
| "avg_frame_interval": f"{metadata.get('avg_frame_interval', 0):.2f}s", | |
| "used_fallback": db_storage[analysis_id].get("used_fallback", False), | |
| "method": "dense_consecutive_frames" | |
| } | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| traceback.print_exc() | |
| # Try to provide helpful fallback | |
| try: | |
| frames_fb, _ = await asyncio.get_event_loop().run_in_executor(None, extract_dense_consecutive_frames, file_path) | |
| fallback = make_fallback(frames_fb) | |
| except: | |
| fallback = make_fallback([{"timestamp": "00:30", "second": 30}]) | |
| return { | |
| "status": "completed_with_fallback", | |
| "data": fallback, | |
| "error": str(e), | |
| "used_fallback": True | |
| } | |
| finally: | |
| if file_path: | |
| try: | |
| os.remove(file_path) | |
| except: | |
| pass | |
| async def health_check(): | |
| return {"status": "healthy", "version": "29.0.0-optimized-accurate"} | |
| async def root(): | |
| return { | |
| "message": "BJJ AI Coach - Optimized for Speed + Accuracy", | |
| "version": "29.0.0", | |
| "target_performance": "Total analysis: 50-60 seconds", | |
| "architecture": "Gemini Vision + CrewAI Agents + Python Validation", | |
| "optimizations": [ | |
| "⚡ Optimized frame counts for 50-60s Gemini processing", | |
| "🎯 50% of frames in final 30% (submission-focused)", | |
| "📊 15-40 frames (optimized for speed + accuracy)", | |
| "✅ Ultra-strict evidence requirements (prevents wrong diagnosis)", | |
| "🔍 Conservative analysis (admits uncertainty when unclear)", | |
| "⏱️ Target: 50-60s total (15s video: ~30s, 60s video: ~50s)" | |
| ], | |
| "frame_strategy": { | |
| "10-15s_video": "15 frames (~1.0s intervals) → Gemini ~30s", | |
| "15-30s_video": "20 frames (~1.5s intervals) → Gemini ~40s", | |
| "30-60s_video": "30 frames (~2.0s intervals) → Gemini ~50s", | |
| "60-90s_video": "40 frames (~2.3s intervals) → Gemini ~60s" | |
| }, | |
| "submission_focus": { | |
| "distribution": "20% start, 30% middle, 50% end", | |
| "end_section": "50% of all frames in final 30% of video", | |
| "final_frames": "Always includes last 2 frames for tap detection", | |
| "confirmation": "Ultra-strict: requires EXPLICIT tap visible (2+ slaps)" | |
| }, | |
| "accuracy_improvements": [ | |
| "Evidence-only analysis (NO assumptions or inferences)", | |
| "Conservative position labels (says 'Unclear' when uncertain)", | |
| "Stricter submission confirmation (tap must be EXPLICIT)", | |
| "Mount requires ALL 4 criteria (knees, hips, flat, no entangle)", | |
| "No pain inference, intent assumption, or guessing", | |
| "Better to admit uncertainty than make wrong diagnosis" | |
| ], | |
| "validation": { | |
| "content_types_accepted": [ | |
| "BJJ (gi/no-gi)", | |
| "Submission grappling", | |
| "Wrestling", | |
| "Judo (newaza)", | |
| "MMA grappling" | |
| ], | |
| "content_types_rejected": [ | |
| "Striking arts", | |
| "Kata/forms", | |
| "Non-combat sports" | |
| ], | |
| "duration": "5-120 seconds" | |
| } | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.environ.get("PORT", 7860)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) |