samiee2213 commited on
Commit
2d3b6d3
·
verified ·
1 Parent(s): fd23d1a

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +1403 -0
main.py ADDED
@@ -0,0 +1,1403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import os
3
+ import time
4
+ import shutil
5
+ import uuid
6
+ import json
7
+ import asyncio
8
+ import base64
9
+ import re
10
+ import traceback
11
+ from typing import List, Optional, Dict, Any
12
+ from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Form
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from pydantic import BaseModel, ConfigDict
15
+ import warnings
16
+
17
+ # Suppress warnings
18
+ warnings.filterwarnings('ignore', category=FutureWarning)
19
+
20
+ # CrewAI imports
21
+ from crewai import Agent, Task, Crew, Process
22
+ from crewai.llm import LLM
23
+
24
+ # Gemini imports
25
+ import google.generativeai as genai
26
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
27
+
28
+ # OpenCV
29
+ import cv2
30
+ import numpy as np
31
+
32
+ # Configuration
33
+ GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
34
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
35
+
36
+ if not GEMINI_API_KEY:
37
+ raise ValueError("GOOGLE_API_KEY environment variable required")
38
+ if not GROQ_API_KEY:
39
+ raise ValueError("GROQ_API_KEY environment variable required")
40
+
41
+ genai.configure(api_key=GEMINI_API_KEY)
42
+
43
+ app = FastAPI(title="BJJ AI Coach - Dense Frame Analysis")
44
+
45
+ app.add_middleware(
46
+ CORSMiddleware,
47
+ allow_origins=["*"],
48
+ allow_credentials=True,
49
+ allow_methods=["*"],
50
+ allow_headers=["*"],
51
+ )
52
+
53
+ # --- MODELS ---
54
+
55
+ class TimestampedEvent(BaseModel):
56
+ time: str
57
+ title: str
58
+ description: str
59
+ category: Optional[str] = "GENERAL"
60
+ frame_image: Optional[str] = None
61
+ frame_timestamp: Optional[str] = None
62
+ model_config = ConfigDict(extra="allow")
63
+
64
+ class Drill(BaseModel):
65
+ name: str
66
+ focus_area: str
67
+ reason: str
68
+ duration: Optional[str] = "15 min/day"
69
+ frequency: Optional[str] = "5x/week"
70
+
71
+ class DetailedSkillBreakdown(BaseModel):
72
+ offense: int
73
+ defense: int
74
+ guard: int
75
+ passing: int
76
+ standup: int
77
+
78
+ class PerformanceGrades(BaseModel):
79
+ defense_grade: str
80
+ offense_grade: str
81
+ control_grade: str
82
+
83
+ class AnalysisResult(BaseModel):
84
+ overall_score: int
85
+ performance_label: str
86
+ performance_grades: PerformanceGrades
87
+ skill_breakdown: DetailedSkillBreakdown
88
+ strengths: List[str]
89
+ weaknesses: List[str]
90
+ missed_opportunities: List[TimestampedEvent]
91
+ key_moments: List[TimestampedEvent]
92
+ coach_notes: str
93
+ recommended_drills: List[Drill]
94
+
95
+ db_storage = {}
96
+
97
+ # --- UTILITIES ---
98
+
99
+ def parse_time_to_seconds(time_str: str) -> Optional[int]:
100
+ if not time_str:
101
+ return None
102
+ match = re.search(r"(\d{1,2}):(\d{2})", time_str)
103
+ if not match:
104
+ return None
105
+ mm, ss = match.groups()
106
+ return int(mm) * 60 + int(ss)
107
+
108
+ def find_closest_frame(target_time_sec: int, frames: list) -> dict:
109
+ return min(frames, key=lambda f: abs(f["second"] - target_time_sec))
110
+
111
+ def attach_frames_to_events(events: List[dict], frames: list):
112
+ for event in events:
113
+ try:
114
+ event_time_sec = parse_time_to_seconds(event.get("time"))
115
+ if event_time_sec is None:
116
+ continue
117
+ closest = find_closest_frame(event_time_sec, frames)
118
+ event["frame_timestamp"] = closest["timestamp"]
119
+ event["frame_image"] = base64.b64encode(closest["bytes"]).decode("utf-8")
120
+ except:
121
+ event["frame_image"] = None
122
+
123
+ def extract_json_from_text(text: str) -> Dict:
124
+ """Robust JSON extraction"""
125
+ text = text.strip()
126
+
127
+ try:
128
+ return json.loads(text)
129
+ except:
130
+ pass
131
+
132
+ if "```json" in text or "```" in text:
133
+ try:
134
+ if "```json" in text:
135
+ text = text.split("```json")[1].split("```")[0]
136
+ else:
137
+ text = text.split("```")[1].split("```")[0]
138
+ return json.loads(text.strip())
139
+ except:
140
+ pass
141
+
142
+ try:
143
+ start_idx = text.find('{')
144
+ if start_idx == -1:
145
+ raise ValueError("No opening brace")
146
+
147
+ brace_count = 0
148
+ end_idx = -1
149
+
150
+ for i in range(start_idx, len(text)):
151
+ if text[i] == '{':
152
+ brace_count += 1
153
+ elif text[i] == '}':
154
+ brace_count -= 1
155
+ if brace_count == 0:
156
+ end_idx = i
157
+ break
158
+
159
+ if end_idx != -1:
160
+ json_str = text[start_idx:end_idx+1]
161
+ return json.loads(json_str)
162
+
163
+ json_str = text[start_idx:]
164
+ open_braces = json_str.count('{')
165
+ close_braces = json_str.count('}')
166
+ open_brackets = json_str.count('[')
167
+ close_brackets = json_str.count(']')
168
+
169
+ if open_brackets > close_brackets:
170
+ json_str += ']' * (open_brackets - close_brackets)
171
+ if open_braces > close_braces:
172
+ json_str += '}' * (open_braces - close_braces)
173
+
174
+ return json.loads(json_str)
175
+
176
+ except:
177
+ pass
178
+
179
+ raise ValueError("Could not extract JSON")
180
+
181
+ def is_generic(text: str) -> bool:
182
+ """Check if feedback is too generic"""
183
+ patterns = [r'^More \w+$', r'^Improve \w+$', r'^Work \w+$', r'^Better \w+$']
184
+ for p in patterns:
185
+ if re.match(p, text.strip(), re.IGNORECASE):
186
+ return True
187
+ if not re.search(r'\d{1,2}:\d{2}', text):
188
+ return True
189
+ if len(text) < 20:
190
+ return True
191
+ return False
192
+
193
+ # --- ENHANCED DENSE FRAME EXTRACTION ---
194
+
195
+ def extract_dense_consecutive_frames(video_path: str) -> tuple:
196
+ """
197
+ OPTIMIZED: Extract frames for MAXIMUM ACCURACY in 50-60s total processing
198
+
199
+ Strategy - Balanced for speed + accuracy:
200
+ - 10-15s video: 15 frames (~1.0s intervals) → Gemini ~30s
201
+ - 15-30s video: 20 frames (~1.2s intervals) → Gemini ~40s
202
+ - 30-60s video: 30 frames (~1.8s intervals) → Gemini ~50s
203
+ - 60-90s video: 40 frames (~2.0s intervals) → Gemini ~60s
204
+
205
+ Distribution (submission-focused):
206
+ - START (0-20%): 20% of frames
207
+ - MIDDLE (20-70%): 30% of frames
208
+ - END (70-100%): 50% of frames (DENSEST for submission detection)
209
+ """
210
+ try:
211
+ cap = cv2.VideoCapture(video_path)
212
+ if not cap.isOpened():
213
+ raise Exception("Cannot open video")
214
+
215
+ fps = cap.get(cv2.CAP_PROP_FPS)
216
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
217
+ duration = total_frames / fps if fps > 0 else 0
218
+
219
+ # Validate video duration
220
+ if duration < 5:
221
+ raise ValueError("Video too short (< 5 seconds). Please upload a longer clip (10-90 seconds recommended).")
222
+
223
+ if duration > 120:
224
+ raise ValueError("Video too long (> 2 minutes). Please upload a shorter clip (10-90 seconds recommended) for optimal analysis.")
225
+
226
+ # OPTIMIZED FRAME COUNTS - Balanced for 50-60s Gemini processing
227
+ if duration <= 15:
228
+ total_to_extract = 15 # ~1.0s intervals → ~30s Gemini
229
+ elif duration <= 30:
230
+ total_to_extract = 20 # ~1.5s intervals → ~40s Gemini
231
+ elif duration <= 60:
232
+ total_to_extract = 30 # ~2.0s intervals → ~50s Gemini
233
+ elif duration <= 90:
234
+ total_to_extract = 40 # ~2.25s intervals → ~60s Gemini
235
+ else:
236
+ total_to_extract = 45 # ~2.7s intervals → ~65s Gemini (max)
237
+
238
+ print(f"📹 OPTIMIZED EXTRACTION: {total_to_extract} frames from {duration:.1f}s video")
239
+ print(f" Target: 1 frame every {duration/total_to_extract:.1f}s (Gemini: ~{total_to_extract * 1.5:.0f}s)")
240
+
241
+ # SUBMISSION-FOCUSED distribution: 20% start, 30% middle, 50% end
242
+ start_frames = max(3, int(total_to_extract * 0.20))
243
+ middle_frames = max(6, int(total_to_extract * 0.30))
244
+ end_frames = total_to_extract - start_frames - middle_frames
245
+
246
+ print(f" Distribution (submission-focused): START={start_frames}, MIDDLE={middle_frames}, END={end_frames}")
247
+
248
+ # Define sections
249
+ start_section_end = int(total_frames * 0.20)
250
+ middle_section_start = start_section_end
251
+ middle_section_end = int(total_frames * 0.70)
252
+ end_section_start = middle_section_end
253
+
254
+ frames = []
255
+
256
+ # Extract START section (0-20%) - Overview
257
+ start_interval = max(1, start_section_end // start_frames)
258
+ for i in range(0, start_section_end, start_interval):
259
+ if len([f for f in frames if f["second"] < duration * 0.20]) >= start_frames:
260
+ break
261
+ frame = get_frame(cap, i, fps)
262
+ if frame:
263
+ frames.append(frame)
264
+
265
+ # Extract MIDDLE section (20-70%) - Standard coverage
266
+ middle_section_frames = middle_section_end - middle_section_start
267
+ middle_interval = max(1, middle_section_frames // middle_frames)
268
+ for i in range(middle_section_start, middle_section_end, middle_interval):
269
+ if len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]) >= middle_frames:
270
+ break
271
+ frame = get_frame(cap, i, fps)
272
+ if frame:
273
+ frames.append(frame)
274
+
275
+ # Extract END section (70-100%) - DENSEST for submissions (50% of all frames!)
276
+ end_section_frames = total_frames - end_section_start
277
+ end_interval = max(1, end_section_frames // end_frames)
278
+
279
+ print(f" END section (50% of frames): 1 frame every {end_interval/fps:.2f}s for submission detection")
280
+
281
+ for i in range(end_section_start, total_frames, end_interval):
282
+ if len([f for f in frames if f["second"] >= duration * 0.70]) >= end_frames:
283
+ break
284
+ frame = get_frame(cap, i, fps)
285
+ if frame:
286
+ frames.append(frame)
287
+
288
+ # CRITICAL: Always add final 2 frames for tap detection
289
+ for offset in [2, 1]:
290
+ final_frame_idx = total_frames - offset
291
+ if final_frame_idx > 0:
292
+ frame = get_frame(cap, final_frame_idx, fps)
293
+ if frame:
294
+ if not any(f["frame_idx"] == frame["frame_idx"] for f in frames):
295
+ frames.append(frame)
296
+
297
+ cap.release()
298
+ frames.sort(key=lambda f: f["second"])
299
+
300
+ # Calculate stats
301
+ intervals = []
302
+ for i in range(1, len(frames)):
303
+ time_gap = frames[i]["second"] - frames[i-1]["second"]
304
+ intervals.append(time_gap)
305
+
306
+ avg_interval = sum(intervals) / len(intervals) if intervals else 0
307
+
308
+ metadata = {
309
+ "duration": round(duration, 2),
310
+ "fps": round(fps, 2),
311
+ "frames_extracted": len(frames),
312
+ "avg_frame_interval": round(avg_interval, 2),
313
+ "estimated_gemini_time": round(len(frames) * 1.5, 1), # ~1.5s per frame
314
+ "distribution": {
315
+ "start": len([f for f in frames if f["second"] < duration * 0.20]),
316
+ "middle": len([f for f in frames if duration * 0.20 <= f["second"] < duration * 0.70]),
317
+ "end": len([f for f in frames if f["second"] >= duration * 0.70])
318
+ }
319
+ }
320
+
321
+ print(f"✅ Extracted {len(frames)} frames (avg interval: {avg_interval:.2f}s)")
322
+ print(f" Estimated Gemini time: ~{metadata['estimated_gemini_time']:.0f}s")
323
+ print(f" Actual distribution: START={metadata['distribution']['start']}, "
324
+ f"MIDDLE={metadata['distribution']['middle']}, "
325
+ f"END={metadata['distribution']['end']} (50% in final 30%!)")
326
+
327
+ return frames, metadata
328
+
329
+ except Exception as e:
330
+ if 'cap' in locals():
331
+ cap.release()
332
+ raise Exception(f"Frame extraction failed: {str(e)}")
333
+
334
+ def get_frame(cap: cv2.VideoCapture, frame_idx: int, fps: float) -> Optional[dict]:
335
+ try:
336
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
337
+ ret, frame = cap.read()
338
+ if not ret:
339
+ return None
340
+
341
+ h, w = frame.shape[:2]
342
+ target_h = 720
343
+ target_w = int(w * (target_h / h))
344
+ resized = cv2.resize(frame, (target_w, target_h))
345
+ _, buffer = cv2.imencode('.jpg', resized, [cv2.IMWRITE_JPEG_QUALITY, 85])
346
+
347
+ timestamp_sec = frame_idx / fps
348
+ timestamp_str = f"{int(timestamp_sec // 60):02d}:{int(timestamp_sec % 60):02d}"
349
+
350
+ return {
351
+ "bytes": buffer.tobytes(),
352
+ "timestamp": timestamp_str,
353
+ "second": round(timestamp_sec, 2),
354
+ "frame_idx": frame_idx
355
+ }
356
+ except:
357
+ return None
358
+
359
+ # --- ENHANCED GEMINI VISION WITH CONSECUTIVE CONTEXT ---
360
+
361
+ async def extract_frame_observations(frames: List[Dict], user_desc: str, opp_desc: str, duration: float, metadata: Dict) -> str:
362
+ """Use Gemini to analyze DENSE CONSECUTIVE frames"""
363
+
364
+ print("STEP 1: Gemini Vision - Dense Consecutive Frame Analysis")
365
+
366
+ try:
367
+ # Build detailed frame list with time gaps
368
+ frame_details = []
369
+ for i, f in enumerate(frames):
370
+ if i > 0:
371
+ time_gap = f["second"] - frames[i-1]["second"]
372
+ gap_indicator = f" [+{time_gap:.1f}s]" if time_gap > 2 else ""
373
+ else:
374
+ gap_indicator = ""
375
+
376
+ frame_details.append(f"Frame {i+1} @ {f['timestamp']} ({f['second']:.1f}s){gap_indicator}")
377
+
378
+ frame_list = "\n".join(frame_details)
379
+
380
+ avg_interval = metadata.get("avg_frame_interval", 2.0)
381
+
382
+ prompt = f"""
383
+ You are an EXPERT Brazilian Jiu-Jitsu black belt analyst performing PRECISE EVIDENCE-BASED frame analysis.
384
+
385
+ ====================
386
+ CRITICAL: VIDEO CONTENT VERIFICATION (STEP 0 - MANDATORY)
387
+ ====================
388
+
389
+ BEFORE analyzing frames, verify this is BJJ/grappling content.
390
+
391
+ ACCEPTABLE: BJJ (gi/no-gi), Wrestling, Judo (newaza), Submission grappling, MMA grappling
392
+ REJECT: Striking arts, kata/forms, non-combat sports, random videos
393
+
394
+ If NOT grappling → Output this JSON and STOP:
395
+ {{"content_verification": "FAILED", "reason": "This video shows [what you see]. Please upload BJJ/grappling footage.", "suggested_action": "Upload ground grappling, submissions, or takedowns."}}
396
+
397
+ ====================
398
+ CORE PRINCIPLE: EVIDENCE-ONLY ANALYSIS
399
+ ====================
400
+
401
+ YOU ARE ABSOLUTELY FORBIDDEN FROM:
402
+ ❌ Assuming intent or motivation
403
+ ❌ Inferring pain levels or discomfort
404
+ ❌ Guessing what happened between frames
405
+ ❌ Extrapolating beyond visible evidence
406
+ ❌ Making confident claims from unclear visuals
407
+
408
+ YOU MUST ONLY:
409
+ ✅ Describe EXACTLY what is visible in each frame
410
+ ✅ Use conservative language when uncertain
411
+ ✅ Say "Unclear" or "Insufficient evidence" if you cannot confirm
412
+ ✅ Track visible progressions across consecutive frames
413
+
414
+ ====================
415
+ CONSECUTIVE FRAME CONTEXT
416
+ ====================
417
+
418
+ You have {len(frames)} frames with ~{avg_interval:.1f}s average interval.
419
+
420
+ Frame sequence (time gaps shown):
421
+ {frame_list}
422
+
423
+ IMPORTANT:
424
+ - Frames < 2s apart = CONTINUOUS ACTION (track progressions)
425
+ - Frames > 3s apart = POTENTIAL TRANSITION (note gap)
426
+ - Final 50% of frames are DENSE (70-100% of video) for submission detection
427
+
428
+ ====================
429
+ POSITION CLASSIFICATION (STRICT RULES)
430
+ ====================
431
+
432
+ Use ONLY these positions if CLEARLY visible:
433
+
434
+ STANDING/CLINCH:
435
+ - Standing: Both athletes upright, no ground contact
436
+ - Clinch: Standing with upper body control
437
+
438
+ GUARD POSITIONS (Bottom player has legs between them):
439
+ - Closed Guard: Legs locked around opponent's waist
440
+ - Open Guard: Legs not locked, but controlling opponent (Butterfly, DLR, Spider, X-Guard)
441
+ - Half Guard: One leg trapped between opponent's legs
442
+
443
+ TOP CONTROL:
444
+ - Side Control: Chest across opponent's chest, perpendicular, opponent flat
445
+ - North-South: Head-to-head, chest across opponent's chest
446
+ - Mount: ONLY if BOTH knees on mat, hips square over torso, opponent flat, NO leg entanglement
447
+ - If ANY condition missing → "Top pressure (not mount)" or "Knee on belly"
448
+ - Back Control: Behind opponent with hooks or body triangle
449
+
450
+ NEUTRAL/TRANSITION:
451
+ - Turtle: Opponent on hands/knees
452
+ - Scramble: Both athletes moving, position unclear
453
+ - Transitional: Between defined positions
454
+
455
+ WHEN UNCERTAIN: Use "Unclear position" or "Transitional control" - NEVER guess!
456
+
457
+ ====================
458
+ SUBMISSION DETECTION (ULTRA-STRICT)
459
+ ====================
460
+
461
+ A submission is confirmed ONLY if you see ALL of:
462
+ 1. ✅ CLEAR lock/control visible in 2+ consecutive frames
463
+ 2. ✅ EXPLICIT tap (hand slapping mat/body 2+ times) OR
464
+ 3. ✅ Match stopping during locked submission OR
465
+ 4. ✅ Video ending during unmistakable locked submission
466
+
467
+ TAP INDICATORS (must be EXPLICIT):
468
+ - ✅ Hand rapidly slapping mat (2+ distinct slaps)
469
+ - ✅ Hand rapidly patting opponent's body (2+ distinct pats)
470
+ - ✅ Verbal submission with visible distress
471
+ - ✅ Body going completely limp during lock
472
+
473
+ INSUFFICIENT FOR CONFIRMATION:
474
+ - ❌ Position control alone (even if perfect)
475
+ - ❌ "Could be applying pressure" - NOT confirmed
476
+ - ❌ "Appears to be in pain" - NOT confirmed
477
+ - ❌ "Submission position visible" - NOT confirmed unless TAP visible
478
+ - ❌ Hand moving once - NOT a tap
479
+ - ❌ Match ending without clear tap or lock - NOT confirmed
480
+
481
+ DECISION TREE:
482
+ Is lock clearly visible? NO → "No submission"
483
+ Is lock clearly visible? YES → Is tap EXPLICITLY visible? NO → "Submission attempt only"
484
+ Is tap EXPLICITLY visible? YES → "SUBMISSION CONFIRMED"
485
+
486
+ ====================
487
+ FRAME-BY-FRAME ANALYSIS (REQUIRED FORMAT)
488
+ ====================
489
+
490
+ For EACH frame, report:
491
+
492
+ Frame X (MM:SS):
493
+ Position: [Conservative label - say "Unclear" if unsure]
494
+ Advantage: [User / Opponent / Neutral - based ONLY on visible control]
495
+ Action: [OFFENSE / DEFENSE / GUARD / PASSING / STANDUP / NONE]
496
+ Threats: [None / Submission attempt (name) / Positional advance]
497
+ Details: [Observable grips, pressure, movements - NO speculation]
498
+ Progression: [If < 2s from previous frame: "Continues [action]" / If > 3s: "New sequence"]
499
+
500
+ CRITICAL RULES:
501
+ - If position unclear → say "Position unclear"
502
+ - If advantage unclear → say "Neutral"
503
+ - If can't see details → say "Insufficient detail visible"
504
+ - NEVER fill in gaps with assumptions
505
+
506
+ ====================
507
+ ACTION TYPE DEFINITIONS (STRICT)
508
+ ====================
509
+
510
+ OFFENSE: Initiated submission attempts OR active attack chains (NOT just control)
511
+ DEFENSE: Actively escaping, framing, or defending attacks (NOT just being on bottom)
512
+ GUARD: Bottom position with legs controlling opponent (NOT just being on back)
513
+ PASSING: Actively clearing legs and advancing position (NOT just being on top)
514
+ STANDUP: Takedown attempts or clinch exchanges
515
+ NONE: Static control, unclear action, or transitional movement
516
+
517
+ ====================
518
+ FINAL SUMMARY (EVIDENCE-LOCKED)
519
+ ====================
520
+
521
+ 1. OUTCOME VERDICT:
522
+ - Submission: YES (only if tap EXPLICITLY visible) / NO / UNCLEAR
523
+ - Winner: User / Opponent / NONE / UNCLEAR
524
+ - Technique: [Name ONLY if lock + tap confirmed] / NONE / UNCLEAR
525
+ - Evidence: "Frames X-Y show [specific visible evidence]"
526
+ - Confidence: HIGH (tap explicitly visible) / MEDIUM (strong indicators) / LOW (unclear)
527
+
528
+ 2. POSITIONAL SUMMARY:
529
+ - Describe visible progressions
530
+ - Note dominant positions
531
+ - List transitions between confirmed positions
532
+ - ADMIT UNCERTAINTY where applicable
533
+
534
+ 3. KEY SEQUENCES:
535
+ - List multi-frame progressions with frame references
536
+ - Format: "Frames X-Y: [observable progression]"
537
+
538
+ ====================
539
+ QUALITY CHECKLIST (VERIFY BEFORE SUBMITTING)
540
+ ====================
541
+
542
+ Before finalizing, verify:
543
+ - [ ] Did I ONLY describe what's CLEARLY visible?
544
+ - [ ] Did I use "Unclear" when uncertain?
545
+ - [ ] Did I confirm submission ONLY if tap EXPLICITLY visible?
546
+ - [ ] Did I avoid assuming pain, intent, or motivation?
547
+ - [ ] Did I track progressions in consecutive frames?
548
+ - [ ] Did I use conservative position labels?
549
+ - [ ] Did I admit gaps in evidence?
550
+ - [ ] Did I verify "mount" meets ALL 4 criteria?
551
+
552
+ REMEMBER:
553
+ - It is BETTER to say "Unclear" than to make a wrong diagnosis
554
+ - Conservative analysis is MORE valuable than confident guessing
555
+ - Visible evidence > Positional inference
556
+ - When in doubt, describe what you SEE, not what you THINK
557
+
558
+ Your analysis will guide training decisions. ACCURACY and HONESTY are paramount.
559
+
560
+ ✅ Track continuous movement progressions
561
+ ✅ Identify setup sequences (e.g., grip → control → finish)
562
+ ✅ Detect transitional movements between positions
563
+ ✅ Recognize submission attempts developing over multiple frames
564
+ ✅ See tapping sequences frame-by-frame
565
+
566
+ IMPORTANT INSTRUCTIONS:
567
+ 1. When frames are close together (< 2 seconds apart), treat them as CONTINUOUS ACTION
568
+ 2. Look for PROGRESSIONS across consecutive frames, not just isolated moments
569
+ 3. A technique may develop over 3-5 consecutive frames - describe the SEQUENCE
570
+ 4. For submissions: Track the setup (Frame N) → control (Frame N+1) → finish (Frame N+2) → tap (Frame N+3)
571
+
572
+ ====================
573
+ VIDEO CONTEXT
574
+ ====================
575
+ - Duration: {duration}s
576
+ - Total Frames: {len(frames)} (DENSE consecutive sampling)
577
+ - Average time between frames: {avg_interval:.1f}s
578
+ - Athlete Being Analyzed (User): {user_desc}
579
+ - Opponent: {opp_desc}
580
+
581
+ ====================
582
+ FRAME SEQUENCE
583
+ ====================
584
+ {frame_list}
585
+
586
+ NOTE: Frames marked with [+X.Xs] have larger time gaps - these are transitions between sequences.
587
+
588
+ ====================
589
+ REFERENCE KNOWLEDGE (VOCABULARY ONLY)
590
+ ====================
591
+
592
+ Use these terms ONLY if clearly visible in frames.
593
+
594
+ POSITIONS:
595
+ Standing, Clinch, Closed Guard, Open Guard (Butterfly, De La Riva, Spider, X-Guard),
596
+ Half Guard (Top/Bottom, Knee Shield, Deep Half), Side Control (Standard, Kesa Gatame),
597
+ North-South, Mount (Low, High, S-Mount), Back Control, Turtle (Top/Bottom)
598
+
599
+ CRITICAL POSITION RULE:
600
+ - "Full Mount" requires: BOTH knees on mat, hips square, opponent flat, NO leg entanglement
601
+ - If ANY missing → use "Top control (not mount)" or "Transitional position"
602
+
603
+ ATTACKS & THREATS:
604
+ Chokes (RNC, Guillotine, Triangle, Arm Triangle, D'Arce, Anaconda, Ezekiel, Collar chokes)
605
+ Joint Locks (Armbar, Kimura, Americana, Omoplata, Wrist locks)
606
+ Leg Locks (Straight Ankle, Kneebar, Heel Hook, Toe Hold, Calf Slicer)
607
+
608
+ ====================
609
+ SUBMISSION DETECTION (STRICT)
610
+ ====================
611
+
612
+ With {len(frames)} dense frames, you can now track COMPLETE submission sequences:
613
+
614
+ A submission is confirmed ONLY if you see:
615
+ 1. SETUP in earlier frames (e.g., Frame 28: "Leg entangled")
616
+ 2. CONTROL in middle frames (e.g., Frame 29: "Ankle isolated, arching back")
617
+ 3. PRESSURE in later frames (e.g., Frame 30: "Full extension applied")
618
+ 4. TAP or STOPPAGE in final frames (e.g., Frame 31: "Hand tapping mat")
619
+
620
+ Visual tap indicators:
621
+ - ✅ Hand slapping mat/body rapidly (2+ times)
622
+ - ✅ Verbal submission (grimacing in pain)
623
+ - ✅ Body going limp/giving up resistance
624
+ - ✅ Match ending during locked submission
625
+
626
+ If unclear or incomplete sequence → classify as "submission attempt" NOT "submission"
627
+
628
+ ====================
629
+ CONSECUTIVE FRAME ANALYSIS TASK
630
+ ====================
631
+
632
+ For EACH frame, provide:
633
+
634
+ 1. POSITION: Current position (conservative labels if unclear)
635
+
636
+ 2. ADVANTAGE: User / Opponent / Neutral (based on visible control)
637
+
638
+ 3. ACTION TYPE: OFFENSE | DEFENSE | GUARD | PASSING | STANDUP | NONE
639
+
640
+ 4. THREATS: None / Submission Attempt (name it) / Positional Advance
641
+
642
+ 5. TECHNICAL DETAILS: Observable grips, pressure, transitions
643
+ - For consecutive frames < 2s apart: Describe the PROGRESSION
644
+ - Example: "Continuing from previous frame, hand now moved to..."
645
+
646
+ 6. CONSECUTIVE CONTEXT: (NEW - VERY IMPORTANT)
647
+ - If this frame continues action from previous frame, note: "Continuation of [action]"
648
+ - If this starts new sequence, note: "New sequence initiated"
649
+ - Track multi-frame progressions: "Frame 3/5 of [technique] setup"
650
+
651
+ STRICT OUTPUT FORMAT:
652
+
653
+ Frame X (MM:SS):
654
+ Position: [name]
655
+ Advantage: [User/Opponent/Neutral]
656
+ Action: [type]
657
+ Threats: [description]
658
+ Details: [technical observation]
659
+ Context: [consecutive progression if applicable]
660
+
661
+ ====================
662
+ CONSECUTIVE SEQUENCE TRACKING (CRITICAL)
663
+ ====================
664
+
665
+ With dense frames, pay special attention to:
666
+
667
+ 1. MULTI-FRAME PROGRESSIONS:
668
+ - Frame 25: Grip established
669
+ - Frame 26: Control secured (progression)
670
+ - Frame 27: Position improved (progression continues)
671
+ - Frame 28: Submission attempt initiated (culmination)
672
+
673
+ 2. SUBMISSION SEQUENCES (if visible):
674
+ Track EVERY step:
675
+ - Early frame: "Leg control established, foot isolated"
676
+ - Next frame: "Opponent arching back, pressure applied"
677
+ - Next frame: "Ankle lock fully extended"
678
+ - Final frame: "Tapping motion visible / Match stopped"
679
+
680
+ 3. TRANSITIONAL FLOWS:
681
+ Note when position changes occur across consecutive frames
682
+
683
+ ====================
684
+ FINAL SUMMARY (EVIDENCE-LOCKED)
685
+ ====================
686
+
687
+ 1. OUTCOME VERDICT:
688
+ - Submission: YES / NO
689
+ - Winner: User / Opponent / NONE
690
+ - Technique: <name or NONE>
691
+ - Time: MM:SS or NONE
692
+ - Frame Sequence: "Frames X-Y showed [setup/execution/finish]"
693
+ - Confidence: HIGH / MEDIUM / LOW
694
+ - Evidence: Specific frame numbers + descriptions
695
+
696
+ 2. POSITIONAL FLOW:
697
+ - Describe the overall progression through consecutive frames
698
+ - Note dominant positions and transitions
699
+ - Identify key turning points in the sequence
700
+
701
+ 3. KEY SEQUENCES:
702
+ - List any multi-frame progressions that led to significant moments
703
+ - Format: "Frames X-Y: [description of progression]"
704
+
705
+ ====================
706
+ FINAL CHECKLIST
707
+ ====================
708
+
709
+ Before submitting, verify:
710
+ - [ ] Did I analyze frames CONSECUTIVELY, not in isolation?
711
+ - [ ] Did I track multi-frame progressions (setup → execution → finish)?
712
+ - [ ] For close frames (< 2s apart), did I note continuations?
713
+ - [ ] If submission visible, did I describe the COMPLETE sequence?
714
+ - [ ] Did I use "Frame X-Y" notation for extended sequences?
715
+ - [ ] Are all position labels conservative and evidence-based?
716
+ - [ ] No speculation beyond what's visible in frames?
717
+
718
+ REMEMBER: With {len(frames)} dense consecutive frames, you can see COMPLETE action sequences.
719
+ Use this advantage to provide CONTEXTUAL analysis, not just isolated observations.
720
+ """
721
+
722
+ # Prepare content
723
+ content = []
724
+ for f in frames:
725
+ content.append({
726
+ "mime_type": "image/jpeg",
727
+ "data": base64.b64encode(f["bytes"]).decode("utf-8")
728
+ })
729
+ content.append(prompt)
730
+
731
+ # Call Gemini
732
+ start = time.time()
733
+ model = genai.GenerativeModel(
734
+ model_name="gemini-2.5-flash",
735
+ generation_config={
736
+ "temperature": 0.2,
737
+ "max_output_tokens": 12000 # Increased for more frames
738
+ }
739
+ )
740
+
741
+ response = await asyncio.get_event_loop().run_in_executor(
742
+ None,
743
+ lambda: model.generate_content(
744
+ content,
745
+ safety_settings={
746
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
747
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
748
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
749
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
750
+ }
751
+ )
752
+ )
753
+
754
+ elapsed = time.time() - start
755
+ print(f"✅ Gemini vision completed: {elapsed:.2f}s ({len(frames)} frames analyzed)")
756
+
757
+ try:
758
+ observations = response.text
759
+ except:
760
+ observations = response.candidates[0].content.parts[0].text
761
+
762
+ # Log first 500 chars for debugging
763
+ print(f"📄 Observations preview: {observations[:500]}...")
764
+
765
+ return observations
766
+
767
+ except Exception as e:
768
+ print(f"❌ Vision extraction failed: {e}")
769
+ traceback.print_exc()
770
+ return f"Error analyzing frames: {str(e)}"
771
+
772
+ # --- CREWAI AGENTS (UPDATED FOR DENSE FRAMES) ---
773
+
774
+ def create_analysis_crew(observations: str, user_desc: str, opp_desc: str, duration: float, num_frames: int):
775
+ """Create CrewAI agents with awareness of dense consecutive frame analysis"""
776
+
777
+ model = genai.GenerativeModel(
778
+ model_name="gemini-3-flash-preview",
779
+ generation_config={
780
+ "temperature": 0.2,
781
+ "max_output_tokens": 12000 # Increased for more frames
782
+ }
783
+ )
784
+
785
+ llm = LLM(
786
+ model="groq/llama-3.3-70b-versatile",
787
+ api_key=GROQ_API_KEY,
788
+ temperature=0.2
789
+ )
790
+
791
+ analyst = Agent(
792
+ role="BJJ Technical Analyst",
793
+ goal=f"Analyze {num_frames} consecutive frame observations for {user_desc} to detect submissions, score performance, and identify patterns",
794
+ backstory=f"""
795
+ You are a BJJ black belt coach analyzing DENSE CONSECUTIVE FRAME observations.
796
+
797
+ CONTEXT AWARENESS:
798
+ - You received observations from {num_frames} frames (high density sampling)
799
+ - Frames are CONSECUTIVE with small time gaps (avg 1-2 seconds)
800
+ - This allows you to see COMPLETE action sequences, not just snapshots
801
+
802
+ CRITICAL RULES:
803
+ 1. OUTCOME AUTHORITY: Accept submission verdicts from observations - do NOT override
804
+ 2. SEQUENCE AWARENESS: Look for multi-frame progressions described in observations
805
+ 3. POSITION AUTHORITY: Respect position labels used in observations
806
+ 4. TIMESTAMP PRECISION: Every claim must reference specific timestamps
807
+ 5. NO GENERICS: "More aggression" and similar phrases are FORBIDDEN
808
+
809
+ SCORING GUIDELINES:
810
+ - If user was submitted: Defense ≤40, Overall ≤60
811
+ - If user finished opponent: Offense ≥80, Overall ≥80
812
+ - Score based on demonstrated actions, not potential
813
+
814
+ STRENGTHS/WEAKNESSES:
815
+ - Must be SPECIFIC with timestamps
816
+ - Minimum 25 characters with context
817
+ - If submission occurred, it MUST be #1 in relevant category
818
+ - Each item must be distinct (no repetition with different wording)
819
+
820
+ DENSE FRAME ADVANTAGE:
821
+ - Use the sequential context to identify setup patterns
822
+ - Reference frame progressions (e.g., "Frames 25-28 showed grip sequence leading to...")
823
+ - Distinguish between isolated mistakes vs systematic issues
824
+ """,
825
+ verbose=True,
826
+ allow_delegation=False,
827
+ llm=llm,
828
+ memory=True
829
+ )
830
+
831
+ formatter = Agent(
832
+ role="Data Structure Specialist",
833
+ goal="Convert analysis into valid JSON matching exact schema requirements",
834
+ backstory="""You transform technical analysis into structured JSON.
835
+
836
+ REQUIREMENTS:
837
+ - Exactly 3 strengths and 3 weaknesses
838
+ - All feedback includes timestamps (MM:SS format)
839
+ - No generic phrases like "More aggression" or "Improve timing"
840
+ - Scores reflect actual match outcome
841
+ - JSON is valid (no trailing commas, proper syntax)
842
+ - Each strength/weakness minimum 25 characters
843
+
844
+ VALIDATION CHECKS:
845
+ - All timestamps in MM:SS format? ✓
846
+ - No trailing commas? ✓
847
+ - Exactly 3 of each category? ✓
848
+ - All feedback includes timestamps? ✓
849
+ - No generic phrases? ✓
850
+ """,
851
+ verbose=True,
852
+ allow_delegation=False,
853
+ llm=llm,
854
+ memory=True
855
+ )
856
+
857
+ analysis_task = Task(
858
+ description=f"""
859
+ Analyze DENSE CONSECUTIVE frame observations from BJJ match.
860
+
861
+ OBSERVATIONS (from {num_frames} frames):
862
+ {observations}
863
+
864
+ VIDEO INFO:
865
+ - Duration: {duration}s
866
+ - Frames analyzed: {num_frames} (consecutive with ~1-2s intervals)
867
+ - User: {user_desc}
868
+ - Opponent: {opp_desc}
869
+
870
+ REQUIRED OUTPUT:
871
+
872
+ 1. OUTCOME SUMMARY:
873
+ - Restate outcome exactly as in observations
874
+ - Note frame sequences if submission occurred
875
+
876
+ 2. SKILL SCORING (0-100, evidence-based):
877
+ - Offense: Submission attempts / attacks (NOT positional control)
878
+ - Defense: Escapes / survival (≤40 if submitted, ≤65 if never threatened)
879
+ - Guard: Bottom position effectiveness (≤40 if not meaningfully used)
880
+ - Passing: Clearing legs and advancing (mount ≠ passing)
881
+ - Standup: Takedowns / clinch (=0 if no standing engagement)
882
+
883
+ 3. STRENGTHS (EXACTLY 3):
884
+ - Format: "At MM:SS - [Specific technical observation, min 25 chars]"
885
+ - If submission: #1 MUST be the finish
886
+ - Use sequential context from observations
887
+ - NO generics
888
+
889
+ 4. WEAKNESSES (EXACTLY 3):
890
+ - Format: "At MM:SS - [Specific technical flaw, min 25 chars]"
891
+ - If submitted: #1 MUST be the defensive failure
892
+ - Reference frame progressions if applicable
893
+ - NO generics
894
+
895
+ 5. MISSED OPPORTUNITIES (2-3):
896
+ - Must be visible in observations
897
+ - Reference specific timestamps
898
+
899
+ 6. KEY MOMENTS (2-4):
900
+ - Include submission if occurred
901
+ - Note significant transitions
902
+
903
+ 7. COACH NOTES (150-250 words):
904
+ - Technical, honest, evidence-based
905
+ - Reference sequential patterns if observed
906
+ - No speculation
907
+
908
+ 8. DRILLS (EXACTLY 3):
909
+ - Each addresses a specific weakness
910
+ - Include timestamp justification
911
+ """,
912
+ agent=analyst,
913
+ expected_output="Detailed technical analysis with submission detection and sequential awareness"
914
+ )
915
+
916
+ formatting_task = Task(
917
+ description="""Convert the analysis into this EXACT JSON structure. NO markdown wrapping.
918
+
919
+ {{
920
+ "overall_score": <int 0-100>,
921
+ "performance_label": "EXCELLENT|STRONG|SOLID|DEVELOPING|NEEDS IMPROVEMENT",
922
+ "performance_grades": {{
923
+ "defense_grade": "<A+|A|B+|B|C+|C|D+|D>",
924
+ "offense_grade": "<letter>",
925
+ "control_grade": "<letter>"
926
+ }},
927
+ "skill_breakdown": {{
928
+ "offense": <int>,
929
+ "defense": <int>,
930
+ "guard": <int>,
931
+ "passing": <int>,
932
+ "standup": <int>
933
+ }},
934
+ "strengths": [
935
+ "At 0:XX - Specific observation with context (min 25 chars)",
936
+ "At 0:XX - Another specific observation",
937
+ "At 0:XX - Third specific observation"
938
+ ],
939
+ "weaknesses": [
940
+ "At 0:XX - Specific weakness with context (min 25 chars)",
941
+ "At 0:XX - Another weakness",
942
+ "At 0:XX - Third weakness"
943
+ ],
944
+ "missed_opportunities": [
945
+ {{"time": "MM:SS", "title": "Brief", "description": "Detail", "category": "SUBMISSION|POSITION|SWEEP"}}
946
+ ],
947
+ "key_moments": [
948
+ {{"time": "MM:SS", "title": "Event", "description": "What happened", "category": "SUBMISSION|TRANSITION|DEFENSE"}}
949
+ ],
950
+ "coach_notes": "Paragraph 150-250 words",
951
+ "recommended_drills": [
952
+ {{"name": "Drill 1", "focus_area": "Area", "reason": "Why (reference timestamp)", "duration": "15 min/day", "frequency": "5x/week"}},
953
+ {{"name": "Drill 2", "focus_area": "Area", "reason": "Why", "duration": "10 min/day", "frequency": "4x/week"}},
954
+ {{"name": "Drill 3", "focus_area": "Area", "reason": "Why", "duration": "12 min/day", "frequency": "3x/week"}}
955
+ ]
956
+ }}
957
+
958
+ VALIDATION CHECKS:
959
+ - All timestamps in MM:SS format ✓
960
+ - No trailing commas ✓
961
+ - Exactly 3 strengths, 3 weaknesses, 3 drills ✓
962
+ - All feedback includes timestamps ✓
963
+ - No generic phrases ✓
964
+ - Valid JSON syntax ✓
965
+ """,
966
+ agent=formatter,
967
+ expected_output="Valid JSON only"
968
+ )
969
+
970
+ crew = Crew(
971
+ agents=[analyst, formatter],
972
+ tasks=[analysis_task, formatting_task],
973
+ process=Process.sequential,
974
+ verbose=True
975
+ )
976
+
977
+ return crew
978
+
979
+ # --- HYBRID ANALYSIS ---
980
+
981
+ async def hybrid_agentic_analysis(
982
+ frames: List[Dict],
983
+ metadata: Dict,
984
+ user_desc: str,
985
+ opp_desc: str,
986
+ activity_type: str,
987
+ analysis_id: str = None
988
+ ) -> AnalysisResult:
989
+ """Hybrid: Gemini vision + CrewAI agents + Python validation"""
990
+
991
+ print("\n" + "="*70)
992
+ print("HYBRID AGENTIC ANALYSIS (Dense Consecutive Frames)")
993
+ print("="*70)
994
+
995
+ try:
996
+ if analysis_id:
997
+ db_storage[analysis_id]["progress"] = 30
998
+
999
+ # STEP 1: Gemini Vision with dense frames
1000
+ observations = await extract_frame_observations(
1001
+ frames, user_desc, opp_desc, metadata["duration"], metadata
1002
+ )
1003
+
1004
+ # Check for content verification failure
1005
+ if "content_verification" in observations and "FAILED" in observations:
1006
+ print("❌ Content verification failed - not BJJ/grappling content")
1007
+
1008
+ # Try to parse the rejection message
1009
+ try:
1010
+ rejection_data = json.loads(observations)
1011
+ reason = rejection_data.get("reason", "Video does not appear to contain BJJ or grappling content.")
1012
+ suggested = rejection_data.get("suggested_action", "Please upload a BJJ or grappling video.")
1013
+
1014
+ if analysis_id:
1015
+ db_storage[analysis_id]["status"] = "rejected"
1016
+ db_storage[analysis_id]["rejection_reason"] = reason
1017
+
1018
+ # Return a special rejection result
1019
+ return AnalysisResult(**{
1020
+ "overall_score": 0,
1021
+ "performance_label": "CONTENT VERIFICATION FAILED",
1022
+ "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"},
1023
+ "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0},
1024
+ "strengths": [
1025
+ "This video does not appear to contain BJJ or grappling content.",
1026
+ "Please upload footage showing ground grappling, submissions, or takedowns.",
1027
+ "Acceptable: BJJ (gi/no-gi), wrestling, judo newaza, submission grappling."
1028
+ ],
1029
+ "weaknesses": [
1030
+ f"Content detected: {reason}",
1031
+ "This system is designed specifically for grappling analysis.",
1032
+ f"Action needed: {suggested}"
1033
+ ],
1034
+ "missed_opportunities": [],
1035
+ "key_moments": [],
1036
+ "coach_notes": f"⚠️ CONTENT VERIFICATION FAILED\n\n{reason}\n\n{suggested}\n\nThis AI system is specifically trained for Brazilian Jiu-Jitsu and grappling analysis. It cannot analyze striking-based martial arts, non-combat sports, or general videos. Please upload a video showing:\n\n• Ground grappling or submissions\n• Takedowns or clinch work\n• BJJ, wrestling, judo, or submission grappling\n\nFor best results, ensure the video clearly shows both athletes engaged in grappling exchanges.",
1037
+ "recommended_drills": []
1038
+ })
1039
+ except:
1040
+ # Fallback if parsing fails
1041
+ if analysis_id:
1042
+ db_storage[analysis_id]["status"] = "rejected"
1043
+ db_storage[analysis_id]["rejection_reason"] = "Video content verification failed"
1044
+
1045
+ return AnalysisResult(**{
1046
+ "overall_score": 0,
1047
+ "performance_label": "CONTENT VERIFICATION FAILED",
1048
+ "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"},
1049
+ "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0},
1050
+ "strengths": [
1051
+ "Video does not appear to contain BJJ or grappling content.",
1052
+ "Please upload footage of ground grappling or submissions.",
1053
+ "This system is designed for grappling analysis only."
1054
+ ],
1055
+ "weaknesses": [
1056
+ "Upload a video showing BJJ, wrestling, or submission grappling.",
1057
+ "Ensure both athletes are visible and engaged in grappling.",
1058
+ "Videos should show ground work, takedowns, or submissions."
1059
+ ],
1060
+ "missed_opportunities": [],
1061
+ "key_moments": [],
1062
+ "coach_notes": "⚠️ CONTENT VERIFICATION FAILED\n\nThis video does not appear to contain Brazilian Jiu-Jitsu or grappling content. This AI system is specifically designed for analyzing ground grappling, submissions, and takedowns.\n\nPlease upload a video showing:\n• BJJ (gi or no-gi)\n• Wrestling\n• Judo (newaza)\n• Submission grappling\n• MMA grappling exchanges\n\nFor optimal results, ensure the video clearly shows both athletes engaged in grappling.",
1063
+ "recommended_drills": []
1064
+ })
1065
+
1066
+ if analysis_id:
1067
+ db_storage[analysis_id]["progress"] = 60
1068
+
1069
+ # STEP 2: CrewAI Agents
1070
+ print("\nSTEP 2: CrewAI Agents - Analysis & Formatting")
1071
+ crew = create_analysis_crew(observations, user_desc, opp_desc, metadata["duration"], len(frames))
1072
+
1073
+ crew_start = time.time()
1074
+ result = await asyncio.get_event_loop().run_in_executor(
1075
+ None,
1076
+ crew.kickoff
1077
+ )
1078
+ crew_time = time.time() - crew_start
1079
+ print(f"✅ CrewAI completed: {crew_time:.2f}s")
1080
+
1081
+ if analysis_id:
1082
+ db_storage[analysis_id]["progress"] = 90
1083
+
1084
+ # STEP 3: Parse & Validate
1085
+ print("\nSTEP 3: Python Validation")
1086
+ result_text = str(result)
1087
+
1088
+ if "```json" in result_text:
1089
+ result_text = result_text.split("```json")[1].split("```")[0].strip()
1090
+ elif "```" in result_text:
1091
+ result_text = result_text.split("```")[1].split("```")[0].strip()
1092
+
1093
+ data = extract_json_from_text(result_text)
1094
+ data = validate_and_filter(data, frames)
1095
+
1096
+ # Attach frames
1097
+ attach_frames_to_events(data.get("missed_opportunities", []), frames)
1098
+ attach_frames_to_events(data.get("key_moments", []), frames)
1099
+
1100
+ if analysis_id:
1101
+ db_storage[analysis_id]["progress"] = 100
1102
+
1103
+ print("✅ Analysis complete")
1104
+ print("="*70 + "\n")
1105
+
1106
+ return AnalysisResult(**data)
1107
+
1108
+ except Exception as e:
1109
+ print(f"❌ Hybrid analysis failed: {e}")
1110
+ traceback.print_exc()
1111
+ fallback = make_fallback(frames)
1112
+ if analysis_id:
1113
+ db_storage[analysis_id]["used_fallback"] = True
1114
+ return AnalysisResult(**fallback)
1115
+
1116
+ def validate_and_filter(data: Dict, frames: List[Dict]) -> Dict:
1117
+ """Python-level validation and generic filtering"""
1118
+
1119
+ if "overall_score" not in data:
1120
+ data["overall_score"] = 65
1121
+ data["overall_score"] = max(0, min(100, data["overall_score"]))
1122
+
1123
+ if "performance_label" not in data:
1124
+ score = data["overall_score"]
1125
+ if score >= 85:
1126
+ data["performance_label"] = "EXCELLENT PERFORMANCE"
1127
+ elif score >= 75:
1128
+ data["performance_label"] = "STRONG PERFORMANCE"
1129
+ elif score >= 60:
1130
+ data["performance_label"] = "SOLID PERFORMANCE"
1131
+ else:
1132
+ data["performance_label"] = "DEVELOPING PERFORMANCE"
1133
+
1134
+ if "performance_grades" not in data:
1135
+ data["performance_grades"] = {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"}
1136
+
1137
+ if "skill_breakdown" not in data:
1138
+ base = data["overall_score"]
1139
+ data["skill_breakdown"] = {
1140
+ "offense": max(0, min(100, base - 5)),
1141
+ "defense": max(0, min(100, base + 3)),
1142
+ "guard": max(0, min(100, base - 2)),
1143
+ "passing": max(0, min(100, base - 10)),
1144
+ "standup": max(0, min(100, base - 13))
1145
+ }
1146
+
1147
+ # Filter generic feedback
1148
+ for field in ["strengths", "weaknesses"]:
1149
+ if field in data and data[field]:
1150
+ filtered = [item for item in data[field] if not is_generic(item)]
1151
+ if len(filtered) >= 3:
1152
+ data[field] = filtered[:3]
1153
+ else:
1154
+ data[field] = make_specific(field, frames, filtered)
1155
+ else:
1156
+ data[field] = make_specific(field, frames, [])
1157
+
1158
+ if "missed_opportunities" not in data or not data["missed_opportunities"]:
1159
+ data["missed_opportunities"] = [{
1160
+ "time": frames[len(frames)//2]["timestamp"],
1161
+ "title": "Position",
1162
+ "description": "Review sequence for improvement opportunities",
1163
+ "category": "POSITION"
1164
+ }]
1165
+
1166
+ if "key_moments" not in data or not data["key_moments"]:
1167
+ data["key_moments"] = [{
1168
+ "time": frames[-3]["timestamp"],
1169
+ "title": "Exchange",
1170
+ "description": "Significant moment in match flow",
1171
+ "category": "TRANSITION"
1172
+ }]
1173
+
1174
+ if "coach_notes" not in data or len(data["coach_notes"]) < 50:
1175
+ data["coach_notes"] = "Focus on maintaining consistent technique throughout sequences. Review timestamped moments for detailed improvement areas."
1176
+
1177
+ if "recommended_drills" not in data or len(data["recommended_drills"]) < 3:
1178
+ data["recommended_drills"] = [
1179
+ {"name": "Position Control Sequences", "focus_area": "General", "reason": "Improve sequential awareness", "duration": "15 min/day", "frequency": "5x/week"},
1180
+ {"name": "Guard Retention Drills", "focus_area": "Defense", "reason": "Strengthen defensive sequences", "duration": "10 min/day", "frequency": "4x/week"},
1181
+ {"name": "Transition Flow Training", "focus_area": "Movement", "reason": "Improve position transitions", "duration": "12 min/day", "frequency": "3x/week"}
1182
+ ]
1183
+
1184
+ return data
1185
+
1186
+ def make_specific(field: str, frames: List[Dict], existing: List[str]) -> List[str]:
1187
+ feedback = existing.copy()
1188
+
1189
+ start = frames[len(frames) // 8]
1190
+ mid = frames[len(frames) // 2]
1191
+ end = frames[-3] if len(frames) > 2 else frames[-1]
1192
+
1193
+ if field == "strengths":
1194
+ templates = [
1195
+ f"At {start['timestamp']} - Maintained good structural positioning during opening sequence",
1196
+ f"At {mid['timestamp']} - Demonstrated positional awareness during mid-match exchange",
1197
+ f"At {end['timestamp']} - Showed consistent control in final phase of match"
1198
+ ]
1199
+ else:
1200
+ templates = [
1201
+ f"At {start['timestamp']} - Could improve initial positioning strategy and grip selection",
1202
+ f"At {mid['timestamp']} - Slow to recognize transitional opportunity during position change",
1203
+ f"At {end['timestamp']} - Room to improve execution and pressure application in final sequence"
1204
+ ]
1205
+
1206
+ for t in templates:
1207
+ if len(feedback) < 3:
1208
+ feedback.append(t)
1209
+
1210
+ return feedback[:3]
1211
+
1212
+ def make_fallback(frames: List[Dict]) -> Dict:
1213
+ mid = frames[len(frames)//2]["timestamp"] if frames else "00:30"
1214
+ end = frames[-3]["timestamp"] if len(frames) > 2 else "00:45"
1215
+
1216
+ return {
1217
+ "overall_score": 65,
1218
+ "performance_label": "SOLID PERFORMANCE",
1219
+ "performance_grades": {"defense_grade": "C+", "offense_grade": "C", "control_grade": "C+"},
1220
+ "skill_breakdown": {"offense": 60, "defense": 68, "guard": 63, "passing": 55, "standup": 52},
1221
+ "strengths": [
1222
+ "At 0:10 - Maintained structural integrity during opening",
1223
+ f"At {mid} - Showed positional awareness during exchange",
1224
+ f"At {end} - Demonstrated control in final sequences"
1225
+ ],
1226
+ "weaknesses": [
1227
+ "At 0:15 - Could improve initial positioning approach",
1228
+ f"At {mid} - Slow to recognize transitional opportunities",
1229
+ f"At {end} - Room to improve execution in final phase"
1230
+ ],
1231
+ "missed_opportunities": [{"time": mid, "title": "Position", "description": "Review for improvement", "category": "POSITION"}],
1232
+ "key_moments": [{"time": end, "title": "Exchange", "description": "Significant sequence", "category": "TRANSITION"}],
1233
+ "coach_notes": "Focus on maintaining consistent technique throughout match sequences. Review specific timestamped moments for detailed improvement areas.",
1234
+ "recommended_drills": [
1235
+ {"name": "Sequential Control", "focus_area": "General", "reason": "Improve awareness", "duration": "15 min/day", "frequency": "5x/week"},
1236
+ {"name": "Guard Sequences", "focus_area": "Defense", "reason": "Strengthen defense", "duration": "10 min/day", "frequency": "4x/week"},
1237
+ {"name": "Flow Training", "focus_area": "Movement", "reason": "Improve transitions", "duration": "12 min/day", "frequency": "3x/week"}
1238
+ ]
1239
+ }
1240
+
1241
+ # --- API ---
1242
+
1243
+ @app.post("/analyze-complete")
1244
+ async def analyze_complete(
1245
+ file: UploadFile = File(...),
1246
+ user_description: str = Form(...),
1247
+ opponent_description: str = Form(...),
1248
+ activity_type: str = Form("Brazilian Jiu-Jitsu")
1249
+ ):
1250
+ start_time = time.time()
1251
+ file_path = None
1252
+
1253
+ try:
1254
+ file_name = f"{uuid.uuid4()}_{file.filename}"
1255
+ file_path = f"temp_videos/{file_name}"
1256
+ os.makedirs("temp_videos", exist_ok=True)
1257
+
1258
+ with open(file_path, "wb") as buffer:
1259
+ shutil.copyfileobj(file.file, buffer)
1260
+
1261
+ analysis_id = str(uuid.uuid4())
1262
+ db_storage[analysis_id] = {"status": "processing", "progress": 0}
1263
+
1264
+ # Extract DENSE CONSECUTIVE frames
1265
+ try:
1266
+ frames, metadata = await asyncio.get_event_loop().run_in_executor(
1267
+ None, extract_dense_consecutive_frames, file_path
1268
+ )
1269
+ except ValueError as ve:
1270
+ # Duration validation error
1271
+ error_msg = str(ve)
1272
+ print(f"⚠️ Duration validation failed: {error_msg}")
1273
+
1274
+ return {
1275
+ "status": "rejected",
1276
+ "error": error_msg,
1277
+ "error_type": "duration_validation",
1278
+ "data": {
1279
+ "overall_score": 0,
1280
+ "performance_label": "VIDEO DURATION ERROR",
1281
+ "performance_grades": {"defense_grade": "N/A", "offense_grade": "N/A", "control_grade": "N/A"},
1282
+ "skill_breakdown": {"offense": 0, "defense": 0, "guard": 0, "passing": 0, "standup": 0},
1283
+ "strengths": [],
1284
+ "weaknesses": [],
1285
+ "missed_opportunities": [],
1286
+ "key_moments": [],
1287
+ "coach_notes": f"⚠️ VIDEO DURATION ERROR\n\n{error_msg}\n\nRecommended video length: 10-90 seconds\n\nTips:\n• Focus on a single exchange or position\n• Trim longer videos to key moments\n• Ensure the clip shows clear grappling action",
1288
+ "recommended_drills": []
1289
+ }
1290
+ }
1291
+
1292
+ # Hybrid analysis
1293
+ result = await hybrid_agentic_analysis(
1294
+ frames, metadata,
1295
+ user_description.strip(), opponent_description.strip(),
1296
+ activity_type, analysis_id
1297
+ )
1298
+
1299
+ total_time = time.time() - start_time
1300
+
1301
+ # Check if content was rejected
1302
+ if result.performance_label == "CONTENT VERIFICATION FAILED":
1303
+ return {
1304
+ "status": "rejected",
1305
+ "error": "Video content verification failed - not BJJ/grappling",
1306
+ "error_type": "content_verification",
1307
+ "data": result.model_dump(),
1308
+ "processing_time": f"{total_time:.2f}s"
1309
+ }
1310
+
1311
+ return {
1312
+ "status": "completed",
1313
+ "data": result.model_dump(),
1314
+ "processing_time": f"{total_time:.2f}s",
1315
+ "frames_analyzed": len(frames),
1316
+ "avg_frame_interval": f"{metadata.get('avg_frame_interval', 0):.2f}s",
1317
+ "used_fallback": db_storage[analysis_id].get("used_fallback", False),
1318
+ "method": "dense_consecutive_frames"
1319
+ }
1320
+ except Exception as e:
1321
+ print(f"❌ Error: {e}")
1322
+ traceback.print_exc()
1323
+
1324
+ # Try to provide helpful fallback
1325
+ try:
1326
+ frames_fb, _ = await asyncio.get_event_loop().run_in_executor(None, extract_dense_consecutive_frames, file_path)
1327
+ fallback = make_fallback(frames_fb)
1328
+ except:
1329
+ fallback = make_fallback([{"timestamp": "00:30", "second": 30}])
1330
+
1331
+ return {
1332
+ "status": "completed_with_fallback",
1333
+ "data": fallback,
1334
+ "error": str(e),
1335
+ "used_fallback": True
1336
+ }
1337
+ finally:
1338
+ if file_path:
1339
+ try:
1340
+ os.remove(file_path)
1341
+ except:
1342
+ pass
1343
+
1344
+ @app.get("/health")
1345
+ async def health_check():
1346
+ return {"status": "healthy", "version": "29.0.0-optimized-accurate"}
1347
+
1348
+ @app.get("/")
1349
+ async def root():
1350
+ return {
1351
+ "message": "BJJ AI Coach - Optimized for Speed + Accuracy",
1352
+ "version": "29.0.0",
1353
+ "target_performance": "Total analysis: 50-60 seconds",
1354
+ "architecture": "Gemini Vision + CrewAI Agents + Python Validation",
1355
+ "optimizations": [
1356
+ "⚡ Optimized frame counts for 50-60s Gemini processing",
1357
+ "🎯 50% of frames in final 30% (submission-focused)",
1358
+ "📊 15-40 frames (optimized for speed + accuracy)",
1359
+ "✅ Ultra-strict evidence requirements (prevents wrong diagnosis)",
1360
+ "🔍 Conservative analysis (admits uncertainty when unclear)",
1361
+ "⏱️ Target: 50-60s total (15s video: ~30s, 60s video: ~50s)"
1362
+ ],
1363
+ "frame_strategy": {
1364
+ "10-15s_video": "15 frames (~1.0s intervals) → Gemini ~30s",
1365
+ "15-30s_video": "20 frames (~1.5s intervals) → Gemini ~40s",
1366
+ "30-60s_video": "30 frames (~2.0s intervals) → Gemini ~50s",
1367
+ "60-90s_video": "40 frames (~2.3s intervals) → Gemini ~60s"
1368
+ },
1369
+ "submission_focus": {
1370
+ "distribution": "20% start, 30% middle, 50% end",
1371
+ "end_section": "50% of all frames in final 30% of video",
1372
+ "final_frames": "Always includes last 2 frames for tap detection",
1373
+ "confirmation": "Ultra-strict: requires EXPLICIT tap visible (2+ slaps)"
1374
+ },
1375
+ "accuracy_improvements": [
1376
+ "Evidence-only analysis (NO assumptions or inferences)",
1377
+ "Conservative position labels (says 'Unclear' when uncertain)",
1378
+ "Stricter submission confirmation (tap must be EXPLICIT)",
1379
+ "Mount requires ALL 4 criteria (knees, hips, flat, no entangle)",
1380
+ "No pain inference, intent assumption, or guessing",
1381
+ "Better to admit uncertainty than make wrong diagnosis"
1382
+ ],
1383
+ "validation": {
1384
+ "content_types_accepted": [
1385
+ "BJJ (gi/no-gi)",
1386
+ "Submission grappling",
1387
+ "Wrestling",
1388
+ "Judo (newaza)",
1389
+ "MMA grappling"
1390
+ ],
1391
+ "content_types_rejected": [
1392
+ "Striking arts",
1393
+ "Kata/forms",
1394
+ "Non-combat sports"
1395
+ ],
1396
+ "duration": "5-120 seconds"
1397
+ }
1398
+ }
1399
+
1400
+ if __name__ == "__main__":
1401
+ import uvicorn
1402
+ port = int(os.environ.get("PORT", 7860))
1403
+ uvicorn.run(app, host="0.0.0.0", port=port)