Spaces:

avinashprajapati
/

video-analysis-system

Sleeping

App Files Files Community

video-analysis-system / app.py

avinashprajapati

Update app.py

ff98e57 verified about 2 months ago

raw

history blame contribute delete

37.4 kB

	import os
	import time
	import json
	import requests
	from datetime import datetime
	from supabase import create_client
	from flask import Flask, jsonify
	import threading

	# For video analysi
	import whisper
	import librosa
	import numpy as np
	import soundfile as sf
	import nltk
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as F

	# For PDF generation
	from reportlab.lib import colors
	from reportlab.lib.pagesizes import A4
	from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch
	from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont

	# ==================== CONFIGURATION =====================
	SUPABASE_URL = os.getenv('SUPABASE_URL', 'https://zccaimlxjhktttzqsleb.supabase.co')
	SUPABASE_KEY = os.getenv('SUPABASE_KEY', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InpjY2FpbWx4amhrdHR0enFzbGViIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTc0ODAxNzEsImV4cCI6MjA3MzA1NjE3MX0.BIgHVR-u2fzCINVsLG1FXfnRu79rezgDMF8JTiMpbfQ')
	BUCKET_NAME = os.getenv('BUCKET_NAME', 'interview-videos')
	REPORTS_BUCKET_NAME = os.getenv('REPORTS_BUCKET_NAME', 'analysis-reports')

	# Initialize
	supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
	app = Flask(__name__)
	PROCESSED_FILES = set()

	# Download NLTK data
	try:
	nltk.download('punkt', quiet=True)
	nltk.download('stopwords', quiet=True)
	except:
	pass

	# Load AI detection model
	print("🔄 Loading AI detection model...")
	detector_tokenizer = AutoTokenizer.from_pretrained("andreas122001/roberta-academic-detector")
	detector_model = AutoModelForSequenceClassification.from_pretrained("andreas122001/roberta-academic-detector")
	print("✅ AI detection model loaded")

	# ==================== BUCKET MANAGEMENT ====================
	def create_bucket_if_not_exists(bucket_name=REPORTS_BUCKET_NAME):
	"""Create bucket if it doesn't exist"""
	try:
	print(f"🔍 Checking if bucket '{bucket_name}' exists...")

	# Check if bucket exists by trying to list files
	try:
	supabase.storage.from_(bucket_name).list()
	print(f"✅ Bucket '{bucket_name}' already exists")
	return True
	except Exception as e:
	# If bucket doesn't exist, create it
	if "not found" in str(e).lower():
	print(f"📦 Creating new bucket: {bucket_name}")

	headers = {
	"Authorization": f"Bearer {SUPABASE_KEY}",
	"Content-Type": "application/json"
	}

	data = {
	"name": bucket_name,
	"id": bucket_name,
	"public": True,
	"file_size_limit": 52428800, # 50MB
	"allowed_mime_types": ["application/pdf"]
	}

	response = requests.post(
	f"{SUPABASE_URL}/storage/v1/bucket",
	headers=headers,
	json=data
	)

	if response.status_code in [200, 201, 409]:
	print(f"✅ Bucket '{bucket_name}' created successfully")
	return True
	else:
	print(f"❌ Failed to create bucket: {response.text}")
	return False
	else:
	print(f"❌ Error checking bucket: {e}")
	return False

	except Exception as e:
	print(f"❌ Bucket creation error: {e}")
	return False

	def setup_storage():
	"""Setup required storage buckets"""
	print("🔄 Setting up storage buckets...")

	# Create reports bucket
	if create_bucket_if_not_exists(REPORTS_BUCKET_NAME):
	print("✅ Storage setup completed")
	return True
	else:
	print("❌ Storage setup failed")
	return False

	# ==================== SUPABASE FILE FUNCTIONS ====================
	def get_bucket_files():
	"""Get files from Supabase bucket"""
	try:
	files = supabase.storage.from_(BUCKET_NAME).list()
	video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm']
	videos = [f for f in files if any(f['name'].lower().endswith(ext) for ext in video_extensions)]
	return videos
	except Exception as e:
	print(f"❌ Error getting files: {e}")
	return []

	def download_video(filename):
	"""Download video from Supabase"""
	try:
	file_url = supabase.storage.from_(BUCKET_NAME).get_public_url(filename)
	response = requests.get(file_url, stream=True, timeout=120)

	if response.status_code == 200:
	os.makedirs('downloads', exist_ok=True)
	file_path = os.path.join('downloads', filename)

	with open(file_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	f.write(chunk)

	print(f"✅ Downloaded: {filename}")
	return file_path
	else:
	print(f"❌ Download failed: HTTP {response.status_code}")
	return None
	except Exception as e:
	print(f"❌ Download error: {e}")
	return None

	def delete_from_supabase(filename):
	"""Delete file from Supabase"""
	try:
	supabase.storage.from_(BUCKET_NAME).remove([filename])
	print(f"🗑️ Deleted from Supabase: {filename}")
	return True
	except Exception as e:
	print(f"❌ Delete error: {e}")
	return False

	# ==================== VIDEO ANALYSIS FUNCTIONS ====================
	def extract_audio(video_path, audio_path):
	"""Extract audio from video using ffmpeg directly"""
	try:
	import subprocess

	print(" 🎵 Extracting audio with ffmpeg...")

	cmd = [
	'ffmpeg', '-i', video_path,
	'-vn',
	'-acodec', 'pcm_s16le',
	'-ar', '16000',
	'-ac', '1',
	'-y',
	audio_path
	]

	result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)

	if result.returncode != 0:
	print(f"❌ FFmpeg error: {result.stderr}")
	return 0

	audio_info = sf.info(audio_path)
	print(f" ✅ Audio extracted: {audio_info.duration:.1f}s")
	return audio_info.duration

	except Exception as e:
	print(f"❌ Audio extraction error: {e}")
	import traceback
	traceback.print_exc()
	return 0

	def transcribe_audio(audio_path):
	"""Transcribe audio using Whisper"""
	try:
	print(" 🎤 Loading Whisper model...")
	model = whisper.load_model("base")
	print(" 🎤 Transcribing...")
	result = model.transcribe(audio_path)

	segments = []
	if "segments" in result:
	for segment in result["segments"]:
	segments.append({
	"start": float(segment.get("start", 0)),
	"end": float(segment.get("end", 0)),
	"text": segment.get("text", "")
	})

	return {
	"full_text": result["text"],
	"segments": segments
	}
	except Exception as e:
	print(f"❌ Transcription error: {e}")
	return {"full_text": "", "segments": []}

	def extract_acoustic_features(audio_path):
	"""Extract acoustic features"""
	try:
	y, sr = librosa.load(audio_path, duration=60)

	# Pitch
	try:
	pitch, _, _ = librosa.pyin(y, fmin=50, fmax=300)
	pitch_clean = pitch[~np.isnan(pitch)]
	pitch_std = float(np.std(pitch_clean)) if len(pitch_clean) > 0 else 0.0
	pitch_mean = float(np.mean(pitch_clean)) if len(pitch_clean) > 0 else 0.0
	except:
	pitch_std = 0.0
	pitch_mean = 0.0

	# Energy
	rms = librosa.feature.rms(y=y)[0]
	energy_mean = float(np.mean(rms))
	energy_std = float(np.std(rms))

	# Spectral features
	spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)))

	return {
	"pitch_mean": pitch_mean,
	"pitch_std": pitch_std,
	"energy_mean": energy_mean,
	"energy_std": energy_std,
	"spectral_centroid": spectral_centroid
	}
	except Exception as e:
	print(f"❌ Acoustic feature extraction error: {e}")
	return {
	"pitch_mean": 0, "pitch_std": 0, "energy_mean": 0,
	"energy_std": 0, "spectral_centroid": 0
	}

	def extract_linguistic_features(transcription_data, duration_sec):
	"""Extract linguistic features"""
	try:
	text = transcription_data["full_text"]
	words = text.lower().split()
	word_count = len(words)

	if word_count == 0 or duration_sec == 0:
	return {
	"words_per_minute": 0,
	"lexical_diversity": 0,
	"filler_ratio": 0
	}

	words_per_minute = (word_count / duration_sec) * 60
	lexical_diversity = len(set(words)) / word_count

	filler_words = ['um', 'uh', 'like', 'you know', 'so', 'actually', 'basically']
	filler_count = sum(1 for word in words if word in filler_words)
	filler_ratio = filler_count / word_count

	return {
	"words_per_minute": float(words_per_minute),
	"lexical_diversity": float(lexical_diversity),
	"filler_ratio": float(filler_ratio)
	}
	except Exception as e:
	print(f"❌ Linguistic feature extraction error: {e}")
	return {"words_per_minute": 0, "lexical_diversity": 0, "filler_ratio": 0}

	def detect_ai_text(text):
	"""Detect if text is AI-generated"""
	try:
	if not text or len(text.strip()) < 10:
	return {"Human": 0.5, "AI": 0.5}

	inputs = detector_tokenizer(text, return_tensors="pt", truncation=True,
	padding=True, max_length=512)
	with torch.no_grad():
	outputs = detector_model(**inputs)
	probs = F.softmax(outputs.logits, dim=1)

	return {
	"Human": round(float(probs[0][0]), 4),
	"AI": round(float(probs[0][1]), 4)
	}
	except Exception as e:
	print(f"❌ AI detection error: {e}")
	return {"Human": 0.5, "AI": 0.5}

	def analyze_video(video_path, video_name):
	"""Complete video analysis"""
	try:
	print(f"\n{'='*60}")
	print(f"🎬 ANALYZING: {video_name}")
	print(f"{'='*60}")

	temp_dir = "temp_analysis"
	os.makedirs(temp_dir, exist_ok=True)
	audio_path = os.path.join(temp_dir, "audio.wav")

	# 1. Extract Audio
	print(" 📹 Step 1/5: Extracting audio...")
	duration = extract_audio(video_path, audio_path)
	if duration == 0:
	return None
	print(f" ✅ Audio extracted ({duration:.1f}s)")

	# 2. Transcribe
	print(" 📹 Step 2/5: Transcribing...")
	transcription = transcribe_audio(audio_path)
	print(f" ✅ Transcription complete ({len(transcription['full_text'])} chars)")

	# 3. Acoustic Features
	print(" 📹 Step 3/5: Extracting acoustic features...")
	acoustic = extract_acoustic_features(audio_path)
	print(" ✅ Acoustic features extracted")

	# 4. Linguistic Features
	print(" 📹 Step 4/5: Analyzing language...")
	audio_info = sf.info(audio_path)
	linguistic = extract_linguistic_features(transcription, audio_info.duration)
	print(" ✅ Linguistic features extracted")

	# 5. AI Detection
	print(" 📹 Step 5/5: Running AI detection...")
	ai_result = detect_ai_text(transcription["full_text"])
	print(" ✅ AI detection complete")

	# Calculate confidence score
	confidence_score = (
	acoustic['energy_mean'] * 0.3 +
	(1 - linguistic['filler_ratio']) * 0.3 +
	linguistic['lexical_diversity'] * 0.2 +
	(1 - abs(linguistic['words_per_minute'] - 150) / 150) * 0.2
	)
	confidence_score = max(0, min(1, confidence_score))

	report = {
	"video_name": video_name,
	"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	"duration_seconds": float(audio_info.duration),
	"confidence_score": float(confidence_score),
	"acoustic_features": acoustic,
	"linguistic_features": linguistic,
	"ai_detection": ai_result,
	"transcription_preview": transcription["full_text"][:300],
	"full_transcription": transcription["full_text"]
	}

	# Cleanup
	if os.path.exists(audio_path):
	os.remove(audio_path)

	print(f"\n✅ ANALYSIS COMPLETE!")
	print(f" 📊 Confidence Score: {confidence_score:.2f}")
	print(f" 🤖 AI Detection: {max(ai_result, key=ai_result.get)}")
	print(f"{'='*60}\n")

	return report

	except Exception as e:
	print(f"❌ Analysis error for {video_name}: {e}")
	import traceback
	traceback.print_exc()
	return None


	# ==================== COMPACT PDF GENERATION (SINGLE PAGE) ====================
	def extract_name_mobile_email(filename):
	"""Extract name, mobile and email from video filename"""
	try:
	# Remove extension
	name_without_ext = os.path.splitext(filename)[0]

	# Split by underscore
	parts = name_without_ext.split('_')

	if len(parts) >= 3:
	name = parts[0].title() # Avinash
	mobile = parts[1] # 8235263572
	email = parts[2] # avinashprajapati9199@gmail.com
	return name, mobile, email
	elif len(parts) == 2:
	name = parts[0].title()
	mobile = parts[1] if parts[1].isdigit() and len(parts[1]) == 10 else "Not Provided"
	return name, mobile, "Not Provided"
	else:
	return filename, "Not Provided", "Not Provided"

	except:
	return filename, "Not Provided", "Not Provided"

	# ==================== SINGLE PAGE A4 PDF GENERATION ====================
	def create_pdf_report(report, filename):
	"""Create modern single-page A4 PDF report - All content in one page"""
	try:
	print("\n📄 Creating single-page A4 PDF report...")

	# A4 size with minimal margins
	doc = SimpleDocTemplate(filename, pagesize=A4,
	topMargin=0.15*inch,
	bottomMargin=0.15*inch,
	leftMargin=0.3*inch,
	rightMargin=0.3*inch)
	story = []
	styles = getSampleStyleSheet()

	# Compact Color Scheme
	PRIMARY_COLOR = colors.HexColor('#1E40AF')
	SECONDARY_COLOR = colors.HexColor('#3B82F6')
	ACCENT_COLOR = colors.HexColor('#10B981')
	WARNING_COLOR = colors.HexColor('#F59E0B')
	DANGER_COLOR = colors.HexColor('#EF4444')
	LIGHT_BG = colors.HexColor('#F8FAFC')
	DARK_TEXT = colors.HexColor('#1F2937')
	LIGHT_TEXT = colors.HexColor('#6B7280')

	# Extract name, mobile and email
	candidate_name, mobile_number, email_id = extract_name_mobile_email(report['video_name'])

	# Ultra Compact Styles
	title_style = ParagraphStyle(
	'CompactTitle',
	parent=styles['Heading1'],
	fontSize=16,
	textColor=colors.white,
	alignment=TA_CENTER,
	fontName='Helvetica-Bold',
	spaceAfter=12
	)

	section_style = ParagraphStyle(
	'CompactSection',
	parent=styles['Heading2'],
	fontSize=11,
	textColor=PRIMARY_COLOR,
	fontName='Helvetica-Bold',
	spaceAfter=8,
	spaceBefore=12
	)

	metric_label_style = ParagraphStyle(
	'CompactMetricLabel',
	parent=styles['Normal'],
	fontSize=8,
	textColor=LIGHT_TEXT,
	alignment=TA_CENTER,
	fontName='Helvetica'
	)

	metric_value_style = ParagraphStyle(
	'CompactMetricValue',
	parent=styles['Normal'],
	fontSize=12,
	textColor=DARK_TEXT,
	alignment=TA_CENTER,
	fontName='Helvetica-Bold'
	)

	# ===== COMPACT HEADER =====
	header_data = [[
	Paragraph("INTERVIEW ANALYSIS REPORT", title_style)
	]]
	header_table = Table(header_data, colWidths=[7.2*inch])
	header_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_COLOR),
	('VALIGN', (0, 0), (-1, 0), 'MIDDLE'),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('TOPPADDING', (0, 0), (-1, 0), 12),
	]))
	story.append(header_table)
	story.append(Spacer(1, 0.05*inch))

	# ===== COMPACT CANDIDATE INFO =====
	candidate_data = [
	['👤 Candidate:', candidate_name, '📱 Mobile:', mobile_number],
	['📧 Email:', email_id, '⏱️ Duration:', f"{report['duration_seconds']:.1f}s"],
	['🎥 Video:', report['video_name'][:25] + '...' if len(report['video_name']) > 25 else report['video_name'], '', ''],
	]

	candidate_table = Table(candidate_data, colWidths=[1.2inch, 2.2inch, 1.2inch, 2.2inch])
	candidate_table.setStyle(TableStyle([
	('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('BOTTOMPADDING', (0, 0), (-1, -1), 6),
	('TOPPADDING', (0, 0), (-1, -1), 6),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('SPAN', (2, 2), (3, 2)), # Span the last row for video name
	]))
	story.append(candidate_table)
	story.append(Spacer(1, 0.1*inch))

	# ===== COMPACT CONFIDENCE SCORE =====
	confidence = report['confidence_score']
	if confidence >= 0.8:
	conf_color = ACCENT_COLOR
	conf_text = "EXCELLENT"
	elif confidence >= 0.6:
	conf_color = WARNING_COLOR
	conf_text = "GOOD"
	else:
	conf_color = DANGER_COLOR
	conf_text = "NEEDS WORK"

	confidence_data = [[
	Paragraph(f"Overall Score: {confidence:.2f}/1.00",
	ParagraphStyle('ConfScore', fontSize=11, textColor=colors.white,
	alignment=TA_CENTER, fontName='Helvetica-Bold')),
	Paragraph(conf_text,
	ParagraphStyle('ConfText', fontSize=10, textColor=colors.white,
	alignment=TA_CENTER, fontName='Helvetica'))
	]]

	confidence_table = Table(confidence_data, colWidths=[4inch, 2.8inch])
	confidence_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), conf_color),
	('BOTTOMPADDING', (0, 0), (-1, 0), 8),
	('TOPPADDING', (0, 0), (-1, 0), 8),
	]))
	story.append(confidence_table)
	story.append(Spacer(1, 0.15*inch))

	# ===== COMPACT METRICS GRID - 2x3 =====
	story.append(Paragraph("📊 KEY METRICS", section_style))

	# Get AI detection
	ai_label = max(report['ai_detection'], key=report['ai_detection'].get)
	ai_conf = report['ai_detection'][ai_label]

	metrics_data = [
	[
	# Row 1: Speaking Metrics
	Table([
	[Paragraph('SPEAKING PACE', metric_label_style)],
	[Paragraph(f"{report['linguistic_features']['words_per_minute']:.0f}", metric_value_style)],
	[Paragraph('WPM', metric_label_style)]
	], style=[('ALIGN', (0, 0), (-1, -1), 'CENTER')]),

	Table([
	[Paragraph('FILLER WORDS', metric_label_style)],
	[Paragraph(f"{report['linguistic_features']['filler_ratio']:.1%}", metric_value_style)],
	[Paragraph('Ratio', metric_label_style)]
	], style=[('ALIGN', (0, 0), (-1, -1), 'CENTER')]),

	Table([
	[Paragraph('VOCABULARY', metric_label_style)],
	[Paragraph(f"{report['linguistic_features']['lexical_diversity']:.2f}", metric_value_style)],
	[Paragraph('Diversity', metric_label_style)]
	], style=[('ALIGN', (0, 0), (-1, -1), 'CENTER')])
	],
	[
	# Row 2: Technical Metrics
	Table([
	[Paragraph('VOICE STABILITY', metric_label_style)],
	[Paragraph(f"{report['acoustic_features']['pitch_std']:.1f}", metric_value_style)],
	[Paragraph('Std Dev', metric_label_style)]
	], style=[('ALIGN', (0, 0), (-1, -1), 'CENTER')]),

	Table([
	[Paragraph('VOICE ENERGY', metric_label_style)],
	[Paragraph(f"{report['acoustic_features']['energy_mean']:.3f}", metric_value_style)],
	[Paragraph('Level', metric_label_style)]
	], style=[('ALIGN', (0, 0), (-1, -1), 'CENTER')]),

	Table([
	[Paragraph('AI DETECTION', metric_label_style)],
	[Paragraph(f"{ai_conf:.1%}", metric_value_style)],
	[Paragraph(ai_label, metric_label_style)]
	], style=[('ALIGN', (0, 0), (-1, -1), 'CENTER')])
	]
	]

	metrics_table = Table(metrics_data, colWidths=[2.2inch, 2.2inch, 2.2*inch])
	metrics_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, -1), LIGHT_BG),
	('BOX', (0, 0), (-1, -1), 1, colors.HexColor('#E5E7EB')),
	('BOTTOMPADDING', (0, 0), (-1, -1), 10),
	('TOPPADDING', (0, 0), (-1, -1), 10),
	]))
	story.append(metrics_table)
	story.append(Spacer(1, 0.15*inch))

	# ===== COMPACT STATUS INDICATORS =====
	story.append(Paragraph("📈 PERFORMANCE STATUS", section_style))

	# Calculate status indicators
	wpm_status = '🟢' if 120 <= report['linguistic_features']['words_per_minute'] <= 180 else '🟡' if 80 <= report['linguistic_features']['words_per_minute'] <= 220 else '🔴'
	filler_status = '🟢' if report['linguistic_features']['filler_ratio'] <= 0.05 else '🟡' if report['linguistic_features']['filler_ratio'] <= 0.1 else '🔴'
	vocab_status = '🟢' if report['linguistic_features']['lexical_diversity'] >= 0.7 else '🟡' if report['linguistic_features']['lexical_diversity'] >= 0.5 else '🔴'
	ai_status = '🟢' if ai_label == 'Human' else '🔴'

	status_data = [
	['Speaking Pace', get_pace_status(report['linguistic_features']['words_per_minute']), wpm_status],
	['Speech Fluency', get_fluency_status(report['linguistic_features']['filler_ratio']), filler_status],
	['Vocabulary Range', get_vocab_status(report['linguistic_features']['lexical_diversity']), vocab_status],
	['AI Detection', ai_label.upper(), ai_status],
	]

	status_table = Table(status_data, colWidths=[2.5inch, 3.5inch, 0.6*inch])
	status_table.setStyle(TableStyle([
	('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('BOTTOMPADDING', (0, 0), (-1, -1), 6),
	('TOPPADDING', (0, 0), (-1, -1), 6),
	('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#E5E7EB')),
	]))
	story.append(status_table)
	story.append(Spacer(1, 0.15*inch))

	# ===== COMPACT TRANSCRIPTION =====
	story.append(Paragraph("💬 TRANSCRIPTION", section_style))

	trans_text = report['transcription_preview']
	if len(trans_text) > 150: # Even more compact
	trans_text = trans_text[:150] + "..."

	trans_style = ParagraphStyle(
	'CompactTranscription',
	parent=styles['Normal'],
	fontSize=9,
	textColor=DARK_TEXT,
	alignment=TA_JUSTIFY,
	backColor=LIGHT_BG,
	borderPadding=8,
	leftIndent=5,
	rightIndent=5
	)

	story.append(Paragraph(trans_text, trans_style))
	story.append(Spacer(1, 0.1*inch))

	# ===== COMPACT RECOMMENDATIONS =====
	story.append(Paragraph("💡 KEY RECOMMENDATIONS", section_style))

	recommendations = []
	if report['linguistic_features']['filler_ratio'] > 0.1:
	recommendations.append("• Reduce filler words")
	if report['linguistic_features']['words_per_minute'] < 120:
	recommendations.append("• Increase speaking pace")
	elif report['linguistic_features']['words_per_minute'] > 200:
	recommendations.append("• Slow down for clarity")
	if report['linguistic_features']['lexical_diversity'] < 0.6:
	recommendations.append("• Expand vocabulary")
	if ai_label != 'Human':
	recommendations.append("• Use natural speech patterns")

	if not recommendations:
	recommendations.append("• Excellent! Maintain current performance")

	# Limit to 3 recommendations max
	if len(recommendations) > 3:
	recommendations = recommendations[:3]

	recommendations_text = "<br/>".join(recommendations)
	rec_style = ParagraphStyle(
	'CompactRecommendations',
	parent=styles['Normal'],
	fontSize=9,
	textColor=DARK_TEXT,
	alignment=TA_LEFT,
	backColor=colors.HexColor('#ECFDF5'),
	borderPadding=8,
	leftIndent=5
	)
	story.append(Paragraph(recommendations_text, rec_style))

	# ===== COMPACT FOOTER =====
	story.append(Spacer(1, 0.1*inch))

	footer_text = f"Interview Analysis System (Developed by Avinash Kumar) • {report['timestamp']}"
	footer_style = ParagraphStyle(
	'CompactFooter',
	parent=styles['Normal'],
	fontSize=7,
	textColor=LIGHT_TEXT,
	alignment=TA_CENTER
	)
	story.append(Paragraph(footer_text, footer_style))

	# ===== BUILD PDF =====
	doc.build(story)
	print(f"✅ PDF created: {filename}")
	return True

	except Exception as e:
	print(f"❌ PDF creation error: {e}")
	import traceback
	traceback.print_exc()
	return False

	# Compact helper functions
	def get_pace_status(wpm):
	if 120 <= wpm <= 180:
	return "Optimal"
	elif 80 <= wpm < 120:
	return "Slow"
	elif 180 < wpm <= 220:
	return "Fast"
	else:
	return "Extreme"

	def get_fluency_status(filler_ratio):
	if filler_ratio <= 0.05:
	return "Excellent"
	elif filler_ratio <= 0.1:
	return "Good"
	else:
	return "Needs Work"

	def get_vocab_status(lexical_diversity):
	if lexical_diversity >= 0.7:
	return "Rich"
	elif lexical_diversity >= 0.5:
	return "Average"
	else:
	return "Limited"

	# ==================== SUPABASE STORAGE FUNCTIONS ====================
	def upload_to_supabase(file_path, filename, bucket_name=REPORTS_BUCKET_NAME):
	"""Upload file to Supabase storage - SIRF PDF KE LIYE"""
	try:
	print(f"📤 Uploading {filename} to Supabase...")

	with open(file_path, 'rb') as file:
	# Upload the file
	result = supabase.storage.from_(bucket_name).upload(
	file=file,
	path=filename,
	file_options={"content-type": "application/pdf"}
	)

	print(f"✅ Uploaded to Supabase: {filename}")

	# Get public URL
	public_url = supabase.storage.from_(bucket_name).get_public_url(filename)
	print(f"🌐 Public URL: {public_url}")

	return public_url

	except Exception as e:
	print(f"❌ Upload error: {e}")
	return None

	def store_analysis_data(report):
	"""Store analysis data in Supabase database"""
	try:
	print("💾 Storing analysis data in database...")

	# Insert analysis data into database table
	data = {
	"video_name": report["video_name"],
	"timestamp": report["timestamp"],
	"duration_seconds": report["duration_seconds"],
	"confidence_score": report["confidence_score"],
	"acoustic_features": report["acoustic_features"],
	"linguistic_features": report["linguistic_features"],
	"ai_detection": report["ai_detection"],
	"transcription_preview": report["transcription_preview"],
	"full_transcription": report["full_transcription"],
	"created_at": datetime.now().isoformat()
	}

	result = supabase.table("video_analysis_results").insert(data).execute()
	print(f"✅ Stored analysis data for: {report['video_name']}")

	return True
	except Exception as e:
	print(f"❌ Database storage error: {e}")
	return False

	def create_and_store_single_report(report):
	"""Create and store PDF for single video - VIDEO KE NAME SE"""
	try:
	print("\n📊 Creating and storing individual PDF report...")

	# PDF filename video ke name se banayein (extension change karke .pdf)
	video_name_without_ext = os.path.splitext(report['video_name'])[0]
	pdf_filename = f"{video_name_without_ext}_analysis_report.pdf"

	# 1. Create PDF report
	if not create_pdf_report(report, pdf_filename):
	print("❌ Failed to create PDF")
	return False

	# 2. Upload PDF to Supabase
	pdf_url = upload_to_supabase(pdf_filename, pdf_filename)
	if not pdf_url:
	print("❌ Failed to upload PDF")
	return False

	# 3. Store analysis data in database
	try:
	if not store_analysis_data(report):
	print("⚠️ Failed to store analysis data, but PDF uploaded successfully")
	except Exception as e:
	print(f"⚠️ Database storage failed, but PDF uploaded: {e}")

	# 4. Store report metadata
	try:
	report_metadata = {
	"pdf_url": pdf_url,
	"video_name": report['video_name'],
	"confidence_score": report["confidence_score"],
	"timestamp": datetime.now().isoformat(),
	"report_id": f"report_{video_name_without_ext}"
	}

	supabase.table("analysis_reports").insert(report_metadata).execute()
	print("✅ Report metadata stored")
	except Exception as e:
	print(f"⚠️ Could not store report metadata: {e}")

	# Cleanup local PDF file
	if os.path.exists(pdf_filename):
	os.remove(pdf_filename)

	print(f"✅ Individual PDF report stored successfully in Supabase!")
	print(f"📎 PDF URL: {pdf_url}")
	return True

	except Exception as e:
	print(f"❌ Report storage error: {e}")
	import traceback
	traceback.print_exc()
	return False

	# ==================== REQUIRED DATABASE TABLES SETUP ====================
	def setup_database_tables():
	"""Create required database tables if they don't exist"""
	try:
	print("🔧 Checking database tables...")

	required_tables = ["video_analysis_results", "analysis_reports"]
	print(f"📋 Required tables: {required_tables}")
	print("💡 Note: Create these tables in Supabase Dashboard -> Table Editor")

	except Exception as e:
	print(f"❌ Database setup error: {e}")

	# ==================== MAIN PROCESS - ONE VIDEO AT A TIME ====================
	def process_videos():
	"""Main video processing loop - EK TIME PE EK VIDEO"""
	while True:
	try:
	print(f"\n{'='*60}")
	print(f"🔍 CHECKING FOR NEW VIDEOS... ({datetime.now().strftime('%H:%M:%S')})")
	print(f"{'='*60}")

	videos = get_bucket_files()
	new_videos = [v for v in videos if v['name'] not in PROCESSED_FILES]

	if not new_videos:
	print("✅ No new videos found. Waiting...")
	time.sleep(30)
	continue

	print(f"🎯 Found {len(new_videos)} new video(s) to process")
	print("🔄 Processing ONE VIDEO AT A TIME...\n")

	# Sirf PEHLA video process karo
	video = new_videos[0]
	filename = video['name']

	print(f"🎬 PROCESSING: {filename}")

	# Download
	video_path = download_video(filename)
	if not video_path:
	PROCESSED_FILES.add(filename) # Mark as processed even if failed
	continue

	# Analyze
	report = analyze_video(video_path, filename)

	# Cleanup local file
	if os.path.exists(video_path):
	os.remove(video_path)
	print(f"🗑️ Deleted local: {filename}")

	# Delete from Supabase
	delete_from_supabase(filename)

	# Mark as processed
	PROCESSED_FILES.add(filename)

	# Store individual PDF report in Supabase
	if report:
	print(f"\n{'='*60}")
	print(f"📊 ANALYSIS COMPLETE - Storing individual PDF report")
	print(f"{'='*60}")
	create_and_store_single_report(report)

	print(f"\n✅ Video '{filename}' processing complete. Waiting 10 seconds for next video...\n")
	time.sleep(10) # Thoda wait karo next video ke liye

	except Exception as e:
	print(f"❌ Process error: {e}")
	import traceback
	traceback.print_exc()
	time.sleep(30)

	# ==================== FLASK ROUTES ====================
	@app.route('/')
	def home():
	return jsonify({
	"status": "running",
	"service": "Video Analysis System",
	"processed_files": len(PROCESSED_FILES),
	"reports_bucket": REPORTS_BUCKET_NAME,
	"processing_mode": "ONE_VIDEO_AT_A_TIME",
	"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	})

	@app.route('/health')
	def health():
	return jsonify({"status": "healthy"}), 200

	@app.route('/storage-status')
	def storage_status():
	"""Check storage bucket status"""
	try:
	# Check if reports bucket exists and is accessible
	files = supabase.storage.from_(REPORTS_BUCKET_NAME).list()
	pdf_files = [f for f in files if f['name'].endswith('.pdf')]
	return jsonify({
	"status": "healthy",
	"reports_bucket": REPORTS_BUCKET_NAME,
	"total_files": len(files),
	"pdf_files": len(pdf_files),
	"bucket_accessible": True
	})
	except Exception as e:
	return jsonify({
	"status": "error",
	"reports_bucket": REPORTS_BUCKET_NAME,
	"bucket_accessible": False,
	"error": str(e)
	}), 500

	@app.route('/stats')
	def stats():
	return jsonify({
	"total_processed": len(PROCESSED_FILES),
	"processed_files": list(PROCESSED_FILES),
	"bucket": BUCKET_NAME,
	"processing_mode": "sequential"
	})

	# ==================== MAIN ====================
	if __name__ == '__main__':
	print("\n" + "="*60)
	print("🚀 VIDEO ANALYSIS SYSTEM STARTING")
	print("="*60)
	print(f"📁 Videos Bucket: {BUCKET_NAME}")
	print(f"📊 Reports Bucket: {REPORTS_BUCKET_NAME}")
	print(f"📄 Storage Type: PDF ONLY")
	print(f"🎯 Processing: ONE VIDEO AT A TIME")
	print(f"⏱️ Check interval: 30 seconds")
	print("="*60 + "\n")

	# Setup storage and database
	setup_storage()
	setup_database_tables()

	# Start background processor
	processor = threading.Thread(target=process_videos, daemon=True)
	processor.start()

	# Start Flask server
	port = int(os.getenv("PORT", 7860))
	app.run(host='0.0.0.0', port=port, debug=False)