Spaces:

arwnsyh
/

factify-models

Running

App Files Files Community

factify-models / models /video_analyzer.py

arwnsyh

Revert to models/gemini-flash-latest to fix quota issues

c318fcb 4 months ago

raw

history blame contribute delete

14.5 kB

	"""
	Video Analyzer - Deteksi deepfake dan manipulasi video
	"""
	from __future__ import annotations
	import io
	import time
	import tempfile
	import os
	from typing import Any, Dict, List, Tuple, Optional
	from pathlib import Path

	from .base_model import BaseAnalyzer, AnalysisResult
	from .image_analyzer import ImageAnalyzer

	# Lazy imports
	PIL = None
	np = None
	cv2 = None
	torch = None


	class VideoAnalyzer(BaseAnalyzer):
	"""
	Analyzer untuk video - mendeteksi:
	- Deepfake (face manipulation)
	- Audio-visual sync issues
	- Frame manipulation
	- Temporal inconsistencies
	- Metadata analysis
	"""

	def __init__(self):
	super().__init__("VideoAnalyzer")
	self.image_analyzer = ImageAnalyzer()
	self.face_detector = None
	self.frame_sample_rate = 30 # Sample every N frames
	self.max_frames = 50 # Maximum frames to analyze

	def initialize(self) -> bool:
	"""Initialize video processing libraries"""
	try:
	global cv2, np, FaceDetector, dlib
	import os

	# Setup Gemini Vision if API key exists
	api_key = os.getenv('GEMINI_API_KEY')
	if api_key:
	try:
	import google.generativeai as genai
	genai.configure(api_key=api_key)
	self.genai_model = genai.GenerativeModel('models/gemini-flash-latest')
	print("[VideoAnalyzer] Gemini Flash Latest Multimodal initialized")
	except Exception as e:
	print(f"[VideoAnalyzer] Failed to initialize Gemini: {e}")
	self.genai_model = None
	else:
	self.genai_model = None

	import numpy as _np
	np = _np

	try:
	import cv2 as _cv2
	cv2 = _cv2
	except ImportError:
	print("[VideoAnalyzer] OpenCV not available")
	cv2 = None

	# Initialize ImageAnalyzer for frame analysis
	from .image_analyzer import ImageAnalyzer
	self.image_analyzer = ImageAnalyzer()
	self.image_analyzer.initialize()

	self.is_initialized = True
	print("[VideoAnalyzer] Initialization complete")
	return True

	except Exception as e:
	print(f"[VideoAnalyzer] Initialization failed: {e}")
	self.is_initialized = False
	return False

	def analyze(self, video_source: Any) -> AnalysisResult:
	"""
	Analisis video untuk deepfake dan manipulasi
	Hybrid: Frame-by-frame analysis + Gemini Multimodal Video Analysis
	"""
	start_time = time.time()

	# Save to temp file if bytes or stream
	temp_path = None
	video_path = str(video_source)

	# Handle non-path inputs
	if not isinstance(video_source, (str, Path)):
	try:
	import tempfile
	tfile = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
	tfile.write(video_source.read() if hasattr(video_source, 'read') else video_source)
	tfile.close()
	video_path = tfile.name
	temp_path = video_path
	except Exception as e:
	return self._create_result(0, 0, [], [f"Gagal memproses input video: {e}"], 0)

	findings = []
	warnings = []

	# 1. Traditional Frame Extraction & Analysis
	frames = []
	video_info = {'fps': 0, 'frame_count': 0, 'width': 0, 'height': 0}

	if cv2:
	try:
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise ValueError("Could not open video")

	video_info = {
	'fps': cap.get(cv2.CAP_PROP_FPS),
	'frame_count': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
	'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
	'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	}

	# Extract frames (limit to 10 spread out frames for local checks)
	frames = self._extract_frames(cap, video_info['frame_count'])
	cap.release()

	findings.append(f"Resolusi Video: {video_info['width']}x{video_info['height']} @ {video_info['fps']:.1f}fps")
	except Exception as e:
	warnings.append(f"Gagal membaca video secara lokal: {e}")

	# 2. Heuristic Analysis
	face_result = self._analyze_faces(frames)
	temporal_result = self._check_temporal_consistency(frames)
	deepfake_result = self._detect_deepfake_indicators(frames, face_result)

	if deepfake_result['is_deepfake']:
	warnings.append(f"Indikator Deepfake terdeteksi (heuristic): {deepfake_result['indicators_found']} tanda")

	# 3. Gemini Multimodal Analysis (The Heavy Lifter)
	ai_video_result = {'performed': False}
	if self.genai_model:
	ai_video_result = self._analyze_with_gemini_video(video_path)
	if ai_video_result['performed']:
	if ai_video_result['is_deepfake']:
	warnings.append(f"AI Multimodal: {ai_video_result['reasoning']}")
	else:
	findings.append(f"AI Multimodal: {ai_video_result['reasoning']}")
	else:
	warnings.append("Gemini model tidak tersedia untuk analisis video mendalam")

	# Cleanup temp file
	if temp_path and os.path.exists(temp_path):
	try:
	os.remove(temp_path)
	except: pass

	# Calculate Scores
	heuristic_score = 1.0 - deepfake_result['confidence']

	final_score = heuristic_score
	confidence = 0.6

	if ai_video_result['performed']:
	ai_score = ai_video_result['score']
	ai_conf = ai_video_result['confidence']

	# 70% AI, 30% Heuristic (Video analysis by AI is much stronger than simple heuristics)
	final_score = (heuristic_score * 0.3) + (ai_score * 0.7)
	confidence = max(confidence, ai_conf)

	analysis_time = time.time() - start_time

	return self._create_result(
	score=final_score * 100,
	confidence=confidence,
	findings=findings,
	warnings=warnings,
	metadata={
	'video_info': video_info,
	'heuristic_deepfake': deepfake_result,
	'ai_multimodal': ai_video_result,
	'temporal_consistency': temporal_result
	},
	analysis_time=analysis_time
	)

	def _analyze_with_gemini_video(self, video_path: str) -> Dict[str, Any]:
	"""Upload and analyze video with Gemini"""
	print(f"[VideoAnalyzer] Uploading video to Gemini: {video_path}")
	try:
	import google.generativeai as genai
	import time

	# 1. Upload file
	video_file = genai.upload_file(path=video_path)

	# 2. Wait for processing
	while video_file.state.name == "PROCESSING":
	print(".", end="", flush=True)
	time.sleep(1)
	video_file = genai.get_file(video_file.name)

	if video_file.state.name == "FAILED":
	raise ValueError("Gemini video processing failed")

	print("\n[VideoAnalyzer] Video processed by Gemini. Generating analysis...")

	# 3. Generate content
	prompt = """
	Peran: Kamu adalah Spesialis Deteksi Deepfake & Manipulasi Video Elit.
	Tugas: Analisis video ini frame-by-frame (jika memungkinkan) dan audionya untuk menemukan tanda DEEPFAKE.

	CHECKLIST ANALISIS:
	1. VISUAL (Wajah & Tubuh):
	- LIP-SYNC: Apakah gerakan mulut pas 100% dengan suara? (Deepfake sering slip 0.1 detik).
	- MATA: Apakah subjek berkedip secara alami? (Jarang berkedip = tanda bahaya).
	- TEKSTUR: Apakah kulit terlihat terlalu mulus (blur) atau gigi terlihat menyatu?
	- TEPIAN WAJAH: Periksa area di sekitar dagu dan rambut. Apakah ada efek 'jitter' atau kabur saat bergerak?

	2. TEMPORAL & LATAR:
	- Apakah latar belakang ikut bergerak/menyot saat wajah bergerak? (Warping artifacts).
	- Apakah pencahayaan berubah secara tidak wajar antar frame?

	3. AUDIO:
	- Apakah ada suara latar yang mendadak hilang (noise gating agresif)?
	- Apakah intonasi suara terdengar robotik/monoton meski ekspresi wajah emosional?

	PENILAIAN AKHIR:
	- Skor 0-35: Terkonfirmasi Deepfake / Manipulasi Berat.
	- Skor 36-60: Mencurigakan (Low Quality atau Edit Ringan).
	- Skor 80-100: Video Asli / Organik.

	Format JSON:
	{
	"score": <0-100>,
	"is_deepfake": <boolean>,
	"reasoning": "<Sebutkan timestamp atau tanda visual spesifik (misal: 'Bibir tidak sinkron di detik 0:05')>"
	}
	"""

	response = self.genai_model.generate_content([video_file, prompt])

	# 4. Clean up
	try:
	genai.delete_file(video_file.name)
	except: pass

	# Parse result
	import json
	content = response.text.strip()
	if "```json" in content:
	content = content.split("```json")[1].split("```")[0]
	elif "```" in content:
	content = content.split("```")[1].split("```")[0]

	ai_json = json.loads(content)

	return {
	'performed': True,
	'score': ai_json.get('score', 50) / 100.0,
	'confidence': 0.95,
	'is_deepfake': ai_json.get('is_deepfake', False),
	'reasoning': ai_json.get('reasoning', '')
	}

	except Exception as e:
	print(f"[VideoAnalyzer] Gemini Video Analysis Error: {e}")
	return {'performed': False, 'error': str(e)}

	def _extract_frames(self, cap, total_frames: int) -> List[np.ndarray]:
	"""Extract sample frames from video"""
	frames = []
	if total_frames <= 0: return frames

	# Determine sampling
	num_frames = getattr(self, 'max_frames', 10)

	# Safe sampling across the video
	indices = np.linspace(0, total_frames-2, num_frames, dtype=int)

	for idx in indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if ret:
	frames.append(frame)

	return frames

	# ... (Rest of existing methods _analyze_faces, _check_temporal_consistency, etc. follow below here, but I will include them to be safe since I am replacing a big chunk) ...

	def _analyze_faces(self, frames: List[np.ndarray]) -> Dict[str, Any]:
	"""Analyze faces across frames"""
	findings = []
	warnings = []

	if not cv2 or not frames:
	return {'score': 0.5, 'findings': [], 'warnings': [], 'faces_per_frame': []}

	# Load cascade if not loaded (using default opencv path if valid, else skip)
	cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	if not os.path.exists(cascade_path):
	return {'score': 0.5, 'warnings': ["Face detector model missing"], 'faces_per_frame': []}

	face_detector = cv2.CascadeClassifier(cascade_path)

	faces_per_frame = []
	face_positions = []

	for i, frame in enumerate(frames):
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	faces = face_detector.detectMultiScale(gray, 1.1, 5, minSize=(30, 30))

	faces_per_frame.append(len(faces))
	if len(faces) > 0:
	face_positions.append(faces[0])

	total_faces = sum(faces_per_frame)
	frames_with_faces = sum(1 for f in faces_per_frame if f > 0)

	if total_faces > 0:
	findings.append(f"Wajah terdeteksi di {frames_with_faces}/{len(frames)} frame")

	score = 0.5
	if frames_with_faces > 0:
	score = 0.8

	return {
	'score': score,
	'findings': findings,
	'warnings': warnings,
	'faces_per_frame': faces_per_frame,
	'frames_with_faces': frames_with_faces
	}

	def _check_temporal_consistency(self, frames: List[np.ndarray]) -> Dict[str, Any]:
	"""Check for temporal inconsistencies between frames"""
	if len(frames) < 2:
	return {'inconsistent': False, 'score': 0}

	differences = []
	for i in range(1, len(frames)):
	diff = cv2.absdiff(frames[i-1], frames[i])
	diff_score = np.mean(diff) / 255
	differences.append(diff_score)

	avg_diff = np.mean(differences) if differences else 0
	return {'inconsistent': False, 'score': avg_diff}

	def _detect_deepfake_indicators(self, frames: List[np.ndarray], face_result: Dict[str, Any]) -> Dict[str, Any]:
	"""Detect heuristic deepfake indicators"""
	indicators = 0
	# Simple heuristic: if face count varies wildly, it's suspicious
	if 'faces_per_frame' in face_result:
	counts = face_result['faces_per_frame']
	if counts and np.var(counts) > 0.5:
	indicators += 1

	return {
	'is_deepfake': indicators > 0,
	'confidence': 0.4 if indicators > 0 else 0.8,
	'indicators_found': indicators
	}

	def _analyze_audio_sync(self, video_path: str) -> Dict[str, Any]:
	return {'score': 0.5}

	def _calculate_final_score(self, face, temporal, quality, deepfake, audio) -> float:
	return 50.0