File size: 3,171 Bytes
89e8242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import sys
import torch
import numpy as np
from scipy.optimize import minimize

# Add path so we can import backend packages
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "app")))

from services.video.video_detector import analyze_video

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def inverse_sigmoid(p):
    p = np.clip(p, 1e-15, 1 - 1e-15)
    return np.log(p / (1 - p))

def nll_loss(T, logits, labels):
    """ Negative log likelihood loss for temperature scaling """
    scaled_logits = logits / T
    probs = sigmoid(scaled_logits)
    # clip for numerical stability
    probs = np.clip(probs, 1e-15, 1 - 1e-15)
    return -np.mean(labels * np.log(probs) + (1 - labels) * np.log(1 - probs))

def main():
    real_dir = "test_data/dataset/real"
    ai_dir = "test_data/dataset/ai"
    
    if not os.path.exists(real_dir) or not os.path.exists(ai_dir):
        print("Please run downloaded_samples.py first.")
        return

    real_videos = [os.path.join(real_dir, f) for f in os.listdir(real_dir) if f.endswith(".mp4")]
    ai_videos = [os.path.join(ai_dir, f) for f in os.listdir(ai_dir) if f.endswith(".mp4")]

    print(f"Loaded {len(real_videos)} real videos and {len(ai_videos)} AI videos.")

    labels = []
    raw_scores = []
    
    print("Running inference on REAL videos...")
    for v in real_videos:
        try:
            res = analyze_video(v)
            raw_scores.append(res['fused_score'] / 100.0)  # scale to 0-1
            labels.append(0)
            print(f"REAL video {os.path.basename(v)} -> Score: {res['fused_score']}% Verdict: {res['verdict']}")
        except Exception as e:
            print(f"Failed to process {v}: {e}")

    print("\nRunning inference on AI videos...")
    for v in ai_videos:
        try:
            res = analyze_video(v)
            raw_scores.append(res['fused_score'] / 100.0)  # scale to 0-1
            labels.append(1)
            print(f"AI video {os.path.basename(v)} -> Score: {res['fused_score']}% Verdict: {res['verdict']}")
        except Exception as e:
            print(f"Failed to process {v}: {e}")

    if not labels:
        print("No videos processed successfully.")
        return

    # Convert probability back to logit for temperature scaling
    logits = np.array([inverse_sigmoid(p) for p in raw_scores])
    labels = np.array(labels)

    # Initial temperature
    init_T = 1.0
    
    # Optimize temperature
    print("\n[Calibration] Optimizing Temperature...")
    res = minimize(nll_loss, init_T, args=(logits, labels), method='L-BFGS-B', bounds=[(0.01, 10.0)])
    opt_T = res.x[0]
    
    print(f"Optimal Temperature T found: {opt_T:.4f}")
    
    # Calculate calibrated scores
    calibrated_logits = logits / opt_T
    calibrated_probs = sigmoid(calibrated_logits)
    
    print("\n---------- CALIBRATED RESULTS ----------")
    for idx in range(len(labels)):
        v_type = "REAL" if labels[idx] == 0 else "AI"
        orig = raw_scores[idx] * 100
        calib = calibrated_probs[idx] * 100
        print(f"[{v_type}] Original: {orig:05.1f}% -> Calibrated: {calib:05.1f}%")

if __name__ == "__main__":
    main()