File size: 6,929 Bytes

2d96524

import os
import json
import torch
import torch.nn as nn
from torchvision import models, transforms
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
from tqdm import tqdm
import argparse
import random

# ==========================================
# 1. إعدادات الكلاسات (Hardcoded)
# ==========================================
FINAL_CLASSES = ['fake_ai', 'fake_splice', 'real']

class ManipulateDetector:
    def __init__(self, model_path, device):
        self.device = device
        self.class_names = FINAL_CLASSES 
        print(f"🔧 Initializing Detector with classes: {self.class_names}")
        
        self.model = models.resnet18(pretrained=False)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, len(self.class_names))
        
        try:
            state_dict = torch.load(model_path, map_location=device)
            self.model.load_state_dict(state_dict, strict=False)
            print("✅ Weights loaded successfully!")
        except Exception as e:
            print(f"⚠️ Warning loading weights: {e}")
        
        self.model.to(device)
        self.model.eval()
        
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def predict(self, image_path):
        image = Image.open(image_path).convert('RGB')
        img_t = self.transform(image).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            outputs = self.model(img_t)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            score, preds = torch.max(probs, 1)
            
        class_idx = preds.item()
        
        if class_idx < len(self.class_names):
            label = self.class_names[class_idx]
        else:
            label = "fake_splice"
            
        confidence = probs[0][class_idx].item()
        
        if label == 'real':
            authenticity_score = confidence
        else:
            authenticity_score = 1.0 - confidence

        return authenticity_score, label

# ==========================================
# 2. المحلل الذكي (Smart Forensic Reasoner)
# ==========================================
class ForensicVLM:
    def __init__(self, device):
        self.device = device
        print("🔧 Loading VLM (BLIP Pro)...")
        try:
            self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
            self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)
            self.model.eval()
            self.loaded = True
        except:
            self.loaded = False

    def ask(self, image, question):
        inputs = self.processor(image, question, return_tensors="pt").to(self.device)
        out = self.model.generate(**inputs)
        return self.processor.decode(out[0], skip_special_tokens=True)

    def explain(self, image_path, pred_label):
        if not self.loaded: return "System error during analysis."
        image = Image.open(image_path).convert('RGB')
        
        # إذا كانت الصورة حقيقية
        if pred_label == 'real':
            # نسأل لنتأكد من وصف المشهد بإيجابية
            scene_desc = self.ask(image, "What type of room is this?")
            return f"Authentic scene. The {scene_desc} displays consistent global illumination and natural perspective geometry."
            
        # --- التحقيق الجنائي للصورة المزيفة ---
        
        # 1. تحديد الجسم المشبوه (بدلاً من كلمة furniture العامة)
        suspicious_object = self.ask(image, "What is the main piece of furniture in this image?")
        if "room" in suspicious_object or "living" in suspicious_object: 
            suspicious_object = "furniture object" # fallback
            
        # 2. التحقق من الظلال لهذا الجسم تحديداً
        shadow_check = self.ask(image, f"Does the {suspicious_object} cast a realistic shadow on the floor?")
        
        # 3. التحقق من الإضاءة
        light_check = self.ask(image, "Is the lighting on the furniture matching the background?")
        
        # 4. التحقق من الطفو
        float_check = self.ask(image, f"Does the {suspicious_object} look like it is floating?")

        # --- بناء التقرير الذكي ---
        reasons = []
        
        if "no" in shadow_check.lower():
            reasons.append(f"the {suspicious_object} lacks a grounded contact shadow")
        
        if "no" in light_check.lower():
            reasons.append(f"illumination on the {suspicious_object} contradicts the room's light source")
            
        if "yes" in float_check.lower():
            reasons.append(f"spatial disconnection observed (floating {suspicious_object})")

        # إذا لم يجد أسباباً محددة رغم أن الكاشف قال أنها مزيفة
        if not reasons:
            reasons.append(f"digital artifacts detected around the {suspicious_object}")

        # صياغة الجملة النهائية
        joined_reasons = "; ".join(reasons)
        final_report = f"Manipulation detected: {joined_reasons}. The integration of the {suspicious_object} into the scene is physically inconsistent."
            
        return final_report

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_dir", type=str, default="./test_images")
    parser.add_argument("--output_file", type=str, default="predictions.json")
    args = parser.parse_args()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_path = "/content/drive/MyDrive/RealEstate_Challenge/detector_model.pth"
    
    if not os.path.exists(model_path):
        print("❌ Model file not found!")
        return

    detector = ManipulateDetector(model_path, device)
    vlm = ForensicVLM(device)
    
    results = []
    if not os.path.exists(args.input_dir): return

    files = [f for f in os.listdir(args.input_dir) if f.endswith(('.jpg', '.png'))]
    print(f"🚀 Processing {len(files)} images...")
    
    for img_file in tqdm(files):
        try:
            score, label = detector.predict(os.path.join(args.input_dir, img_file))
            reasoning = vlm.explain(os.path.join(args.input_dir, img_file), label)
            results.append({
                "image_name": img_file,
                "authenticity_score": round(float(score), 4),
                "manipulation_type": label,
                "vlm_reasoning": reasoning
            })
        except: pass

    with open(args.output_file, 'w') as f:
        json.dump(results, f, indent=2)
    print("✅ Done!")

if __name__ == "__main__":
    main()