| | |
| | import torch |
| | import os |
| | from PIL import Image |
| | from transformers import AutoProcessor, AutoModelForCausalLM |
| | from peft import PeftModel |
| | import matplotlib.pyplot as plt |
| |
|
| | |
| | def analyze_deepfake(original_image_path, cam_image_path=None, cam_overlay_path=None, comparison_image_path=None, custom_prompt=None): |
| | """Analyze multiple images for signs of deepfakes with detailed explanation""" |
| | |
| | base_model_id = "unsloth/llama-3.2-11b-vision-instruct" |
| | adapter_id = "saakshigupta/deepfake-explainer-2" |
| |
|
| | |
| | processor = AutoProcessor.from_pretrained(base_model_id) |
| |
|
| | |
| | model = AutoModelForCausalLM.from_pretrained( |
| | base_model_id, |
| | torch_dtype=torch.float16, |
| | device_map="auto" |
| | ) |
| |
|
| | |
| | model = PeftModel.from_pretrained(model, adapter_id) |
| |
|
| | |
| | original_image = Image.open(original_image_path).convert("RGB") |
| | |
| | |
| | images = [original_image] |
| | image_titles = ["Original Image"] |
| | |
| | if cam_image_path: |
| | cam_image = Image.open(cam_image_path).convert("RGB") |
| | images.append(cam_image) |
| | image_titles.append("CAM Image") |
| | |
| | if cam_overlay_path: |
| | cam_overlay = Image.open(cam_overlay_path).convert("RGB") |
| | images.append(cam_overlay) |
| | image_titles.append("CAM Overlay") |
| | |
| | if comparison_image_path: |
| | comparison_image = Image.open(comparison_image_path).convert("RGB") |
| | images.append(comparison_image) |
| | image_titles.append("Comparison Image") |
| | |
| | |
| | rows = (len(images) + 1) // 2 |
| | fig, axs = plt.subplots(rows, min(2, len(images)), figsize=(12, 6 * rows)) |
| | |
| | if len(images) == 1: |
| | axs.imshow(images[0]) |
| | axs.set_title(image_titles[0]) |
| | axs.axis('off') |
| | else: |
| | axs = axs.flatten() if len(images) > 2 else axs |
| | for i, (img, title) in enumerate(zip(images, image_titles)): |
| | if len(images) <= 2: |
| | curr_ax = axs[i] |
| | else: |
| | curr_ax = axs[i] |
| | curr_ax.imshow(img) |
| | curr_ax.set_title(title) |
| | curr_ax.axis('off') |
| | |
| | plt.tight_layout() |
| | plt.show() |
| |
|
| | |
| | if custom_prompt is None: |
| | prompt = "Analyze these images carefully and determine if there's a deepfake. Provide both a technical explanation and a simple explanation anyone can understand." |
| | else: |
| | prompt = custom_prompt |
| |
|
| | |
| | try: |
| | |
| | inputs = processor(text=prompt, images=images, return_tensors="pt") |
| | except Exception as e: |
| | print(f"Warning: Unable to process multiple images ({e}). Falling back to original image only.") |
| | inputs = processor(text=prompt, images=original_image, return_tensors="pt") |
| |
|
| | |
| | if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape: |
| | batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape |
| | visual_features = 6404 |
| | new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles), |
| | device=inputs['cross_attention_mask'].device if torch.is_tensor(inputs['cross_attention_mask']) else None) |
| | inputs['cross_attention_mask'] = new_mask |
| | print("Fixed cross-attention mask dimensions") |
| |
|
| | |
| | inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)} |
| |
|
| | |
| | print("Generating analysis...") |
| | with torch.no_grad(): |
| | output_ids = model.generate( |
| | **inputs, |
| | max_new_tokens=512, |
| | temperature=0.7, |
| | top_p=0.9 |
| | ) |
| |
|
| | |
| | response = processor.decode(output_ids[0], skip_special_tokens=True) |
| |
|
| | |
| | if prompt in response: |
| | result = response.split(prompt)[-1].strip() |
| | else: |
| | result = response |
| |
|
| | return result |
| |
|
| | |
| | if __name__ == "__main__": |
| | |
| | original_image_path = input("Enter path to original image: ") |
| | |
| | cam_image_path = input("Enter path to CAM image (or press Enter to skip): ") |
| | if cam_image_path.strip() == "": |
| | cam_image_path = None |
| | |
| | cam_overlay_path = input("Enter path to CAM overlay image (or press Enter to skip): ") |
| | if cam_overlay_path.strip() == "": |
| | cam_overlay_path = None |
| | |
| | comparison_image_path = input("Enter path to comparison image (or press Enter to skip): ") |
| | if comparison_image_path.strip() == "": |
| | comparison_image_path = None |
| |
|
| | |
| | analysis = analyze_deepfake(original_image_path, cam_image_path, cam_overlay_path, comparison_image_path) |
| |
|
| | |
| | print(" |
| | ===== DEEPFAKE ANALYSIS RESULT ===== |
| | ") |
| | print(analysis) |
| |
|