manar54
/

Elm-Challenge

+# predict.py
+import argparse
+import os
+import json
+import re
+import torch
+from PIL import Image
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
+from qwen_vl_utils import process_vision_info
+# --- CONFIGURATION ---
+# We use the base Qwen model. It will download automatically on the first run.
+MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
+def load_model():
+    """Loads the model with 4-bit quantization for efficiency."""
+    print(f"⏳ Loading Model: {MODEL_ID}...")
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16
+    )
+    # Use 'sdpa' implementation for broad compatibility (Colab T4 / RTX GPUs)
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        quantization_config=bnb_config,
+        device_map="auto",
+        trust_remote_code=True,
+        attn_implementation="sdpa"
+    )
+    processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+    print("✅ Model Loaded.")
+    return model, processor
+def analyze_image(model, processor, image_path):
+    """Runs the VLM analysis."""
+    prompt_text = (
+        "You are a Forensic Image Analyst. Analyze this image for GenAI manipulation.\n"
+        "Focus on: Lighting inconsistencies, structural logic, and unnatural textures.\n"
+        "Provide your output STRICTLY as a JSON object with these keys:\n"
+        "- 'authenticity_score': float (0.0=Real, 1.0=Fake)\n"
+        "- 'manipulation_type': string (e.g., 'Inpainting', 'None')\n"
+        "- 'vlm_reasoning': string (max 2 sentences)\n"
+    )
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image_path},
+                {"type": "text", "text": prompt_text},
+            ],
+        }
+    ]
+    # Preprocess
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    ).to("cuda")
+    # Generate
+    with torch.no_grad():
+        generated_ids = model.generate(
+            **inputs,
+            max_new_tokens=256,
+            temperature=0.1 # Low temp for consistency
+        )
+    # Decode
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )[0]
+    return clean_json(output_text)
+def clean_json(text):
+    """Extracts JSON from response."""
+    try:
+        json_match = re.search(r"\{.*\}", text, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group(0))
+        return {"authenticity_score": 0.5, "manipulation_type": "Unknown", "vlm_reasoning": "Parse Error"}
+    except:
+        return {"authenticity_score": 0.5, "manipulation_type": "Error", "vlm_reasoning": "JSON Error"}
+def main(input_dir, output_file):
+    # Load model once
+    model, processor = load_model()
+    predictions = []
+    # Process images
+    valid_extensions = ('.png', '.jpg', '.jpeg', '.webp')
+    files = [f for f in os.listdir(input_dir) if f.lower().endswith(valid_extensions)]
+    print(f"🚀 Starting inference on {len(files)} images...")
+    for img_name in files:
+        img_path = os.path.join(input_dir, img_name)
+        try:
+            result = analyze_image(model, processor, img_path)
+            entry = {
+                "image_name": img_name,
+                "authenticity_score": result.get("authenticity_score", 0.5),
+                "manipulation_type": result.get("manipulation_type", "Unknown"),
+                "vlm_reasoning": result.get("vlm_reasoning", "No reasoning provided.")
+            }
+            predictions.append(entry)
+            print(f"Processed: {img_name}")
+        except Exception as e:
+            print(f"Failed to process {img_name}: {e}")
+    # Save output
+    with open(output_file, 'w') as f:
+        json.dump(predictions, f, indent=4)
+    print(f"✅ Submission file saved to: {output_file}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_dir", type=str, required=True, help="Path to input images")
+    parser.add_argument("--output_file", type=str, required=True, help="Path to output JSON")
+    args = parser.parse_args()
+    if not os.path.exists(args.input_dir):
+        raise FileNotFoundError(f"Input directory {args.input_dir} not found.")
+    main(args.input_dir, args.output_file)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch>=2.4.0
+transformers>=4.45.0
+accelerate>=0.33.0
+qwen-vl-utils
+bitsandbytes
+pillow
+numpy