Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 5

Commit

eab7cdf

verified ·

1 Parent(s): aa3f85c

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -168

app.py CHANGED Viewed

@@ -1,21 +1,17 @@
 import streamlit as st
 import torch
 from PIL import Image
-import os
 import gc
-from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
-# Page config
-st.set_page_config(
-    page_title="Deepfake Image Analyzer",
-    page_icon="🔍",
-    layout="wide"
-)
-# App title and description
 st.title("Deepfake Image Analyzer")
-st.markdown("Upload an image to analyze it for possible deepfake manipulation")
 # Function to free up memory
 def free_memory():
@@ -23,51 +19,41 @@ def free_memory():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-# Helper function to check CUDA
-def init_device():
-    if torch.cuda.is_available():
-        st.sidebar.success("✓ GPU available: Using CUDA")
-        return "cuda"
-    else:
-        st.sidebar.warning("⚠️ No GPU detected: Using CPU (analysis will be slow)")
-        return "cpu"
-# Set device
-device = init_device()
 @st.cache_resource
 def load_model():
-    """Load pre-quantized model"""
     try:
-        # Using your original base model
         base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
-        # Configure quantization settings for unsloth model
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_quant_storage=torch.float16,
-            llm_int8_skip_modules=["lm_head"],
-            llm_int8_enable_fp32_cpu_offload=True
-        )
-        # Load the pre-quantized model with unsloth settings
-        model = AutoModelForCausalLM.from_pretrained(
             base_model_id,
-            device_map="auto",
-            quantization_config=quantization_config,
             torch_dtype=torch.float16,
-            trust_remote_code=True,
-            low_cpu_mem_usage=True,
-            use_cache=True,
-            offload_folder="offload"  # Enable disk offloading
         )
         # Load adapter
         adapter_id = "saakshigupta/deepfake-explainer-1"
         model = PeftModel.from_pretrained(model, adapter_id)
@@ -79,144 +65,110 @@ def load_model():
         st.exception(e)
         return None, None
-# Function to fix cross-attention masks
-def fix_processor_outputs(inputs):
-    """Fix cross-attention mask dimensions if needed"""
-    if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
-        batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
-        visual_features = 6404  # The exact dimension used in training
-        new_mask = torch.ones(
-            (batch_size, seq_len, visual_features, num_tiles),
-            device=inputs['cross_attention_mask'].device
-        )
-        inputs['cross_attention_mask'] = new_mask
-        return True, inputs
-    return False, inputs
-# Create sidebar with options
 with st.sidebar:
-    st.header("Options")
-    temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.7, step=0.1,
-                           help="Higher values make output more random, lower values more deterministic")
-    max_length = st.slider("Maximum response length", min_value=100, max_value=1000, value=500, step=50)
-    custom_prompt = st.text_area(
-        "Custom instruction (optional)",
-        value="Analyze this image and determine if it's a deepfake. Provide both technical and non-technical explanations.",
         height=100
     )
-    st.markdown("### About")
-    st.markdown("""
-    This app uses a fine-tuned Llama 3.2 Vision model to detect and explain deepfakes.
-    The analyzer looks for:
-    - Inconsistencies in facial features
-    - Unusual lighting or shadows
-    - Unnatural blur patterns
-    - Artifacts around edges
-    - Texture inconsistencies
-    Model by [saakshigupta](https://huggingface.co/saakshigupta/deepfake-explainer-1)
-    """)
-# Load model button
-if st.button("Load Model"):
-    with st.spinner("Loading model... this may take several minutes"):
-        try:
             model, processor = load_model()
             if model is not None and processor is not None:
                 st.session_state['model'] = model
                 st.session_state['processor'] = processor
-                st.success("Model loaded successfully!")
             else:
-                st.error("Failed to load model.")
-        except Exception as e:
-            st.error(f"Error during model loading: {str(e)}")
-            st.exception(e)
-# Main content area - file uploader
-uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
-# Check if model is loaded
-model_loaded = 'model' in st.session_state and st.session_state['model'] is not None
-if uploaded_file is not None:
-    # Display the image
-    image = Image.open(uploaded_file).convert('RGB')
-    st.image(image, caption="Uploaded Image", use_column_width=True)
-    # Analyze button (only enabled if model is loaded)
-    if st.button("Analyze Image", disabled=not model_loaded):
-        if not model_loaded:
-            st.warning("Please load the model first by clicking the 'Load Model' button.")
-        else:
-            with st.spinner("Analyzing the image... This may take 15-30 seconds"):
-                try:
-                    # Get components from session state
-                    model = st.session_state['model']
-                    processor = st.session_state['processor']
-                    # Process the image using the processor
-                    inputs = processor(text=custom_prompt, images=image, return_tensors="pt")
-                    # Fix cross-attention mask if needed
-                    fixed, inputs = fix_processor_outputs(inputs)
-                    if fixed:
-                        st.info("Fixed cross-attention mask dimensions")
-                    # Move to device
-                    inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
-                    # Generate the analysis
-                    with torch.no_grad():
-                        output_ids = model.generate(
-                            **inputs,
-                            max_new_tokens=max_length,
-                            temperature=temperature,
-                            top_p=0.9
                         )
-                    # Decode the output
-                    response = processor.decode(output_ids[0], skip_special_tokens=True)
-                    # Extract the actual response (removing the prompt)
-                    if custom_prompt in response:
-                        result = response.split(custom_prompt)[-1].strip()
-                    else:
-                        result = response
-                    # Display result in a nice format
-                    st.success("Analysis complete!")
-                    # Show technical and non-technical explanations separately if they exist
-                    if "Technical Explanation:" in result and "Non-Technical Explanation:" in result:
-                        technical, non_technical = result.split("Non-Technical Explanation:")
-                        technical = technical.replace("Technical Explanation:", "").strip()
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.subheader("Technical Analysis")
-                            st.write(technical)
-                        with col2:
-                            st.subheader("Simple Explanation")
-                            st.write(non_technical)
-                    else:
-                        st.subheader("Analysis Result")
-                        st.write(result)
-                    # Free memory after analysis
-                    free_memory()
-                except Exception as e:
-                    st.error(f"Error analyzing image: {str(e)}")
-                    st.exception(e)
-    elif not model_loaded:
-        st.warning("Please load the model first by clicking the 'Load Model' button at the top of the page.")
-else:
-    st.info("Please upload an image to begin analysis")
-# Add footer
-st.markdown("---")
-st.markdown("Deepfake Image Analyzer")

 import streamlit as st
 import torch
 from PIL import Image
 import gc
+from transformers import AutoProcessor
 from peft import PeftModel
+from unsloth import FastVisionModel
+# Simple page config
+st.set_page_config(page_title="Deepfake Analyzer", layout="wide")
+# Minimal UI
 st.title("Deepfake Image Analyzer")
+st.markdown("This app analyzes images for signs of deepfake manipulation")
 # Function to free up memory
 def free_memory():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+# Function to fix cross-attention masks
+def fix_processor_outputs(inputs):
+    """Fix cross-attention mask dimensions if needed"""
+    if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
+        batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
+        visual_features = 6404  # The exact dimension used in training
+        new_mask = torch.ones(
+            (batch_size, seq_len, visual_features, num_tiles),
+            device=inputs['cross_attention_mask'].device
+        )
+        inputs['cross_attention_mask'] = new_mask
+        return True, inputs
+    return False, inputs
+# Load model function
 @st.cache_resource
 def load_model():
+    """Load model using Unsloth approach (similar to Colab)"""
     try:
         base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
+        # Load model using Unsloth's FastVisionModel
+        model, _ = FastVisionModel.from_pretrained(
             base_model_id,
+            load_in_4bit=True,
             torch_dtype=torch.float16,
+            device_map="auto"
         )
+        # Set to inference mode
+        FastVisionModel.for_inference(model)
         # Load adapter
         adapter_id = "saakshigupta/deepfake-explainer-1"
         model = PeftModel.from_pretrained(model, adapter_id)
         st.exception(e)
         return None, None
+# Minimal sidebar
 with st.sidebar:
+    st.header("Settings")
+    temperature = st.slider("Temperature", 0.1, 1.0, 0.7, 0.1)
+    max_length = st.slider("Max length", 100, 500, 300, 50)
+    # Instruction field
+    prompt = st.text_area(
+        "Analysis instruction",
+        value="Analyze this image and determine if it's a deepfake. Provide your reasoning.",
         height=100
     )
+# Main content - two columns for clarity
+col1, col2 = st.columns([1, 2])
+with col1:
+    # Load model button
+    if st.button("1. Load Model"):
+        with st.spinner("Loading model... (this may take a minute)"):
             model, processor = load_model()
             if model is not None and processor is not None:
                 st.session_state['model'] = model
                 st.session_state['processor'] = processor
+                st.success("✓ Model loaded successfully!")
             else:
+                st.error("Failed to load model")
+    # File uploader
+    uploaded_file = st.file_uploader("2. Upload an image", type=["jpg", "jpeg", "png"])
+    # Display uploaded image
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file).convert('RGB')
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Only enable analysis if model is loaded
+        model_loaded = 'model' in st.session_state and st.session_state['model'] is not None
+        if st.button("3. Analyze Image", disabled=not model_loaded):
+            if not model_loaded:
+                st.warning("Please load the model first")
+            else:
+                col2.subheader("Analysis Results")
+                with col2.spinner("Analyzing image..."):
+                    try:
+                        # Get model components
+                        model = st.session_state['model']
+                        processor = st.session_state['processor']
+                        # Format message for analysis
+                        messages = [
+                            {"role": "user", "content": [
+                                {"type": "image"},
+                                {"type": "text", "text": prompt}
+                            ]}
+                        ]
+                        # Apply chat template
+                        input_text = processor.tokenizer.apply_chat_template(
+                            messages,
+                            add_generation_prompt=True
                         )
+                        # Process with image
+                        inputs = processor(
+                            images=image,
+                            text=input_text,
+                            add_special_tokens=False,
+                            return_tensors="pt"
+                        ).to(model.device)
+                        # Apply the fix
+                        fixed, inputs = fix_processor_outputs(inputs)
+                        if fixed:
+                            col2.info("Fixed cross-attention mask dimensions")
+                        # Generate analysis
+                        with torch.no_grad():
+                            output_ids = model.generate(
+                                **inputs,
+                                max_new_tokens=max_length,
+                                temperature=temperature,
+                                top_p=0.9
+                            )
+                        # Decode the output
+                        response = processor.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+                        # Display results
+                        col2.success("Analysis complete!")
+                        col2.markdown(response)
+                        # Free memory
+                        free_memory()
+                    except Exception as e:
+                        col2.error(f"Error analyzing image: {str(e)}")
+                        col2.exception(e)
+        elif not model_loaded:
+            st.info("Please load the model first (Step 1)")
+    else:
+        st.info("Please upload an image (Step 2)")
+with col2:
+    if 'model' not in st.session_state:
+        st.info("👈 Follow the steps on the left to analyze an image")