Spaces:

deepfakedetection
/

deepfake_uq

Sleeping

App Files Files Community

saakshigupta commited on Apr 6

Commit

c692452

verified ·

1 Parent(s): bfe7823

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -193

app.py CHANGED Viewed

@@ -1,222 +1,246 @@
 import streamlit as st
 import torch
 from PIL import Image
-import os
-import gc
-from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
-# Page config
 st.set_page_config(
-    page_title="Deepfake Image Analyzer",
-    page_icon="🔍",
-    layout="wide"
 )
-# App title and description
 st.title("Deepfake Image Analyzer")
 st.markdown("Upload an image to analyze it for possible deepfake manipulation")
-# Function to free up memory
-def free_memory():
-    gc.collect()
     if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-# Helper function to check CUDA
-def init_device():
-    if torch.cuda.is_available():
-        st.sidebar.success("✓ GPU available: Using CUDA")
-        return "cuda"
     else:
-        st.sidebar.warning("⚠️ No GPU detected: Using CPU (analysis will be slow)")
-        return "cpu"
-# Set device
-device = init_device()
-@st.cache_resource
-def load_model():
-    """Load pre-quantized model"""
-    try:
-        # Using your original base model
-        base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
-        # Load processor
-        processor = AutoProcessor.from_pretrained(base_model_id)
-        # Configure quantization settings for unsloth model
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_quant_storage=torch.float16,
-            llm_int8_skip_modules=["lm_head"],
-            llm_int8_enable_fp32_cpu_offload=True
-        )
-        # Load the pre-quantized model with unsloth settings
-        model = AutoModelForCausalLM.from_pretrained(
-            base_model_id,
-            device_map="auto",
-            quantization_config=quantization_config,
-            torch_dtype=torch.float16,
-            trust_remote_code=True,
-            low_cpu_mem_usage=True,
-            use_cache=True,
-            offload_folder="offload"  # Enable disk offloading
-        )
-        # Load adapter
-        adapter_id = "saakshigupta/deepfake-explainer-1"
-        model = PeftModel.from_pretrained(model, adapter_id)
-        return model, processor
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        st.exception(e)
-        return None, None
 # Function to fix cross-attention masks
-def fix_processor_outputs(inputs):
-    """Fix cross-attention mask dimensions if needed"""
     if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
         batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
-        visual_features = 6404  # The exact dimension used in training
-        new_mask = torch.ones(
-            (batch_size, seq_len, visual_features, num_tiles),
-            device=inputs['cross_attention_mask'].device
-        )
         inputs['cross_attention_mask'] = new_mask
-        return True, inputs
-    return False, inputs
-# Create sidebar with options
-with st.sidebar:
-    st.header("Options")
-    temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.7, step=0.1,
-                           help="Higher values make output more random, lower values more deterministic")
-    max_length = st.slider("Maximum response length", min_value=100, max_value=1000, value=500, step=50)
-    custom_prompt = st.text_area(
-        "Custom instruction (optional)",
-        value="Analyze this image and determine if it's a deepfake. Provide both technical and non-technical explanations.",
-        height=100
-    )
-    st.markdown("### About")
-    st.markdown("""
-    This app uses a fine-tuned Llama 3.2 Vision model to detect and explain deepfakes.
-    The analyzer looks for:
-    - Inconsistencies in facial features
-    - Unusual lighting or shadows
-    - Unnatural blur patterns
-    - Artifacts around edges
-    - Texture inconsistencies
-    Model by [saakshigupta](https://huggingface.co/saakshigupta/deepfake-explainer-1)
-    """)
-# Load model button
-if st.button("Load Model"):
-    with st.spinner("Loading model... this may take several minutes"):
         try:
-            model, processor = load_model()
-            if model is not None and processor is not None:
-                st.session_state['model'] = model
-                st.session_state['processor'] = processor
-                st.success("Model loaded successfully!")
-            else:
-                st.error("Failed to load model.")
-        except Exception as e:
-            st.error(f"Error during model loading: {str(e)}")
-            st.exception(e)
-# Main content area - file uploader
-uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
-# Check if model is loaded
-model_loaded = 'model' in st.session_state and st.session_state['model'] is not None
-if uploaded_file is not None:
-    # Display the image
-    image = Image.open(uploaded_file).convert('RGB')
-    st.image(image, caption="Uploaded Image", use_column_width=True)
-    # Analyze button (only enabled if model is loaded)
-    if st.button("Analyze Image", disabled=not model_loaded):
-        if not model_loaded:
-            st.warning("Please load the model first by clicking the 'Load Model' button.")
         else:
-            with st.spinner("Analyzing the image... This may take 15-30 seconds"):
-                try:
-                    # Get components from session state
-                    model = st.session_state['model']
-                    processor = st.session_state['processor']
-                    # Process the image using the processor
-                    inputs = processor(text=custom_prompt, images=image, return_tensors="pt")
-                    # Fix cross-attention mask if needed
-                    fixed, inputs = fix_processor_outputs(inputs)
-                    if fixed:
-                        st.info("Fixed cross-attention mask dimensions")
-                    # Move to device
-                    inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
-                    # Generate the analysis
-                    with torch.no_grad():
-                        output_ids = model.generate(
-                            **inputs,
-                            max_new_tokens=max_length,
-                            temperature=temperature,
-                            top_p=0.9
-                        )
-                    # Decode the output
-                    response = processor.decode(output_ids[0], skip_special_tokens=True)
-                    # Extract the actual response (removing the prompt)
-                    if custom_prompt in response:
-                        result = response.split(custom_prompt)[-1].strip()
-                    else:
-                        result = response
-                    # Display result in a nice format
-                    st.success("Analysis complete!")
-                    # Show technical and non-technical explanations separately if they exist
-                    if "Technical Explanation:" in result and "Non-Technical Explanation:" in result:
-                        technical, non_technical = result.split("Non-Technical Explanation:")
-                        technical = technical.replace("Technical Explanation:", "").strip()
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.subheader("Technical Analysis")
-                            st.write(technical)
-                        with col2:
-                            st.subheader("Simple Explanation")
-                            st.write(non_technical)
-                    else:
-                        st.subheader("Analysis Result")
-                        st.write(result)
-                    # Free memory after analysis
-                    free_memory()
-                except Exception as e:
-                    st.error(f"Error analyzing image: {str(e)}")
-                    st.exception(e)
-    elif not model_loaded:
-        st.warning("Please load the model first by clicking the 'Load Model' button at the top of the page.")
-else:
-    st.info("Please upload an image to begin analysis")
-# Add footer
-st.markdown("---")
-st.markdown("Deepfake Image Analyzer")

 import streamlit as st
 import torch
 from PIL import Image
+import io
 from peft import PeftModel
+from unsloth import FastVisionModel
+import tempfile
+import os
+# App title and description
 st.set_page_config(
+    page_title="Deepfake Analyzer",
+    layout="wide",
+    page_icon="🔍"
 )
+# Main title and description
 st.title("Deepfake Image Analyzer")
 st.markdown("Upload an image to analyze it for possible deepfake manipulation")
+# Check for GPU availability
+def check_gpu():
     if torch.cuda.is_available():
+        gpu_info = torch.cuda.get_device_properties(0)
+        st.sidebar.success(f"✅ GPU available: {gpu_info.name} ({gpu_info.total_memory / (1024**3):.2f} GB)")
+        return True
     else:
+        st.sidebar.warning("⚠️ No GPU detected. Analysis will be slower.")
+        return False
+# Sidebar components
+st.sidebar.title("Options")
+# Temperature slider
+temperature = st.sidebar.slider(
+    "Temperature",
+    min_value=0.1,
+    max_value=1.0,
+    value=0.7,
+    step=0.1,
+    help="Higher values make output more random, lower values more deterministic"
+)
+# Max response length slider
+max_tokens = st.sidebar.slider(
+    "Maximum Response Length",
+    min_value=100,
+    max_value=1000,
+    value=500,
+    step=50,
+    help="The maximum number of tokens in the response"
+)
+# Custom instruction text area in sidebar
+custom_instruction = st.sidebar.text_area(
+    "Custom Instructions (Advanced)",
+    value="Analyze for facial inconsistencies, lighting irregularities, mismatched shadows, and other signs of manipulation.",
+    help="Add specific instructions for the model"
+)
+# About section in sidebar
+st.sidebar.markdown("---")
+st.sidebar.subheader("About")
+st.sidebar.markdown("""
+This analyzer looks for:
+- Facial inconsistencies
+- Unnatural movements
+- Lighting issues
+- Texture anomalies
+- Edge artifacts
+- Blending problems
+**Model**: Fine-tuned Llama 3.2 Vision
+**Creator**: [Saakshi Gupta](https://huggingface.co/saakshigupta)
+""")
 # Function to fix cross-attention masks
+def fix_cross_attention_mask(inputs):
     if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape:
         batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape
+        visual_features = 6404  # Critical dimension
+        new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles),
+                            device=inputs['cross_attention_mask'].device)
         inputs['cross_attention_mask'] = new_mask
+        st.success("Fixed cross-attention mask dimensions")
+    return inputs
+# Load model function
+@st.cache_resource
+def load_model():
+    with st.spinner("Loading model... This may take a few minutes. Please be patient..."):
         try:
+            # Check for GPU
+            has_gpu = check_gpu()
+            # Load base model and tokenizer using Unsloth
+            base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
+            model, tokenizer = FastVisionModel.from_pretrained(
+                base_model_id,
+                load_in_4bit=True,
+            )
+            # Load the adapter
+            adapter_id = "saakshigupta/deepfake-explainer-1"
+            model = PeftModel.from_pretrained(model, adapter_id)
+            # Set to inference mode
+            FastVisionModel.for_inference(model)
+            return model, tokenizer
+        except Exception as e:
+            st.error(f"Error loading model: {str(e)}")
+            return None, None
+# Analyze image function
+def analyze_image(image, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""):
+    # Combine question with custom instruction if provided
+    if custom_instruction.strip():
+        full_prompt = f"{question}\n\nAdditional instructions: {custom_instruction}"
+    else:
+        full_prompt = question
+    # Format the message
+    messages = [
+        {"role": "user", "content": [
+            {"type": "image"},
+            {"type": "text", "text": full_prompt}
+        ]}
+    ]
+    # Apply chat template
+    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+    # Process with image
+    inputs = tokenizer(
+        image,
+        input_text,
+        add_special_tokens=False,
+        return_tensors="pt",
+    ).to(model.device)
+    # Fix cross-attention mask if needed
+    inputs = fix_cross_attention_mask(inputs)
+    # Generate response
+    with st.spinner("Analyzing image... (this may take 15-30 seconds)"):
+        with torch.no_grad():
+            output_ids = model.generate(
+                **inputs,
+                max_new_tokens=max_tokens,
+                use_cache=True,
+                temperature=temperature,
+                top_p=0.9
+            )
+        # Decode the output
+        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        # Try to extract just the model's response (after the prompt)
+        if full_prompt in response:
+            result = response.split(full_prompt)[-1].strip()
         else:
+            result = response
+        return result
+# Main app
+def main():
+    # Create a button to load the model
+    if 'model_loaded' not in st.session_state:
+        st.session_state.model_loaded = False
+        st.session_state.model = None
+        st.session_state.tokenizer = None
+    # Load model button
+    if not st.session_state.model_loaded:
+        if st.button("📥 Load Deepfake Analysis Model", type="primary"):
+            model, tokenizer = load_model()
+            if model is not None and tokenizer is not None:
+                st.session_state.model = model
+                st.session_state.tokenizer = tokenizer
+                st.session_state.model_loaded = True
+                st.success("✅ Model loaded successfully! You can now analyze images.")
+            else:
+                st.error("❌ Failed to load model. Please check the logs for errors.")
+    else:
+        st.success("✅ Model loaded successfully! You can now analyze images.")
+    # Image upload section
+    st.subheader("Upload an Image")
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+    # Default question with option to customize
+    default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations."
+    question = st.text_area("Question/Prompt:", value=default_question, height=100)
+    if uploaded_file is not None:
+        # Display the uploaded image
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Analyze button - only enabled if model is loaded
+        if st.session_state.model_loaded:
+            if st.button("🔍 Analyze Image", type="primary"):
+                result = analyze_image(
+                    image,
+                    question,
+                    st.session_state.model,
+                    st.session_state.tokenizer,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    custom_instruction=custom_instruction
+                )
+                # Display results
+                st.success("✅ Analysis complete!")
+                # Check if the result contains both technical and non-technical explanations
+                if "Technical" in result and "Non-Technical" in result:
+                    # Split the result into technical and non-technical sections
+                    parts = result.split("Non-Technical")
+                    technical = parts[0]
+                    non_technical = "Non-Technical" + parts[1]
+                    # Display in two columns
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.subheader("Technical Analysis")
+                        st.markdown(technical)
+                    with col2:
+                        st.subheader("Simple Explanation")
+                        st.markdown(non_technical)
+                else:
+                    # Just display the whole result
+                    st.subheader("Analysis Result")
+                    st.markdown(result)
+        else:
+            st.warning("⚠️ Please load the model first before analyzing images.")
+    # Footer
+    st.markdown("---")
+    st.caption("Deepfake Image Analyzer")
+if __name__ == "__main__":
+    main()