Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 9, 2025

Commit

5d3e972

verified ·

1 Parent(s): f08ce84

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -26

app.py CHANGED Viewed

@@ -408,8 +408,8 @@ def process_image_with_gradcam(image, model, device, pred_class):
 def load_blip_model():
     with st.spinner("Loading BLIP captioning model..."):
         try:
-            processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-            model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
             return processor, model
         except Exception as e:
             st.error(f"Error loading BLIP model: {str(e)}")
@@ -418,40 +418,31 @@ def load_blip_model():
 # Function to generate image caption using BLIP's VQA approach for GradCAM
 def generate_gradcam_caption(image, processor, model, max_length=60):
     """
-    Generate a detailed analysis of GradCAM visualization using multiple questions
     """
     try:
         # Check for available GPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = model.to(device)
-        # Multiple specific questions about the GradCAM visualization
-        questions = [
-            "What facial features are highlighted by the red and yellow areas in this heatmap?",
-            "What does this facial heat map visualization show?",
-            "What patterns do you see in this facial heatmap visualization?"
-        ]
-        # Get answers to each question
-        answers = []
-        for question in questions:
-            inputs = processor(image, text=question, return_tensors="pt").to(device)
-            with torch.no_grad():
-                output = model.generate(**inputs, max_length=max_length, num_beams=5)
-            answer = processor.decode(output[0], skip_special_tokens=True)
-            answers.append(answer)
-        # Format answers into a structured analysis
         structured_output = f"""
-**Main Focus Area**: The heatmap is primarily focused on the facial region of the person.
-**High Activation Regions**: The red/yellow areas highlight {answers[0]}
-**Medium Activation Regions**: The green/cyan areas correspond to regions of medium importance in the detection process, typically including parts of the face and surrounding areas.
-**Low Activation Regions**: The blue/dark blue areas represent features that have less impact on the model's decision, usually the background and peripheral elements.
-**Activation Pattern**: {answers[2]}
 """
         return structured_output.strip()

 def load_blip_model():
     with st.spinner("Loading BLIP captioning model..."):
         try:
+            processor = BlipProcessor.from_pretrained("saakshigupta/deepfake-blip-large")
+            model = BlipForConditionalGeneration.from_pretrained("saakshigupta/deepfake-blip-large")
             return processor, model
         except Exception as e:
             st.error(f"Error loading BLIP model: {str(e)}")
 # Function to generate image caption using BLIP's VQA approach for GradCAM
 def generate_gradcam_caption(image, processor, model, max_length=60):
     """
+    Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
     """
     try:
         # Check for available GPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = model.to(device)
+        # Process image with BLIP
+        inputs = processor(image, return_tensors="pt").to(device)
+        # Generate caption
+        with torch.no_grad():
+            output = model.generate(**inputs, max_length=max_length, num_beams=5)
+        # Decode the output
+        caption = processor.decode(output[0], skip_special_tokens=True)
+        # Format into structured analysis
         structured_output = f"""
+**Heatmap Analysis**: {caption}
+**Key Observations**:
+- The red/yellow regions indicate areas of high importance in the detection process
+- The green/cyan areas show regions of medium importance
+- The blue/dark blue regions represent features with lower impact on the model's decision
 """
         return structured_output.strip()