Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 6

Commit

be65f5f

verified ·

1 Parent(s): 4048570

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -42

app.py CHANGED Viewed

@@ -424,10 +424,6 @@ def process_image_with_gradcam(image, model, device, pred_class):
 # ----- BLIP Image Captioning -----
-# Define conditional prompts for BLIP
-ORIGINAL_IMAGE_PROMPT = "an image of"  # For the original image
-GRADCAM_IMAGE_PROMPT = "a heatmap showing"  # For the GradCAM visualization
 # Function to load BLIP captioning model
 @st.cache_resource
 def load_blip_model():
@@ -443,33 +439,39 @@ def load_blip_model():
 # Function to generate image caption using BLIP
 def generate_image_caption(image, processor, model, is_gradcam=False, max_length=75, num_beams=5):
     """
-    Generate a caption for the input image using BLIP model's conditional captioning
     """
     try:
-        # Select the appropriate prompt based on image type
-        conditional_prompt = GRADCAM_IMAGE_PROMPT if is_gradcam else ORIGINAL_IMAGE_PROMPT
         # Check for available GPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = model.to(device)
-        # Get conditional caption
-        conditional_inputs = processor(image, conditional_prompt, return_tensors="pt").to(device)
-        with torch.no_grad():
-            conditional_output = model.generate(**conditional_inputs, max_length=max_length, num_beams=num_beams)
-        conditional_caption = processor.decode(conditional_output[0], skip_special_tokens=True)
-        # Remove the prompt from the beginning if it appears
-        if conditional_prompt in conditional_caption:
-            conditional_caption = conditional_caption.replace(conditional_prompt, "").strip()
-        # Format the caption based on image type
         if is_gradcam:
-            full_info = format_gradcam_caption(conditional_caption)
         else:
-            full_info = format_image_caption(conditional_caption)
-        return full_info
     except Exception as e:
         st.error(f"Error generating caption: {str(e)}")
         return "Error generating caption"
@@ -675,14 +677,18 @@ def main():
         uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
         if uploaded_file is not None:
-            # Display the uploaded image
             try:
                 image = Image.open(uploaded_file).convert("RGB")
-                st.image(image, caption="Uploaded Image", use_column_width=True)
                 # Generate detailed caption for original image if BLIP model is loaded
                 if st.session_state.blip_model_loaded:
-                    with st.spinner("Generating detailed image description..."):
                         caption = generate_image_caption(
                             image,
                             st.session_state.blip_processor,
@@ -690,11 +696,8 @@ def main():
                             is_gradcam=False
                         )
                         st.session_state.image_caption = caption
-                        st.success(f"📝 Image Description Generated")
-                        # Format the caption nicely
-                        st.markdown("### Image Description:")
-                        st.markdown(caption)
                 # Detect with CLIP model if loaded
                 if st.session_state.clip_model_loaded:
@@ -728,11 +731,12 @@ def main():
                             pred_label = "Fake" if pred_class == 1 else "Real"
                         # Display results
-                        result_col1, result_col2 = st.columns(2)
-                        with result_col1:
-                            st.metric("Prediction", pred_label)
-                        with result_col2:
-                            st.metric("Confidence", f"{confidence:.2%}")
                         # GradCAM visualization
                         st.subheader("GradCAM Visualization")
@@ -740,8 +744,8 @@ def main():
                             image, model, device, pred_class
                         )
-                        # Display GradCAM results
-                        st.image(comparison, caption="Original | CAM | Overlay", use_column_width=True)
                         # Generate caption for GradCAM overlay image if BLIP model is loaded
                         if st.session_state.blip_model_loaded:
@@ -754,11 +758,8 @@ def main():
                                     max_length=150  # Longer for detailed analysis
                                 )
                                 st.session_state.gradcam_caption = gradcam_caption
-                                st.success("✅ GradCAM analysis complete")
-                                # Format the GradCAM caption nicely
-                                st.markdown("### GradCAM Analysis:")
-                                st.markdown(gradcam_caption)
                         # Save results in session state for LLM analysis
                         st.session_state.current_image = image
@@ -854,10 +855,10 @@ def main():
             col1, col2 = st.columns([1, 2])
             with col1:
-                # Display original image and overlay side by side
-                st.image(st.session_state.current_image, caption="Original Image", use_column_width=True)
                 if hasattr(st.session_state, 'current_overlay'):
-                    st.image(st.session_state.current_overlay, caption="GradCAM Overlay", use_column_width=True)
             with col2:
                 # Detection result

 # ----- BLIP Image Captioning -----
 # Function to load BLIP captioning model
 @st.cache_resource
 def load_blip_model():
 # Function to generate image caption using BLIP
 def generate_image_caption(image, processor, model, is_gradcam=False, max_length=75, num_beams=5):
     """
+    Generate a caption for the input image using BLIP model
     """
     try:
         # Check for available GPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = model.to(device)
+        # Choose the right prompting method based on image type
+        if is_gradcam:
+            # For GradCAM, use conditional captioning with a specific prompt
+            text = "a heatmap showing"
+            inputs = processor(image, text, return_tensors="pt").to(device)
+        else:
+            # For original image, use unconditional captioning (works better for portraits)
+            inputs = processor(image, return_tensors="pt").to(device)
+        # Generate caption
+        with torch.no_grad():
+            output = model.generate(**inputs, max_length=max_length, num_beams=num_beams)
+        # Decode the output
+        caption = processor.decode(output[0], skip_special_tokens=True)
+        # Remove the prompt from the beginning if it appears (for conditional captioning)
+        if is_gradcam and "a heatmap showing" in caption:
+            caption = caption.replace("a heatmap showing", "").strip()
+        # Format based on image type
         if is_gradcam:
+            return format_gradcam_caption(caption)
         else:
+            return format_image_caption(caption)
     except Exception as e:
         st.error(f"Error generating caption: {str(e)}")
         return "Error generating caption"
         uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
         if uploaded_file is not None:
             try:
+                # Load and display the image (with controlled size)
                 image = Image.open(uploaded_file).convert("RGB")
+                # Display the image with a controlled width
+                col1, col2 = st.columns([1, 2])
+                with col1:
+                    st.image(image, caption="Uploaded Image", width=300)
                 # Generate detailed caption for original image if BLIP model is loaded
                 if st.session_state.blip_model_loaded:
+                    with st.spinner("Generating image description..."):
                         caption = generate_image_caption(
                             image,
                             st.session_state.blip_processor,
                             is_gradcam=False
                         )
                         st.session_state.image_caption = caption
+                        # Store caption but don't display it here - it will be shown in the summary section
                 # Detect with CLIP model if loaded
                 if st.session_state.clip_model_loaded:
                             pred_label = "Fake" if pred_class == 1 else "Real"
                         # Display results
+                        with col2:
+                            result_col1, result_col2 = st.columns(2)
+                            with result_col1:
+                                st.metric("Prediction", pred_label)
+                            with result_col2:
+                                st.metric("Confidence", f"{confidence:.2%}")
                         # GradCAM visualization
                         st.subheader("GradCAM Visualization")
                             image, model, device, pred_class
                         )
+                        # Display GradCAM results (controlled size)
+                        st.image(comparison, caption="Original | CAM | Overlay", width=700)
                         # Generate caption for GradCAM overlay image if BLIP model is loaded
                         if st.session_state.blip_model_loaded:
                                     max_length=150  # Longer for detailed analysis
                                 )
                                 st.session_state.gradcam_caption = gradcam_caption
+                                # Store caption but don't display it here - it will be shown in the summary section
                         # Save results in session state for LLM analysis
                         st.session_state.current_image = image
             col1, col2 = st.columns([1, 2])
             with col1:
+                # Display original image and overlay side by side with controlled size
+                st.image(st.session_state.current_image, caption="Original Image", width=300)
                 if hasattr(st.session_state, 'current_overlay'):
+                    st.image(st.session_state.current_overlay, caption="GradCAM Overlay", width=300)
             with col2:
                 # Detection result