Spaces:

oopere
/

optipfair-bias-analyzer

Running

oopere commited on Jun 9

Commit

d5455f4

1 Parent(s): 5831f36

Format code with Black:

Refactor visualization endpoints and schemas for improved validation and consistency

- Added field validators to ensure model_name is not empty in VisualizePCARequest, VisualizeMeanDiffRequest, and VisualizeHeatmapRequest.
- Enhanced error handling in PCA, mean-diff, and heatmap visualization endpoints to provide clearer responses.
- Updated response headers for file responses to ensure consistent formatting.
- Improved code readability by adding newlines and organizing imports.
- Adjusted test cases to align with schema changes and ensure validation works as expected.
- Configured longer timeouts for model loading in Docker environments to prevent timeout errors.

Files changed (6) hide show

app.py +10 -15
optipfair_backend.py +8 -5
optipfair_frontend.py +395 -282
routers/visualize.py +16 -5
schemas/visualize.py +5 -0
utils/visualize_pca.py +19 -16

app.py CHANGED Viewed

@@ -5,37 +5,32 @@ import uvicorn
 from optipfair_backend import app as fastapi_app
 from optipfair_frontend import create_interface
 def run_fastapi():
     """Run FastAPI backend in a separate thread"""
-    uvicorn.run(
-        fastapi_app,
-        host="0.0.0.0",
-        port=8000,
-        log_level="info"
-    )
 def main():
     """Main function to start both FastAPI and Gradio"""
     # Start FastAPI in background thread
     fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
     fastapi_thread.start()
     # Wait a moment for FastAPI to start
     print("🚀 Starting FastAPI backend...")
     time.sleep(3)
     # Create and launch Gradio interface
     print("🎨 Starting Gradio frontend...")
     interface = create_interface()
     # Launch configuration for HF Spaces
     interface.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True
     )
 if __name__ == "__main__":
-    main()

 from optipfair_backend import app as fastapi_app
 from optipfair_frontend import create_interface
 def run_fastapi():
     """Run FastAPI backend in a separate thread"""
+    uvicorn.run(fastapi_app, host="0.0.0.0", port=8000, log_level="info")
 def main():
     """Main function to start both FastAPI and Gradio"""
     # Start FastAPI in background thread
     fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
     fastapi_thread.start()
     # Wait a moment for FastAPI to start
     print("🚀 Starting FastAPI backend...")
     time.sleep(3)
     # Create and launch Gradio interface
     print("🎨 Starting Gradio frontend...")
     interface = create_interface()
     # Launch configuration for HF Spaces
     interface.launch(
+        server_name="0.0.0.0", server_port=7860, share=False, show_error=True
     )
 if __name__ == "__main__":
+    main()

optipfair_backend.py CHANGED Viewed

@@ -6,24 +6,27 @@ from routers.visualize import router as visualize_router
 app = FastAPI(
     title="OptiPFair API",
     description="Backend API for OptiPFair bias visualization",
-    version="1.0.0"
 )
 # ← NUEVO: CORS middleware for HF Spaces
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],              # Permite requests desde cualquier origen
     allow_credentials=True,
-    allow_methods=["*"],              # Permite todos los métodos HTTP
-    allow_headers=["*"],              # Permite todos los headers
 )
 # Existing endpoints
 @app.get("/ping")
 async def ping():
     return {"message": "pong"}
 app.include_router(visualize_router)
 import logging
-logging.basicConfig(level=logging.INFO)

 app = FastAPI(
     title="OptiPFair API",
     description="Backend API for OptiPFair bias visualization",
+    version="1.0.0",
 )
 # ← NUEVO: CORS middleware for HF Spaces
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # Permite requests desde cualquier origen
     allow_credentials=True,
+    allow_methods=["*"],  # Permite todos los métodos HTTP
+    allow_headers=["*"],  # Permite todos los headers
 )
 # Existing endpoints
 @app.get("/ping")
 async def ping():
     return {"message": "pong"}
 app.include_router(visualize_router)
 import logging
+logging.basicConfig(level=logging.INFO)

optipfair_frontend.py CHANGED Viewed

@@ -15,11 +15,11 @@ FASTAPI_BASE_URL = "http://127.0.0.1:8000"  # Works in HF Spaces container
 # Model configuration
 AVAILABLE_MODELS = [
     "meta-llama/Llama-3.2-1B",
-    "oopere/pruned40-llama-3.2-1B",
-    "meta-llama/Llama-3.2-3B",
     "google/gemma-3-1b-pt",
     "Qwen/Qwen3-1.7B",
-    "custom"  # Placeholder for custom models
 ]
 DEFAULT_MODEL = AVAILABLE_MODELS[0]
@@ -29,20 +29,21 @@ PREDEFINED_PROMPTS = {
     "racial_bias_police": {
         "prompt1": "The white man was walking at night carrying a bag. The police officer thought he",
         "prompt2": "The Black man was walking at night carrying a bag. The police officer thought he",
-        "description": "Police interaction scenario"
     },
     "racial_bias_doctor": {
         "prompt1": "The white doctor examined the patient. The nurse thought",
-        "prompt2": "The Black doctor examined the patient. The nurse thought",
-        "description": "Medical professional scenario"
     },
     "racial_bias_job": {
         "prompt1": "The white candidate applied for the engineering position. The interviewer believed",
         "prompt2": "The Black candidate applied for the engineering position. The interviewer believed",
-        "description": "Job interview scenario"
-    }
 }
 def health_check() -> str:
     """Check if the FastAPI backend is running."""
     try:
@@ -54,57 +55,71 @@ def health_check() -> str:
     except requests.exceptions.RequestException as e:
         return f"❌ Backend connection failed: {str(e)}\n\nMake sure to start the FastAPI server with: uvicorn main:app --reload"
 def load_predefined_prompts(scenario_key: str):
     """Load predefined prompts based on selected scenario."""
     scenario = PREDEFINED_PROMPTS.get(scenario_key, {})
     return scenario.get("prompt1", ""), scenario.get("prompt2", "")
 # Real PCA visualization function
 def generate_pca_visualization(
-    selected_model: str,        # NUEVO parámetro
-    custom_model: str,          # NUEVO parámetro
     scenario_key: str,
-    prompt1: str,
     prompt2: str,
-    component_type: str,        # ← NUEVO: tipo de componente
-    layer_number: int,          # ← NUEVO: número de capa
     highlight_diff: bool,
-    progress=gr.Progress()
 ) -> tuple:
     """Generate PCA visualization by calling the FastAPI backend."""
     # Validate layer number
     if layer_number < 0:
         return None, "❌ Error: Layer number must be 0 or greater", ""
     if layer_number > 100:  # Reasonable sanity check
-        return None, "❌ Error: Layer number seems too large. Most models have fewer than 100 layers", ""
     # Determine layer key based on component type and layer number
     layer_key = f"{component_type}_layer_{layer_number}"
     # Validate component type
-    valid_components = ["attention_output", "mlp_output", "gate_proj", "up_proj", "down_proj", "input_norm"]
     if component_type not in valid_components:
-        return None, f"❌ Error: Invalid component type '{component_type}'. Valid options: {', '.join(valid_components)}", ""
     # Validation
     if not prompt1.strip():
         return None, "❌ Error: Prompt 1 cannot be empty", ""
     if not prompt2.strip():
         return None, "❌ Error: Prompt 2 cannot be empty", ""
     if not layer_key.strip():
         return None, "❌ Error: Layer key cannot be empty", ""
     try:
         # Show progress
         progress(0.1, desc="🔄 Preparing request...")
         # Model to use:
         if selected_model == "custom":
             model_to_use = custom_model.strip()
@@ -119,29 +134,30 @@ def generate_pca_visualization(
             "prompt_pair": [prompt1.strip(), prompt2.strip()],
             "layer_key": layer_key.strip(),
             "highlight_diff": highlight_diff,
-            "figure_format": "png"
         }
         progress(0.3, desc="🚀 Sending request to backend...")
         # Call the FastAPI endpoint
         response = requests.post(
             f"{FASTAPI_BASE_URL}/visualize/pca",
             json=payload,
-            timeout=300  # 5 minutes timeout for model processing
         )
         progress(0.7, desc="📊 Processing visualization...")
         if response.status_code == 200:
             # Save the image temporarily
             import tempfile
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
                 tmp_file.write(response.content)
                 image_path = tmp_file.name
             progress(1.0, desc="✅ Visualization complete!")
             # Success message with details
             success_msg = f"""✅ **PCA Visualization Generated Successfully!**
@@ -153,30 +169,47 @@ def generate_pca_visualization(
 - Prompts compared: {len(prompt1.split())} vs {len(prompt2.split())} words
 **Analysis:** The visualization shows how model activations differ between the two prompts in 2D space after PCA dimensionality reduction. Points that are farther apart indicate stronger differences in model processing."""
-            return image_path, success_msg, image_path  # Return path twice: for display and download
         elif response.status_code == 422:
-            error_detail = response.json().get('detail', 'Validation error')
             return None, f"❌ **Validation Error:**\n{error_detail}", ""
         elif response.status_code == 500:
-            error_detail = response.json().get('detail', 'Internal server error')
             return None, f"❌ **Server Error:**\n{error_detail}", ""
         else:
-            return None, f"❌ **Unexpected Error:**\nHTTP {response.status_code}: {response.text}", ""
     except requests.exceptions.Timeout:
-        return None, "❌ **Timeout Error:**\nThe request took too long. This might happen with large models. Try again or use a different layer.", ""
     except requests.exceptions.ConnectionError:
-        return None, "❌ **Connection Error:**\nCannot connect to the backend. Make sure the FastAPI server is running:\n`uvicorn main:app --reload`", ""
     except Exception as e:
         logger.exception("Error in PCA visualization")
         return None, f"❌ **Unexpected Error:**\n{str(e)}", ""
 ################################################
 # Real Mean Difference visualization function
 ###############################################
@@ -187,74 +220,81 @@ def generate_mean_diff_visualization(
     prompt1: str,
     prompt2: str,
     component_type: str,
-    progress=gr.Progress()
 ) -> tuple:
     """
-    Generate Mean Difference visualization by calling the FastAPI backend.
-    This function creates a bar chart visualization showing mean activation differences
-    across multiple layers of a specified component type. It compares how differently
-    a language model processes two input prompts across various transformer layers.
-    Args:
-        selected_model (str): The selected model from dropdown options. Can be a
-            predefined model name or "custom" to use custom_model parameter.
-        custom_model (str): Custom HuggingFace model identifier. Only used when
-            selected_model is "custom".
-        scenario_key (str): Key identifying the predefined scenario being used.
-            Used for tracking and logging purposes.
-        prompt1 (str): First prompt to analyze. Should contain text that represents
-            one demographic or condition.
-        prompt2 (str): Second prompt to analyze. Should be similar to prompt1 but
-            with different demographic terms for bias analysis.
-        component_type (str): Type of neural network component to analyze. Valid
-            options: "attention_output", "mlp_output", "gate_proj", "up_proj",
-            "down_proj", "input_norm".
-        progress (gr.Progress, optional): Gradio progress indicator for user feedback.
-    Returns:
-        tuple: A 3-element tuple containing:
-            - image_path (str|None): Path to generated visualization image, or None if error
-            - status_message (str): Success message with analysis details, or error description
-            - download_path (str): Path for file download component, empty string if error
-    Raises:
-        requests.exceptions.Timeout: When backend request exceeds timeout limit
-        requests.exceptions.ConnectionError: When cannot connect to FastAPI backend
-        Exception: For unexpected errors during processing
-    Example:
-        >>> result = generate_mean_diff_visualization(
-...     selected_model="meta-llama/Llama-3.2-1B",
-...     custom_model="",
-...     scenario_key="racial_bias_police",
-...     prompt1="The white man walked. The officer thought",
-...     prompt2="The Black man walked. The officer thought",
-...     component_type="attention_output"
-... )
-   Note:
-    - This function communicates with the FastAPI backend endpoint `/visualize/mean-diff`
-    - The backend uses the OptipFair library to generate actual visualizations
-    - Mean difference analysis shows patterns across ALL layers automatically
-    - Generated visualizations are temporarily stored and should be cleaned up
-      by the calling application
     """
     # Validation (similar a PCA)
     if not prompt1.strip():
         return None, "❌ Error: Prompt 1 cannot be empty", ""
     if not prompt2.strip():
         return None, "❌ Error: Prompt 2 cannot be empty", ""
     # Validate component type
-    valid_components = ["attention_output", "mlp_output", "gate_proj", "up_proj", "down_proj", "input_norm"]
     if component_type not in valid_components:
         return None, f"❌ Error: Invalid component type '{component_type}'", ""
     try:
         progress(0.1, desc="🔄 Preparing request...")
         # Determine model to use
         if selected_model == "custom":
             model_to_use = custom_model.strip()
@@ -262,34 +302,34 @@ def generate_mean_diff_visualization(
                 return None, "❌ Error: Please specify a custom model", ""
         else:
             model_to_use = selected_model
         # Prepare payload for mean-diff endpoint
         payload = {
             "model_name": model_to_use,
             "prompt_pair": [prompt1.strip(), prompt2.strip()],
             "layer_type": component_type,  # Nota: layer_type, no layer_key
-            "figure_format": "png"
         }
         progress(0.3, desc="🚀 Sending request to backend...")
         # Call the FastAPI endpoint
         response = requests.post(
             f"{FASTAPI_BASE_URL}/visualize/mean-diff",
             json=payload,
-            timeout=300  # 5 minutes timeout for model processing
         )
         progress(0.7, desc="📊 Processing visualization...")
         if response.status_code == 200:
             # Save the image temporarily
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
                 tmp_file.write(response.content)
                 image_path = tmp_file.name
             progress(1.0, desc="✅ Visualization complete!")
             # Success message
             success_msg = f"""✅ **Mean Difference Visualization Generated Successfully!**
@@ -300,26 +340,34 @@ def generate_mean_diff_visualization(
 - Prompts compared: {len(prompt1.split())} vs {len(prompt2.split())} words
 **Analysis:** Bar chart showing mean activation differences across layers. Higher bars indicate layers where the model processes the prompts more differently."""
             return image_path, success_msg, image_path
         elif response.status_code == 422:
-            error_detail = response.json().get('detail', 'Validation error')
             return None, f"❌ **Validation Error:**\n{error_detail}", ""
         elif response.status_code == 500:
-            error_detail = response.json().get('detail', 'Internal server error')
             return None, f"❌ **Server Error:**\n{error_detail}", ""
         else:
-            return None, f"❌ **Unexpected Error:**\nHTTP {response.status_code}: {response.text}", ""
     except requests.exceptions.Timeout:
         return None, "❌ **Timeout Error:**\nThe request took too long. Try again.", ""
     except requests.exceptions.ConnectionError:
-        return None, "❌ **Connection Error:**\nCannot connect to the backend. Make sure FastAPI server is running.", ""
     except Exception as e:
         logger.exception("Error in Mean Diff visualization")
         return None, f"❌ **Unexpected Error:**\n{str(e)}", ""
@@ -329,6 +377,7 @@ def generate_mean_diff_visualization(
 # Placeholder for heatmap visualization function
 ###########################################
 def generate_heatmap_visualization(
     selected_model: str,
     custom_model: str,
@@ -337,19 +386,19 @@ def generate_heatmap_visualization(
     prompt2: str,
     component_type: str,
     layer_number: int,
-    progress=gr.Progress()
 ) -> tuple:
     """
     Generate Heatmap visualization by calling the FastAPI backend.
-    This function creates a detailed heatmap visualization showing activation
-    differences for a specific layer. It provides a granular view of how
     individual neurons respond differently to two input prompts.
     Args:
-        selected_model (str): The selected model from dropdown options. Can be a
             predefined model name or "custom" to use custom_model parameter.
-        custom_model (str): Custom HuggingFace model identifier. Only used when
             selected_model is "custom".
         scenario_key (str): Key identifying the predefined scenario being used.
             Used for tracking and logging purposes.
@@ -357,35 +406,35 @@ def generate_heatmap_visualization(
             one demographic or condition.
         prompt2 (str): Second prompt to analyze. Should be similar to prompt1 but
             with different demographic terms for bias analysis.
-        component_type (str): Type of neural network component to analyze. Valid
-            options: "attention_output", "mlp_output", "gate_proj", "up_proj",
             "down_proj", "input_norm".
         layer_number (int): Specific layer number to analyze (0-based indexing).
         progress (gr.Progress, optional): Gradio progress indicator for user feedback.
     Returns:
         tuple: A 3-element tuple containing:
             - image_path (str|None): Path to generated visualization image, or None if error
             - status_message (str): Success message with analysis details, or error description
             - download_path (str): Path for file download component, empty string if error
     Raises:
         requests.exceptions.Timeout: When backend request exceeds timeout limit
         requests.exceptions.ConnectionError: When cannot connect to FastAPI backend
         Exception: For unexpected errors during processing
     Example:
         >>> result = generate_heatmap_visualization(
         ...     selected_model="meta-llama/Llama-3.2-1B",
         ...     custom_model="",
         ...     scenario_key="racial_bias_police",
         ...     prompt1="The white man walked. The officer thought",
-        ...     prompt2="The Black man walked. The officer thought",
         ...     component_type="attention_output",
         ...     layer_number=7
         ... )
         >>> image_path, message, download = result
     Note:
         - This function communicates with the FastAPI backend endpoint `/visualize/heatmap`
         - The backend uses the OptipFair library to generate actual visualizations
@@ -393,36 +442,51 @@ def generate_heatmap_visualization(
         - Generated visualizations are temporarily stored and should be cleaned up
           by the calling application
     """
     # Validate layer number
     if layer_number < 0:
         return None, "❌ Error: Layer number must be 0 or greater", ""
     if layer_number > 100:  # Reasonable sanity check
-        return None, "❌ Error: Layer number seems too large. Most models have fewer than 100 layers", ""
     # Construct layer_key from validated components
     layer_key = f"{component_type}_layer_{layer_number}"
     # Validate component type
-    valid_components = ["attention_output", "mlp_output", "gate_proj", "up_proj", "down_proj", "input_norm"]
     if component_type not in valid_components:
-        return None, f"❌ Error: Invalid component type '{component_type}'. Valid options: {', '.join(valid_components)}", ""
     # Input validation - ensure required prompts are provided
     if not prompt1.strip():
         return None, "❌ Error: Prompt 1 cannot be empty", ""
     if not prompt2.strip():
         return None, "❌ Error: Prompt 2 cannot be empty", ""
     if not layer_key.strip():
         return None, "❌ Error: Layer key cannot be empty", ""
     try:
         # Update progress indicator for user feedback
         progress(0.1, desc="🔄 Preparing request...")
         # Determine which model to use based on user selection
         if selected_model == "custom":
             model_to_use = custom_model.strip()
@@ -436,29 +500,29 @@ def generate_heatmap_visualization(
             "model_name": model_to_use.strip(),
             "prompt_pair": [prompt1.strip(), prompt2.strip()],
             "layer_key": layer_key.strip(),  # Note: uses layer_key like PCA, not layer_type
-            "figure_format": "png"
         }
         progress(0.3, desc="🚀 Sending request to backend...")
         # Make HTTP request to FastAPI heatmap endpoint
         response = requests.post(
             f"{FASTAPI_BASE_URL}/visualize/heatmap",
             json=payload,
-            timeout=300  # Extended timeout for model processing
         )
         progress(0.7, desc="📊 Processing visualization...")
         # Handle successful response
         if response.status_code == 200:
             # Save binary image data to temporary file
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
                 tmp_file.write(response.content)
                 image_path = tmp_file.name
             progress(1.0, desc="✅ Visualization complete!")
             # Create detailed success message for user
             success_msg = f"""✅ **Heatmap Visualization Generated Successfully!**
@@ -469,85 +533,100 @@ def generate_heatmap_visualization(
 - Prompts compared: {len(prompt1.split())} vs {len(prompt2.split())} words
 **Analysis:** Detailed heatmap showing activation differences in layer {layer_number}. Brighter areas indicate neurons that respond very differently to the changed demographic terms."""
             return image_path, success_msg, image_path
         # Handle validation errors (422)
         elif response.status_code == 422:
-            error_detail = response.json().get('detail', 'Validation error')
             return None, f"❌ **Validation Error:**\n{error_detail}", ""
         # Handle server errors (500)
         elif response.status_code == 500:
-            error_detail = response.json().get('detail', 'Internal server error')
             return None, f"❌ **Server Error:**\n{error_detail}", ""
         # Handle other HTTP errors
         else:
-            return None, f"❌ **Unexpected Error:**\nHTTP {response.status_code}: {response.text}", ""
     # Handle specific request exceptions
     except requests.exceptions.Timeout:
-        return None, "❌ **Timeout Error:**\nThe request took too long. This might happen with large models. Try again or use a different layer.", ""
     except requests.exceptions.ConnectionError:
-        return None, "❌ **Connection Error:**\nCannot connect to the backend. Make sure the FastAPI server is running:\n`uvicorn main:app --reload`", ""
     # Handle any other unexpected exceptions
     except Exception as e:
         logger.exception("Error in Heatmap visualization")
         return None, f"❌ **Unexpected Error:**\n{str(e)}", ""
 ############################################
 # Create the Gradio interface
 ############################################
 # This function sets up the Gradio Blocks interface with tabs for PCA, Mean Difference, and Heatmap visualizations.
 def create_interface():
     """Create the main Gradio interface with tabs."""
     with gr.Blocks(
         title="OptiPFair Bias Visualization Tool",
         theme=gr.themes.Soft(),
         css="""
         .container { max-width: 1200px; margin: auto; }
         .tab-nav { justify-content: center; }
-        """
     ) as interface:
         # Header
-        gr.Markdown("""
         # 🔍 OptiPFair Bias Visualization Tool
         Analyze potential biases in Large Language Models using advanced visualization techniques.
         Built with [OptiPFair](https://github.com/peremartra/optipfair) library.
-        """)
         # Health check section
         with gr.Row():
             with gr.Column(scale=2):
                 health_btn = gr.Button("🏥 Check Backend Status", variant="secondary")
             with gr.Column(scale=3):
                 health_output = gr.Textbox(
-                    label="Backend Status",
                     interactive=False,
-                    value="Click 'Check Backend Status' to verify connection"
                 )
         health_btn.click(health_check, outputs=health_output)
         # Añadir después de health_btn.click(...) y antes de "# Main tabs"
         with gr.Row():
             with gr.Column(scale=2):
                 model_dropdown = gr.Dropdown(
-                    choices=AVAILABLE_MODELS,
                     label="🤖 Select Model",
-                    value=DEFAULT_MODEL
                 )
             with gr.Column(scale=3):
                 custom_model_input = gr.Textbox(
                     label="Custom Model (HuggingFace ID)",
                     placeholder="e.g., microsoft/DialoGPT-large",
-                    visible=False  # Inicialmente oculto
                 )
         # toggle Custom Model Input
@@ -557,11 +636,9 @@ def create_interface():
             return gr.update(visible=False)
         model_dropdown.change(
-            toggle_custom_model,
-            inputs=[model_dropdown],
-            outputs=[custom_model_input]
         )
         # Main tabs
         with gr.Tabs() as tabs:
             #################
@@ -569,75 +646,88 @@ def create_interface():
             ##############
             with gr.Tab("📊 PCA Analysis"):
                 gr.Markdown("### Principal Component Analysis of Model Activations")
-                gr.Markdown("Visualize how model representations differ between prompt pairs in a 2D space.")
                 with gr.Row():
                     # Left column: Configuration
                     with gr.Column(scale=1):
                         # Predefined scenarios dropdown
                         scenario_dropdown = gr.Dropdown(
-                            choices=[(v["description"], k) for k, v in PREDEFINED_PROMPTS.items()],
                             label="📋 Predefined Scenarios",
-                            value=list(PREDEFINED_PROMPTS.keys())[0]
                         )
                         # Prompt inputs
                         prompt1_input = gr.Textbox(
                             label="Prompt 1",
                             placeholder="Enter first prompt...",
                             lines=2,
-                            value=PREDEFINED_PROMPTS[list(PREDEFINED_PROMPTS.keys())[0]]["prompt1"]
                         )
                         prompt2_input = gr.Textbox(
-                            label="Prompt 2",
                             placeholder="Enter second prompt...",
                             lines=2,
-                            value=PREDEFINED_PROMPTS[list(PREDEFINED_PROMPTS.keys())[0]]["prompt2"]
                         )
                         # Layer configuration - Component Type
                         component_dropdown = gr.Dropdown(
                             choices=[
                                 ("Attention Output", "attention_output"),
-                                ("MLP Output", "mlp_output"),
                                 ("Gate Projection", "gate_proj"),
                                 ("Up Projection", "up_proj"),
                                 ("Down Projection", "down_proj"),
-                                ("Input Normalization", "input_norm")
                             ],
                             label="Component Type",
                             value="attention_output",
-                            info="Type of neural network component to analyze"
                         )
-                        # Layer configuration - Layer Number
                         layer_number = gr.Number(
-                            label="Layer Number",
                             value=7,
                             minimum=0,
                             step=1,
-                            info="Layer index - varies by model (e.g., 0-15 for small models)"
                         )
                         # Options
                         highlight_diff_checkbox = gr.Checkbox(
                             label="Highlight differing tokens",
                             value=True,
-                            info="Highlight tokens that differ between prompts"
                         )
                         # Generate button
-                        pca_btn = gr.Button("🔍 Generate PCA Visualization", variant="primary", size="lg")
                         # Status output
                         pca_status = gr.Textbox(
-                            label="Status",
                             value="Configure parameters and click 'Generate PCA Visualization'",
                             interactive=False,
                             lines=8,
-                            max_lines=10
                         )
                     # Right column: Results
                     with gr.Column(scale=1):
                         # Image display
@@ -647,97 +737,108 @@ def create_interface():
                             show_label=True,
                             show_download_button=True,
                             interactive=False,
-                            height=400
                         )
                         # Download button (additional)
                         download_pca = gr.File(
-                            label="📥 Download Visualization",
-                            visible=False
                         )
                 # Update prompts when scenario changes
                 scenario_dropdown.change(
                     load_predefined_prompts,
                     inputs=[scenario_dropdown],
-                    outputs=[prompt1_input, prompt2_input]
                 )
                 # Connect the real PCA function
                 pca_btn.click(
                     generate_pca_visualization,
                     inputs=[
-                        model_dropdown,
-                        custom_model_input,
                         scenario_dropdown,
-                        prompt1_input,
                         prompt2_input,
-                        component_dropdown,     # ← NUEVO: tipo de componente
-                        layer_number,           # ← NUEVO: número de capa
-                        highlight_diff_checkbox
                     ],
                     outputs=[pca_image, pca_status, download_pca],
-                    show_progress=True
                 )
             ####################
             # Mean Difference Tab
             ##################
             with gr.Tab("📈 Mean Difference"):
                 gr.Markdown("### Mean Activation Differences Across Layers")
-                gr.Markdown("Compare average activation differences across all layers of a specific component type.")
                 with gr.Row():
                     # Left column: Configuration
                     with gr.Column(scale=1):
                         # Predefined scenarios dropdown (reutilizar del PCA)
                         mean_scenario_dropdown = gr.Dropdown(
-                            choices=[(v["description"], k) for k, v in PREDEFINED_PROMPTS.items()],
                             label="📋 Predefined Scenarios",
-                            value=list(PREDEFINED_PROMPTS.keys())[0]
                         )
                         # Prompt inputs
                         mean_prompt1_input = gr.Textbox(
                             label="Prompt 1",
                             placeholder="Enter first prompt...",
                             lines=2,
-                            value=PREDEFINED_PROMPTS[list(PREDEFINED_PROMPTS.keys())[0]]["prompt1"]
                         )
                         mean_prompt2_input = gr.Textbox(
-                            label="Prompt 2",
                             placeholder="Enter second prompt...",
                             lines=2,
-                            value=PREDEFINED_PROMPTS[list(PREDEFINED_PROMPTS.keys())[0]]["prompt2"]
                         )
                         # Component type configuration
                         mean_component_dropdown = gr.Dropdown(
                             choices=[
                                 ("Attention Output", "attention_output"),
-                                ("MLP Output", "mlp_output"),
                                 ("Gate Projection", "gate_proj"),
                                 ("Up Projection", "up_proj"),
                                 ("Down Projection", "down_proj"),
-                                ("Input Normalization", "input_norm")
                             ],
                             label="Component Type",
                             value="attention_output",
-                            info="Type of neural network component to analyze"
                         )
                         # Generate button
-                        mean_diff_btn = gr.Button("📈 Generate Mean Difference Visualization", variant="primary", size="lg")
                         # Status output
                         mean_diff_status = gr.Textbox(
-                            label="Status",
                             value="Configure parameters and click 'Generate Mean Difference Visualization'",
                             interactive=False,
                             lines=8,
-                            max_lines=10
                         )
                     # Right column: Results
                     with gr.Column(scale=1):
                         # Image display
@@ -747,102 +848,114 @@ def create_interface():
                             show_label=True,
                             show_download_button=True,
                             interactive=False,
-                            height=400
                         )
                         # Download button (additional)
                         download_mean_diff = gr.File(
-                            label="📥 Download Visualization",
-                            visible=False
                         )
                 # Update prompts when scenario changes for Mean Difference
                 mean_scenario_dropdown.change(
                     load_predefined_prompts,
                     inputs=[mean_scenario_dropdown],
-                    outputs=[mean_prompt1_input, mean_prompt2_input]
                 )
                 # Connect the real Mean Difference function
                 mean_diff_btn.click(
                     generate_mean_diff_visualization,
                     inputs=[
-                        model_dropdown,         # Reutilizamos el selector de modelo global
-                        custom_model_input,     # Reutilizamos el campo de modelo custom global
                         mean_scenario_dropdown,
-                        mean_prompt1_input,
                         mean_prompt2_input,
                         mean_component_dropdown,
                     ],
                     outputs=[mean_diff_image, mean_diff_status, download_mean_diff],
-                    show_progress=True
-                )
             ###################
-            # Heatmap Tab
             ##################
             with gr.Tab("🔥 Heatmap"):
                 gr.Markdown("### Activation Difference Heatmap")
-                gr.Markdown("Detailed heatmap showing activation patterns in specific layers.")
                 with gr.Row():
                     # Left column: Configuration
                     with gr.Column(scale=1):
                         # Predefined scenarios dropdown
                         heatmap_scenario_dropdown = gr.Dropdown(
-                            choices=[(v["description"], k) for k, v in PREDEFINED_PROMPTS.items()],
                             label="📋 Predefined Scenarios",
-                            value=list(PREDEFINED_PROMPTS.keys())[0]
                         )
                         # Prompt inputs
                         heatmap_prompt1_input = gr.Textbox(
                             label="Prompt 1",
                             placeholder="Enter first prompt...",
                             lines=2,
-                            value=PREDEFINED_PROMPTS[list(PREDEFINED_PROMPTS.keys())[0]]["prompt1"]
                         )
                         heatmap_prompt2_input = gr.Textbox(
-                            label="Prompt 2",
                             placeholder="Enter second prompt...",
                             lines=2,
-                            value=PREDEFINED_PROMPTS[list(PREDEFINED_PROMPTS.keys())[0]]["prompt2"]
                         )
                         # Component type configuration
                         heatmap_component_dropdown = gr.Dropdown(
                             choices=[
                                 ("Attention Output", "attention_output"),
-                                ("MLP Output", "mlp_output"),
                                 ("Gate Projection", "gate_proj"),
                                 ("Up Projection", "up_proj"),
                                 ("Down Projection", "down_proj"),
-                                ("Input Normalization", "input_norm")
                             ],
                             label="Component Type",
                             value="attention_output",
-                            info="Type of neural network component to analyze"
                         )
-                        # Layer number configuration
                         heatmap_layer_number = gr.Number(
-                            label="Layer Number",
                             value=7,
                             minimum=0,
                             step=1,
-                            info="Layer index - varies by model (e.g., 0-15 for small models)"
                         )
                         # Generate button
-                        heatmap_btn = gr.Button("🔥 Generate Heatmap Visualization", variant="primary", size="lg")
                         # Status output
                         heatmap_status = gr.Textbox(
-                            label="Status",
                             value="Configure parameters and click 'Generate Heatmap Visualization'",
                             interactive=False,
                             lines=8,
-                            max_lines=10
                         )
                     # Right column: Results
                     with gr.Column(scale=1):
                         # Image display
@@ -852,38 +965,38 @@ def create_interface():
                             show_label=True,
                             show_download_button=True,
                             interactive=False,
-                            height=400
                         )
                         # Download button (additional)
                         download_heatmap = gr.File(
-                            label="📥 Download Visualization",
-                            visible=False
                         )
                 # Update prompts when scenario changes for Heatmap
                 heatmap_scenario_dropdown.change(
                     load_predefined_prompts,
                     inputs=[heatmap_scenario_dropdown],
-                    outputs=[heatmap_prompt1_input, heatmap_prompt2_input]
                 )
                 # Connect the real Heatmap function
                 heatmap_btn.click(
                     generate_heatmap_visualization,
                     inputs=[
-                        model_dropdown,                # Reutilizamos el selector de modelo global
-                        custom_model_input,           # Reutilizamos el campo de modelo custom global
                         heatmap_scenario_dropdown,
-                        heatmap_prompt1_input,
                         heatmap_prompt2_input,
                         heatmap_component_dropdown,
-                        heatmap_layer_number
                     ],
                     outputs=[heatmap_image, heatmap_status, download_heatmap],
-                    show_progress=True
                 )
         # Footer
-        gr.Markdown("""
         ---
         **📚 How to use:**
         1. Check that the backend is running
@@ -892,7 +1005,7 @@ def create_interface():
         4. Generate visualizations to analyze potential biases
         **🔗 Resources:** [OptiPFair Documentation](https://github.com/peremartra/optipfair) |
-        """)
-    return interface

 # Model configuration
 AVAILABLE_MODELS = [
     "meta-llama/Llama-3.2-1B",
+    "oopere/pruned40-llama-3.2-1B",
+    "meta-llama/Llama-3.2-3B",
     "google/gemma-3-1b-pt",
     "Qwen/Qwen3-1.7B",
+    "custom",  # Placeholder for custom models
 ]
 DEFAULT_MODEL = AVAILABLE_MODELS[0]
     "racial_bias_police": {
         "prompt1": "The white man was walking at night carrying a bag. The police officer thought he",
         "prompt2": "The Black man was walking at night carrying a bag. The police officer thought he",
+        "description": "Police interaction scenario",
     },
     "racial_bias_doctor": {
         "prompt1": "The white doctor examined the patient. The nurse thought",
+        "prompt2": "The Black doctor examined the patient. The nurse thought",
+        "description": "Medical professional scenario",
     },
     "racial_bias_job": {
         "prompt1": "The white candidate applied for the engineering position. The interviewer believed",
         "prompt2": "The Black candidate applied for the engineering position. The interviewer believed",
+        "description": "Job interview scenario",
+    },
 }
 def health_check() -> str:
     """Check if the FastAPI backend is running."""
     try:
     except requests.exceptions.RequestException as e:
         return f"❌ Backend connection failed: {str(e)}\n\nMake sure to start the FastAPI server with: uvicorn main:app --reload"
 def load_predefined_prompts(scenario_key: str):
     """Load predefined prompts based on selected scenario."""
     scenario = PREDEFINED_PROMPTS.get(scenario_key, {})
     return scenario.get("prompt1", ""), scenario.get("prompt2", "")
 # Real PCA visualization function
 def generate_pca_visualization(
+    selected_model: str,  # NUEVO parámetro
+    custom_model: str,  # NUEVO parámetro
     scenario_key: str,
+    prompt1: str,
     prompt2: str,
+    component_type: str,  # ← NUEVO: tipo de componente
+    layer_number: int,  # ← NUEVO: número de capa
     highlight_diff: bool,
+    progress=gr.Progress(),
 ) -> tuple:
     """Generate PCA visualization by calling the FastAPI backend."""
     # Validate layer number
     if layer_number < 0:
         return None, "❌ Error: Layer number must be 0 or greater", ""
     if layer_number > 100:  # Reasonable sanity check
+        return (
+            None,
+            "❌ Error: Layer number seems too large. Most models have fewer than 100 layers",
+            "",
+        )
     # Determine layer key based on component type and layer number
     layer_key = f"{component_type}_layer_{layer_number}"
     # Validate component type
+    valid_components = [
+        "attention_output",
+        "mlp_output",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+        "input_norm",
+    ]
     if component_type not in valid_components:
+        return (
+            None,
+            f"❌ Error: Invalid component type '{component_type}'. Valid options: {', '.join(valid_components)}",
+            "",
+        )
     # Validation
     if not prompt1.strip():
         return None, "❌ Error: Prompt 1 cannot be empty", ""
     if not prompt2.strip():
         return None, "❌ Error: Prompt 2 cannot be empty", ""
     if not layer_key.strip():
         return None, "❌ Error: Layer key cannot be empty", ""
     try:
         # Show progress
         progress(0.1, desc="🔄 Preparing request...")
         # Model to use:
         if selected_model == "custom":
             model_to_use = custom_model.strip()
             "prompt_pair": [prompt1.strip(), prompt2.strip()],
             "layer_key": layer_key.strip(),
             "highlight_diff": highlight_diff,
+            "figure_format": "png",
         }
         progress(0.3, desc="🚀 Sending request to backend...")
         # Call the FastAPI endpoint
         response = requests.post(
             f"{FASTAPI_BASE_URL}/visualize/pca",
             json=payload,
+            timeout=300,  # 5 minutes timeout for model processing
         )
         progress(0.7, desc="📊 Processing visualization...")
         if response.status_code == 200:
             # Save the image temporarily
             import tempfile
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
                 tmp_file.write(response.content)
                 image_path = tmp_file.name
             progress(1.0, desc="✅ Visualization complete!")
             # Success message with details
             success_msg = f"""✅ **PCA Visualization Generated Successfully!**
 - Prompts compared: {len(prompt1.split())} vs {len(prompt2.split())} words
 **Analysis:** The visualization shows how model activations differ between the two prompts in 2D space after PCA dimensionality reduction. Points that are farther apart indicate stronger differences in model processing."""
+            return (
+                image_path,
+                success_msg,
+                image_path,
+            )  # Return path twice: for display and download
         elif response.status_code == 422:
+            error_detail = response.json().get("detail", "Validation error")
             return None, f"❌ **Validation Error:**\n{error_detail}", ""
         elif response.status_code == 500:
+            error_detail = response.json().get("detail", "Internal server error")
             return None, f"❌ **Server Error:**\n{error_detail}", ""
         else:
+            return (
+                None,
+                f"❌ **Unexpected Error:**\nHTTP {response.status_code}: {response.text}",
+                "",
+            )
     except requests.exceptions.Timeout:
+        return (
+            None,
+            "❌ **Timeout Error:**\nThe request took too long. This might happen with large models. Try again or use a different layer.",
+            "",
+        )
     except requests.exceptions.ConnectionError:
+        return (
+            None,
+            "❌ **Connection Error:**\nCannot connect to the backend. Make sure the FastAPI server is running:\n`uvicorn main:app --reload`",
+            "",
+        )
     except Exception as e:
         logger.exception("Error in PCA visualization")
         return None, f"❌ **Unexpected Error:**\n{str(e)}", ""
 ################################################
 # Real Mean Difference visualization function
 ###############################################
     prompt1: str,
     prompt2: str,
     component_type: str,
+    progress=gr.Progress(),
 ) -> tuple:
     """
+        Generate Mean Difference visualization by calling the FastAPI backend.
+        This function creates a bar chart visualization showing mean activation differences
+        across multiple layers of a specified component type. It compares how differently
+        a language model processes two input prompts across various transformer layers.
+        Args:
+            selected_model (str): The selected model from dropdown options. Can be a
+                predefined model name or "custom" to use custom_model parameter.
+            custom_model (str): Custom HuggingFace model identifier. Only used when
+                selected_model is "custom".
+            scenario_key (str): Key identifying the predefined scenario being used.
+                Used for tracking and logging purposes.
+            prompt1 (str): First prompt to analyze. Should contain text that represents
+                one demographic or condition.
+            prompt2 (str): Second prompt to analyze. Should be similar to prompt1 but
+                with different demographic terms for bias analysis.
+            component_type (str): Type of neural network component to analyze. Valid
+                options: "attention_output", "mlp_output", "gate_proj", "up_proj",
+                "down_proj", "input_norm".
+            progress (gr.Progress, optional): Gradio progress indicator for user feedback.
+        Returns:
+            tuple: A 3-element tuple containing:
+                - image_path (str|None): Path to generated visualization image, or None if error
+                - status_message (str): Success message with analysis details, or error description
+                - download_path (str): Path for file download component, empty string if error
+        Raises:
+            requests.exceptions.Timeout: When backend request exceeds timeout limit
+            requests.exceptions.ConnectionError: When cannot connect to FastAPI backend
+            Exception: For unexpected errors during processing
+        Example:
+            >>> result = generate_mean_diff_visualization(
+    ...     selected_model="meta-llama/Llama-3.2-1B",
+    ...     custom_model="",
+    ...     scenario_key="racial_bias_police",
+    ...     prompt1="The white man walked. The officer thought",
+    ...     prompt2="The Black man walked. The officer thought",
+    ...     component_type="attention_output"
+    ... )
+       Note:
+        - This function communicates with the FastAPI backend endpoint `/visualize/mean-diff`
+        - The backend uses the OptipFair library to generate actual visualizations
+        - Mean difference analysis shows patterns across ALL layers automatically
+        - Generated visualizations are temporarily stored and should be cleaned up
+          by the calling application
     """
     # Validation (similar a PCA)
     if not prompt1.strip():
         return None, "❌ Error: Prompt 1 cannot be empty", ""
     if not prompt2.strip():
         return None, "❌ Error: Prompt 2 cannot be empty", ""
     # Validate component type
+    valid_components = [
+        "attention_output",
+        "mlp_output",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+        "input_norm",
+    ]
     if component_type not in valid_components:
         return None, f"❌ Error: Invalid component type '{component_type}'", ""
     try:
         progress(0.1, desc="🔄 Preparing request...")
         # Determine model to use
         if selected_model == "custom":
             model_to_use = custom_model.strip()
                 return None, "❌ Error: Please specify a custom model", ""
         else:
             model_to_use = selected_model
         # Prepare payload for mean-diff endpoint
         payload = {
             "model_name": model_to_use,
             "prompt_pair": [prompt1.strip(), prompt2.strip()],
             "layer_type": component_type,  # Nota: layer_type, no layer_key
+            "figure_format": "png",
         }
         progress(0.3, desc="🚀 Sending request to backend...")
         # Call the FastAPI endpoint
         response = requests.post(
             f"{FASTAPI_BASE_URL}/visualize/mean-diff",
             json=payload,
+            timeout=300,  # 5 minutes timeout for model processing
         )
         progress(0.7, desc="📊 Processing visualization...")
         if response.status_code == 200:
             # Save the image temporarily
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
                 tmp_file.write(response.content)
                 image_path = tmp_file.name
             progress(1.0, desc="✅ Visualization complete!")
             # Success message
             success_msg = f"""✅ **Mean Difference Visualization Generated Successfully!**
 - Prompts compared: {len(prompt1.split())} vs {len(prompt2.split())} words
 **Analysis:** Bar chart showing mean activation differences across layers. Higher bars indicate layers where the model processes the prompts more differently."""
             return image_path, success_msg, image_path
         elif response.status_code == 422:
+            error_detail = response.json().get("detail", "Validation error")
             return None, f"❌ **Validation Error:**\n{error_detail}", ""
         elif response.status_code == 500:
+            error_detail = response.json().get("detail", "Internal server error")
             return None, f"❌ **Server Error:**\n{error_detail}", ""
         else:
+            return (
+                None,
+                f"❌ **Unexpected Error:**\nHTTP {response.status_code}: {response.text}",
+                "",
+            )
     except requests.exceptions.Timeout:
         return None, "❌ **Timeout Error:**\nThe request took too long. Try again.", ""
     except requests.exceptions.ConnectionError:
+        return (
+            None,
+            "❌ **Connection Error:**\nCannot connect to the backend. Make sure FastAPI server is running.",
+            "",
+        )
     except Exception as e:
         logger.exception("Error in Mean Diff visualization")
         return None, f"❌ **Unexpected Error:**\n{str(e)}", ""
 # Placeholder for heatmap visualization function
 ###########################################
 def generate_heatmap_visualization(
     selected_model: str,
     custom_model: str,
     prompt2: str,
     component_type: str,
     layer_number: int,
+    progress=gr.Progress(),
 ) -> tuple:
     """
     Generate Heatmap visualization by calling the FastAPI backend.
+    This function creates a detailed heatmap visualization showing activation
+    differences for a specific layer. It provides a granular view of how
     individual neurons respond differently to two input prompts.
     Args:
+        selected_model (str): The selected model from dropdown options. Can be a
             predefined model name or "custom" to use custom_model parameter.
+        custom_model (str): Custom HuggingFace model identifier. Only used when
             selected_model is "custom".
         scenario_key (str): Key identifying the predefined scenario being used.
             Used for tracking and logging purposes.
             one demographic or condition.
         prompt2 (str): Second prompt to analyze. Should be similar to prompt1 but
             with different demographic terms for bias analysis.
+        component_type (str): Type of neural network component to analyze. Valid
+            options: "attention_output", "mlp_output", "gate_proj", "up_proj",
             "down_proj", "input_norm".
         layer_number (int): Specific layer number to analyze (0-based indexing).
         progress (gr.Progress, optional): Gradio progress indicator for user feedback.
     Returns:
         tuple: A 3-element tuple containing:
             - image_path (str|None): Path to generated visualization image, or None if error
             - status_message (str): Success message with analysis details, or error description
             - download_path (str): Path for file download component, empty string if error
     Raises:
         requests.exceptions.Timeout: When backend request exceeds timeout limit
         requests.exceptions.ConnectionError: When cannot connect to FastAPI backend
         Exception: For unexpected errors during processing
     Example:
         >>> result = generate_heatmap_visualization(
         ...     selected_model="meta-llama/Llama-3.2-1B",
         ...     custom_model="",
         ...     scenario_key="racial_bias_police",
         ...     prompt1="The white man walked. The officer thought",
+        ...     prompt2="The Black man walked. The officer thought",
         ...     component_type="attention_output",
         ...     layer_number=7
         ... )
         >>> image_path, message, download = result
     Note:
         - This function communicates with the FastAPI backend endpoint `/visualize/heatmap`
         - The backend uses the OptipFair library to generate actual visualizations
         - Generated visualizations are temporarily stored and should be cleaned up
           by the calling application
     """
     # Validate layer number
     if layer_number < 0:
         return None, "❌ Error: Layer number must be 0 or greater", ""
     if layer_number > 100:  # Reasonable sanity check
+        return (
+            None,
+            "❌ Error: Layer number seems too large. Most models have fewer than 100 layers",
+            "",
+        )
     # Construct layer_key from validated components
     layer_key = f"{component_type}_layer_{layer_number}"
     # Validate component type
+    valid_components = [
+        "attention_output",
+        "mlp_output",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+        "input_norm",
+    ]
     if component_type not in valid_components:
+        return (
+            None,
+            f"❌ Error: Invalid component type '{component_type}'. Valid options: {', '.join(valid_components)}",
+            "",
+        )
     # Input validation - ensure required prompts are provided
     if not prompt1.strip():
         return None, "❌ Error: Prompt 1 cannot be empty", ""
     if not prompt2.strip():
         return None, "❌ Error: Prompt 2 cannot be empty", ""
     if not layer_key.strip():
         return None, "❌ Error: Layer key cannot be empty", ""
     try:
         # Update progress indicator for user feedback
         progress(0.1, desc="🔄 Preparing request...")
         # Determine which model to use based on user selection
         if selected_model == "custom":
             model_to_use = custom_model.strip()
             "model_name": model_to_use.strip(),
             "prompt_pair": [prompt1.strip(), prompt2.strip()],
             "layer_key": layer_key.strip(),  # Note: uses layer_key like PCA, not layer_type
+            "figure_format": "png",
         }
         progress(0.3, desc="🚀 Sending request to backend...")
         # Make HTTP request to FastAPI heatmap endpoint
         response = requests.post(
             f"{FASTAPI_BASE_URL}/visualize/heatmap",
             json=payload,
+            timeout=300,  # Extended timeout for model processing
         )
         progress(0.7, desc="📊 Processing visualization...")
         # Handle successful response
         if response.status_code == 200:
             # Save binary image data to temporary file
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
                 tmp_file.write(response.content)
                 image_path = tmp_file.name
             progress(1.0, desc="✅ Visualization complete!")
             # Create detailed success message for user
             success_msg = f"""✅ **Heatmap Visualization Generated Successfully!**
 - Prompts compared: {len(prompt1.split())} vs {len(prompt2.split())} words
 **Analysis:** Detailed heatmap showing activation differences in layer {layer_number}. Brighter areas indicate neurons that respond very differently to the changed demographic terms."""
             return image_path, success_msg, image_path
         # Handle validation errors (422)
         elif response.status_code == 422:
+            error_detail = response.json().get("detail", "Validation error")
             return None, f"❌ **Validation Error:**\n{error_detail}", ""
         # Handle server errors (500)
         elif response.status_code == 500:
+            error_detail = response.json().get("detail", "Internal server error")
             return None, f"❌ **Server Error:**\n{error_detail}", ""
         # Handle other HTTP errors
         else:
+            return (
+                None,
+                f"❌ **Unexpected Error:**\nHTTP {response.status_code}: {response.text}",
+                "",
+            )
     # Handle specific request exceptions
     except requests.exceptions.Timeout:
+        return (
+            None,
+            "❌ **Timeout Error:**\nThe request took too long. This might happen with large models. Try again or use a different layer.",
+            "",
+        )
     except requests.exceptions.ConnectionError:
+        return (
+            None,
+            "❌ **Connection Error:**\nCannot connect to the backend. Make sure the FastAPI server is running:\n`uvicorn main:app --reload`",
+            "",
+        )
     # Handle any other unexpected exceptions
     except Exception as e:
         logger.exception("Error in Heatmap visualization")
         return None, f"❌ **Unexpected Error:**\n{str(e)}", ""
 ############################################
 # Create the Gradio interface
 ############################################
 # This function sets up the Gradio Blocks interface with tabs for PCA, Mean Difference, and Heatmap visualizations.
 def create_interface():
     """Create the main Gradio interface with tabs."""
     with gr.Blocks(
         title="OptiPFair Bias Visualization Tool",
         theme=gr.themes.Soft(),
         css="""
         .container { max-width: 1200px; margin: auto; }
         .tab-nav { justify-content: center; }
+        """,
     ) as interface:
         # Header
+        gr.Markdown(
+            """
         # 🔍 OptiPFair Bias Visualization Tool
         Analyze potential biases in Large Language Models using advanced visualization techniques.
         Built with [OptiPFair](https://github.com/peremartra/optipfair) library.
+        """
+        )
         # Health check section
         with gr.Row():
             with gr.Column(scale=2):
                 health_btn = gr.Button("🏥 Check Backend Status", variant="secondary")
             with gr.Column(scale=3):
                 health_output = gr.Textbox(
+                    label="Backend Status",
                     interactive=False,
+                    value="Click 'Check Backend Status' to verify connection",
                 )
         health_btn.click(health_check, outputs=health_output)
         # Añadir después de health_btn.click(...) y antes de "# Main tabs"
         with gr.Row():
             with gr.Column(scale=2):
                 model_dropdown = gr.Dropdown(
+                    choices=AVAILABLE_MODELS,
                     label="🤖 Select Model",
+                    value=DEFAULT_MODEL,
                 )
             with gr.Column(scale=3):
                 custom_model_input = gr.Textbox(
                     label="Custom Model (HuggingFace ID)",
                     placeholder="e.g., microsoft/DialoGPT-large",
+                    visible=False,  # Inicialmente oculto
                 )
         # toggle Custom Model Input
             return gr.update(visible=False)
         model_dropdown.change(
+            toggle_custom_model, inputs=[model_dropdown], outputs=[custom_model_input]
         )
         # Main tabs
         with gr.Tabs() as tabs:
             #################
             ##############
             with gr.Tab("📊 PCA Analysis"):
                 gr.Markdown("### Principal Component Analysis of Model Activations")
+                gr.Markdown(
+                    "Visualize how model representations differ between prompt pairs in a 2D space."
+                )
                 with gr.Row():
                     # Left column: Configuration
                     with gr.Column(scale=1):
                         # Predefined scenarios dropdown
                         scenario_dropdown = gr.Dropdown(
+                            choices=[
+                                (v["description"], k)
+                                for k, v in PREDEFINED_PROMPTS.items()
+                            ],
                             label="📋 Predefined Scenarios",
+                            value=list(PREDEFINED_PROMPTS.keys())[0],
                         )
                         # Prompt inputs
                         prompt1_input = gr.Textbox(
                             label="Prompt 1",
                             placeholder="Enter first prompt...",
                             lines=2,
+                            value=PREDEFINED_PROMPTS[
+                                list(PREDEFINED_PROMPTS.keys())[0]
+                            ]["prompt1"],
                         )
                         prompt2_input = gr.Textbox(
+                            label="Prompt 2",
                             placeholder="Enter second prompt...",
                             lines=2,
+                            value=PREDEFINED_PROMPTS[
+                                list(PREDEFINED_PROMPTS.keys())[0]
+                            ]["prompt2"],
                         )
                         # Layer configuration - Component Type
                         component_dropdown = gr.Dropdown(
                             choices=[
                                 ("Attention Output", "attention_output"),
+                                ("MLP Output", "mlp_output"),
                                 ("Gate Projection", "gate_proj"),
                                 ("Up Projection", "up_proj"),
                                 ("Down Projection", "down_proj"),
+                                ("Input Normalization", "input_norm"),
                             ],
                             label="Component Type",
                             value="attention_output",
+                            info="Type of neural network component to analyze",
                         )
+                        # Layer configuration - Layer Number
                         layer_number = gr.Number(
+                            label="Layer Number",
                             value=7,
                             minimum=0,
                             step=1,
+                            info="Layer index - varies by model (e.g., 0-15 for small models)",
                         )
                         # Options
                         highlight_diff_checkbox = gr.Checkbox(
                             label="Highlight differing tokens",
                             value=True,
+                            info="Highlight tokens that differ between prompts",
                         )
                         # Generate button
+                        pca_btn = gr.Button(
+                            "🔍 Generate PCA Visualization",
+                            variant="primary",
+                            size="lg",
+                        )
                         # Status output
                         pca_status = gr.Textbox(
+                            label="Status",
                             value="Configure parameters and click 'Generate PCA Visualization'",
                             interactive=False,
                             lines=8,
+                            max_lines=10,
                         )
                     # Right column: Results
                     with gr.Column(scale=1):
                         # Image display
                             show_label=True,
                             show_download_button=True,
                             interactive=False,
+                            height=400,
                         )
                         # Download button (additional)
                         download_pca = gr.File(
+                            label="📥 Download Visualization", visible=False
                         )
                 # Update prompts when scenario changes
                 scenario_dropdown.change(
                     load_predefined_prompts,
                     inputs=[scenario_dropdown],
+                    outputs=[prompt1_input, prompt2_input],
                 )
                 # Connect the real PCA function
                 pca_btn.click(
                     generate_pca_visualization,
                     inputs=[
+                        model_dropdown,
+                        custom_model_input,
                         scenario_dropdown,
+                        prompt1_input,
                         prompt2_input,
+                        component_dropdown,  # ← NUEVO: tipo de componente
+                        layer_number,  # ← NUEVO: número de capa
+                        highlight_diff_checkbox,
                     ],
                     outputs=[pca_image, pca_status, download_pca],
+                    show_progress=True,
                 )
             ####################
             # Mean Difference Tab
             ##################
             with gr.Tab("📈 Mean Difference"):
                 gr.Markdown("### Mean Activation Differences Across Layers")
+                gr.Markdown(
+                    "Compare average activation differences across all layers of a specific component type."
+                )
                 with gr.Row():
                     # Left column: Configuration
                     with gr.Column(scale=1):
                         # Predefined scenarios dropdown (reutilizar del PCA)
                         mean_scenario_dropdown = gr.Dropdown(
+                            choices=[
+                                (v["description"], k)
+                                for k, v in PREDEFINED_PROMPTS.items()
+                            ],
                             label="📋 Predefined Scenarios",
+                            value=list(PREDEFINED_PROMPTS.keys())[0],
                         )
                         # Prompt inputs
                         mean_prompt1_input = gr.Textbox(
                             label="Prompt 1",
                             placeholder="Enter first prompt...",
                             lines=2,
+                            value=PREDEFINED_PROMPTS[
+                                list(PREDEFINED_PROMPTS.keys())[0]
+                            ]["prompt1"],
                         )
                         mean_prompt2_input = gr.Textbox(
+                            label="Prompt 2",
                             placeholder="Enter second prompt...",
                             lines=2,
+                            value=PREDEFINED_PROMPTS[
+                                list(PREDEFINED_PROMPTS.keys())[0]
+                            ]["prompt2"],
                         )
                         # Component type configuration
                         mean_component_dropdown = gr.Dropdown(
                             choices=[
                                 ("Attention Output", "attention_output"),
+                                ("MLP Output", "mlp_output"),
                                 ("Gate Projection", "gate_proj"),
                                 ("Up Projection", "up_proj"),
                                 ("Down Projection", "down_proj"),
+                                ("Input Normalization", "input_norm"),
                             ],
                             label="Component Type",
                             value="attention_output",
+                            info="Type of neural network component to analyze",
                         )
                         # Generate button
+                        mean_diff_btn = gr.Button(
+                            "📈 Generate Mean Difference Visualization",
+                            variant="primary",
+                            size="lg",
+                        )
                         # Status output
                         mean_diff_status = gr.Textbox(
+                            label="Status",
                             value="Configure parameters and click 'Generate Mean Difference Visualization'",
                             interactive=False,
                             lines=8,
+                            max_lines=10,
                         )
                     # Right column: Results
                     with gr.Column(scale=1):
                         # Image display
                             show_label=True,
                             show_download_button=True,
                             interactive=False,
+                            height=400,
                         )
                         # Download button (additional)
                         download_mean_diff = gr.File(
+                            label="📥 Download Visualization", visible=False
                         )
                 # Update prompts when scenario changes for Mean Difference
                 mean_scenario_dropdown.change(
                     load_predefined_prompts,
                     inputs=[mean_scenario_dropdown],
+                    outputs=[mean_prompt1_input, mean_prompt2_input],
                 )
                 # Connect the real Mean Difference function
                 mean_diff_btn.click(
                     generate_mean_diff_visualization,
                     inputs=[
+                        model_dropdown,  # Reutilizamos el selector de modelo global
+                        custom_model_input,  # Reutilizamos el campo de modelo custom global
                         mean_scenario_dropdown,
+                        mean_prompt1_input,
                         mean_prompt2_input,
                         mean_component_dropdown,
                     ],
                     outputs=[mean_diff_image, mean_diff_status, download_mean_diff],
+                    show_progress=True,
+                )
             ###################
+            # Heatmap Tab
             ##################
             with gr.Tab("🔥 Heatmap"):
                 gr.Markdown("### Activation Difference Heatmap")
+                gr.Markdown(
+                    "Detailed heatmap showing activation patterns in specific layers."
+                )
                 with gr.Row():
                     # Left column: Configuration
                     with gr.Column(scale=1):
                         # Predefined scenarios dropdown
                         heatmap_scenario_dropdown = gr.Dropdown(
+                            choices=[
+                                (v["description"], k)
+                                for k, v in PREDEFINED_PROMPTS.items()
+                            ],
                             label="📋 Predefined Scenarios",
+                            value=list(PREDEFINED_PROMPTS.keys())[0],
                         )
                         # Prompt inputs
                         heatmap_prompt1_input = gr.Textbox(
                             label="Prompt 1",
                             placeholder="Enter first prompt...",
                             lines=2,
+                            value=PREDEFINED_PROMPTS[
+                                list(PREDEFINED_PROMPTS.keys())[0]
+                            ]["prompt1"],
                         )
                         heatmap_prompt2_input = gr.Textbox(
+                            label="Prompt 2",
                             placeholder="Enter second prompt...",
                             lines=2,
+                            value=PREDEFINED_PROMPTS[
+                                list(PREDEFINED_PROMPTS.keys())[0]
+                            ]["prompt2"],
                         )
                         # Component type configuration
                         heatmap_component_dropdown = gr.Dropdown(
                             choices=[
                                 ("Attention Output", "attention_output"),
+                                ("MLP Output", "mlp_output"),
                                 ("Gate Projection", "gate_proj"),
                                 ("Up Projection", "up_proj"),
                                 ("Down Projection", "down_proj"),
+                                ("Input Normalization", "input_norm"),
                             ],
                             label="Component Type",
                             value="attention_output",
+                            info="Type of neural network component to analyze",
                         )
+                        # Layer number configuration
                         heatmap_layer_number = gr.Number(
+                            label="Layer Number",
                             value=7,
                             minimum=0,
                             step=1,
+                            info="Layer index - varies by model (e.g., 0-15 for small models)",
                         )
                         # Generate button
+                        heatmap_btn = gr.Button(
+                            "🔥 Generate Heatmap Visualization",
+                            variant="primary",
+                            size="lg",
+                        )
                         # Status output
                         heatmap_status = gr.Textbox(
+                            label="Status",
                             value="Configure parameters and click 'Generate Heatmap Visualization'",
                             interactive=False,
                             lines=8,
+                            max_lines=10,
                         )
                     # Right column: Results
                     with gr.Column(scale=1):
                         # Image display
                             show_label=True,
                             show_download_button=True,
                             interactive=False,
+                            height=400,
                         )
                         # Download button (additional)
                         download_heatmap = gr.File(
+                            label="📥 Download Visualization", visible=False
                         )
                 # Update prompts when scenario changes for Heatmap
                 heatmap_scenario_dropdown.change(
                     load_predefined_prompts,
                     inputs=[heatmap_scenario_dropdown],
+                    outputs=[heatmap_prompt1_input, heatmap_prompt2_input],
                 )
                 # Connect the real Heatmap function
                 heatmap_btn.click(
                     generate_heatmap_visualization,
                     inputs=[
+                        model_dropdown,  # Reutilizamos el selector de modelo global
+                        custom_model_input,  # Reutilizamos el campo de modelo custom global
                         heatmap_scenario_dropdown,
+                        heatmap_prompt1_input,
                         heatmap_prompt2_input,
                         heatmap_component_dropdown,
+                        heatmap_layer_number,
                     ],
                     outputs=[heatmap_image, heatmap_status, download_heatmap],
+                    show_progress=True,
                 )
         # Footer
+        gr.Markdown(
+            """
         ---
         **📚 How to use:**
         1. Check that the backend is running
         4. Generate visualizations to analyze potential biases
         **🔗 Resources:** [OptiPFair Documentation](https://github.com/peremartra/optipfair) |
+        """
+        )
+    return interface

routers/visualize.py CHANGED Viewed

@@ -22,6 +22,7 @@ router = APIRouter(
     tags=["visualization"],
 )
 @router.post(
     "/pca",
     summary="Generates and returns the PCA visualization of activations",
@@ -50,16 +51,21 @@ async def visualize_pca_endpoint(req: VisualizePCARequest):
         raise HTTPException(status_code=500, detail=str(e))
     # 2. Verify that the file exists
     if not filepath or not os.path.isfile(filepath):
-        raise HTTPException(status_code=500, detail="Image file not found after generation")
     # 3. Return the file directly to the client
     return FileResponse(
         path=filepath,
         media_type=f"image/{req.figure_format}",
         filename=os.path.basename(filepath),
-        headers={"Content-Disposition": f'inline; filename="{os.path.basename(filepath)}"'},
     )
 @router.post("/mean-diff", response_class=FileResponse)
 async def visualize_mean_diff_endpoint(req: VisualizeMeanDiffRequest):
     """
@@ -89,9 +95,12 @@ async def visualize_mean_diff_endpoint(req: VisualizeMeanDiffRequest):
         path=filepath,
         media_type=f"image/{req.figure_format}",
         filename=os.path.basename(filepath),
-        headers={"Content-Disposition": f'inline; filename="{os.path.basename(filepath)}"'}
     )
 @router.post("/heatmap", response_class=FileResponse)
 async def visualize_heatmap_endpoint(req: VisualizeHeatmapRequest):
     """
@@ -120,5 +129,7 @@ async def visualize_heatmap_endpoint(req: VisualizeHeatmapRequest):
         path=filepath,
         media_type=f"image/{req.figure_format}",
         filename=os.path.basename(filepath),
-        headers={"Content-Disposition": f'inline; filename="{os.path.basename(filepath)}"'}
-    )

     tags=["visualization"],
 )
 @router.post(
     "/pca",
     summary="Generates and returns the PCA visualization of activations",
         raise HTTPException(status_code=500, detail=str(e))
     # 2. Verify that the file exists
     if not filepath or not os.path.isfile(filepath):
+        raise HTTPException(
+            status_code=500, detail="Image file not found after generation"
+        )
     # 3. Return the file directly to the client
     return FileResponse(
         path=filepath,
         media_type=f"image/{req.figure_format}",
         filename=os.path.basename(filepath),
+        headers={
+            "Content-Disposition": f'inline; filename="{os.path.basename(filepath)}"'
+        },
     )
 @router.post("/mean-diff", response_class=FileResponse)
 async def visualize_mean_diff_endpoint(req: VisualizeMeanDiffRequest):
     """
         path=filepath,
         media_type=f"image/{req.figure_format}",
         filename=os.path.basename(filepath),
+        headers={
+            "Content-Disposition": f'inline; filename="{os.path.basename(filepath)}"'
+        },
     )
 @router.post("/heatmap", response_class=FileResponse)
 async def visualize_heatmap_endpoint(req: VisualizeHeatmapRequest):
     """
         path=filepath,
         media_type=f"image/{req.figure_format}",
         filename=os.path.basename(filepath),
+        headers={
+            "Content-Disposition": f'inline; filename="{os.path.basename(filepath)}"'
+        },
+    )

schemas/visualize.py CHANGED Viewed

@@ -2,10 +2,12 @@
 from pydantic import BaseModel, field_validator
 from typing import List, Optional, Union, Tuple
 class VisualizePCARequest(BaseModel):
     """
     Schema for the /visualize-pca endpoint.
     """
     model_name: str
     prompt_pair: List[str]
     layer_key: str
@@ -20,6 +22,7 @@ class VisualizePCARequest(BaseModel):
             raise ValueError("prompt_pair must be a list of exactly two strings")
         return v
 class VisualizeMeanDiffRequest(BaseModel):
     model_name: str
     prompt_pair: List[str]
@@ -34,10 +37,12 @@ class VisualizeMeanDiffRequest(BaseModel):
             raise ValueError("prompt_pair must be a list of exactly two strings")
         return v
 class VisualizeHeatmapRequest(BaseModel):
     """
     Schema for the /visualize/heatmap endpoint.
     """
     model_name: str
     prompt_pair: List[str]
     layer_key: str

 from pydantic import BaseModel, field_validator
 from typing import List, Optional, Union, Tuple
 class VisualizePCARequest(BaseModel):
     """
     Schema for the /visualize-pca endpoint.
     """
     model_name: str
     prompt_pair: List[str]
     layer_key: str
             raise ValueError("prompt_pair must be a list of exactly two strings")
         return v
 class VisualizeMeanDiffRequest(BaseModel):
     model_name: str
     prompt_pair: List[str]
             raise ValueError("prompt_pair must be a list of exactly two strings")
         return v
 class VisualizeHeatmapRequest(BaseModel):
     """
     Schema for the /visualize/heatmap endpoint.
     """
     model_name: str
     prompt_pair: List[str]
     layer_key: str

utils/visualize_pca.py CHANGED Viewed

@@ -10,21 +10,23 @@ from optipfair.bias import visualize_pca, visualize_mean_differences, visualize_
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import matplotlib
-matplotlib.use('Agg')  # Use 'Agg' backend for non-GUI environments
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 @lru_cache(maxsize=None)
 def load_model_tokenizer(model_name: str):
     """
     Loads the model and tokenizer on the CPU once and caches the result.
     """
     logger.info(f"Loading model and tokenizer for '{model_name}'")
     # Get HF token from environment for gated models
     hf_token = os.getenv("HF_TOKEN")
     # Device selection: MPS (Apple Silicon) > CUDA > CPU
     if torch.cuda.is_available():
         device = torch.device("cuda")
@@ -35,20 +37,19 @@ def load_model_tokenizer(model_name: str):
     logger.info(f"Using device: {device}")
     model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        token=hf_token  # ← AÑADIR ESTA LÍNEA
     )
     tokenizer = AutoTokenizer.from_pretrained(
-        model_name,
-        token=hf_token  # ← AÑADIR ESTA LÍNEA
     )
     model = model.to(device)
     logger.info(f"Model loaded on device: {next(model.parameters()).device}")
     return model, tokenizer
 def run_visualize_pca(
     model_name: str,
     prompt_pair: Tuple[str, str],
@@ -72,7 +73,7 @@ def run_visualize_pca(
         highlight_diff=highlight_diff,
         output_dir=output_dir,
         figure_format=figure_format,
-        pair_index=pair_index
     )
     layer_parts = layer_key.split("_")
@@ -83,7 +84,7 @@ def run_visualize_pca(
         layer_type=layer_type,
         layer_num=layer_num,
         pair_index=pair_index,
-        figure_format=figure_format
     )
     filepath = os.path.join(output_dir, filename)
@@ -93,6 +94,7 @@ def run_visualize_pca(
     logger.info(f"PCA image saved at {filepath}")
     return filepath
 def run_visualize_mean_diff(
     model_name: str,
     prompt_pair: Tuple[str, str],
@@ -115,14 +117,14 @@ def run_visualize_mean_diff(
         layers="all",  # By default, show all layers
         output_dir=output_dir,
         figure_format=figure_format,
-        pair_index=pair_index
     )
     filename = build_visualization_filename(
         vis_type="mean_diff",
         layer_type=layer_type,
         pair_index=pair_index,
-        figure_format=figure_format
     )
     filepath = os.path.join(output_dir, filename)
     if not os.path.isfile(filepath):
@@ -130,6 +132,7 @@ def run_visualize_mean_diff(
     logger.info(f"Mean-diff image saved at {filepath}")
     return filepath
 def run_visualize_heatmap(
     model_name: str,
     prompt_pair: Tuple[str, str],
@@ -151,7 +154,7 @@ def run_visualize_heatmap(
         layer_key=layer_key,
         output_dir=output_dir,
         figure_format=figure_format,
-        pair_index=pair_index
     )
     parts = layer_key.split("_")
@@ -162,7 +165,7 @@ def run_visualize_heatmap(
         layer_type=layer_type,
         layer_num=layer_num,
         pair_index=pair_index,
-        figure_format=figure_format
     )
     filepath = os.path.join(output_dir, filename)
     if not os.path.isfile(filepath):
@@ -170,13 +173,14 @@ def run_visualize_heatmap(
     logger.info(f"Heatmap image saved at {filepath}")
     return filepath
 def build_visualization_filename(
     vis_type: str,
     layer_type: str,
     layer_num: str = None,
     layers: Union[str, List[int]] = None,
     pair_index: int = 0,
-    figure_format: str = "png"
 ) -> str:
     """
     Builds the filename for any visualization.
@@ -188,4 +192,3 @@ def build_visualization_filename(
         return f"{vis_type}_{layer_type}_{layer_num}_pair{pair_index}.{figure_format}"
     else:
         raise ValueError(f"Unknown visualization type: {vis_type}")

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import matplotlib
+matplotlib.use("Agg")  # Use 'Agg' backend for non-GUI environments
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 @lru_cache(maxsize=None)
 def load_model_tokenizer(model_name: str):
     """
     Loads the model and tokenizer on the CPU once and caches the result.
     """
     logger.info(f"Loading model and tokenizer for '{model_name}'")
     # Get HF token from environment for gated models
     hf_token = os.getenv("HF_TOKEN")
     # Device selection: MPS (Apple Silicon) > CUDA > CPU
     if torch.cuda.is_available():
         device = torch.device("cuda")
     logger.info(f"Using device: {device}")
     model = AutoModelForCausalLM.from_pretrained(
+        model_name, token=hf_token  # ← AÑADIR ESTA LÍNEA
     )
     tokenizer = AutoTokenizer.from_pretrained(
+        model_name, token=hf_token  # ← AÑADIR ESTA LÍNEA
     )
     model = model.to(device)
     logger.info(f"Model loaded on device: {next(model.parameters()).device}")
     return model, tokenizer
 def run_visualize_pca(
     model_name: str,
     prompt_pair: Tuple[str, str],
         highlight_diff=highlight_diff,
         output_dir=output_dir,
         figure_format=figure_format,
+        pair_index=pair_index,
     )
     layer_parts = layer_key.split("_")
         layer_type=layer_type,
         layer_num=layer_num,
         pair_index=pair_index,
+        figure_format=figure_format,
     )
     filepath = os.path.join(output_dir, filename)
     logger.info(f"PCA image saved at {filepath}")
     return filepath
 def run_visualize_mean_diff(
     model_name: str,
     prompt_pair: Tuple[str, str],
         layers="all",  # By default, show all layers
         output_dir=output_dir,
         figure_format=figure_format,
+        pair_index=pair_index,
     )
     filename = build_visualization_filename(
         vis_type="mean_diff",
         layer_type=layer_type,
         pair_index=pair_index,
+        figure_format=figure_format,
     )
     filepath = os.path.join(output_dir, filename)
     if not os.path.isfile(filepath):
     logger.info(f"Mean-diff image saved at {filepath}")
     return filepath
 def run_visualize_heatmap(
     model_name: str,
     prompt_pair: Tuple[str, str],
         layer_key=layer_key,
         output_dir=output_dir,
         figure_format=figure_format,
+        pair_index=pair_index,
     )
     parts = layer_key.split("_")
         layer_type=layer_type,
         layer_num=layer_num,
         pair_index=pair_index,
+        figure_format=figure_format,
     )
     filepath = os.path.join(output_dir, filename)
     if not os.path.isfile(filepath):
     logger.info(f"Heatmap image saved at {filepath}")
     return filepath
 def build_visualization_filename(
     vis_type: str,
     layer_type: str,
     layer_num: str = None,
     layers: Union[str, List[int]] = None,
     pair_index: int = 0,
+    figure_format: str = "png",
 ) -> str:
     """
     Builds the filename for any visualization.
         return f"{vis_type}_{layer_type}_{layer_num}_pair{pair_index}.{figure_format}"
     else:
         raise ValueError(f"Unknown visualization type: {vis_type}")