Spaces:

pavanmutha
/

AIAgentDataAnalysis

Paused

App Files Files Community

pavanmutha commited on Apr 16, 2025

Commit

b2cb237

verified ·

1 Parent(s): d7e44a0

Update app.py

Browse files

Files changed (1) hide show

app.py +230 -364

app.py CHANGED Viewed

@@ -4,9 +4,8 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import shap
-import lime
 import lime.lime_tabular
-# import optuna # Removing Optuna for this simplified approach
 import wandb
 import json
 import time
@@ -15,390 +14,257 @@ import shutil
 import ast
 from smolagents import HfApiModel, CodeAgent
 from huggingface_hub import login
-from sklearn.model_selection import train_test_split, cross_val_score # Keep cross_val_score if needed elsewhere, but not primary for comparison here
-from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
-# from sklearn.metrics import ConfusionMatrixDisplay # Not used currently
-from sklearn.ensemble import RandomForestClassifier # Keep RF
-# from sklearn.ensemble import GradientBoostingClassifier # Remove GB for simplicity now
 from sklearn.linear_model import LogisticRegression
-from sklearn.preprocessing import LabelEncoder, StandardScaler # Added StandardScaler
-from sklearn.pipeline import Pipeline # Added Pipeline
 from datetime import datetime
-# from PIL import Image # Likely not needed directly
-# --- Authentication and Setup (Keep as is) ---
-hf_token = os.getenv("HF_TOKEN")
-wandb_api_key = os.getenv("WANDB_API_KEY") # Get WandB key
 # Authenticate with Hugging Face
-if hf_token:
-    try:
-        login(token=hf_token)
-        print("HF Login successful.")
-    except Exception as e:
-        print(f"HF login failed: {e}")
-else:
-    print("HF_TOKEN not found.")
-# Login to WandB if key exists
-if wandb_api_key:
-    try:
-        wandb.login(key=wandb_api_key)
-        print("WandB login successful.")
-    except Exception as e:
-        print(f"WandB login failed: {e}. Logging will be disabled.")
-        wandb.init(mode="disabled") # Disable if login fails
-else:
-    print("WANDB_API_KEY not found. WandB logging disabled.")
-    wandb.init(mode="disabled") # Disable if no key
-# SmolAgent initialization (Keep as is)
-try:
-    model_api = HfApiModel("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
-    print("SmolAgent API Model initialized.")
-except Exception as e:
-    print(f"SmolAgent initialization failed: {e}")
-    model_api = None # Set to None if failed
 df_global = None
-# --- NEW: Global variable for split data ---
-split_data_global = None # Will store (X_train, X_test, y_train, y_test, label_encoder)
-# --- clean_data, upload_file, AI Agent functions (Keep as is from your original code) ---
 def clean_data(df):
-    # Your original clean_data implementation
-    df = df.copy() # Work on copy
     df = df.dropna(how='all', axis=1).dropna(how='all', axis=0)
     for col in df.select_dtypes(include='object').columns:
         df[col] = df[col].astype(str)
         df[col] = LabelEncoder().fit_transform(df[col])
-    # Impute only if numeric columns exist
-    numeric_cols = df.select_dtypes(include=np.number).columns
-    if not numeric_cols.empty:
-        df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
     return df
 def upload_file(file):
-    global df_global, split_data_global # Reset split data on new upload
-    df_global = None
-    split_data_global = None
     if file is None:
-        return pd.DataFrame({"Error": ["No file uploaded."]})
     try:
-        ext = os.path.splitext(file.name)[-1].lower() # Use lower()
-        df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
-        df = clean_data(df)
-        df_global = df
-        print("File uploaded and cleaned.")
-        return df.head()
-    except Exception as e:
-        print(f"Error in upload_file: {e}")
-        return pd.DataFrame({"Error": [f"Failed to process file: {e}"]})
-# --- AI Agent functions (Keep your original format_*, analyze_data) ---
-# Placeholder for brevity - use your original functions
-def format_analysis_report(raw_output, visuals): return f"<h2>AI Report</h2><pre>{str(raw_output)}</pre>", visuals
-def format_observations(observations): return f"<pre>{str(observations)}</pre>"
-def format_insights(insights, visuals): return f"<pre>{str(insights)}</pre>"
-def analyze_data(csv_file, additional_notes=""):
-     print("Running AI Agent (stub)...")
-     # Your original analyze_data logic here
-     # Ensure it uses wandb.init(reinit=True) if called multiple times
-     # and finishes the run: wandb.finish()
-     if not model_api: return "AI Agent not initialized.", []
-     # Dummy result
-     analysis_result = {"observations": {"data": "desc"}, "insights": {"insight1": "text"}}
-     visuals = [] # Agent should save plots to './figures/'
-     return format_analysis_report(analysis_result, visuals)
-# --- MODIFIED: prepare_data ---
-def prepare_data(df, target_column=None) -> bool:
-    """Splits data and stores it globally. Returns True on success, False on failure."""
-    global split_data_global
-    print("Preparing data split...")
-    try:
-        if df is None or df.empty:
-            print("Error: DataFrame is empty in prepare_data.")
-            split_data_global = None
-            return False
-        # --- Target Column Logic ---
-        if target_column is None:
-            # Prioritize object columns if they exist and are not all unique
-            object_cols = df.select_dtypes(include=['object', 'category']).columns
-            potential_targets = [col for col in object_cols if df[col].nunique() < len(df)]
-            if potential_targets:
-                target_column = potential_targets[0] # Take the first suitable object col
-                print(f"Target column auto-selected (object): '{target_column}'")
-            else:
-                target_column = df.columns[-1] # Fallback to last column
-                print(f"Target column auto-selected (last): '{target_column}'")
-        elif target_column not in df.columns:
-            print(f"Error: Specified target column '{target_column}' not found.")
-            split_data_global = None
-            return False
-        X = df.drop(columns=[target_column])
-        y = df[target_column].copy()
-        # --- Feature Check (ensure numeric) ---
-        # (Should be handled by clean_data, but double-check)
-        non_numeric_features = X.select_dtypes(exclude=np.number).columns
-        if not non_numeric_features.empty:
-            print(f"Warning: Dropping non-numeric feature columns: {list(non_numeric_features)}")
-            X = X.drop(columns=non_numeric_features)
-        if X.empty:
-            print("Error: No numeric features left after dropping non-numeric ones.")
-            split_data_global = None
-            return False
-        # --- Target Encoding ---
-        label_encoder = None
-        if not pd.api.types.is_numeric_dtype(y):
-            print(f"Encoding target column '{target_column}' with LabelEncoder.")
-            label_encoder = LabelEncoder()
-            y = label_encoder.fit_transform(y)
         else:
-             # Check if float target should be treated as classification (e.g., integers represented as float)
-             if pd.api.types.is_float_dtype(y) and np.all(y == y.astype(int)):
-                  print(f"Target '{target_column}' is float but looks like integer. Converting to int.")
-                  y = y.astype(int)
-        # --- Check for sufficient classes ---
-        if y.nunique() < 2:
-             print(f"Error: Target column '{target_column}' has less than 2 unique values after processing.")
-             split_data_global = None
-             return False
-        # --- Perform Split ---
-        try:
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
-            print("Performed stratified split.")
-        except ValueError: # Handle cases where stratification is not possible
-            print("Stratified split failed, using non-stratified split.")
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
-        split_data_global = (X_train, X_test, y_train, y_test, label_encoder)
-        print(f"Data split successfully: Train {X_train.shape}, Test {X_test.shape}")
-        return True
     except Exception as e:
-        print(f"Error during data preparation: {e}")
-        import traceback
-        traceback.print_exc()
-        split_data_global = None
-        return False
-# --- NEW: run_comparison_and_explainability ---
-# MODIFIED TYPE HINT: Returns str for paths now
-def run_comparison_and_explainability() -> Tuple[pd.DataFrame, str, str, str]:
-    """Compares models, explains the best one, and logs to WandB."""
-    global df_global, split_data_global
-    # Default returns matching the modified hint
-    default_error_df = pd.DataFrame({"Error": ["Comparison failed."]})
-    default_shap_path = ""
-    default_lime_path = ""
-    default_status = "Error: Could not run comparison."
-    # --- 1. Check Prerequisites ---
-    if df_global is None:
-        return pd.DataFrame({"Error": ["No data uploaded."]}), default_shap_path, default_lime_path, "Error: Upload data first."
-    if split_data_global is None:
-        print("Split data not found globally, attempting to prepare now...")
-        if not prepare_data(df_global):
-            return pd.DataFrame({"Error": ["Data preparation failed."]}), default_shap_path, default_lime_path, "Error: Failed to prepare data."
-    # Unpack the globally stored split data
-    X_train, X_test, y_train, y_test, label_encoder = split_data_global
-    class_names = getattr(label_encoder, 'classes_', [str(c) for c in np.unique(y_train)]) if label_encoder else [str(c) for c in np.unique(y_train)]
-    class_names = [str(c) for c in class_names] # Ensure strings
-    print("--- Starting Model Comparison & Explainability ---")
-    # --- 2. Define Models ---
-    models_to_compare = {
-        "LogisticRegression": Pipeline([
-            ('scaler', StandardScaler()),
-            ('logreg', LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced'))
-        ]),
-        "RandomForest": RandomForestClassifier(random_state=42, class_weight='balanced')
     }
-    # --- 3. Initialize WandB Run ---
-    run_name = f"CompareExplain_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-    wandb_run = None
-    if wandb.run is None or wandb.run.mode != "disabled":
-        try:
-            if wandb.run: wandb.finish()
-            wandb_run = wandb.init(project="huggingface-data-analysis", name=run_name, config={...}, reinit=True) # Simplified config
-            print(f"WandB Run '{run_name}' started.")
-        except Exception as e: print(f"WandB init failed: {e}"); wandb_run = None
-    else: wandb_run = None
-    # --- 4. Train and Evaluate Models ---
     results = []
-    trained_models = {}
-    print("Comparing models...")
-    # (Keep the model training loop exactly as in the previous version)
-    for name, model in models_to_compare.items():
-        print(f"  Training {name}...")
-        start_time = time.time()
-        try:
-            model.fit(X_train, y_train)
-            y_pred = model.predict(X_test)
-            duration = time.time() - start_time
-            metrics = { "Model": name, "Accuracy": accuracy_score(y_test, y_pred), "Precision (Weighted)": precision_score(y_test, y_pred, average="weighted", zero_division=0), "Recall (Weighted)": recall_score(y_test, y_pred, average="weighted", zero_division=0), "F1 Score (Weighted)": f1_score(y_test, y_pred, average="weighted", zero_division=0), "Time (s)": duration }
-            results.append(metrics)
-            trained_models[name] = model
-            print(f"  {name} - F1: {metrics['F1 Score (Weighted)']:.4f}, Time: {duration:.2f}s")
-            if wandb_run: wandb.log({f"{name}_{k.lower().replace(' (weighted)','_w').replace(' ','_')}": v for k, v in metrics.items() if k != "Model"}, commit=False)
-        except Exception as e: print(f"  ERROR training/evaluating {name}: {e}"); results.append({"Model": name, "Error": str(e)}); import traceback; traceback.print_exc(); if wandb_run: wandb.log({f"{name}_error": str(e)}, commit=False)
-    # --- 5. Process Comparison Results ---
-    if not results:
-        if wandb_run: wandb.finish()
-        return pd.DataFrame({"Error": ["No models trained successfully."]}), default_shap_path, default_lime_path, "Error: Model training failed."
-    comparison_df = pd.DataFrame(results)
-    best_model = None
-    best_model_name = "N/A"
-    # (Keep logic for sorting and finding best model as before)
-    if "F1 Score (Weighted)" in comparison_df.columns:
-        comparison_df = comparison_df.sort_values(by="F1 Score (Weighted)", ascending=False, na_position='last').reset_index(drop=True)
-        if not comparison_df.empty:
-            best_model_row = comparison_df.iloc[0]
-            potential_best_name = best_model_row['Model']
-            if pd.notna(best_model_row.get("F1 Score (Weighted)")) and potential_best_name in trained_models:
-                 best_model = trained_models[potential_best_name]
-                 best_model_name = potential_best_name
-                 print(f"Best model determined: {best_model_name} (F1: {best_model_row['F1 Score (Weighted)']:.4f})")
-            else: print("Warning: Could not determine a valid best model from results.")
-    else: print("Warning: F1 Score column missing.")
-    # (Keep WandB table logging as before)
-    if wandb_run and not comparison_df.empty:
-        try: wandb.log({"model_comparison": wandb.Table(dataframe=comparison_df)}, commit=False); print("Logged comparison table to WandB.")
-        except Exception as e: print(f"Error logging comparison table: {e}")
-    # --- 6. Explain Best Model (if found) ---
-    shap_plot_path = None
-    lime_plot_path = None
-    explain_status = f"Compared {len(trained_models)} models. Best: {best_model_name}."
-    if best_model:
-        print(f"Generating explanations for {best_model_name}...")
-        explain_dir = "./explain_plots"
-        if os.path.exists(explain_dir): shutil.rmtree(explain_dir)
-        os.makedirs(explain_dir)
-        shap_plot_path = os.path.join(explain_dir, f"shap_{best_model_name}.png")
-        lime_plot_path = os.path.join(explain_dir, f"lime_{best_model_name}.png")
-        try:
-            # --- SHAP (Keep logic as before, but ensure shap_plot_path becomes None on failure) ---
-            # Simplified SHAP logic for display
-            print("  Generating SHAP...")
-            # ... (Your detailed SHAP logic from previous attempts)
-            # Example placeholder:
-            try:
-                 plt.figure(); plt.text(0.5, 0.5, 'SHAP Placeholder'); plt.savefig(shap_plot_path); plt.clf()
-                 print(f"  SHAP plot saved: {shap_plot_path}")
-                 if wandb_run: wandb.log({"shap_summary_best": wandb.Image(shap_plot_path)}, commit=False)
-            except Exception as shap_e:
-                 print(f"  ERROR generating SHAP: {shap_e}")
-                 shap_plot_path = None # Set to None on error
-            # --- LIME (Keep logic as before, but ensure lime_plot_path becomes None on failure) ---
-            print("  Generating LIME...")
-            # ... (Your detailed LIME logic from previous attempts)
-            # Example placeholder:
-            try:
-                 plt.figure(); plt.text(0.5, 0.5, 'LIME Placeholder'); plt.savefig(lime_plot_path); plt.clf()
-                 print(f"  LIME plot saved: {lime_plot_path}")
-                 if wandb_run: wandb.log({"lime_explanation_best": wandb.Image(lime_plot_path)}, commit=False)
-            except Exception as lime_e:
-                 print(f"  ERROR generating LIME: {lime_e}")
-                 lime_plot_path = None # Set to None on error
-            explain_status += f" Explanations attempted for {best_model_name}."
-        except Exception as e:
-            print(f"  ERROR during explanation block: {e}")
-            import traceback; traceback.print_exc()
-            explain_status += f" Explanation failed: {e}"
-            if not os.path.exists(str(shap_plot_path)): shap_plot_path = None # Check path validity
-            if not os.path.exists(str(lime_plot_path)): lime_plot_path = None
-    else:
-        explain_status += " No best model found to explain."
-    # --- 7. Finish WandB Run and Return ---
-    if wandb_run:
-        try:
-            wandb.log({}, commit=True) # Ensure final commit
-            wandb.finish()
-            print(f"WandB Run '{run_name}' finished.")
-        except Exception as finish_e:
-             print(f"Error finishing WandB run: {finish_e}")
-    # MODIFIED: Return empty strings instead of None for paths
-    valid_shap_path = shap_plot_path if shap_plot_path and os.path.exists(shap_plot_path) else ""
-    valid_lime_path = lime_plot_path if lime_plot_path and os.path.exists(lime_plot_path) else ""
-    print(f"DEBUG Final Return: DF shape {comparison_df.shape}, SHAP path '{valid_shap_path}', LIME path '{valid_lime_path}', Status '{explain_status}'")
-    return comparison_df, valid_shap_path, valid_lime_path, explain_status
-# --- Gradio UI ---
-with gr.Blocks() as demo:
-    gr.Markdown("## 📊 AI Data Analysis, Model Comparison & Explainability")
-    # --- Row 1: Upload ---
-    with gr.Row():
-        with gr.Column(scale=1):
-            file_input = gr.File(label="1. Upload CSV or Excel", type="filepath", file_types=[".csv", ".xls", ".xlsx"])
-        with gr.Column(scale=2):
-            df_output = gr.DataFrame(label="Cleaned Data Preview", interactive=False)
-    # --- Row 2: AI Agent (Optional) ---
-    with gr.Accordion("🤖 Step 2 (Optional): Run AI Agent Insights", open=False):
-         with gr.Row():
-             with gr.Column(scale=1):
-                agent_notes = gr.Textbox(label="Optional requests for Agent", placeholder="e.g., 'Focus on column X'")
-                agent_btn = gr.Button("Run AI Analysis", interactive=(model_api is not None))
-             with gr.Column(scale=2):
-                 insights_output = gr.HTML(label="AI Agent Report")
-         with gr.Row():
-             visual_output = gr.Gallery(label="AI Agent Visualizations", height=350, object_fit="contain", columns=3, preview=True)
-    # --- Row 3: Compare & Explain ---
-    with gr.Accordion("⚙️💡 Step 3: Compare Models & Explain Best", open=True):
-        with gr.Row():
-            compare_explain_btn = gr.Button("Run Comparison & Explain Best Model", variant="primary")
-        with gr.Row():
-            comparison_output = gr.DataFrame(label="Model Comparison Results", interactive=False)
         with gr.Row():
-            explain_status_output = gr.Textbox(label="Status", interactive=False)
         with gr.Row():
-            # Only one SHAP plot expected now (summary)
-            shap_img_output = gr.Image(label="SHAP Summary (Best Model)", type="filepath", interactive=False)
-            lime_img_output = gr.Image(label="LIME Explanation (Best Model - Instance 0)", type="filepath", interactive=False)
-    # --- Connect Components ---
-    file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
-    # AI Agent connection (Keep as is)
-    agent_btn.click(fn=analyze_data, inputs=[file_input, agent_notes], outputs=[insights_output, visual_output])
-    # NEW: Connection for combined comparison and explainability
-    compare_explain_btn.click(
-        fn=run_comparison_and_explainability,
-        inputs=[], # Takes data from global df_global
-        outputs=[comparison_output, shap_img_output, lime_img_output, explain_status_output]
-    )
-# --- Launch ---
-print("Launching Gradio App...")
-demo.launch(debug=True) # Use debug=True for more detailed errors during development

 import numpy as np
 import matplotlib.pyplot as plt
 import shap
 import lime.lime_tabular
+import optuna
 import wandb
 import json
 import time
 import ast
 from smolagents import HfApiModel, CodeAgent
 from huggingface_hub import login
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
 from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import LabelEncoder
 from datetime import datetime
+from PIL import Image
+from sklearn.svm import SVC
 # Authenticate with Hugging Face
+hf_token = os.getenv("HF_TOKEN")
+login(token=hf_token)
+# SmolAgent initialization
+model = HfApiModel("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
 df_global = None
 def clean_data(df):
     df = df.dropna(how='all', axis=1).dropna(how='all', axis=0)
     for col in df.select_dtypes(include='object').columns:
         df[col] = df[col].astype(str)
         df[col] = LabelEncoder().fit_transform(df[col])
+    df = df.fillna(df.mean(numeric_only=True))
     return df
 def upload_file(file):
+    global df_global
     if file is None:
+        return pd.DataFrame({"Error": ["No file uploaded."]}), None
+    ext = os.path.splitext(file.name)[-1]
+    df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
+    df = clean_data(df)
+    df_global = df
+    return df.head(), df
+def format_analysis_report(raw_output, visuals):
     try:
+        if isinstance(raw_output, dict):
+            analysis_dict = raw_output
         else:
+            try:
+                analysis_dict = ast.literal_eval(str(raw_output))
+            except (SyntaxError, ValueError) as e:
+                print(f"Error parsing CodeAgent output: {e}")
+                return str(raw_output), visuals  # Return raw output as string
+        report = f"""
+        <div style="font-family: Arial, sans-serif; padding: 20px; color: #333;">
+            <h1 style="color: #2B547E; border-bottom: 2px solid #2B547E; padding-bottom: 10px;">📊 Data Analysis Report</h1>
+            <div style="margin-top: 25px; background: #f8f9fa; padding: 20px; border-radius: 8px;">
+                <h2 style="color: #2B547E;">🔍 Key Observations</h2>
+                {format_observations(analysis_dict.get('observations', {}))}
+            </div>
+            <div style="margin-top: 30px;">
+                <h2 style="color: #2B547E;">💡 Insights & Visualizations</h2>
+                {format_insights(analysis_dict.get('insights', {}), visuals)}
+            </div>
+        </div>
+        """
+        return report, visuals
     except Exception as e:
+        print(f"Error in format_analysis_report: {e}")
+        return str(raw_output), visuals
+def format_observations(observations):
+    return '\n'.join([
+        f"""
+        <div style="margin: 15px 0; padding: 15px; background: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
+            <h3 style="margin: 0 0 10px 0; color: #4A708B;">{key.replace('_', ' ').title()}</h3>
+            <pre style="margin: 0; padding: 10px; background: #f8f9fa; border-radius: 4px;">{value}</pre>
+        </div>
+        """ for key, value in observations.items() if 'proportions' in key
+    ])
+def format_insights(insights, visuals):
+    return '\n'.join([
+        f"""
+        <div style="margin: 20px 0; padding: 20px; background: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
+            <div style="display: flex; align-items: center; gap: 10px;">
+                <div style="background: #2B547E; color: white; width: 30px; height: 30px; border-radius: 50%; display: flex; align-items: center; justify-content: center;">{idx+1}</div>
+                <p style="margin: 0; font-size: 16px;">{insight}</p>
+            </div>
+            {f'<img src="/file={visuals[idx]}" style="max-width: 100%; height: auto; margin-top: 10px; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">' if idx < len(visuals) else ''}
+        </div>
+        """ for idx, (key, insight) in enumerate(insights.items())
+    ])
+def analyze_data(csv_file, additional_notes=""):
+    start_time = time.time()
+    process = psutil.Process(os.getpid())
+    initial_memory = process.memory_info().rss / 1024 ** 2
+    if os.path.exists('./figures'):
+        shutil.rmtree('./figures')
+    os.makedirs('./figures', exist_ok=True)
+    wandb.login(key=os.environ.get('WANDB_API_KEY'))
+    run = wandb.init(project="huggingface-data-analysis", config={
+        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "additional_notes": additional_notes,
+        "source_file": csv_file.name if csv_file else None
+    })
+    agent = CodeAgent(tools=[], model=model, additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"])
+    analysis_result = agent.run("""
+        You are a helpful data analysis agent. Just return insight information and visualization.
+        Load the data that is passed.do not create your own.
+        Automatically detect numeric columns and names.
+        2. 5 data visualizations
+        3. at least 5 insights from data
+        5. Generate publication-quality visualizations and save to './figures/'.
+        Do not use 'open()' or write to files. Just return variables and plots.
+        The dictionary should have the following structure:
+        {
+            'observations': {
+                'observation_1_key': 'observation_1_value',
+                'observation_2_key': 'observation_2_value',
+                ...
+            },
+            'insights': {
+                'insight_1_key': 'insight_1_value',
+                'insight_2_key': 'insight_2_value',
+                ...
+            }
+        }
+    """, additional_args={"additional_notes": additional_notes, "source_file": csv_file})
+    execution_time = time.time() - start_time
+    final_memory = process.memory_info().rss / 1024 ** 2
+    memory_usage = final_memory - initial_memory
+    wandb.log({"execution_time_sec": execution_time, "memory_usage_mb": memory_usage})
+    visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.endswith(('.png', '.jpg', '.jpeg'))]
+    for viz in visuals:
+        wandb.log({os.path.basename(viz): wandb.Image(viz)})
+    run.finish()
+    return format_analysis_report(analysis_result, visuals)
+def compare_models(selected_models, df):
+    if df is None or len(selected_models) == 0:
+        return pd.DataFrame(), []
+    target = df.columns[-1]
+    X = df.drop(target, axis=1)
+    y = df[target]
+    if y.dtype == 'object':
+        y = LabelEncoder().fit_transform(y)
+    model_dict = {
+        "RandomForest": RandomForestClassifier(),
+        "LogisticRegression": LogisticRegression(max_iter=1000),
+        "SVC": SVC(probability=True)
     }
     results = []
+    confusion_imgs = []
+    for name in selected_models:
+        model = model_dict[name]
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") and len(np.unique(y)) == 2 else None
+        metrics = {
+            "Model": name,
+            "Accuracy": accuracy_score(y_test, y_pred),
+            "Precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
+            "Recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
+            "F1": f1_score(y_test, y_pred, average="weighted", zero_division=0),
+            "ROC-AUC": roc_auc_score(y_test, y_proba) if y_proba is not None else "N/A"
+        }
+        results.append(metrics)
+        # Confusion matrix plot
+        fig, ax = plt.subplots()
+        ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
+        img_path = f"conf_matrix_{name}.png"
+        plt.savefig(img_path)
+        confusion_imgs.append(img_path)
+        plt.close(fig)
+    results_df = pd.DataFrame(results)
+    return results_df, confusion_imgs
+def ab_test_models(model_a, model_b, df):
+    if df is None or model_a == model_b:
+        return pd.DataFrame()
+    target = df.columns[-1]
+    X = df.drop(target, axis=1)
+    y = df[target]
+    if y.dtype == 'object':
+        y = LabelEncoder().fit_transform(y)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
+    mid = len(X_test) // 2
+    X_a, X_b = X_test[:mid], X_test[mid:]
+    y_a, y_b = y_test[:mid], y_test[mid:]
+    model_dict = {
+        "RandomForest": RandomForestClassifier(),
+        "LogisticRegression": LogisticRegression(max_iter=1000),
+        "SVC": SVC(probability=True)
+    }
+    results = []
+    for name, X_grp, y_grp in zip([model_a, model_b], [X_a, X_b], [y_a, y_b]):
+        model = model_dict[name]
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_grp)
+        metrics = {
+            "Model": name,
+            "Accuracy": accuracy_score(y_grp, y_pred),
+            "Precision": precision_score(y_grp, y_pred, average="weighted", zero_division=0),
+            "Recall": recall_score(y_grp, y_pred, average="weighted", zero_division=0),
+            "F1": f1_score(y_grp, y_pred, average="weighted", zero_division=0),
+        }
+        results.append(metrics)
+    return pd.DataFrame(results)
+def get_model_choices():
+    return ["RandomForest", "LogisticRegression", "SVC"]
+def clear_confusion_imgs():
+    for name in get_model_choices():
+        img_path = f"conf_matrix_{name}.png"
+        if os.path.exists(img_path):
+            os.remove(img_path)
+def main():
+    with gr.Blocks() as demo:
+        gr.Markdown("# 🤖 Model Comparison & A/B Testing (Hugging Face + Gradio)")
         with gr.Row():
+            with gr.Column():
+                file_input = gr.File(label="Upload CSV or Excel", type="filepath")
+                df_output = gr.DataFrame(label="Cleaned Data Preview")
+                state = gr.State()
+                file_input.change(fn=upload_file, inputs=file_input, outputs=[df_output, state])
+            with gr.Column():
+                model_choices = gr.CheckboxGroup(
+                    choices=get_model_choices(),
+                    value=["RandomForest", "LogisticRegression"],
+                    label="Select Models to Compare"
+                )
+                compare_btn = gr.Button("Compare Models")
+                metrics_output = gr.DataFrame(label="Model Performance Metrics")
+                confusion_gallery = gr.Gallery(label="Confusion Matrices", columns=3)
+                compare_btn.click(fn=compare_models, inputs=[model_choices, state], outputs=[metrics_output, confusion_gallery])
+        gr.Markdown("## A/B Test: Compare Two Models on Test Set")
         with gr.Row():
+            ab_model_a = gr.Dropdown(get_model_choices(), value="RandomForest", label="Model A")
+            ab_model_b = gr.Dropdown(get_model_choices(), value="LogisticRegression", label="Model B")
+            ab_btn = gr.Button("Run A/B Test")
+            ab_output = gr.DataFrame(label="A/B Test Results")
+            ab_btn.click(fn=ab_test_models, inputs=[ab_model_a, ab_model_b, state], outputs=ab_output)
+        gr.Markdown("---\nBuilt for Hugging Face Spaces with Gradio. Upload your data, select models, and compare!")
+    return demo
+if __name__ == "__main__":
+    clear_confusion_imgs()
+    demo = main()
+    demo.launch()