shapely

Running

App Files Files Community

Wajahat698 commited on Jul 31, 2025

Commit

8774de6

verified ·

1 Parent(s): 9274bd2

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -31

app.py CHANGED Viewed

@@ -237,9 +237,9 @@ def calculate_r2_image(r2_percent):
     </div>
     """
-def create_avg_target_display(avg_target):
     """
-    Create average target (Purchase Consideration) visualization.
     """
     fig, ax = plt.subplots(figsize=(3.6, 3.6))
@@ -249,12 +249,12 @@ def create_avg_target_display(avg_target):
     ax.text(0.5, 0.5, f"{avg_target:.1f}",
             ha='center', va='center', fontsize=24, fontweight='bold')
-    ax.text(0.5, 0.2, "Scale: 1-6",
             ha='center', va='center', fontsize=10, color='gray')
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
-    ax.set_title("Avg Target", fontsize=11, pad=10)
     ax.axis('off')
     fig.patch.set_facecolor('none')
@@ -281,25 +281,76 @@ def create_error_message(message):
     </div>
     """
-def call_r_script_for_consideration(input_file, csv_output_path):
     """
-    Call R script for Shapley regression analysis specifically for Consideration.
     """
-    # Create temporary files for all outputs (even though we only need consideration)
     temp_dir = os.path.dirname(csv_output_path)
     text_output_path = os.path.join(temp_dir, "output.txt")
     csv_output_path_trust = os.path.join(temp_dir, "trust.csv")
     csv_output_path_nps = os.path.join(temp_dir, "nps.csv")
     csv_output_path_loyalty = os.path.join(temp_dir, "loyalty.csv")
     csv_output_path_satisfaction = os.path.join(temp_dir, "satisfaction.csv")
     csv_output_path_trustbuilder = os.path.join(temp_dir, "trustbuilder.csv")
-    # Set the boolean flags - we only want consideration analysis
-    nps_present = False
-    loyalty_present = False
-    consideration_present = True  # This is what we want
-    satisfaction_present = False
     trustbuilder_present = False
     command = [
         "Rscript",
@@ -309,10 +360,10 @@ def call_r_script_for_consideration(input_file, csv_output_path):
         csv_output_path_trust,
         csv_output_path_nps,
         csv_output_path_loyalty,
-        csv_output_path,  # This is our consideration output
         csv_output_path_satisfaction,
         csv_output_path_trustbuilder,
-        str(nps_present).upper(),  # Convert to "TRUE"/"FALSE"
         str(loyalty_present).upper(),
         str(consideration_present).upper(),
         str(satisfaction_present).upper(),
@@ -322,6 +373,11 @@ def call_r_script_for_consideration(input_file, csv_output_path):
     try:
         result = subprocess.run(command, check=True, capture_output=True, text=True)
         logger.info("R script executed successfully")
         return True
     except subprocess.CalledProcessError as e:
         logger.error("R script failed with error: %s", e)
@@ -334,7 +390,7 @@ def call_r_script_for_consideration(input_file, csv_output_path):
 def analyze_prospects_data(file_path):
     """
-    Analyze prospects data focusing on Purchase Consideration as target.
     """
     if file_path is None:
         return create_error_message("No file provided"), None, None, None
@@ -353,11 +409,15 @@ def analyze_prospects_data(file_path):
             logger.error(f"Missing factor columns: {missing_factors}")
             return create_error_message(f"Missing required columns: {missing_factors}"), None, None, None
-        # Check if Consideration column exists
-        if "Consideration" not in df.columns:
-            logger.error("Consideration column not found in dataset")
             logger.info(f"Available columns: {list(df.columns)}")
-            return create_error_message(f"Consideration column not found. Available columns: {list(df.columns)}"), None, None, None
         # Map column names from trust buckets to factors
         column_mapping = {
@@ -375,10 +435,10 @@ def analyze_prospects_data(file_path):
             if old_name in df_analysis.columns:
                 df_analysis.rename(columns={old_name: new_name}, inplace=True)
-        # Calculate R² for Consideration model
         factors = ["Factor 1", "Factor 2", "Factor 3", "Factor 4", "Factor 5", "Factor 6"]
         X = df_analysis[factors].dropna()
-        y = df.loc[X.index, "Consideration"]  # Use Consideration as target
         # Remove any remaining NaN values
         valid_mask = ~y.isna()
@@ -394,24 +454,24 @@ def analyze_prospects_data(file_path):
         r2 = r2_score(y, model.predict(X))
         r2_percent = r2 * 100
-        # Calculate average target (Consideration)
         avg_target = y.mean()
-        logger.info(f"R² Score: {r2_percent:.1f}%, Average Consideration: {avg_target:.1f}")
         # Create visualizations
         r2_html = calculate_r2_image(r2_percent)
-        avg_target_html = create_avg_target_display(avg_target)
         # Factor performance plot
         factor_performance_img = plot_factor_performance(df_analysis, "Factor Performance (Agreement Scores)")
-        # Run Shapley analysis on Consideration
         temp_dir = tempfile.mkdtemp()
-        csv_output_path = os.path.join(temp_dir, "consideration_results.csv")
         # Call R script with proper parameters
-        r_success = call_r_script_for_consideration(file_path, csv_output_path)
         if not r_success:
             # Clean up and return error
@@ -461,7 +521,7 @@ def analyze_prospects_data(file_path):
         driver_analysis_img = plot_driver_analysis(
             results_df,
             average_value,
-            "Shapley Driver Analysis - Purchase Consideration"
         )
         # Clean up
@@ -513,12 +573,12 @@ function refresh() {
 with gr.Blocks(css=css, js=js, theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
         <h2 style="text-align: center; font-size: 2.25rem; font-weight: 600;">
-            Driver Analysis - Purchase Consideration
         </h2>
     """)
-    gr.Markdown("### Purchase Consideration Analysis")
-    gr.Markdown("Analysis showing what drives Purchase Consideration among prospects using Factors 1-6")
     # File upload section
     with gr.Row():

     </div>
     """
+def create_avg_target_display(avg_target, target_name, scale_info):
     """
+    Create average target visualization.
     """
     fig, ax = plt.subplots(figsize=(3.6, 3.6))
     ax.text(0.5, 0.5, f"{avg_target:.1f}",
             ha='center', va='center', fontsize=24, fontweight='bold')
+    ax.text(0.5, 0.2, scale_info,
             ha='center', va='center', fontsize=10, color='gray')
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
+    ax.set_title(f"Avg {target_name}", fontsize=11, pad=10)
     ax.axis('off')
     fig.patch.set_facecolor('none')
     </div>
     """
+def find_target_column(df):
     """
+    Find the best target column in the dataset.
+    Priority: Consideration -> Trust -> NPS -> Loyalty
     """
+    # Define target priorities and their scale information
+    target_priorities = [
+        ("Consideration", "Scale: 1-6"),
+        ("Trust", "Scale: 1-10"),
+        ("NPS", "Scale: 0-10"),
+        ("Loyalty", "Scale: 1-10"),
+    ]
+    # Check for exact matches first
+    for target, scale in target_priorities:
+        if target in df.columns:
+            return target, target, scale
+    # Check for case-insensitive matches
+    df_columns_lower = {col.lower(): col for col in df.columns}
+    for target, scale in target_priorities:
+        target_lower = target.lower()
+        if target_lower in df_columns_lower:
+            actual_col = df_columns_lower[target_lower]
+            return actual_col, target, scale
+    # Check for partial matches
+    for col in df.columns:
+        col_lower = col.lower()
+        if 'consider' in col_lower:
+            return col, "Consideration", "Scale: 1-6"
+        elif 'trust' in col_lower:
+            return col, "Trust", "Scale: 1-10"
+        elif 'nps' in col_lower:
+            return col, "NPS", "Scale: 0-10"
+        elif 'loyal' in col_lower:
+            return col, "Loyalty", "Scale: 1-10"
+    return None, None, None
+def call_r_script_for_target(input_file, csv_output_path, target_type):
+    """
+    Call R script for Shapley regression analysis for any target type.
+    """
+    # Create temporary files for all outputs
     temp_dir = os.path.dirname(csv_output_path)
     text_output_path = os.path.join(temp_dir, "output.txt")
     csv_output_path_trust = os.path.join(temp_dir, "trust.csv")
     csv_output_path_nps = os.path.join(temp_dir, "nps.csv")
     csv_output_path_loyalty = os.path.join(temp_dir, "loyalty.csv")
+    csv_output_path_consideration = os.path.join(temp_dir, "consideration.csv")
     csv_output_path_satisfaction = os.path.join(temp_dir, "satisfaction.csv")
     csv_output_path_trustbuilder = os.path.join(temp_dir, "trustbuilder.csv")
+    # Set the boolean flags based on target type
+    nps_present = (target_type.lower() == "nps")
+    loyalty_present = (target_type.lower() == "loyalty")
+    consideration_present = (target_type.lower() == "consideration")
+    satisfaction_present = (target_type.lower() == "satisfaction")
     trustbuilder_present = False
+    # Map output file based on target type
+    target_output_map = {
+        "consideration": csv_output_path_consideration,
+        "trust": csv_output_path_trust,
+        "nps": csv_output_path_nps,
+        "loyalty": csv_output_path_loyalty,
+    }
+    target_csv_path = target_output_map.get(target_type.lower(), csv_output_path_consideration)
     command = [
         "Rscript",
         csv_output_path_trust,
         csv_output_path_nps,
         csv_output_path_loyalty,
+        csv_output_path_consideration,
         csv_output_path_satisfaction,
         csv_output_path_trustbuilder,
+        str(nps_present).upper(),
         str(loyalty_present).upper(),
         str(consideration_present).upper(),
         str(satisfaction_present).upper(),
     try:
         result = subprocess.run(command, check=True, capture_output=True, text=True)
         logger.info("R script executed successfully")
+        # Copy the target-specific result to our expected output path
+        if os.path.exists(target_csv_path) and target_csv_path != csv_output_path:
+            shutil.copy2(target_csv_path, csv_output_path)
         return True
     except subprocess.CalledProcessError as e:
         logger.error("R script failed with error: %s", e)
 def analyze_prospects_data(file_path):
     """
+    Analyze prospects data with flexible target detection.
     """
     if file_path is None:
         return create_error_message("No file provided"), None, None, None
             logger.error(f"Missing factor columns: {missing_factors}")
             return create_error_message(f"Missing required columns: {missing_factors}"), None, None, None
+        # Find target column
+        target_col, target_name, scale_info = find_target_column(df)
+        if target_col is None:
+            logger.error("No suitable target column found")
             logger.info(f"Available columns: {list(df.columns)}")
+            return create_error_message(f"No suitable target column found. Available columns: {list(df.columns)}"), None, None, None
+        logger.info(f"Using target column: {target_col} (interpreted as {target_name})")
         # Map column names from trust buckets to factors
         column_mapping = {
             if old_name in df_analysis.columns:
                 df_analysis.rename(columns={old_name: new_name}, inplace=True)
+        # Calculate R² for target model
         factors = ["Factor 1", "Factor 2", "Factor 3", "Factor 4", "Factor 5", "Factor 6"]
         X = df_analysis[factors].dropna()
+        y = df.loc[X.index, target_col]
         # Remove any remaining NaN values
         valid_mask = ~y.isna()
         r2 = r2_score(y, model.predict(X))
         r2_percent = r2 * 100
+        # Calculate average target
         avg_target = y.mean()
+        logger.info(f"R² Score: {r2_percent:.1f}%, Average {target_name}: {avg_target:.1f}")
         # Create visualizations
         r2_html = calculate_r2_image(r2_percent)
+        avg_target_html = create_avg_target_display(avg_target, target_name, scale_info)
         # Factor performance plot
         factor_performance_img = plot_factor_performance(df_analysis, "Factor Performance (Agreement Scores)")
+        # Run Shapley analysis
         temp_dir = tempfile.mkdtemp()
+        csv_output_path = os.path.join(temp_dir, "results.csv")
         # Call R script with proper parameters
+        r_success = call_r_script_for_target(file_path, csv_output_path, target_name)
         if not r_success:
             # Clean up and return error
         driver_analysis_img = plot_driver_analysis(
             results_df,
             average_value,
+            f"Shapley Driver Analysis - {target_name}"
         )
         # Clean up
 with gr.Blocks(css=css, js=js, theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
         <h2 style="text-align: center; font-size: 2.25rem; font-weight: 600;">
+            Driver Analysis - Multi-Target Analysis
         </h2>
     """)
+    gr.Markdown("### Flexible Target Analysis")
+    gr.Markdown("Analysis showing what drives your target variable (Consideration, Trust, NPS, or Loyalty) using Factors 1-6")
     # File upload section
     with gr.Row():