Wajahat698 commited on
Commit
8774de6
·
verified ·
1 Parent(s): 9274bd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -31
app.py CHANGED
@@ -237,9 +237,9 @@ def calculate_r2_image(r2_percent):
237
  </div>
238
  """
239
 
240
- def create_avg_target_display(avg_target):
241
  """
242
- Create average target (Purchase Consideration) visualization.
243
  """
244
  fig, ax = plt.subplots(figsize=(3.6, 3.6))
245
 
@@ -249,12 +249,12 @@ def create_avg_target_display(avg_target):
249
 
250
  ax.text(0.5, 0.5, f"{avg_target:.1f}",
251
  ha='center', va='center', fontsize=24, fontweight='bold')
252
- ax.text(0.5, 0.2, "Scale: 1-6",
253
  ha='center', va='center', fontsize=10, color='gray')
254
 
255
  ax.set_xlim(0, 1)
256
  ax.set_ylim(0, 1)
257
- ax.set_title("Avg Target", fontsize=11, pad=10)
258
  ax.axis('off')
259
 
260
  fig.patch.set_facecolor('none')
@@ -281,25 +281,76 @@ def create_error_message(message):
281
  </div>
282
  """
283
 
284
- def call_r_script_for_consideration(input_file, csv_output_path):
285
  """
286
- Call R script for Shapley regression analysis specifically for Consideration.
 
287
  """
288
- # Create temporary files for all outputs (even though we only need consideration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  temp_dir = os.path.dirname(csv_output_path)
290
  text_output_path = os.path.join(temp_dir, "output.txt")
291
  csv_output_path_trust = os.path.join(temp_dir, "trust.csv")
292
  csv_output_path_nps = os.path.join(temp_dir, "nps.csv")
293
  csv_output_path_loyalty = os.path.join(temp_dir, "loyalty.csv")
 
294
  csv_output_path_satisfaction = os.path.join(temp_dir, "satisfaction.csv")
295
  csv_output_path_trustbuilder = os.path.join(temp_dir, "trustbuilder.csv")
296
 
297
- # Set the boolean flags - we only want consideration analysis
298
- nps_present = False
299
- loyalty_present = False
300
- consideration_present = True # This is what we want
301
- satisfaction_present = False
302
  trustbuilder_present = False
 
 
 
 
 
 
 
 
 
 
303
 
304
  command = [
305
  "Rscript",
@@ -309,10 +360,10 @@ def call_r_script_for_consideration(input_file, csv_output_path):
309
  csv_output_path_trust,
310
  csv_output_path_nps,
311
  csv_output_path_loyalty,
312
- csv_output_path, # This is our consideration output
313
  csv_output_path_satisfaction,
314
  csv_output_path_trustbuilder,
315
- str(nps_present).upper(), # Convert to "TRUE"/"FALSE"
316
  str(loyalty_present).upper(),
317
  str(consideration_present).upper(),
318
  str(satisfaction_present).upper(),
@@ -322,6 +373,11 @@ def call_r_script_for_consideration(input_file, csv_output_path):
322
  try:
323
  result = subprocess.run(command, check=True, capture_output=True, text=True)
324
  logger.info("R script executed successfully")
 
 
 
 
 
325
  return True
326
  except subprocess.CalledProcessError as e:
327
  logger.error("R script failed with error: %s", e)
@@ -334,7 +390,7 @@ def call_r_script_for_consideration(input_file, csv_output_path):
334
 
335
  def analyze_prospects_data(file_path):
336
  """
337
- Analyze prospects data focusing on Purchase Consideration as target.
338
  """
339
  if file_path is None:
340
  return create_error_message("No file provided"), None, None, None
@@ -353,11 +409,15 @@ def analyze_prospects_data(file_path):
353
  logger.error(f"Missing factor columns: {missing_factors}")
354
  return create_error_message(f"Missing required columns: {missing_factors}"), None, None, None
355
 
356
- # Check if Consideration column exists
357
- if "Consideration" not in df.columns:
358
- logger.error("Consideration column not found in dataset")
 
 
359
  logger.info(f"Available columns: {list(df.columns)}")
360
- return create_error_message(f"Consideration column not found. Available columns: {list(df.columns)}"), None, None, None
 
 
361
 
362
  # Map column names from trust buckets to factors
363
  column_mapping = {
@@ -375,10 +435,10 @@ def analyze_prospects_data(file_path):
375
  if old_name in df_analysis.columns:
376
  df_analysis.rename(columns={old_name: new_name}, inplace=True)
377
 
378
- # Calculate R² for Consideration model
379
  factors = ["Factor 1", "Factor 2", "Factor 3", "Factor 4", "Factor 5", "Factor 6"]
380
  X = df_analysis[factors].dropna()
381
- y = df.loc[X.index, "Consideration"] # Use Consideration as target
382
 
383
  # Remove any remaining NaN values
384
  valid_mask = ~y.isna()
@@ -394,24 +454,24 @@ def analyze_prospects_data(file_path):
394
  r2 = r2_score(y, model.predict(X))
395
  r2_percent = r2 * 100
396
 
397
- # Calculate average target (Consideration)
398
  avg_target = y.mean()
399
 
400
- logger.info(f"R² Score: {r2_percent:.1f}%, Average Consideration: {avg_target:.1f}")
401
 
402
  # Create visualizations
403
  r2_html = calculate_r2_image(r2_percent)
404
- avg_target_html = create_avg_target_display(avg_target)
405
 
406
  # Factor performance plot
407
  factor_performance_img = plot_factor_performance(df_analysis, "Factor Performance (Agreement Scores)")
408
 
409
- # Run Shapley analysis on Consideration
410
  temp_dir = tempfile.mkdtemp()
411
- csv_output_path = os.path.join(temp_dir, "consideration_results.csv")
412
 
413
  # Call R script with proper parameters
414
- r_success = call_r_script_for_consideration(file_path, csv_output_path)
415
 
416
  if not r_success:
417
  # Clean up and return error
@@ -461,7 +521,7 @@ def analyze_prospects_data(file_path):
461
  driver_analysis_img = plot_driver_analysis(
462
  results_df,
463
  average_value,
464
- "Shapley Driver Analysis - Purchase Consideration"
465
  )
466
 
467
  # Clean up
@@ -513,12 +573,12 @@ function refresh() {
513
  with gr.Blocks(css=css, js=js, theme=gr.themes.Soft()) as demo:
514
  gr.Markdown("""
515
  <h2 style="text-align: center; font-size: 2.25rem; font-weight: 600;">
516
- Driver Analysis - Purchase Consideration
517
  </h2>
518
  """)
519
 
520
- gr.Markdown("### Purchase Consideration Analysis")
521
- gr.Markdown("Analysis showing what drives Purchase Consideration among prospects using Factors 1-6")
522
 
523
  # File upload section
524
  with gr.Row():
 
237
  </div>
238
  """
239
 
240
+ def create_avg_target_display(avg_target, target_name, scale_info):
241
  """
242
+ Create average target visualization.
243
  """
244
  fig, ax = plt.subplots(figsize=(3.6, 3.6))
245
 
 
249
 
250
  ax.text(0.5, 0.5, f"{avg_target:.1f}",
251
  ha='center', va='center', fontsize=24, fontweight='bold')
252
+ ax.text(0.5, 0.2, scale_info,
253
  ha='center', va='center', fontsize=10, color='gray')
254
 
255
  ax.set_xlim(0, 1)
256
  ax.set_ylim(0, 1)
257
+ ax.set_title(f"Avg {target_name}", fontsize=11, pad=10)
258
  ax.axis('off')
259
 
260
  fig.patch.set_facecolor('none')
 
281
  </div>
282
  """
283
 
284
+ def find_target_column(df):
285
  """
286
+ Find the best target column in the dataset.
287
+ Priority: Consideration -> Trust -> NPS -> Loyalty
288
  """
289
+ # Define target priorities and their scale information
290
+ target_priorities = [
291
+ ("Consideration", "Scale: 1-6"),
292
+ ("Trust", "Scale: 1-10"),
293
+ ("NPS", "Scale: 0-10"),
294
+ ("Loyalty", "Scale: 1-10"),
295
+ ]
296
+
297
+ # Check for exact matches first
298
+ for target, scale in target_priorities:
299
+ if target in df.columns:
300
+ return target, target, scale
301
+
302
+ # Check for case-insensitive matches
303
+ df_columns_lower = {col.lower(): col for col in df.columns}
304
+ for target, scale in target_priorities:
305
+ target_lower = target.lower()
306
+ if target_lower in df_columns_lower:
307
+ actual_col = df_columns_lower[target_lower]
308
+ return actual_col, target, scale
309
+
310
+ # Check for partial matches
311
+ for col in df.columns:
312
+ col_lower = col.lower()
313
+ if 'consider' in col_lower:
314
+ return col, "Consideration", "Scale: 1-6"
315
+ elif 'trust' in col_lower:
316
+ return col, "Trust", "Scale: 1-10"
317
+ elif 'nps' in col_lower:
318
+ return col, "NPS", "Scale: 0-10"
319
+ elif 'loyal' in col_lower:
320
+ return col, "Loyalty", "Scale: 1-10"
321
+
322
+ return None, None, None
323
+
324
+ def call_r_script_for_target(input_file, csv_output_path, target_type):
325
+ """
326
+ Call R script for Shapley regression analysis for any target type.
327
+ """
328
+ # Create temporary files for all outputs
329
  temp_dir = os.path.dirname(csv_output_path)
330
  text_output_path = os.path.join(temp_dir, "output.txt")
331
  csv_output_path_trust = os.path.join(temp_dir, "trust.csv")
332
  csv_output_path_nps = os.path.join(temp_dir, "nps.csv")
333
  csv_output_path_loyalty = os.path.join(temp_dir, "loyalty.csv")
334
+ csv_output_path_consideration = os.path.join(temp_dir, "consideration.csv")
335
  csv_output_path_satisfaction = os.path.join(temp_dir, "satisfaction.csv")
336
  csv_output_path_trustbuilder = os.path.join(temp_dir, "trustbuilder.csv")
337
 
338
+ # Set the boolean flags based on target type
339
+ nps_present = (target_type.lower() == "nps")
340
+ loyalty_present = (target_type.lower() == "loyalty")
341
+ consideration_present = (target_type.lower() == "consideration")
342
+ satisfaction_present = (target_type.lower() == "satisfaction")
343
  trustbuilder_present = False
344
+
345
+ # Map output file based on target type
346
+ target_output_map = {
347
+ "consideration": csv_output_path_consideration,
348
+ "trust": csv_output_path_trust,
349
+ "nps": csv_output_path_nps,
350
+ "loyalty": csv_output_path_loyalty,
351
+ }
352
+
353
+ target_csv_path = target_output_map.get(target_type.lower(), csv_output_path_consideration)
354
 
355
  command = [
356
  "Rscript",
 
360
  csv_output_path_trust,
361
  csv_output_path_nps,
362
  csv_output_path_loyalty,
363
+ csv_output_path_consideration,
364
  csv_output_path_satisfaction,
365
  csv_output_path_trustbuilder,
366
+ str(nps_present).upper(),
367
  str(loyalty_present).upper(),
368
  str(consideration_present).upper(),
369
  str(satisfaction_present).upper(),
 
373
  try:
374
  result = subprocess.run(command, check=True, capture_output=True, text=True)
375
  logger.info("R script executed successfully")
376
+
377
+ # Copy the target-specific result to our expected output path
378
+ if os.path.exists(target_csv_path) and target_csv_path != csv_output_path:
379
+ shutil.copy2(target_csv_path, csv_output_path)
380
+
381
  return True
382
  except subprocess.CalledProcessError as e:
383
  logger.error("R script failed with error: %s", e)
 
390
 
391
  def analyze_prospects_data(file_path):
392
  """
393
+ Analyze prospects data with flexible target detection.
394
  """
395
  if file_path is None:
396
  return create_error_message("No file provided"), None, None, None
 
409
  logger.error(f"Missing factor columns: {missing_factors}")
410
  return create_error_message(f"Missing required columns: {missing_factors}"), None, None, None
411
 
412
+ # Find target column
413
+ target_col, target_name, scale_info = find_target_column(df)
414
+
415
+ if target_col is None:
416
+ logger.error("No suitable target column found")
417
  logger.info(f"Available columns: {list(df.columns)}")
418
+ return create_error_message(f"No suitable target column found. Available columns: {list(df.columns)}"), None, None, None
419
+
420
+ logger.info(f"Using target column: {target_col} (interpreted as {target_name})")
421
 
422
  # Map column names from trust buckets to factors
423
  column_mapping = {
 
435
  if old_name in df_analysis.columns:
436
  df_analysis.rename(columns={old_name: new_name}, inplace=True)
437
 
438
+ # Calculate R² for target model
439
  factors = ["Factor 1", "Factor 2", "Factor 3", "Factor 4", "Factor 5", "Factor 6"]
440
  X = df_analysis[factors].dropna()
441
+ y = df.loc[X.index, target_col]
442
 
443
  # Remove any remaining NaN values
444
  valid_mask = ~y.isna()
 
454
  r2 = r2_score(y, model.predict(X))
455
  r2_percent = r2 * 100
456
 
457
+ # Calculate average target
458
  avg_target = y.mean()
459
 
460
+ logger.info(f"R² Score: {r2_percent:.1f}%, Average {target_name}: {avg_target:.1f}")
461
 
462
  # Create visualizations
463
  r2_html = calculate_r2_image(r2_percent)
464
+ avg_target_html = create_avg_target_display(avg_target, target_name, scale_info)
465
 
466
  # Factor performance plot
467
  factor_performance_img = plot_factor_performance(df_analysis, "Factor Performance (Agreement Scores)")
468
 
469
+ # Run Shapley analysis
470
  temp_dir = tempfile.mkdtemp()
471
+ csv_output_path = os.path.join(temp_dir, "results.csv")
472
 
473
  # Call R script with proper parameters
474
+ r_success = call_r_script_for_target(file_path, csv_output_path, target_name)
475
 
476
  if not r_success:
477
  # Clean up and return error
 
521
  driver_analysis_img = plot_driver_analysis(
522
  results_df,
523
  average_value,
524
+ f"Shapley Driver Analysis - {target_name}"
525
  )
526
 
527
  # Clean up
 
573
  with gr.Blocks(css=css, js=js, theme=gr.themes.Soft()) as demo:
574
  gr.Markdown("""
575
  <h2 style="text-align: center; font-size: 2.25rem; font-weight: 600;">
576
+ Driver Analysis - Multi-Target Analysis
577
  </h2>
578
  """)
579
 
580
+ gr.Markdown("### Flexible Target Analysis")
581
+ gr.Markdown("Analysis showing what drives your target variable (Consideration, Trust, NPS, or Loyalty) using Factors 1-6")
582
 
583
  # File upload section
584
  with gr.Row():