nghweigeok commited on
Commit
f41ec06
·
verified ·
1 Parent(s): 96b68f9

Fourteenth deployment (Add Trust Builder computation and tables)

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -1
  2. app.py +220 -36
  3. example_files/VW.xlsx +0 -0
  4. process_data.R +249 -54
Dockerfile CHANGED
@@ -17,7 +17,7 @@ RUN apt-get update && apt-get install -y \
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # Install R packages
20
- RUN R -e "install.packages(c('relaimpo', 'readxl', 'readr'), repos='http://cran.rstudio.com/')"
21
 
22
  # Copy the requirements.txt file, your app script, and the R script into the container.
23
  COPY requirements.txt /requirements.txt
 
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # Install R packages
20
+ RUN R -e "install.packages(c('relaimpo', 'readxl', 'readr', 'lavaan', 'leaps', 'dplyr', 'tidyr'), repos='http://cran.rstudio.com/')"
21
 
22
  # Copy the requirements.txt file, your app script, and the R script into the container.
23
  COPY requirements.txt /requirements.txt
app.py CHANGED
@@ -159,10 +159,12 @@ def call_r_script(
159
  csv_output_path_loyalty,
160
  csv_output_path_consideration,
161
  csv_output_path_satisfaction,
 
162
  nps_present,
163
  loyalty_present,
164
  consideration_present,
165
  satisfaction_present,
 
166
  ):
167
  """
168
  Call the R script for Shapley regression analysis.
@@ -191,10 +193,12 @@ def call_r_script(
191
  csv_output_path_loyalty,
192
  csv_output_path_consideration,
193
  csv_output_path_satisfaction,
 
194
  str(nps_present).upper(), # Convert the boolean to a string ("TRUE" or "FALSE")
195
  str(loyalty_present).upper(),
196
  str(consideration_present).upper(),
197
  str(satisfaction_present).upper(),
 
198
  ]
199
 
200
  try:
@@ -234,13 +238,22 @@ def analyze_excel_single(file_path):
234
  ".txt", "_consideration.csv"
235
  )
236
  csv_output_path_satisfaction = text_output_path.replace(".txt", "_satisfaction.csv")
 
237
 
238
- # Load the dataset (CSV or Excel)
 
239
  df = None
240
- if ".xlsx" in file_path:
241
- df = pd.read_excel(file_path)
242
- elif ".csv" in file_path:
243
- df = pd.read_csv(file_path)
 
 
 
 
 
 
 
244
 
245
  # Step 1: Check for missing columns and handle NPS column
246
  required_columns = [
@@ -300,6 +313,7 @@ def analyze_excel_single(file_path):
300
  )
301
 
302
  # Step 3: Adjust Shapley regression analysis based on column presence
 
303
  call_r_script(
304
  file_path,
305
  text_output_path,
@@ -308,10 +322,12 @@ def analyze_excel_single(file_path):
308
  csv_output_path_loyalty,
309
  csv_output_path_consideration,
310
  csv_output_path_satisfaction,
 
311
  nps_present,
312
  loyalty_present,
313
  consideration_present,
314
  satisfaction_present,
 
315
  )
316
 
317
  # Read the output text file
@@ -326,6 +342,7 @@ def analyze_excel_single(file_path):
326
  results_df_trust = pd.read_csv(csv_output_path_trust)
327
  results_df_trust["Importance_percent"] = results_df_trust["Importance"] * 100
328
  average_value_trust = results_df_trust["Importance_percent"].mean()
 
329
  img_trust = plot_model_results(
330
  results_df_trust,
331
  average_value_trust,
@@ -411,6 +428,79 @@ def analyze_excel_single(file_path):
411
  "Satisfaction",
412
  )
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  # After processing, ensure to delete the temporary files and directory
415
  os.remove(csv_output_path_trust)
416
  if nps_present:
@@ -421,6 +511,8 @@ def analyze_excel_single(file_path):
421
  os.remove(csv_output_path_consideration)
422
  if satisfaction_present:
423
  os.remove(csv_output_path_satisfaction)
 
 
424
  os.remove(text_output_path)
425
  os.rmdir(temp_dir)
426
 
@@ -454,6 +546,8 @@ def analyze_excel_single(file_path):
454
  img_loyalty,
455
  img_consideration,
456
  img_satisfaction,
 
 
457
  output_text,
458
  )
459
 
@@ -480,6 +574,8 @@ def batch_file_processing(file_paths):
480
  img_loyalty_list = []
481
  img_consideration_list = []
482
  img_satisfaction_list = []
 
 
483
  output_text_list = []
484
 
485
  for file_path in file_paths:
@@ -489,6 +585,8 @@ def batch_file_processing(file_paths):
489
  img_loyalty,
490
  img_consideration,
491
  img_satisfaction,
 
 
492
  output_text,
493
  ) = analyze_excel_single(file_path)
494
  img_trust_list.append(img_trust)
@@ -496,6 +594,8 @@ def batch_file_processing(file_paths):
496
  img_loyalty_list.append(img_loyalty)
497
  img_consideration_list.append(img_consideration)
498
  img_satisfaction_list.append(img_satisfaction)
 
 
499
  output_text_list.append(output_text)
500
 
501
  return (
@@ -504,6 +604,8 @@ def batch_file_processing(file_paths):
504
  img_loyalty_list,
505
  img_consideration_list,
506
  img_satisfaction_list,
 
 
507
  output_text_list,
508
  )
509
 
@@ -525,7 +627,7 @@ outputs = []
525
 
526
  def variable_outputs(file_inputs):
527
 
528
- file_inputs_single = [file_inputs] ## special handling for single file
529
 
530
  # Call batch file processing and get analysis results
531
  (
@@ -534,6 +636,8 @@ def variable_outputs(file_inputs):
534
  img_loyalty_list,
535
  img_consideration_list,
536
  img_satisfaction_list,
 
 
537
  output_text_list,
538
  ) = batch_file_processing(file_inputs_single)
539
 
@@ -550,6 +654,8 @@ def variable_outputs(file_inputs):
550
  img_loyalty,
551
  img_consideration,
552
  img_satisfaction,
 
 
553
  output_text,
554
  ) in enumerate(
555
  zip_longest(
@@ -558,6 +664,8 @@ def variable_outputs(file_inputs):
558
  img_loyalty_list,
559
  img_consideration_list,
560
  img_satisfaction_list,
 
 
561
  output_text_list,
562
  )
563
  ):
@@ -603,6 +711,24 @@ def variable_outputs(file_inputs):
603
  # #label=f"{dataset_name}: Analysis Summary",
604
  # visible=False,
605
  # ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  ]
607
 
608
  # add current plots to container
@@ -617,6 +743,8 @@ def variable_outputs(file_inputs):
617
  # gr.Textbox(label="Analysis Summary", visible=False),
618
  gr.Image(label="Trust Drivers", visible=False),
619
  gr.Image(label="NPS Drivers", visible=False),
 
 
620
  ]
621
 
622
  return plots_visible + plots_invisible * (max_outputs - k)
@@ -639,8 +767,12 @@ def reset_outputs():
639
  # summary_text = gr.Textbox(value=None, label="Analysis Summary", visible=False)
640
  trust_plot = gr.Image(value=None, label="Trust Drivers", visible=True)
641
  nps_plot = gr.Image(value=None, label="NPS Drivers", visible=True)
 
 
642
  outputs.append(trust_plot)
643
  outputs.append(nps_plot)
 
 
644
  # outputs.append(loyalty_plot)
645
  # outputs.append(satisfaction_plot)
646
  # outputs.append(consideration_plot)
@@ -662,8 +794,12 @@ def reset_outputs():
662
  # summary_text = gr.Textbox(value=None, label="Analysis Summary", visible=False)
663
  trust_plot = gr.Image(value=None, label="Trust Drivers", visible=False)
664
  nps_plot = gr.Image(value=None, label="NPS Drivers", visible=False)
 
 
665
  outputs.append(trust_plot)
666
  outputs.append(nps_plot)
 
 
667
  # outputs.append(loyalty_plot)
668
  # outputs.append(consideration_plot)
669
  # outputs.append(satisfaction_plot)
@@ -674,40 +810,88 @@ def reset_outputs():
674
 
675
  def process_examples(file_name):
676
  file_path = f"example_files/{file_name[0]}"
677
- outputs = variable_outputs(file_path) ## special handling for single file
 
678
  return outputs
679
 
680
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  with gr.Blocks() as demo:
682
- with gr.Row():
683
- with gr.Column():
684
- # title = gr.Markdown("# TrustLogic & NPS Driver Analysis (Batch Version)")
685
- # description = gr.Markdown(instruction_text)
686
-
687
- with gr.Row():
688
- dataset = gr.Dataset(
689
- components=[gr.Textbox(visible=False)],
690
- label="Select an example to calculate the Trust Drivers from the underlying Excel files",
691
- type="values",
692
- samples=[
693
- ["HSBC.xlsx"],
694
- ["CBA.xlsx"],
695
- ["Red Cross.xlsx"],
696
- ["Health Insurance.xlsx"],
697
- ["WV.xlsx"],
698
- ["Care.xlsx"],
699
- ["BUPA.xlsx"],
700
- ],
701
- )
702
-
703
- with gr.Row():
704
- # set file upload widget
705
- file_inputs = gr.File(label="Excel/CSV Dataset")
706
-
707
- with gr.Row():
708
- # set clear and submit butttons
709
- clear_button = gr.ClearButton(file_inputs)
710
- submit_button = gr.Button("Submit", variant="primary")
 
711
 
712
  with gr.Column():
713
  # set default output widgets
 
159
  csv_output_path_loyalty,
160
  csv_output_path_consideration,
161
  csv_output_path_satisfaction,
162
+ csv_output_path_trustbuilder,
163
  nps_present,
164
  loyalty_present,
165
  consideration_present,
166
  satisfaction_present,
167
+ trustbuilder_present,
168
  ):
169
  """
170
  Call the R script for Shapley regression analysis.
 
193
  csv_output_path_loyalty,
194
  csv_output_path_consideration,
195
  csv_output_path_satisfaction,
196
+ csv_output_path_trustbuilder,
197
  str(nps_present).upper(), # Convert the boolean to a string ("TRUE" or "FALSE")
198
  str(loyalty_present).upper(),
199
  str(consideration_present).upper(),
200
  str(satisfaction_present).upper(),
201
+ str(trustbuilder_present).upper(),
202
  ]
203
 
204
  try:
 
238
  ".txt", "_consideration.csv"
239
  )
240
  csv_output_path_satisfaction = text_output_path.replace(".txt", "_satisfaction.csv")
241
+ csv_output_path_trustbuilder = text_output_path.replace(".txt", "_trustbuilder.csv")
242
 
243
+ # Load the Trust Driver dataset (CSV or Excel)
244
+ # Trust Driver dataset is mandatory
245
  df = None
246
+ trustbuilder_present = False
247
+
248
+ # if ".xlsx" in file_path:
249
+ excel_file = pd.ExcelFile(file_path)
250
+ df = pd.read_excel(file_path, sheet_name="Driver")
251
+
252
+ # Check if Driver and Builder sheets are present in the dataset
253
+ trustbuilder_present = "Builder" in excel_file.sheet_names
254
+
255
+ # elif ".csv" in file_path:
256
+ # df = pd.read_csv(file_path)
257
 
258
  # Step 1: Check for missing columns and handle NPS column
259
  required_columns = [
 
313
  )
314
 
315
  # Step 3: Adjust Shapley regression analysis based on column presence
316
+ # Handle Trust Driver Analysis and Trust Builder Analysis
317
  call_r_script(
318
  file_path,
319
  text_output_path,
 
322
  csv_output_path_loyalty,
323
  csv_output_path_consideration,
324
  csv_output_path_satisfaction,
325
+ csv_output_path_trustbuilder,
326
  nps_present,
327
  loyalty_present,
328
  consideration_present,
329
  satisfaction_present,
330
+ trustbuilder_present,
331
  )
332
 
333
  # Read the output text file
 
342
  results_df_trust = pd.read_csv(csv_output_path_trust)
343
  results_df_trust["Importance_percent"] = results_df_trust["Importance"] * 100
344
  average_value_trust = results_df_trust["Importance_percent"].mean()
345
+
346
  img_trust = plot_model_results(
347
  results_df_trust,
348
  average_value_trust,
 
428
  "Satisfaction",
429
  )
430
 
431
+ df_builder = None
432
+ df_builder_pivot = None
433
+ if trustbuilder_present:
434
+ # Create dataframe for trust builder
435
+ results_df_builder = pd.read_csv(csv_output_path_trustbuilder)
436
+
437
+ combined_data = {
438
+ "Message": results_df_builder["Message"],
439
+ "Stability": results_df_builder["Stability"].round(0).astype(int),
440
+ "Development": results_df_builder["Development"].round(0).astype(int),
441
+ "Relationship": results_df_builder["Relationship"].round(0).astype(int),
442
+ "Benefit": results_df_builder["Benefit"].round(0).astype(int),
443
+ "Vision": results_df_builder["Vision"].round(0).astype(int),
444
+ "Competence": results_df_builder["Competence"].round(0).astype(int),
445
+ }
446
+
447
+ df_builder = pd.DataFrame(combined_data)
448
+
449
+ # Create consolidated table
450
+ # List of bucket columns
451
+ bucket_columns = [
452
+ "Stability",
453
+ "Development",
454
+ "Relationship",
455
+ "Benefit",
456
+ "Vision",
457
+ "Competence",
458
+ ]
459
+
460
+ # Prepare lists to collect data
461
+ buckets = []
462
+ messages = []
463
+ percentages = []
464
+
465
+ # Iterate through each bucket column
466
+ for bucket in bucket_columns:
467
+ for index, value in results_df_builder[bucket].items():
468
+ if value > 0:
469
+ buckets.append(bucket)
470
+ messages.append(results_df_builder["Message"][index])
471
+ percentages.append(int(round(value)))
472
+
473
+ # Create the new DataFrame
474
+ builder_consolidated = {
475
+ "Trust Driver®": buckets,
476
+ "Trust Proof Point®": messages,
477
+ "%": percentages,
478
+ }
479
+
480
+ df_builder_pivot = pd.DataFrame(builder_consolidated)
481
+
482
+ # Define the order of the Trust Driver® categories
483
+ trust_driver_order = [
484
+ "Stability",
485
+ "Development",
486
+ "Relationship",
487
+ "Benefit",
488
+ "Vision",
489
+ "Competence",
490
+ ]
491
+
492
+ # Convert Trust Driver® column to a categorical type with the specified order
493
+ df_builder_pivot["Trust Driver®"] = pd.Categorical(
494
+ df_builder_pivot["Trust Driver®"],
495
+ categories=trust_driver_order,
496
+ ordered=True,
497
+ )
498
+
499
+ # Sort the DataFrame by 'Trust Driver®' and '%' in descending order within each 'Trust Driver®'
500
+ df_builder_pivot = df_builder_pivot.sort_values(
501
+ by=["Trust Driver®", "%"], ascending=[True, False]
502
+ )
503
+
504
  # After processing, ensure to delete the temporary files and directory
505
  os.remove(csv_output_path_trust)
506
  if nps_present:
 
511
  os.remove(csv_output_path_consideration)
512
  if satisfaction_present:
513
  os.remove(csv_output_path_satisfaction)
514
+ if trustbuilder_present:
515
+ os.remove(csv_output_path_trustbuilder)
516
  os.remove(text_output_path)
517
  os.rmdir(temp_dir)
518
 
 
546
  img_loyalty,
547
  img_consideration,
548
  img_satisfaction,
549
+ df_builder,
550
+ df_builder_pivot,
551
  output_text,
552
  )
553
 
 
574
  img_loyalty_list = []
575
  img_consideration_list = []
576
  img_satisfaction_list = []
577
+ df_builder_list = []
578
+ df_builder_pivot_list = []
579
  output_text_list = []
580
 
581
  for file_path in file_paths:
 
585
  img_loyalty,
586
  img_consideration,
587
  img_satisfaction,
588
+ df_builder,
589
+ df_builder_pivot,
590
  output_text,
591
  ) = analyze_excel_single(file_path)
592
  img_trust_list.append(img_trust)
 
594
  img_loyalty_list.append(img_loyalty)
595
  img_consideration_list.append(img_consideration)
596
  img_satisfaction_list.append(img_satisfaction)
597
+ df_builder_list.append(df_builder)
598
+ df_builder_pivot_list.append(df_builder_pivot)
599
  output_text_list.append(output_text)
600
 
601
  return (
 
604
  img_loyalty_list,
605
  img_consideration_list,
606
  img_satisfaction_list,
607
+ df_builder_list,
608
+ df_builder_pivot_list,
609
  output_text_list,
610
  )
611
 
 
627
 
628
  def variable_outputs(file_inputs):
629
 
630
+ file_inputs_single = file_inputs
631
 
632
  # Call batch file processing and get analysis results
633
  (
 
636
  img_loyalty_list,
637
  img_consideration_list,
638
  img_satisfaction_list,
639
+ df_builder_list,
640
+ df_builder_pivot_list,
641
  output_text_list,
642
  ) = batch_file_processing(file_inputs_single)
643
 
 
654
  img_loyalty,
655
  img_consideration,
656
  img_satisfaction,
657
+ df_builder,
658
+ df_builder_pivot,
659
  output_text,
660
  ) in enumerate(
661
  zip_longest(
 
664
  img_loyalty_list,
665
  img_consideration_list,
666
  img_satisfaction_list,
667
+ df_builder_list,
668
+ df_builder_pivot_list,
669
  output_text_list,
670
  )
671
  ):
 
711
  # #label=f"{dataset_name}: Analysis Summary",
712
  # visible=False,
713
  # ),
714
+ gr.Dataframe(
715
+ value=df_builder,
716
+ headers=list(df_builder.columns),
717
+ interactive=False,
718
+ label=f"{dataset_name}",
719
+ visible=True,
720
+ height=800,
721
+ wrap=True,
722
+ ),
723
+ gr.Dataframe(
724
+ value=df_builder_pivot,
725
+ headers=list(df_builder_pivot.columns),
726
+ interactive=False,
727
+ label=f"{dataset_name}",
728
+ visible=True,
729
+ height=800,
730
+ wrap=True,
731
+ ),
732
  ]
733
 
734
  # add current plots to container
 
743
  # gr.Textbox(label="Analysis Summary", visible=False),
744
  gr.Image(label="Trust Drivers", visible=False),
745
  gr.Image(label="NPS Drivers", visible=False),
746
+ gr.Dataframe(label=" ", visible=False),
747
+ gr.Dataframe(label=" ", visible=False),
748
  ]
749
 
750
  return plots_visible + plots_invisible * (max_outputs - k)
 
767
  # summary_text = gr.Textbox(value=None, label="Analysis Summary", visible=False)
768
  trust_plot = gr.Image(value=None, label="Trust Drivers", visible=True)
769
  nps_plot = gr.Image(value=None, label="NPS Drivers", visible=True)
770
+ df_builder = gr.Dataframe(value=None, label=" ", visible=True)
771
+ df_builder_pivot = gr.Dataframe(value=None, label=" ", visible=True)
772
  outputs.append(trust_plot)
773
  outputs.append(nps_plot)
774
+ outputs.append(df_builder)
775
+ outputs.append(df_builder_pivot)
776
  # outputs.append(loyalty_plot)
777
  # outputs.append(satisfaction_plot)
778
  # outputs.append(consideration_plot)
 
794
  # summary_text = gr.Textbox(value=None, label="Analysis Summary", visible=False)
795
  trust_plot = gr.Image(value=None, label="Trust Drivers", visible=False)
796
  nps_plot = gr.Image(value=None, label="NPS Drivers", visible=False)
797
+ df_builder = gr.Dataframe(value=None, label=" ", visible=False)
798
+ df_builder_pivot = gr.Dataframe(value=None, label=" ", visible=False)
799
  outputs.append(trust_plot)
800
  outputs.append(nps_plot)
801
+ outputs.append(df_builder)
802
+ outputs.append(df_builder_pivot)
803
  # outputs.append(loyalty_plot)
804
  # outputs.append(consideration_plot)
805
  # outputs.append(satisfaction_plot)
 
810
 
811
  def process_examples(file_name):
812
  file_path = f"example_files/{file_name[0]}"
813
+ file_path = [file_path]
814
+ outputs = variable_outputs(file_path)
815
  return outputs
816
 
817
 
818
+ # with gr.Blocks() as demo:
819
+ # with gr.Row():
820
+ # with gr.Column():
821
+ # # title = gr.Markdown("# TrustLogic & NPS Driver Analysis (Batch Version)")
822
+ # # description = gr.Markdown(instruction_text)
823
+
824
+ # with gr.Row():
825
+ # dataset = gr.Dataset(
826
+ # components=[gr.Textbox(visible=False)],
827
+ # label="Select an example to calculate the Trust Drivers from the underlying Excel files",
828
+ # type="values",
829
+ # samples=[
830
+ # ["VW.xlsx"],
831
+ # # ["HSBC.xlsx"],
832
+ # # ["CBA.xlsx"],
833
+ # # ["Red Cross.xlsx"],
834
+ # # ["Health Insurance.xlsx"],
835
+ # # ["WV.xlsx"],
836
+ # # ["Care.xlsx"],
837
+ # # ["BUPA.xlsx"],
838
+ # ],
839
+ # )
840
+
841
+ # with gr.Row():
842
+ # # set file upload widget
843
+ # file_inputs = gr.File(label="Excel Dataset")
844
+
845
+ # with gr.Row():
846
+ # # set clear and submit butttons
847
+ # clear_button = gr.ClearButton(file_inputs)
848
+ # submit_button = gr.Button("Submit", variant="primary")
849
+
850
+ # with gr.Column():
851
+ # # set default output widgets
852
+ # outputs = reset_outputs()
853
+
854
+ # # function for submit button click
855
+ # submit_button.click(fn=variable_outputs, inputs=file_inputs, outputs=outputs)
856
+
857
+ # # function for clear button click
858
+ # # this only handles the outputs. Input reset is handled at button definition
859
+ # clear_button.click(fn=reset_outputs, inputs=[], outputs=outputs)
860
+
861
+ # # function for example files
862
+ # dataset.click(fn=process_examples, inputs=dataset, outputs=outputs)
863
+
864
  with gr.Blocks() as demo:
865
+ # with gr.Column():
866
+ # title = gr.Markdown("# TrustLogic & NPS Driver Analysis (Batch Version)")
867
+ # description = gr.Markdown(instruction_text)
868
+
869
+ with gr.Column():
870
+ with gr.Row():
871
+ dataset = gr.Dataset(
872
+ components=[gr.Textbox(visible=False)],
873
+ label="Select an example to calculate the Donor Drivers from the underlying Excel files",
874
+ type="values",
875
+ samples=[
876
+ ["VW.xlsx"],
877
+ # ["HSBC.xlsx"],
878
+ # ["CBA.xlsx"],
879
+ # ["Red Cross.xlsx"],
880
+ # ["Health Insurance.xlsx"],
881
+ # ["WV.xlsx"],
882
+ # ["Care.xlsx"],
883
+ # ["BUPA.xlsx"],
884
+ ],
885
+ )
886
+
887
+ with gr.Row():
888
+ # set file upload widget
889
+ file_inputs = gr.Files(label="Excel Dataset")
890
+
891
+ with gr.Row():
892
+ # set clear and submit butttons
893
+ clear_button = gr.ClearButton(file_inputs)
894
+ submit_button = gr.Button("Submit", variant="primary")
895
 
896
  with gr.Column():
897
  # set default output widgets
example_files/VW.xlsx ADDED
Binary file (20.1 kB). View file
 
process_data.R CHANGED
@@ -1,34 +1,199 @@
1
  # Load required libraries
 
2
  library(readxl)
3
  library(readr)
4
- library(relaimpo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Define function to process each model
7
- process_model <- function(model_formula, data, output_text_file, csv_file) {
8
- # Fit linear regression model
9
- model <- lm(model_formula, data = data)
10
-
11
- # Calculate relative importance using the lmg method
12
- calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
13
- # Calculate average importance
14
- average_importance <- mean(calc_relaimpo$lmg)
15
-
16
- # Open the output text file in append mode to add this model's output
17
- file_conn <- file(output_text_file, open = "a")
18
- # Capture output to include in the text file
19
- full_output <- capture.output({
20
- print(calc_relaimpo)
21
- cat("\nAverage Importance: ", average_importance, "\n")
22
  })
23
- # Write output to text file
24
- writeLines(full_output, file_conn)
25
- close(file_conn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Create data frame of predictor names and their importance
28
- results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
29
 
30
- # Save results to CSV file
31
- write.csv(results, file = csv_file, row.names = FALSE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
 
34
  # Read command-line arguments
@@ -40,53 +205,83 @@ csv_output_path_nps <- args[4]
40
  csv_output_path_loyalty <- args[5]
41
  csv_output_path_consideration <- args[6]
42
  csv_output_path_satisfaction <- args[7]
43
- nps_present <- as.logical(tolower(args[8])) # Expecting "TRUE" or "FALSE" as the argument
44
- loyalty_present <- as.logical(tolower(args[9]))
45
- consideration_present <- as.logical(tolower(args[10]))
46
- satisfaction_present <- as.logical(tolower(args[11]))
 
 
 
 
 
47
 
48
- # Load the dataset (CSV or Excel)
49
- data <- NULL
 
 
50
  if (grepl(".xlsx", input_file)) {
51
- data <- read_excel(input_file)
52
- } else if (grepl(".csv", input_file)) {
53
- data <- read_csv(input_file)
54
  }
55
 
56
  # Process the Trust model
57
- process_model(Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence,
58
- data,
59
- output_text_file,
60
- csv_output_path_trust)
 
61
 
62
  # Conditionally process the NPS model
63
  if (nps_present) {
64
- process_model(NPS ~ Stability + Development + Relationship + Benefit + Vision + Competence,
65
- data,
66
- output_text_file,
67
- csv_output_path_nps)
 
68
  }
69
 
70
  # Conditionally process the Loyalty model
71
  if (loyalty_present) {
72
- process_model(Loyalty ~ Stability + Development + Relationship + Benefit + Vision + Competence,
73
- data,
74
- output_text_file,
75
- csv_output_path_loyalty)
 
76
  }
77
 
78
  # Conditionally process the Consideration model
79
  if (consideration_present) {
80
- process_model(Consideration ~ Stability + Development + Relationship + Benefit + Vision + Competence,
81
- data,
82
- output_text_file,
83
- csv_output_path_consideration)
 
84
  }
85
 
86
  # Conditionally process the Satisfaction model
87
  if (satisfaction_present) {
88
- process_model(Satisfaction ~ Stability + Development + Relationship + Benefit + Vision + Competence,
89
- data,
90
- output_text_file,
91
- csv_output_path_satisfaction)
92
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Load required libraries
2
+ library(relaimpo)
3
  library(readxl)
4
  library(readr)
5
+ library(lavaan)
6
+ library(leaps)
7
+ library(dplyr)
8
+ library(tidyr)
9
+
10
+ # Logging function
11
+ log_message <- function(message, output_text_file) {
12
+ cat(message, "\n")
13
+ write(message, file = output_text_file, append = TRUE)
14
+ }
15
+
16
+ # Trust Driver analysis function
17
+ trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
18
+ tryCatch({
19
+ # Fit linear regression model
20
+ model <- lm(model_formula, data = data)
21
+
22
+ # Calculate relative importance using the lmg method
23
+ calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
24
+ # Calculate average importance
25
+ average_importance <- mean(calc_relaimpo$lmg)
26
+
27
+ # Open the output text file in append mode to add this model's output
28
+ file_conn <- file(output_text_file, open = "a")
29
+ # Capture output to include in the text file
30
+ full_output <- capture.output({
31
+ print("Trust Driver Analysis:\n")
32
+ print(calc_relaimpo)
33
+ cat("\nAverage Importance: ", average_importance, "\n")
34
+ })
35
+ # Write output to text file
36
+ writeLines(full_output, file_conn)
37
+ close(file_conn)
38
 
39
+ # Create data frame of predictor names and their importance
40
+ results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
41
+
42
+ # Save results to CSV file
43
+ write.csv(results, file = csv_file, row.names = FALSE)
44
+ }, error = function(e) {
45
+ log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
 
 
 
 
 
 
 
 
 
46
  })
47
+ }
48
+
49
+ # Trust Builder Analysis function
50
+ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_file) {
51
+ tryCatch({
52
+ # Map the questions to column names
53
+ question_to_column <- setNames(as.list(data_headers[1, ]), as.character(data_headers[2, ]))
54
+
55
+ # Number of important statements to be selected
56
+ p <- 6
57
+
58
+ # Define the list of column names
59
+ bucket_columns <- c("Stability", "Development", "Relationship", "Benefit", "Vision", "Competence")
60
+
61
+ # Select columns based on the predefined list
62
+ bucket <- data %>% select(all_of(bucket_columns))
63
+
64
+ # Select all columns from the consumer dataframe that contain "TB" in their names and assign them to the variable TB
65
+ TB <- data %>% select(contains("TB"))
66
+
67
+ # Initialize a matrix with 37 rows and 6 columns, filled with NA values
68
+ coef <- matrix(NA, ncol = 6, nrow = 37)
69
+
70
+ # Initialize an empty list to store the predictors for each bucket column
71
+ bucket_predictors <- list()
72
+
73
+ # Loop over each of the 6 columns
74
+ for (i in 1:6) {
75
+ # Extract the i-th column from 'bucket' as a matrix and assign it to 'y'
76
+ y <- as.matrix(pull(bucket[, i]))
77
+
78
+ # Convert 'TB' dataframe to a matrix and assign it to 'x'
79
+ x <- as.matrix(TB)
80
+
81
+ # Perform best subset regression using 'x' as predictors and 'y' as the response variable
82
+ fit <- regsubsets(x, y, nbest = 1, nvmax = p)
83
+
84
+ # Summarize the regression subsets
85
+ fit_sum <- summary(fit)
86
+
87
+ # Store the coefficients of the best model in the i-th column of 'coef' matrix
88
+ coef[, i] <- fit_sum$outmat[p, ]
89
+
90
+ # Print the predictors used in the best model
91
+ predictors <- names(which(fit_sum$outmat[p, ] == "*"))
92
+
93
+ # Append the predictors to the bucket_predictors list
94
+ bucket_predictors[[bucket_columns[i]]] <- predictors
95
+ }
96
+
97
+ # Create the desired output format as model
98
+ model_str <- sapply(names(bucket_predictors), function(col) {
99
+ paste(col, "~", paste(bucket_predictors[[col]], collapse = "+"))
100
+ })
101
+
102
+ # Prepend the Trust x and y to model_str
103
+ model_str <- c("Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence", model_str)
104
+
105
+ # Fit the model using sem() function
106
+ fit <- sem(model_str, data = data)
107
+ fit_summary <- summary(fit, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)
108
 
109
+ # Make it percentages
110
+ output <- fit_summary$pe[fit_summary$pe$op == "~", c("lhs", "rhs", "std.all")]
111
 
112
+ # Define the function to convert std.all to percentages
113
+ convert_to_percentage <- function(df) {
114
+ df %>%
115
+ group_by(lhs) %>%
116
+ mutate(abs_std = abs(std.all),
117
+ sum_abs_std = sum(abs_std),
118
+ percent_std = (abs_std / sum_abs_std) * 100) %>%
119
+ select(-abs_std, -sum_abs_std) %>%
120
+ ungroup()
121
+ }
122
+
123
+ # Convert the estimates to percentages
124
+ percentage_output <- convert_to_percentage(output)
125
+
126
+ # Extract TB column names
127
+ tb_column_names <- colnames(TB)
128
+
129
+ # Convert std.all to a wide format dataframe
130
+ percentage_output_wide <- percentage_output %>%
131
+ pivot_wider(names_from = lhs, values_from = percent_std) %>%
132
+ rename_with(~ gsub("std.all\\.", "", .), starts_with("std.all"))
133
+
134
+ # Create a new dataframe with TB columns and percentage estimates
135
+ result_df <- data.frame(TB = tb_column_names)
136
+
137
+ # Merge the result_df with percentage_estimates_wide
138
+ result_df <- left_join(result_df, percentage_output_wide, by = c("TB" = "rhs"))
139
+
140
+ # Fill NA values with 0 to ensure proper representation
141
+ result_df[is.na(result_df)] <- 0
142
+
143
+ # Add corresponding messages of TB as a new column
144
+ result_df$Message <- sapply(result_df$TB, function(tb_col) question_to_column[[tb_col]])
145
+
146
+ # Convert 'TB' column to a factor with the correct order
147
+ result_df$TB <- factor(result_df$TB, levels = paste0("TB", 1:37))
148
+
149
+ # Exclude 'est' and 'Trust' columns and merge rows by 'TB'
150
+ result_df <- result_df %>%
151
+ select(-std.all, -Trust) %>%
152
+ group_by(TB) %>%
153
+ summarise(across(everything(), ~ if(is.numeric(.)) sum(., na.rm = TRUE) else first(.))) %>%
154
+ arrange(TB)
155
+
156
+ # Reorder columns to have Message as the second column
157
+ result_df <- result_df %>%
158
+ select(TB, Message, everything())
159
+
160
+ # Open the output text file in append mode to add this model's output
161
+ file_conn <- file(output_text_file, open = "a")
162
+
163
+ # Capture output to include in the text file
164
+ full_output <- capture.output({
165
+ print("Trust Builder Analysis:\n")
166
+ print("Data header mapping:\n")
167
+ print(question_to_column)
168
+ print("Buckets:\n")
169
+ print(bucket)
170
+ print("Messages:\n")
171
+ print(TB)
172
+ print("Coefficients matrix (coef:\n")
173
+ print(coef)
174
+ print("Model:\n")
175
+ cat(model_str, sep = "\n")
176
+ print("Fit summary:\n")
177
+ print(fit_summary)
178
+ print("Output:\n")
179
+ print(output)
180
+ print("Output in percentage (%):\n")
181
+ print(percentage_output)
182
+ print("result_df:\n")
183
+ print(result_df)
184
+ })
185
+ # Write output to text file
186
+ writeLines(full_output, file_conn)
187
+ close(file_conn)
188
+
189
+ # Create data frame of predictor names and their importance
190
+ results <- data.frame(result_df)
191
+
192
+ # Save results to CSV file
193
+ write.csv(results, file = csv_file, row.names = FALSE)
194
+ }, error = function(e) {
195
+ log_message(paste("Error in trust_builder_analysis:", e$message), output_text_file)
196
+ })
197
  }
198
 
199
  # Read command-line arguments
 
205
  csv_output_path_loyalty <- args[5]
206
  csv_output_path_consideration <- args[6]
207
  csv_output_path_satisfaction <- args[7]
208
+ csv_output_path_trustbuilder <- args[8]
209
+ nps_present <- as.logical(tolower(args[9])) # Expecting "TRUE" or "FALSE" as the argument
210
+ loyalty_present <- as.logical(tolower(args[10]))
211
+ consideration_present <- as.logical(tolower(args[11]))
212
+ satisfaction_present <- as.logical(tolower(args[12]))
213
+ trustbuilder_present <- as.logical(tolower(args[13]))
214
+
215
+ # Log the starting of the script
216
+ log_message("Starting Trust Driver and Builder Analysis Script.", output_text_file)
217
 
218
+ ########## Trust Driver Analysis ######################
219
+
220
+ # Load the trust driver dataset (CSV or Excel)
221
+ data_driver <- NULL
222
  if (grepl(".xlsx", input_file)) {
223
+ data_driver <- read_excel(input_file, sheet = "Driver")
 
 
224
  }
225
 
226
  # Process the Trust model
227
+ trust_driver_analysis(
228
+ Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence,
229
+ data_driver,
230
+ output_text_file,
231
+ csv_output_path_trust)
232
 
233
  # Conditionally process the NPS model
234
  if (nps_present) {
235
+ trust_driver_analysis(
236
+ NPS ~ Stability + Development + Relationship + Benefit + Vision + Competence,
237
+ data_driver,
238
+ output_text_file,
239
+ csv_output_path_nps)
240
  }
241
 
242
  # Conditionally process the Loyalty model
243
  if (loyalty_present) {
244
+ trust_driver_analysis(
245
+ Loyalty ~ Stability + Development + Relationship + Benefit + Vision + Competence,
246
+ data_driver,
247
+ output_text_file,
248
+ csv_output_path_loyalty)
249
  }
250
 
251
  # Conditionally process the Consideration model
252
  if (consideration_present) {
253
+ trust_driver_analysis(
254
+ Consideration ~ Stability + Development + Relationship + Benefit + Vision + Competence,
255
+ data_driver,
256
+ output_text_file,
257
+ csv_output_path_consideration)
258
  }
259
 
260
  # Conditionally process the Satisfaction model
261
  if (satisfaction_present) {
262
+ trust_driver_analysis(
263
+ Satisfaction ~ Stability + Development + Relationship + Benefit + Vision + Competence,
264
+ data_driver,
265
+ output_text_file,
266
+ csv_output_path_satisfaction)
267
+ }
268
+
269
+ ########## Trust Builder Analysis ######################
270
+
271
+ if (trustbuilder_present) {
272
+ data_builder <- NULL
273
+
274
+ if (grepl(".xlsx", input_file)) {
275
+ # Read the first two rows as header mapping
276
+ data_builder_headers <- read_excel(input_file, sheet = "Builder", n_max = 2)
277
+ # Read the rest of the data, skipping the first two rows as datapoints
278
+ data_builder_rows <- read_excel(input_file, sheet = "Builder", skip = 2)
279
+ }
280
+
281
+ # Process the Builder model
282
+ trust_builder_analysis(data_builder_rows, data_builder_headers, output_text_file, csv_output_path_trustbuilder)
283
+
284
+ }
285
+
286
+ # Log the ending of the script
287
+ log_message("Trust Driver and Builder Analysis Script Completed.", output_text_file)