temporary-trustlogic-batch

Running

App Files Files Community

Wajahat698 commited on Jun 6, 2025

Commit

d02ccaa

verified ·

1 Parent(s): efacef2

Update process_data.R

Browse files

Files changed (1) hide show

process_data.R +143 -31

process_data.R CHANGED Viewed

@@ -16,20 +16,30 @@ log_message <- function(message, output_text_file) {
 # Trust Driver analysis function
 trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
   tryCatch({
     model <- lm(model_formula, data = data)
     calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
     average_importance <- mean(calc_relaimpo$lmg)
     file_conn <- file(output_text_file, open = "a")
     full_output <- capture.output({
       print("Trust Driver Analysis:\n")
       print(calc_relaimpo)
       cat("\nAverage Importance: ", average_importance, "\n")
     })
     writeLines(full_output, file_conn)
     close(file_conn)
     results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
     write.csv(results, file = csv_file, row.names = FALSE)
   }, error = function(e) {
     log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
@@ -39,66 +49,121 @@ trust_driver_analysis <- function(model_formula, data, output_text_file, csv_fil
 # Trust Builder Analysis function
 trust_builder_analysis <- function(data, data_headers, output_text_file, csv_file) {
   tryCatch({
     question_to_column <- setNames(as.list(data_headers[1, ]), as.character(data_headers[2, ]))
     p <- 6
     bucket_columns <- c("Stability", "Development", "Relationship", "Benefit", "Vision", "Competence")
     bucket <- data %>% select(all_of(bucket_columns))
     TB <- data %>% select(contains("TB"))
     num_tb_statements <- ncol(TB)
     coef <- matrix(NA, ncol = 6, nrow = num_tb_statements)
     bucket_predictors <- list()
     for (i in 1:6) {
       y <- as.matrix(pull(bucket[, i]))
       x <- as.matrix(TB)
       fit <- regsubsets(x, y, nbest = 1, nvmax = p)
       fit_sum <- summary(fit)
       coef[, i] <- fit_sum$outmat[p, ]
       predictors <- names(which(fit_sum$outmat[p, ] == "*"))
       bucket_predictors[[bucket_columns[i]]] <- predictors
     }
     model_str <- sapply(names(bucket_predictors), function(col) {
       paste(col, "~", paste(bucket_predictors[[col]], collapse = "+"))
     })
     model_str <- c("Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence", model_str)
     fit <- sem(model_str, data = data)
     fit_summary <- summary(fit, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)
     output <- fit_summary$pe[fit_summary$pe$op == "~", c("lhs", "rhs", "std.all")]
     convert_to_percentage <- function(df) {
       df %>%
         group_by(lhs) %>%
         mutate(abs_std = abs(std.all),
-               sum_abs_std = sum(abs_std),
-               percent_std = (abs_std / sum_abs_std) * 100) %>%
         select(-abs_std, -sum_abs_std) %>%
         ungroup()
     }
     percentage_output <- convert_to_percentage(output)
     tb_column_names <- colnames(TB)
     percentage_output_wide <- percentage_output %>%
       pivot_wider(names_from = lhs, values_from = percent_std) %>%
       rename_with(~ gsub("std.all\\.", "", .), starts_with("std.all"))
     result_df <- data.frame(TB = tb_column_names)
     result_df <- left_join(result_df, percentage_output_wide, by = c("TB" = "rhs"))
     result_df[is.na(result_df)] <- 0
     result_df$Message <- sapply(result_df$TB, function(tb_col) question_to_column[[tb_col]])
     result_df$TB <- factor(result_df$TB, levels = paste0("TB", 1:37))
     result_df <- result_df %>%
-      select(-Trust) %>%
       group_by(TB) %>%
       summarise(across(everything(), ~ if(is.numeric(.)) sum(., na.rm = TRUE) else first(.))) %>%
       arrange(TB)
-    result_df <- result_df %>% select(TB, Message, everything())
     file_conn <- file(output_text_file, open = "a")
     full_output <- capture.output({
       print("Trust Builder Analysis:\n")
       print("Data header mapping:\n")
@@ -107,7 +172,7 @@ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_fil
       print(bucket)
       print("Messages:\n")
       print(TB)
-      print("Coefficients matrix (coef):\n")
       print(coef)
       print("Model:\n")
       cat(model_str, sep = "\n")
@@ -120,10 +185,15 @@ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_fil
       print("result_df:\n")
       print(result_df)
     })
     writeLines(full_output, file_conn)
     close(file_conn)
-    write.csv(result_df, file = csv_file, row.names = FALSE)
   }, error = function(e) {
     log_message(paste("Error in trust_builder_analysis:", e$message), output_text_file)
   })
@@ -132,48 +202,90 @@ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_fil
 # Read command-line arguments
 args <- commandArgs(trailingOnly = TRUE)
 input_file <- args[1]
-output_text_file <- args[2]
 csv_output_path_trust <- args[3]
 csv_output_path_nps <- args[4]
 csv_output_path_loyalty <- args[5]
 csv_output_path_consideration <- args[6]
 csv_output_path_satisfaction <- args[7]
 csv_output_path_trustbuilder <- args[8]
-nps_present <- as.logical(tolower(args[9]))
 loyalty_present <- as.logical(tolower(args[10]))
 consideration_present <- as.logical(tolower(args[11]))
 satisfaction_present <- as.logical(tolower(args[12]))
 trustbuilder_present <- as.logical(tolower(args[13]))
 log_message("Starting Trust Driver and Builder Analysis Script.", output_text_file)
-# Load data (support .xlsx or .csv)
 data_driver <- NULL
-if (grepl("\\.xlsx$", input_file)) {
   data_driver <- read_excel(input_file, sheet = "Driver", skip = 3)
-} else if (grepl("\\.csv$", input_file)) {
-  data_driver <- read.csv(input_file, stringsAsFactors = FALSE)
 }
-trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
-  tryCatch({
-    data <- data[complete.cases(data), ]  # ✅ <---- ADD THIS LINE HERE
-    model <- lm(model_formula, data = data)
-    calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
-    average_importance <- mean(calc_relaimpo$lmg)
-    file_conn <- file(output_text_file, open = "a")
-    full_output <- capture.output({
-      print("Trust Driver Analysis:\n")
-      print(calc_relaimpo)
-      cat("\nAverage Importance: ", average_importance, "\n")
-    })
-    writeLines(full_output, file_conn)
-    close(file_conn)
-    results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
-    write.csv(results, file = csv_file, row.names = FALSE)
-  }, error = function(e) {
-    log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
-  })
 }

 # Trust Driver analysis function
 trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
   tryCatch({
+    # Fit linear regression model
     model <- lm(model_formula, data = data)
+    # Calculate relative importance using the lmg method
     calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
+    # Calculate average importance
     average_importance <- mean(calc_relaimpo$lmg)
+    # Open the output text file in append mode to add this model's output
     file_conn <- file(output_text_file, open = "a")
+    # Capture output to include in the text file
     full_output <- capture.output({
       print("Trust Driver Analysis:\n")
       print(calc_relaimpo)
       cat("\nAverage Importance: ", average_importance, "\n")
     })
+    # Write output to text file
     writeLines(full_output, file_conn)
     close(file_conn)
+    # Create data frame of predictor names and their importance
     results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
+    # Save results to CSV file
     write.csv(results, file = csv_file, row.names = FALSE)
   }, error = function(e) {
     log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
 # Trust Builder Analysis function
 trust_builder_analysis <- function(data, data_headers, output_text_file, csv_file) {
   tryCatch({
+    # Map the questions to column names
     question_to_column <- setNames(as.list(data_headers[1, ]), as.character(data_headers[2, ]))
+    # Number of important statements to be selected
     p <- 6
+    # Define the list of column names
     bucket_columns <- c("Stability", "Development", "Relationship", "Benefit", "Vision", "Competence")
+    # Select columns based on the predefined list
     bucket <- data %>% select(all_of(bucket_columns))
+    # Select all columns from the consumer dataframe that contain "TB" in their names and assign them to the variable TB
     TB <- data %>% select(contains("TB"))
+    # Dynamically detect the number of TB statements
     num_tb_statements <- ncol(TB)
+    # Initialize a matrix with number of TB rows (37 for Volkswagen) and 6 columns, filled with NA values
     coef <- matrix(NA, ncol = 6, nrow = num_tb_statements)
+    # Initialize an empty list to store the predictors for each bucket column
     bucket_predictors <- list()
+    # Loop over each of the 6 columns
     for (i in 1:6) {
+      # Extract the i-th column from 'bucket' as a matrix and assign it to 'y'
       y <- as.matrix(pull(bucket[, i]))
+      # Convert 'TB' dataframe to a matrix and assign it to 'x'
       x <- as.matrix(TB)
+      # Perform best subset regression using 'x' as predictors and 'y' as the response variable
       fit <- regsubsets(x, y, nbest = 1, nvmax = p)
+      # Summarize the regression subsets
       fit_sum <- summary(fit)
+      # Store the coefficients of the best model in the i-th column of 'coef' matrix
       coef[, i] <- fit_sum$outmat[p, ]
+      # Print the predictors used in the best model
       predictors <- names(which(fit_sum$outmat[p, ] == "*"))
+      # Append the predictors to the bucket_predictors list
       bucket_predictors[[bucket_columns[i]]] <- predictors
     }
+    # Create the desired output format as model
     model_str <- sapply(names(bucket_predictors), function(col) {
       paste(col, "~", paste(bucket_predictors[[col]], collapse = "+"))
     })
+    # Prepend the Trust x and y to model_str
     model_str <- c("Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence", model_str)
+    # Fit the model using sem() function
     fit <- sem(model_str, data = data)
     fit_summary <- summary(fit, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)
+    # Make it percentages
     output <- fit_summary$pe[fit_summary$pe$op == "~", c("lhs", "rhs", "std.all")]
+    # Define the function to convert std.all to percentages
     convert_to_percentage <- function(df) {
       df %>%
         group_by(lhs) %>%
         mutate(abs_std = abs(std.all),
+              sum_abs_std = sum(abs_std),
+              percent_std = (abs_std / sum_abs_std) * 100) %>%
         select(-abs_std, -sum_abs_std) %>%
         ungroup()
     }
+    # Convert the estimates to percentages
     percentage_output <- convert_to_percentage(output)
+    # Extract TB column names
     tb_column_names <- colnames(TB)
+    # Convert std.all to a wide format dataframe
     percentage_output_wide <- percentage_output %>%
       pivot_wider(names_from = lhs, values_from = percent_std) %>%
       rename_with(~ gsub("std.all\\.", "", .), starts_with("std.all"))
+    # Create a new dataframe with TB columns and percentage estimates
     result_df <- data.frame(TB = tb_column_names)
+    # Merge the result_df with percentage_estimates_wide
     result_df <- left_join(result_df, percentage_output_wide, by = c("TB" = "rhs"))
+    # Fill NA values with 0 to ensure proper representation
     result_df[is.na(result_df)] <- 0
+    # Add corresponding messages of TB as a new column
     result_df$Message <- sapply(result_df$TB, function(tb_col) question_to_column[[tb_col]])
+    # Convert 'TB' column to a factor with the correct order
     result_df$TB <- factor(result_df$TB, levels = paste0("TB", 1:37))
+    # Exclude 'est' and 'Trust' columns and merge rows by 'TB'
     result_df <- result_df %>%
+      select(-std.all, -Trust) %>%
       group_by(TB) %>%
       summarise(across(everything(), ~ if(is.numeric(.)) sum(., na.rm = TRUE) else first(.))) %>%
       arrange(TB)
+    # Reorder columns to have Message as the second column
+    result_df <- result_df %>%
+      select(TB, Message, everything())
+    # Open the output text file in append mode to add this model's output
     file_conn <- file(output_text_file, open = "a")
+    # Capture output to include in the text file
     full_output <- capture.output({
       print("Trust Builder Analysis:\n")
       print("Data header mapping:\n")
       print(bucket)
       print("Messages:\n")
       print(TB)
+      print("Coefficients matrix (coef:\n")
       print(coef)
       print("Model:\n")
       cat(model_str, sep = "\n")
       print("result_df:\n")
       print(result_df)
     })
+    # Write output to text file
     writeLines(full_output, file_conn)
     close(file_conn)
+    # Create data frame of predictor names and their importance
+    results <- data.frame(result_df)
+    # Save results to CSV file
+    write.csv(results, file = csv_file, row.names = FALSE)
   }, error = function(e) {
     log_message(paste("Error in trust_builder_analysis:", e$message), output_text_file)
   })
 # Read command-line arguments
 args <- commandArgs(trailingOnly = TRUE)
 input_file <- args[1]
+output_text_file <- args[2]  # Base path for output text and CSV files
 csv_output_path_trust <- args[3]
 csv_output_path_nps <- args[4]
 csv_output_path_loyalty <- args[5]
 csv_output_path_consideration <- args[6]
 csv_output_path_satisfaction <- args[7]
 csv_output_path_trustbuilder <- args[8]
+nps_present <- as.logical(tolower(args[9]))  # Expecting "TRUE" or "FALSE" as the argument
 loyalty_present <- as.logical(tolower(args[10]))
 consideration_present <- as.logical(tolower(args[11]))
 satisfaction_present <- as.logical(tolower(args[12]))
 trustbuilder_present <- as.logical(tolower(args[13]))
+# Log the starting of the script
 log_message("Starting Trust Driver and Builder Analysis Script.", output_text_file)
+########## Trust Driver Analysis ######################
+# Load the trust driver dataset (CSV or Excel)
 data_driver <- NULL
+if (grepl(".xlsx", input_file)) {
+  # Load the Excel file with the fourth row as the header
   data_driver <- read_excel(input_file, sheet = "Driver", skip = 3)
 }
+# Process the Trust model
+trust_driver_analysis(
+  Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence,
+  data_driver,
+  output_text_file,
+  csv_output_path_trust)
+# Conditionally process the NPS model
+if (nps_present) {
+  trust_driver_analysis(
+    NPS ~ Stability + Development + Relationship + Benefit + Vision + Competence,
+    data_driver,
+    output_text_file,
+    csv_output_path_nps)
+}
+# Conditionally process the Loyalty model
+if (loyalty_present) {
+  trust_driver_analysis(
+    Loyalty ~ Stability + Development + Relationship + Benefit + Vision + Competence,
+    data_driver,
+    output_text_file,
+    csv_output_path_loyalty)
+}
+# Conditionally process the Consideration model
+if (consideration_present) {
+  trust_driver_analysis(
+    Consideration ~ Stability + Development + Relationship + Benefit + Vision + Competence,
+    data_driver,
+    output_text_file,
+    csv_output_path_consideration)
+}
+# Conditionally process the Satisfaction model
+if (satisfaction_present) {
+  trust_driver_analysis(
+    Satisfaction ~ Stability + Development + Relationship + Benefit + Vision + Competence,
+    data_driver,
+    output_text_file,
+    csv_output_path_satisfaction)
 }
+########## Trust Builder Analysis ######################
+if (trustbuilder_present) {
+  data_builder <- NULL
+  if (grepl(".xlsx", input_file)) {
+    # Read the 4th and 5th rows as header mapping
+    data_builder_headers <- read_excel(input_file, sheet = "Builder", skip = 3, n_max = 2)
+    # Read the rest of the data, skipping the first 5 rows (to start from row 6)
+    data_builder_rows <- read_excel(input_file, sheet = "Builder", skip = 5)
+  }
+  # Process the Builder model
+  trust_builder_analysis(data_builder_rows, data_builder_headers, output_text_file, csv_output_path_trustbuilder)
+}
+# Log the ending of the script
+log_message("Trust Driver and Builder Analysis Script Completed.", output_text_file)