Wajahat698 commited on
Commit
d02ccaa
·
verified ·
1 Parent(s): efacef2

Update process_data.R

Browse files
Files changed (1) hide show
  1. process_data.R +143 -31
process_data.R CHANGED
@@ -16,20 +16,30 @@ log_message <- function(message, output_text_file) {
16
  # Trust Driver analysis function
17
  trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
18
  tryCatch({
 
19
  model <- lm(model_formula, data = data)
 
 
20
  calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
 
21
  average_importance <- mean(calc_relaimpo$lmg)
22
 
 
23
  file_conn <- file(output_text_file, open = "a")
 
24
  full_output <- capture.output({
25
  print("Trust Driver Analysis:\n")
26
  print(calc_relaimpo)
27
  cat("\nAverage Importance: ", average_importance, "\n")
28
  })
 
29
  writeLines(full_output, file_conn)
30
  close(file_conn)
31
 
 
32
  results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
 
 
33
  write.csv(results, file = csv_file, row.names = FALSE)
34
  }, error = function(e) {
35
  log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
@@ -39,66 +49,121 @@ trust_driver_analysis <- function(model_formula, data, output_text_file, csv_fil
39
  # Trust Builder Analysis function
40
  trust_builder_analysis <- function(data, data_headers, output_text_file, csv_file) {
41
  tryCatch({
 
42
  question_to_column <- setNames(as.list(data_headers[1, ]), as.character(data_headers[2, ]))
 
 
43
  p <- 6
 
 
44
  bucket_columns <- c("Stability", "Development", "Relationship", "Benefit", "Vision", "Competence")
 
 
45
  bucket <- data %>% select(all_of(bucket_columns))
 
 
46
  TB <- data %>% select(contains("TB"))
 
 
47
  num_tb_statements <- ncol(TB)
 
 
48
  coef <- matrix(NA, ncol = 6, nrow = num_tb_statements)
 
 
49
  bucket_predictors <- list()
50
 
 
51
  for (i in 1:6) {
 
52
  y <- as.matrix(pull(bucket[, i]))
 
 
53
  x <- as.matrix(TB)
 
 
54
  fit <- regsubsets(x, y, nbest = 1, nvmax = p)
 
 
55
  fit_sum <- summary(fit)
 
 
56
  coef[, i] <- fit_sum$outmat[p, ]
 
 
57
  predictors <- names(which(fit_sum$outmat[p, ] == "*"))
 
 
58
  bucket_predictors[[bucket_columns[i]]] <- predictors
59
  }
60
 
 
61
  model_str <- sapply(names(bucket_predictors), function(col) {
62
  paste(col, "~", paste(bucket_predictors[[col]], collapse = "+"))
63
  })
 
 
64
  model_str <- c("Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence", model_str)
65
 
 
66
  fit <- sem(model_str, data = data)
67
  fit_summary <- summary(fit, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)
 
 
68
  output <- fit_summary$pe[fit_summary$pe$op == "~", c("lhs", "rhs", "std.all")]
69
 
 
70
  convert_to_percentage <- function(df) {
71
  df %>%
72
  group_by(lhs) %>%
73
  mutate(abs_std = abs(std.all),
74
- sum_abs_std = sum(abs_std),
75
- percent_std = (abs_std / sum_abs_std) * 100) %>%
76
  select(-abs_std, -sum_abs_std) %>%
77
  ungroup()
78
  }
79
 
 
80
  percentage_output <- convert_to_percentage(output)
 
 
81
  tb_column_names <- colnames(TB)
82
 
 
83
  percentage_output_wide <- percentage_output %>%
84
  pivot_wider(names_from = lhs, values_from = percent_std) %>%
85
  rename_with(~ gsub("std.all\\.", "", .), starts_with("std.all"))
86
 
 
87
  result_df <- data.frame(TB = tb_column_names)
 
 
88
  result_df <- left_join(result_df, percentage_output_wide, by = c("TB" = "rhs"))
 
 
89
  result_df[is.na(result_df)] <- 0
 
 
90
  result_df$Message <- sapply(result_df$TB, function(tb_col) question_to_column[[tb_col]])
 
 
91
  result_df$TB <- factor(result_df$TB, levels = paste0("TB", 1:37))
92
 
 
93
  result_df <- result_df %>%
94
- select(-Trust) %>%
95
  group_by(TB) %>%
96
  summarise(across(everything(), ~ if(is.numeric(.)) sum(., na.rm = TRUE) else first(.))) %>%
97
  arrange(TB)
98
 
99
- result_df <- result_df %>% select(TB, Message, everything())
 
 
100
 
 
101
  file_conn <- file(output_text_file, open = "a")
 
 
102
  full_output <- capture.output({
103
  print("Trust Builder Analysis:\n")
104
  print("Data header mapping:\n")
@@ -107,7 +172,7 @@ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_fil
107
  print(bucket)
108
  print("Messages:\n")
109
  print(TB)
110
- print("Coefficients matrix (coef):\n")
111
  print(coef)
112
  print("Model:\n")
113
  cat(model_str, sep = "\n")
@@ -120,10 +185,15 @@ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_fil
120
  print("result_df:\n")
121
  print(result_df)
122
  })
 
123
  writeLines(full_output, file_conn)
124
  close(file_conn)
125
 
126
- write.csv(result_df, file = csv_file, row.names = FALSE)
 
 
 
 
127
  }, error = function(e) {
128
  log_message(paste("Error in trust_builder_analysis:", e$message), output_text_file)
129
  })
@@ -132,48 +202,90 @@ trust_builder_analysis <- function(data, data_headers, output_text_file, csv_fil
132
  # Read command-line arguments
133
  args <- commandArgs(trailingOnly = TRUE)
134
  input_file <- args[1]
135
- output_text_file <- args[2]
136
  csv_output_path_trust <- args[3]
137
  csv_output_path_nps <- args[4]
138
  csv_output_path_loyalty <- args[5]
139
  csv_output_path_consideration <- args[6]
140
  csv_output_path_satisfaction <- args[7]
141
  csv_output_path_trustbuilder <- args[8]
142
- nps_present <- as.logical(tolower(args[9]))
143
  loyalty_present <- as.logical(tolower(args[10]))
144
  consideration_present <- as.logical(tolower(args[11]))
145
  satisfaction_present <- as.logical(tolower(args[12]))
146
  trustbuilder_present <- as.logical(tolower(args[13]))
147
 
 
148
  log_message("Starting Trust Driver and Builder Analysis Script.", output_text_file)
149
 
150
- # Load data (support .xlsx or .csv)
 
 
151
  data_driver <- NULL
152
- if (grepl("\\.xlsx$", input_file)) {
 
153
  data_driver <- read_excel(input_file, sheet = "Driver", skip = 3)
154
- } else if (grepl("\\.csv$", input_file)) {
155
- data_driver <- read.csv(input_file, stringsAsFactors = FALSE)
156
  }
157
- trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
158
- tryCatch({
159
- data <- data[complete.cases(data), ] # ✅ <---- ADD THIS LINE HERE
160
 
161
- model <- lm(model_formula, data = data)
162
- calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
163
- average_importance <- mean(calc_relaimpo$lmg)
 
 
 
164
 
165
- file_conn <- file(output_text_file, open = "a")
166
- full_output <- capture.output({
167
- print("Trust Driver Analysis:\n")
168
- print(calc_relaimpo)
169
- cat("\nAverage Importance: ", average_importance, "\n")
170
- })
171
- writeLines(full_output, file_conn)
172
- close(file_conn)
173
 
174
- results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
175
- write.csv(results, file = csv_file, row.names = FALSE)
176
- }, error = function(e) {
177
- log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
178
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Trust Driver analysis function
17
  trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
18
  tryCatch({
19
+ # Fit linear regression model
20
  model <- lm(model_formula, data = data)
21
+
22
+ # Calculate relative importance using the lmg method
23
  calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
24
+ # Calculate average importance
25
  average_importance <- mean(calc_relaimpo$lmg)
26
 
27
+ # Open the output text file in append mode to add this model's output
28
  file_conn <- file(output_text_file, open = "a")
29
+ # Capture output to include in the text file
30
  full_output <- capture.output({
31
  print("Trust Driver Analysis:\n")
32
  print(calc_relaimpo)
33
  cat("\nAverage Importance: ", average_importance, "\n")
34
  })
35
+ # Write output to text file
36
  writeLines(full_output, file_conn)
37
  close(file_conn)
38
 
39
+ # Create data frame of predictor names and their importance
40
  results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
41
+
42
+ # Save results to CSV file
43
  write.csv(results, file = csv_file, row.names = FALSE)
44
  }, error = function(e) {
45
  log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
 
49
  # Trust Builder Analysis function
50
  trust_builder_analysis <- function(data, data_headers, output_text_file, csv_file) {
51
  tryCatch({
52
+ # Map the questions to column names
53
  question_to_column <- setNames(as.list(data_headers[1, ]), as.character(data_headers[2, ]))
54
+
55
+ # Number of important statements to be selected
56
  p <- 6
57
+
58
+ # Define the list of column names
59
  bucket_columns <- c("Stability", "Development", "Relationship", "Benefit", "Vision", "Competence")
60
+
61
+ # Select columns based on the predefined list
62
  bucket <- data %>% select(all_of(bucket_columns))
63
+
64
+ # Select all columns from the consumer dataframe that contain "TB" in their names and assign them to the variable TB
65
  TB <- data %>% select(contains("TB"))
66
+
67
+ # Dynamically detect the number of TB statements
68
  num_tb_statements <- ncol(TB)
69
+
70
+ # Initialize a matrix with number of TB rows (37 for Volkswagen) and 6 columns, filled with NA values
71
  coef <- matrix(NA, ncol = 6, nrow = num_tb_statements)
72
+
73
+ # Initialize an empty list to store the predictors for each bucket column
74
  bucket_predictors <- list()
75
 
76
+ # Loop over each of the 6 columns
77
  for (i in 1:6) {
78
+ # Extract the i-th column from 'bucket' as a matrix and assign it to 'y'
79
  y <- as.matrix(pull(bucket[, i]))
80
+
81
+ # Convert 'TB' dataframe to a matrix and assign it to 'x'
82
  x <- as.matrix(TB)
83
+
84
+ # Perform best subset regression using 'x' as predictors and 'y' as the response variable
85
  fit <- regsubsets(x, y, nbest = 1, nvmax = p)
86
+
87
+ # Summarize the regression subsets
88
  fit_sum <- summary(fit)
89
+
90
+ # Store the coefficients of the best model in the i-th column of 'coef' matrix
91
  coef[, i] <- fit_sum$outmat[p, ]
92
+
93
+ # Print the predictors used in the best model
94
  predictors <- names(which(fit_sum$outmat[p, ] == "*"))
95
+
96
+ # Append the predictors to the bucket_predictors list
97
  bucket_predictors[[bucket_columns[i]]] <- predictors
98
  }
99
 
100
+ # Create the desired output format as model
101
  model_str <- sapply(names(bucket_predictors), function(col) {
102
  paste(col, "~", paste(bucket_predictors[[col]], collapse = "+"))
103
  })
104
+
105
+ # Prepend the Trust x and y to model_str
106
  model_str <- c("Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence", model_str)
107
 
108
+ # Fit the model using sem() function
109
  fit <- sem(model_str, data = data)
110
  fit_summary <- summary(fit, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)
111
+
112
+ # Make it percentages
113
  output <- fit_summary$pe[fit_summary$pe$op == "~", c("lhs", "rhs", "std.all")]
114
 
115
+ # Define the function to convert std.all to percentages
116
  convert_to_percentage <- function(df) {
117
  df %>%
118
  group_by(lhs) %>%
119
  mutate(abs_std = abs(std.all),
120
+ sum_abs_std = sum(abs_std),
121
+ percent_std = (abs_std / sum_abs_std) * 100) %>%
122
  select(-abs_std, -sum_abs_std) %>%
123
  ungroup()
124
  }
125
 
126
+ # Convert the estimates to percentages
127
  percentage_output <- convert_to_percentage(output)
128
+
129
+ # Extract TB column names
130
  tb_column_names <- colnames(TB)
131
 
132
+ # Convert std.all to a wide format dataframe
133
  percentage_output_wide <- percentage_output %>%
134
  pivot_wider(names_from = lhs, values_from = percent_std) %>%
135
  rename_with(~ gsub("std.all\\.", "", .), starts_with("std.all"))
136
 
137
+ # Create a new dataframe with TB columns and percentage estimates
138
  result_df <- data.frame(TB = tb_column_names)
139
+
140
+ # Merge the result_df with percentage_estimates_wide
141
  result_df <- left_join(result_df, percentage_output_wide, by = c("TB" = "rhs"))
142
+
143
+ # Fill NA values with 0 to ensure proper representation
144
  result_df[is.na(result_df)] <- 0
145
+
146
+ # Add corresponding messages of TB as a new column
147
  result_df$Message <- sapply(result_df$TB, function(tb_col) question_to_column[[tb_col]])
148
+
149
+ # Convert 'TB' column to a factor with the correct order
150
  result_df$TB <- factor(result_df$TB, levels = paste0("TB", 1:37))
151
 
152
+ # Exclude 'est' and 'Trust' columns and merge rows by 'TB'
153
  result_df <- result_df %>%
154
+ select(-std.all, -Trust) %>%
155
  group_by(TB) %>%
156
  summarise(across(everything(), ~ if(is.numeric(.)) sum(., na.rm = TRUE) else first(.))) %>%
157
  arrange(TB)
158
 
159
+ # Reorder columns to have Message as the second column
160
+ result_df <- result_df %>%
161
+ select(TB, Message, everything())
162
 
163
+ # Open the output text file in append mode to add this model's output
164
  file_conn <- file(output_text_file, open = "a")
165
+
166
+ # Capture output to include in the text file
167
  full_output <- capture.output({
168
  print("Trust Builder Analysis:\n")
169
  print("Data header mapping:\n")
 
172
  print(bucket)
173
  print("Messages:\n")
174
  print(TB)
175
+ print("Coefficients matrix (coef:\n")
176
  print(coef)
177
  print("Model:\n")
178
  cat(model_str, sep = "\n")
 
185
  print("result_df:\n")
186
  print(result_df)
187
  })
188
+ # Write output to text file
189
  writeLines(full_output, file_conn)
190
  close(file_conn)
191
 
192
+ # Create data frame of predictor names and their importance
193
+ results <- data.frame(result_df)
194
+
195
+ # Save results to CSV file
196
+ write.csv(results, file = csv_file, row.names = FALSE)
197
  }, error = function(e) {
198
  log_message(paste("Error in trust_builder_analysis:", e$message), output_text_file)
199
  })
 
202
  # Read command-line arguments
203
  args <- commandArgs(trailingOnly = TRUE)
204
  input_file <- args[1]
205
+ output_text_file <- args[2] # Base path for output text and CSV files
206
  csv_output_path_trust <- args[3]
207
  csv_output_path_nps <- args[4]
208
  csv_output_path_loyalty <- args[5]
209
  csv_output_path_consideration <- args[6]
210
  csv_output_path_satisfaction <- args[7]
211
  csv_output_path_trustbuilder <- args[8]
212
+ nps_present <- as.logical(tolower(args[9])) # Expecting "TRUE" or "FALSE" as the argument
213
  loyalty_present <- as.logical(tolower(args[10]))
214
  consideration_present <- as.logical(tolower(args[11]))
215
  satisfaction_present <- as.logical(tolower(args[12]))
216
  trustbuilder_present <- as.logical(tolower(args[13]))
217
 
218
+ # Log the starting of the script
219
  log_message("Starting Trust Driver and Builder Analysis Script.", output_text_file)
220
 
221
+ ########## Trust Driver Analysis ######################
222
+
223
+ # Load the trust driver dataset (CSV or Excel)
224
  data_driver <- NULL
225
+ if (grepl(".xlsx", input_file)) {
226
+ # Load the Excel file with the fourth row as the header
227
  data_driver <- read_excel(input_file, sheet = "Driver", skip = 3)
 
 
228
  }
 
 
 
229
 
230
+ # Process the Trust model
231
+ trust_driver_analysis(
232
+ Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence,
233
+ data_driver,
234
+ output_text_file,
235
+ csv_output_path_trust)
236
 
237
+ # Conditionally process the NPS model
238
+ if (nps_present) {
239
+ trust_driver_analysis(
240
+ NPS ~ Stability + Development + Relationship + Benefit + Vision + Competence,
241
+ data_driver,
242
+ output_text_file,
243
+ csv_output_path_nps)
244
+ }
245
 
246
+ # Conditionally process the Loyalty model
247
+ if (loyalty_present) {
248
+ trust_driver_analysis(
249
+ Loyalty ~ Stability + Development + Relationship + Benefit + Vision + Competence,
250
+ data_driver,
251
+ output_text_file,
252
+ csv_output_path_loyalty)
253
+ }
254
+
255
+ # Conditionally process the Consideration model
256
+ if (consideration_present) {
257
+ trust_driver_analysis(
258
+ Consideration ~ Stability + Development + Relationship + Benefit + Vision + Competence,
259
+ data_driver,
260
+ output_text_file,
261
+ csv_output_path_consideration)
262
+ }
263
+
264
+ # Conditionally process the Satisfaction model
265
+ if (satisfaction_present) {
266
+ trust_driver_analysis(
267
+ Satisfaction ~ Stability + Development + Relationship + Benefit + Vision + Competence,
268
+ data_driver,
269
+ output_text_file,
270
+ csv_output_path_satisfaction)
271
  }
272
+
273
+ ########## Trust Builder Analysis ######################
274
+
275
+ if (trustbuilder_present) {
276
+ data_builder <- NULL
277
+
278
+ if (grepl(".xlsx", input_file)) {
279
+ # Read the 4th and 5th rows as header mapping
280
+ data_builder_headers <- read_excel(input_file, sheet = "Builder", skip = 3, n_max = 2)
281
+ # Read the rest of the data, skipping the first 5 rows (to start from row 6)
282
+ data_builder_rows <- read_excel(input_file, sheet = "Builder", skip = 5)
283
+ }
284
+
285
+ # Process the Builder model
286
+ trust_builder_analysis(data_builder_rows, data_builder_headers, output_text_file, csv_output_path_trustbuilder)
287
+
288
+ }
289
+
290
+ # Log the ending of the script
291
+ log_message("Trust Driver and Builder Analysis Script Completed.", output_text_file)