Spaces:

trustlogic
/

trustdriver-free

Sleeping

App Files Files Community

trustdriver-free / process_data.R

nghweigeok

17th deployment. Update AI chatbot. Update colour scheme for charts. Disable one Trust Builder table.

99410be verified over 1 year ago

raw

history blame contribute delete

9.98 kB

	# Load required libraries
	library(relaimpo)
	library(readxl)
	library(readr)
	library(lavaan)
	library(leaps)
	library(dplyr)
	library(tidyr)

	# Logging function
	log_message <- function(message, output_text_file) {
	cat(message, "\n")
	write(message, file = output_text_file, append = TRUE)
	}

	# Trust Driver analysis function
	trust_driver_analysis <- function(model_formula, data, output_text_file, csv_file) {
	tryCatch({
	# Fit linear regression model
	model <- lm(model_formula, data = data)

	# Calculate relative importance using the lmg method
	calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
	# Calculate average importance
	average_importance <- mean(calc_relaimpo$lmg)

	# Open the output text file in append mode to add this model's output
	file_conn <- file(output_text_file, open = "a")
	# Capture output to include in the text file
	full_output <- capture.output({
	print("Trust Driver Analysis:\n")
	print(calc_relaimpo)
	cat("\nAverage Importance: ", average_importance, "\n")
	})
	# Write output to text file
	writeLines(full_output, file_conn)
	close(file_conn)

	# Create data frame of predictor names and their importance
	results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)

	# Save results to CSV file
	write.csv(results, file = csv_file, row.names = FALSE)
	}, error = function(e) {
	log_message(paste("Error in trust_driver_analysis:", e$message), output_text_file)
	})
	}

	# Trust Builder Analysis function
	trust_builder_analysis <- function(data, data_headers, output_text_file, csv_file) {
	tryCatch({
	# Map the questions to column names
	question_to_column <- setNames(as.list(data_headers[1, ]), as.character(data_headers[2, ]))

	# Number of important statements to be selected
	p <- 6

	# Define the list of column names
	bucket_columns <- c("Stability", "Development", "Relationship", "Benefit", "Vision", "Competence")

	# Select columns based on the predefined list
	bucket <- data %>% select(all_of(bucket_columns))

	# Select all columns from the consumer dataframe that contain "TB" in their names and assign them to the variable TB
	TB <- data %>% select(contains("TB"))

	# Initialize a matrix with 37 rows and 6 columns, filled with NA values
	coef <- matrix(NA, ncol = 6, nrow = 37)

	# Initialize an empty list to store the predictors for each bucket column
	bucket_predictors <- list()

	# Loop over each of the 6 columns
	for (i in 1:6) {
	# Extract the i-th column from 'bucket' as a matrix and assign it to 'y'
	y <- as.matrix(pull(bucket[, i]))

	# Convert 'TB' dataframe to a matrix and assign it to 'x'
	x <- as.matrix(TB)

	# Perform best subset regression using 'x' as predictors and 'y' as the response variable
	fit <- regsubsets(x, y, nbest = 1, nvmax = p)

	# Summarize the regression subsets
	fit_sum <- summary(fit)

	# Store the coefficients of the best model in the i-th column of 'coef' matrix
	coef[, i] <- fit_sum$outmat[p, ]

	# Print the predictors used in the best model
	predictors <- names(which(fit_sum$outmat[p, ] == "*"))

	# Append the predictors to the bucket_predictors list
	bucket_predictors[[bucket_columns[i]]] <- predictors
	}

	# Create the desired output format as model
	model_str <- sapply(names(bucket_predictors), function(col) {
	paste(col, "~", paste(bucket_predictors[[col]], collapse = "+"))
	})

	# Prepend the Trust x and y to model_str
	model_str <- c("Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence", model_str)

	# Fit the model using sem() function
	fit <- sem(model_str, data = data)
	fit_summary <- summary(fit, standardized = TRUE, fit.measures = TRUE, rsquare = TRUE)

	# Make it percentages
	output <- fit_summary$pe[fit_summary$pe$op == "~", c("lhs", "rhs", "std.all")]

	# Define the function to convert std.all to percentages
	convert_to_percentage <- function(df) {
	df %>%
	group_by(lhs) %>%
	mutate(abs_std = abs(std.all),
	sum_abs_std = sum(abs_std),
	percent_std = (abs_std / sum_abs_std) * 100) %>%
	select(-abs_std, -sum_abs_std) %>%
	ungroup()
	}

	# Convert the estimates to percentages
	percentage_output <- convert_to_percentage(output)

	# Extract TB column names
	tb_column_names <- colnames(TB)

	# Convert std.all to a wide format dataframe
	percentage_output_wide <- percentage_output %>%
	pivot_wider(names_from = lhs, values_from = percent_std) %>%
	rename_with(~ gsub("std.all\\.", "", .), starts_with("std.all"))

	# Create a new dataframe with TB columns and percentage estimates
	result_df <- data.frame(TB = tb_column_names)

	# Merge the result_df with percentage_estimates_wide
	result_df <- left_join(result_df, percentage_output_wide, by = c("TB" = "rhs"))

	# Fill NA values with 0 to ensure proper representation
	result_df[is.na(result_df)] <- 0

	# Add corresponding messages of TB as a new column
	result_df$Message <- sapply(result_df$TB, function(tb_col) question_to_column[[tb_col]])

	# Convert 'TB' column to a factor with the correct order
	result_df$TB <- factor(result_df$TB, levels = paste0("TB", 1:37))

	# Exclude 'est' and 'Trust' columns and merge rows by 'TB'
	result_df <- result_df %>%
	select(-std.all, -Trust) %>%
	group_by(TB) %>%
	summarise(across(everything(), ~ if(is.numeric(.)) sum(., na.rm = TRUE) else first(.))) %>%
	arrange(TB)

	# Reorder columns to have Message as the second column
	result_df <- result_df %>%
	select(TB, Message, everything())

	# Open the output text file in append mode to add this model's output
	file_conn <- file(output_text_file, open = "a")

	# Capture output to include in the text file
	full_output <- capture.output({
	print("Trust Builder Analysis:\n")
	print("Data header mapping:\n")
	print(question_to_column)
	print("Buckets:\n")
	print(bucket)
	print("Messages:\n")
	print(TB)
	print("Coefficients matrix (coef:\n")
	print(coef)
	print("Model:\n")
	cat(model_str, sep = "\n")
	print("Fit summary:\n")
	print(fit_summary)
	print("Output:\n")
	print(output)
	print("Output in percentage (%):\n")
	print(percentage_output)
	print("result_df:\n")
	print(result_df)
	})
	# Write output to text file
	writeLines(full_output, file_conn)
	close(file_conn)

	# Create data frame of predictor names and their importance
	results <- data.frame(result_df)

	# Save results to CSV file
	write.csv(results, file = csv_file, row.names = FALSE)
	}, error = function(e) {
	log_message(paste("Error in trust_builder_analysis:", e$message), output_text_file)
	})
	}

	# Read command-line arguments
	args <- commandArgs(trailingOnly = TRUE)
	input_file <- args[1]
	output_text_file <- args[2] # Base path for output text and CSV files
	csv_output_path_trust <- args[3]
	csv_output_path_nps <- args[4]
	csv_output_path_loyalty <- args[5]
	csv_output_path_consideration <- args[6]
	csv_output_path_satisfaction <- args[7]
	csv_output_path_trustbuilder <- args[8]
	nps_present <- as.logical(tolower(args[9])) # Expecting "TRUE" or "FALSE" as the argument
	loyalty_present <- as.logical(tolower(args[10]))
	consideration_present <- as.logical(tolower(args[11]))
	satisfaction_present <- as.logical(tolower(args[12]))
	trustbuilder_present <- as.logical(tolower(args[13]))

	# Log the starting of the script
	log_message("Starting Trust Driver and Builder Analysis Script.", output_text_file)

	########## Trust Driver Analysis ######################

	# Load the trust driver dataset (CSV or Excel)
	data_driver <- NULL
	if (grepl(".xlsx", input_file)) {
	# Load the Excel file with the fourth row as the header
	data_driver <- read_excel(input_file, sheet = "Driver", skip = 3)
	}

	# Process the Trust model
	trust_driver_analysis(
	Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence,
	data_driver,
	output_text_file,
	csv_output_path_trust)

	# Conditionally process the NPS model
	if (nps_present) {
	trust_driver_analysis(
	NPS ~ Stability + Development + Relationship + Benefit + Vision + Competence,
	data_driver,
	output_text_file,
	csv_output_path_nps)
	}

	# Conditionally process the Loyalty model
	if (loyalty_present) {
	trust_driver_analysis(
	Loyalty ~ Stability + Development + Relationship + Benefit + Vision + Competence,
	data_driver,
	output_text_file,
	csv_output_path_loyalty)
	}

	# Conditionally process the Consideration model
	if (consideration_present) {
	trust_driver_analysis(
	Consideration ~ Stability + Development + Relationship + Benefit + Vision + Competence,
	data_driver,
	output_text_file,
	csv_output_path_consideration)
	}

	# Conditionally process the Satisfaction model
	if (satisfaction_present) {
	trust_driver_analysis(
	Satisfaction ~ Stability + Development + Relationship + Benefit + Vision + Competence,
	data_driver,
	output_text_file,
	csv_output_path_satisfaction)
	}

	########## Trust Builder Analysis ######################

	if (trustbuilder_present) {
	data_builder <- NULL

	if (grepl(".xlsx", input_file)) {
	# Read the 4th and 5th rows as header mapping
	data_builder_headers <- read_excel(input_file, sheet = "Builder", skip = 3, n_max = 2)
	# Read the rest of the data, skipping the first 5 rows (to start from row 6)
	data_builder_rows <- read_excel(input_file, sheet = "Builder", skip = 5)
	}

	# Process the Builder model
	trust_builder_analysis(data_builder_rows, data_builder_headers, output_text_file, csv_output_path_trustbuilder)

	}

	# Log the ending of the script
	log_message("Trust Driver and Builder Analysis Script Completed.", output_text_file)