shapely

Sleeping

App Files Files Community

shapely / app.py

Wajahat698

Update app.py

3060060 verified over 1 year ago

raw

history blame

80.1 kB

	import subprocess
	import pandas as pd
	import matplotlib.pyplot as plt
	from matplotlib.ticker import FuncFormatter
	import gradio as gr
	import tempfile
	import logging
	from PIL import Image
	import os
	import random

	import io
	import numpy as np
	from itertools import zip_longest
	import openai
	from dotenv import load_dotenv
	from openai import OpenAI
	from langchain_openai import ChatOpenAI
	from langchain_community.vectorstores import FAISS
	from langchain_openai import OpenAIEmbeddings
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain.agents import tool, AgentExecutor
	from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
	from langchain.agents.format_scratchpad.openai_tools import (
	format_to_openai_tool_messages,
	)
	from langchain_core.messages import AIMessage, HumanMessage
	from langchain_community.document_loaders import TextLoader
	from langchain_text_splitters import CharacterTextSplitter
	import serpapi
	import requests
	import mpld3

	# Initialize logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Load environment variables from .env file
	load_dotenv()

	# Define and validate API keys
	openai_api_key = os.getenv("OPENAI_API_KEY")
	serper_api_key = os.getenv("SERPER_API_KEY")

	if not openai_api_key or not serper_api_key:
	logger.error("API keys are not set properly.")
	raise ValueError("API keys for OpenAI and SERPER must be set in the .env file.")
	else:
	logger.info("API keys loaded successfully.")

	# Initialize OpenAI client
	try:
	openai.api_key = openai_api_key
	logger.info("OpenAI client initialized successfully.")
	except Exception as e:
	logger.error(f"Error initializing OpenAI client: {e}")
	raise e

	max_outputs = 10
	outputs = []

	# Global variable to store the selected dataset for AI computation
	selected_dataset_ai = "Volkswagen Customers"
	df_builder_pivot_str = ""


	def plot_model_results(results_df, average_value, title, model_type):
	"""
	Plot model results with specific orders and colors for Trust and NPS models.
	Args:
	results_df (DataFrame): DataFrame containing predictor names and their importance.
	average_value (float): Average importance value.
	title (str): Title of the plot.
	model_type (str): Type of model (either "Trust" or "NPS").
	Returns:
	Image: Image object containing the plot.
	"""

	logger.info(
	"Plotting model results for %s model with title '%s'.", model_type, title
	)
	try:
	# Define color scheme
	color_map = {
	"Stability": "#375570",
	"Development": "#E3B05B",
	"Relationship": "#C63F48",
	"Benefit": "#418387",
	"Vision": "#DF8859",
	"Competence": "#6D93AB",
	"Trust": "#f5918a",
	}

	# Define the order for each model
	if model_type == "Trust":
	order = [
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]
	else: # "NPS"
	order = [
	"Trust",
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]

	# Apply the categorical ordering to the 'Predictor' column
	results_df["Predictor"] = pd.Categorical(
	results_df["Predictor"], categories=order, ordered=True
	)
	results_df.sort_values("Predictor", ascending=False, inplace=True)

	# Create the figure and axis
	fig, ax = plt.subplots(figsize=(10, 8))

	# Set the x-axis labels with "%" using FuncFormatter
	formatter = FuncFormatter(lambda x, _: f"{x:.0f}%")
	ax.xaxis.set_major_formatter(formatter)

	# Determine the dynamic range of the X-axis
	actual_min = results_df["Importance_percent"].min()
	actual_max = results_df["Importance_percent"].max()

	# Calculate the x-axis limits
	half_range = max(average_value - actual_min, actual_max - average_value)
	x_min = average_value - half_range - 3 # Adding some padding for text
	x_max = average_value + half_range + 3 # Adding some padding for text
	plt.xlim(x_min, x_max)

	# Set the x-axis ticks at every 5% interval and add dotted lines
	x_ticks = np.arange(
	np.floor(x_min), np.ceil(x_max) + 5, 5
	) # Ensures complete coverage
	ax.set_xticks(x_ticks) # Set the ticks on the axis
	for tick in x_ticks:
	ax.axvline(
	x=tick, color="grey", linestyle="--", linewidth=0.5, zorder=2
	) # Add dotted lines

	# Create bars in the bar chart
	for i, row in enumerate(results_df.itertuples(index=False)):
	color = color_map[row.Predictor]
	if row.Importance_percent < average_value:
	# For values less than the average, the bar starts at the value and extends to the average
	bar_length = average_value - row.Importance_percent
	left_edge = row.Importance_percent
	text_x = left_edge - 0.5 # Text to the left of the bar
	ha = "right"
	else:
	# For values greater than the average, the bar starts at the average and extends to the value
	bar_length = row.Importance_percent - average_value
	left_edge = average_value
	text_x = row.Importance_percent + 0.5 # Text to the right of the bar
	ha = "left"

	ax.barh(
	row.Predictor,
	bar_length,
	left=left_edge,
	color=color,
	edgecolor="white",
	height=0.6,
	zorder=3, # Set zorder to a value higher than the default for lines
	)
	ax.text(
	text_x,
	i,
	f"{row.Importance_percent:.1f}%",
	va="center",
	ha=ha,
	color="#8c8b8c",
	)

	# Draw the average line and set the title
	ax.axvline(average_value, color="black", linewidth=1, linestyle="-", zorder=3)
	plt.title(title, fontsize=14)

	# Remove plot borders
	ax.spines[["left", "top", "right"]].set_color("none")

	# Change the colour of y-axis text
	ax.tick_params(axis="y", colors="#8c8b8c", length=0)

	# Send axes to background and tighten the layout
	ax.set_axisbelow(True)
	plt.tight_layout()

	# Save the figure to a bytes buffer and then to an image
	img_data = io.BytesIO()
	plt.savefig(
	img_data, format="png", facecolor=fig.get_facecolor(), edgecolor="none"
	)
	img_data.seek(0)
	img = Image.open(img_data)
	plt.close(fig)

	return img
	except Exception as e:
	logger.error("Error plotting model results: %s", e)
	raise


	def plot_bucket_fullness(driver_df, title):
	# Determine required trust buckets
	buckets = [
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]

	# Check if columns are present in df
	missing_columns = [col for col in buckets if col not in driver_df.columns]

	if missing_columns:
	logger.warning(
	f"The following columns are missing in driver_df: {missing_columns}"
	)
	return None
	logger.info("All required columns are present in driver_df.")

	try:
	color_map = {
	"Stability": "#375570",
	"Development": "#E3B05B",
	"Relationship": "#C63F48",
	"Benefit": "#418387",
	"Vision": "#DF8859",
	"Competence": "#6D93AB",
	}

	order = buckets

	# Calculate the percentage of fullness for each column in buckets
	results_df = (driver_df[buckets].mean()).reset_index()
	results_df.columns = ["Trust_Bucket", "Fullness_of_Bucket"]
	results_df["Trust_Bucket"] = pd.Categorical(
	results_df["Trust_Bucket"], categories=order, ordered=True
	)
	results_df.sort_values("Trust_Bucket", inplace=True)

	fig, ax = plt.subplots(figsize=(10, 8))

	ax.bar(
	results_df["Trust_Bucket"],
	results_df["Fullness_of_Bucket"],
	color=[color_map[bucket] for bucket in results_df["Trust_Bucket"]],
	edgecolor="white",
	zorder=2,
	)

	# Adding the percentage values on top of the bars
	for i, row in enumerate(results_df.itertuples(index=False, name=None)):
	trust_bucket, fullness_of_bucket = row
	ax.text(
	i,
	fullness_of_bucket + 0.5, # slightly above the top of the bar
	f"{fullness_of_bucket:.1f}",
	ha="center",
	va="bottom",
	color="#8c8b8c",
	)

	y_max = results_df["Fullness_of_Bucket"].max() + 1
	plt.ylim(0, y_max)
	plt.ylabel("Fullness")
	plt.title(title, fontsize=14)

	ax.spines[["top", "right"]].set_color("none")

	# Adding grey dotted lines along the y-axis labels
	y_ticks = ax.get_yticks()
	for y_tick in y_ticks:
	ax.axhline(y=y_tick, color="grey", linestyle="--", linewidth=0.5, zorder=1)

	ax.set_axisbelow(True)
	plt.tight_layout()

	# Save the figure to a bytes buffer and then to an image
	img_data = io.BytesIO()
	plt.savefig(
	img_data, format="png", facecolor=fig.get_facecolor(), edgecolor="none"
	)
	img_data.seek(0)
	img = Image.open(img_data)
	plt.close(fig)

	return img
	except Exception as e:
	logger.error("Error plotting bucket fullness: %s", e)
	raise


	def call_r_script(
	input_file,
	text_output_path,
	csv_output_path_trust,
	csv_output_path_nps,
	csv_output_path_loyalty,
	csv_output_path_consideration,
	csv_output_path_satisfaction,
	csv_output_path_trustbuilder,
	nps_present,
	loyalty_present,
	consideration_present,
	satisfaction_present,
	trustbuilder_present,
	):
	"""
	Call the R script for Shapley regression analysis.
	Args:
	input_file (str): Path to the input Excel file.
	text_output_path (str): Path to the output text file.
	csv_output_path_trust (str): Path to the output CSV file for Trust.
	csv_output_path_nps (str): Path to the output CSV file for NPS.
	csv_output_path_loyalty (str): Path to the output CSV file for Loyalty.
	csv_output_path_consideration (str): Path to the output CSV file for Consideration.
	csv_output_path_satisfaction (str): Path to the output CSV file for Satisfaction.
	nps_present (bool): Flag indicating whether NPS column is present in the data.
	loyalty_present (bool): Flag indicating whether Loyalty column is present in the data.
	consideration_present (bool): Flag indicating whether Consideration column is present in the data.
	satisfaction_present (bool): Flag indicating whether Satisfaction column is present in the data.
	trustbuilder_present (bool): Flag indicating whether Trustbuilder column is present in the data.
	"""

	command = [
	"Rscript",
	"process_data.R",
	input_file,
	text_output_path,
	csv_output_path_trust,
	csv_output_path_nps,
	csv_output_path_loyalty,
	csv_output_path_consideration,
	csv_output_path_satisfaction,
	csv_output_path_trustbuilder,
	str(nps_present).upper(), # Convert the boolean to a string ("TRUE" or "FALSE")
	str(loyalty_present).upper(),
	str(consideration_present).upper(),
	str(satisfaction_present).upper(),
	str(trustbuilder_present).upper(),
	]

	try:
	subprocess.run(command, check=True)
	except subprocess.CalledProcessError as e:
	logger.error("R script failed with error: %s", e)
	raise RuntimeError(
	"Error executing R script. Please check the input file format."
	)
	except Exception as e:
	logger.error("Error calling R script: %s", e)
	raise


	def analyze_excel_single(file_path):
	"""
	Analyzes a single Excel file containing data and generates plots for Trust, NPS, Loyalty, Consideration, and Satisfaction models.
	Args:
	file_path (str): Path to the Excel file.
	Returns:
	Image: Image of the Trust regression plot.
	Image: Image of the NPS regression plot.
	Image: Image of the Loyalty regression plot.
	Image: Image of the Consideration regression plot.
	Image: Image of the Satisfaction regression plot.
	str: Summary of the analysis.
	"""
	logger.info("Analyzing Excel file: %s", file_path)

	# Create a temporary directory
	temp_dir = tempfile.mkdtemp()
	logger.info("Created temporary directory: %s", temp_dir)

	try:
	# Manually construct file paths
	text_output_path = os.path.join(temp_dir, "output.txt")
	csv_output_path_trust = text_output_path.replace(".txt", "_trust.csv")
	csv_output_path_nps = text_output_path.replace(".txt", "_nps.csv")
	csv_output_path_loyalty = text_output_path.replace(".txt", "_loyalty.csv")
	csv_output_path_consideration = text_output_path.replace(
	".txt", "_consideration.csv"
	)
	csv_output_path_satisfaction = text_output_path.replace(
	".txt", "_satisfaction.csv"
	)
	csv_output_path_trustbuilder = text_output_path.replace(
	".txt", "_trustbuilder.csv"
	)

	# Load the Trust Driver dataset (CSV or Excel)
	# Trust Driver dataset is mandatory
	df = None
	trustbuilder_present = False

	excel_file = pd.ExcelFile(file_path)
	# Load the Excel file with the fourth row as the header
	df = pd.read_excel(file_path, sheet_name="Driver", header=3)

	# Check if the "Builder" sheet is present
	if "Builder" in excel_file.sheet_names:
	# Read the "Builder" sheet, making row 6 the header and reading row 7 onwards as data
	builder_data = pd.read_excel(file_path, sheet_name="Builder", header=5)
	# Check if the "Builder" sheet contains more than 10 rows
	trustbuilder_present = len(builder_data) > 10
	else:
	trustbuilder_present = False

	# Step 1: Check for missing columns and handle NPS column
	required_columns = [
	"Trust",
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]
	missing_columns = set(required_columns) - set(df.columns)
	if missing_columns:
	logger.warning("Missing columns in dataset: %s", missing_columns)

	# Handling NPS column
	nps_present = "NPS" in df.columns
	if nps_present:
	nps_missing_ratio = df["NPS"].isna().mean()
	if nps_missing_ratio > 0.8:
	df.drop(columns=["NPS"], inplace=True)
	nps_present = False

	# Handling Loyalty column
	loyalty_present = "Loyalty" in df.columns
	if loyalty_present:
	loyalty_missing_ratio = df["Loyalty"].isna().mean()
	if loyalty_missing_ratio > 0.8:
	df.drop(columns=["Loyalty"], inplace=True)
	loyalty_present = False
	else:
	print("not present")

	# Handling Consideration column
	consideration_present = "Consideration" in df.columns
	if consideration_present:
	consideration_missing_ratio = df["Consideration"].isna().mean()
	if consideration_missing_ratio > 0.8:
	df.drop(columns=["Consideration"], inplace=True)
	consideration_present = False
	else:
	print("not present")

	# Handling Satisfaction column
	satisfaction_present = "Satisfaction" in df.columns
	if satisfaction_present:
	satisfaction_missing_ratio = df["Satisfaction"].isna().mean()
	if satisfaction_missing_ratio > 0.8:
	df.drop(columns=["Satisfaction"], inplace=True)
	satisfaction_present = False
	else:
	print("not present")

	# Step 2: Remove missing values and print data shape
	df.dropna(subset=required_columns, inplace=True)

	# Ensure the dataset has more than 10 rows
	if df.shape[0] <= 10:
	return (
	None,
	None,
	None,
	None,
	None,
	None,
	"Dataset must contain more than 10 rows after preprocessing.",
	)

	# Step 3: Adjust Shapley regression analysis based on column presence
	# Handle Trust Driver Analysis and Trust Builder Analysis
	call_r_script(
	file_path,
	text_output_path,
	csv_output_path_trust,
	csv_output_path_nps,
	csv_output_path_loyalty,
	csv_output_path_consideration,
	csv_output_path_satisfaction,
	csv_output_path_trustbuilder,
	nps_present,
	loyalty_present,
	consideration_present,
	satisfaction_present,
	trustbuilder_present,
	)

	# Read the output text file
	with open(text_output_path, "r") as file:
	output_text = file.read()

	# Get file name for display
	file_name = file_path.split("/")[-1]

	# plot how full the trust buckets are
	title = f"Trust Profile: {file_name}"
	img_bucketfull = plot_bucket_fullness(df, title)

	# plot trust
	# Get n_samples from output text
	n_samples_trust = output_text.split(": Trust")[1]
	n_samples_trust = n_samples_trust.split("Analysis based on ")[1]
	n_samples_trust = n_samples_trust.split("observations")[0]

	results_df_trust = None
	results_df_trust = pd.read_csv(csv_output_path_trust)
	results_df_trust["Importance_percent"] = results_df_trust["Importance"] * 100
	average_value_trust = results_df_trust["Importance_percent"].mean()

	img_trust = plot_model_results(
	results_df_trust,
	average_value_trust,
	f"Trust Drivers: {file_name}",
	"Trust",
	)
	display_trust_score_1()

	# plot NPS
	img_nps = None
	results_df_nps = None
	if nps_present:
	# Get n_samples from output text
	n_samples_nps = output_text.split(": NPS")[1]
	n_samples_nps = n_samples_nps.split("Analysis based on ")[1]
	n_samples_nps = n_samples_nps.split("observations")[0]

	results_df_nps = pd.read_csv(csv_output_path_nps)
	results_df_nps["Importance_percent"] = results_df_nps["Importance"] * 100
	average_value_nps = results_df_nps["Importance_percent"].mean()
	img_nps = plot_model_results(
	results_df_nps,
	average_value_nps,
	f"NPS Drivers: {file_name}",
	"NPS",
	)

	# plot loyalty
	img_loyalty = None
	results_df_loyalty = None
	if loyalty_present:
	# Get n_samples from output text
	n_samples_loyalty = output_text.split(": Loyalty")[1]
	n_samples_loyalty = n_samples_loyalty.split("Analysis based on ")[1]
	n_samples_loyalty = n_samples_loyalty.split("observations")[0]

	results_df_loyalty = pd.read_csv(csv_output_path_loyalty)
	results_df_loyalty["Importance_percent"] = (
	results_df_loyalty["Importance"] * 100
	)
	average_value_loyalty = results_df_loyalty["Importance_percent"].mean()
	img_loyalty = plot_model_results(
	results_df_loyalty,
	average_value_loyalty,
	f"Loyalty Drivers: {file_name}",
	"Loyalty",
	)
	else:
	print("data is not present")

	# plot consideration
	img_consideration = None
	results_df_consideration = None
	if consideration_present:
	# Get n_samples from output text
	n_samples_consideration = output_text.split(": Consideration")[1]
	n_samples_consideration = n_samples_consideration.split(
	"Analysis based on "
	)[1]
	n_samples_consideration = n_samples_consideration.split("observations")[0]

	results_df_consideration = pd.read_csv(csv_output_path_consideration)
	results_df_consideration["Importance_percent"] = (
	results_df_consideration["Importance"] * 100
	)
	average_value_consideration = results_df_consideration[
	"Importance_percent"
	].mean()
	img_consideration = plot_model_results(
	results_df_consideration,
	average_value_consideration,
	f"Consideration Drivers: {file_name}",
	"Consideration",
	)
	else:
	print("data not present")

	# plot satisfaction
	img_satisfaction = None
	results_df_satisfaction = None
	if satisfaction_present:
	# Get n_samples from output text
	n_samples_satisfaction = output_text.split(": Satisfaction")[1]
	n_samples_satisfaction = n_samples_satisfaction.split("Analysis based on ")[
	1
	]
	n_samples_satisfaction = n_samples_satisfaction.split("observations")[0]

	results_df_satisfaction = pd.read_csv(csv_output_path_satisfaction)
	results_df_satisfaction["Importance_percent"] = (
	results_df_satisfaction["Importance"] * 100
	)
	average_value_satisfaction = results_df_satisfaction[
	"Importance_percent"
	].mean()
	img_satisfaction = plot_model_results(
	results_df_satisfaction,
	average_value_satisfaction,
	f"Satisfaction Drivers: {file_name}",
	"Satisfaction",
	)
	else:
	print("data not present")

	# plot trust builder table 1 and 2
	df_builder_pivot = None
	if trustbuilder_present:
	# Create dataframe for trust builder
	results_df_builder = pd.read_csv(csv_output_path_trustbuilder)

	combined_data = {
	"Message": results_df_builder["Message"],
	"Stability": results_df_builder["Stability"].round(0).astype(int),
	"Development": results_df_builder["Development"].round(0).astype(int),
	"Relationship": results_df_builder["Relationship"].round(0).astype(int),
	"Benefit": results_df_builder["Benefit"].round(0).astype(int),
	"Vision": results_df_builder["Vision"].round(0).astype(int),
	"Competence": results_df_builder["Competence"].round(0).astype(int),
	}

	df_builder = pd.DataFrame(combined_data)

	# Create consolidated table
	# List of bucket columns
	bucket_columns = [
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]

	# Prepare lists to collect data
	buckets = []
	messages = []
	percentages = []

	# Iterate through each bucket column
	for bucket in bucket_columns:
	for index, value in results_df_builder[bucket].items():
	if value > 0:
	buckets.append(bucket)
	messages.append(results_df_builder["Message"][index])
	percentages.append(int(round(value)))

	# Create the new DataFrame
	builder_consolidated = {
	"Trust Bucket®": buckets,
	"TrustBuilders®": messages,
	"%": percentages,
	}

	df_builder_pivot = pd.DataFrame(builder_consolidated)

	# Define the order of the Trust Bucket® categories
	trust_driver_order = [
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]

	#trust_driver_order = [
	# "Stability",
	# "Development",
	# "Relationship",
	# "Competence",
	#]

	# Convert Trust Bucket® column to a categorical type with the specified order
	df_builder_pivot["Trust Bucket®"] = pd.Categorical(
	df_builder_pivot["Trust Bucket®"],
	categories=trust_driver_order,
	ordered=True,
	)

	# Sort the DataFrame by 'Trust Bucket®' and '%' in descending order within each 'Trust Bucket®'
	df_builder_pivot = df_builder_pivot.sort_values(
	by=["Trust Bucket®", "%"], ascending=[True, False]
	)



	#df_builder_pivot = df_builder_pivot.sort_values(
	# by=["%"], ascending=[False]
	#)
	#df_builder_pivot = df_builder_pivot.head(4)








	# After processing, ensure to delete the temporary files and directory
	os.remove(csv_output_path_trust)
	if nps_present:
	os.remove(csv_output_path_nps)
	if loyalty_present:
	os.remove(csv_output_path_loyalty)
	if consideration_present:
	os.remove(csv_output_path_consideration)
	if satisfaction_present:
	os.remove(csv_output_path_satisfaction)
	if trustbuilder_present:
	os.remove(csv_output_path_trustbuilder)
	os.remove(text_output_path)

	if img_nps is None:
	# Load the placeholder image if NPS analysis was not performed
	img_nps = Image.open("./images/nps_not_available.png")
	img_nps = img_nps.resize((1000, 800), Image.Resampling.LANCZOS)

	if img_loyalty is None:
	# Load the placeholder image if Loyalty analysis was not performed
	img_loyalty = Image.open("./images/loyalty_not_available.png")
	img_loyalty = img_loyalty.resize((1000, 800), Image.Resampling.LANCZOS)

	if img_consideration is None:
	# Load the placeholder image if Consideration analysis was not performed
	img_consideration = Image.open("./images/consideration_not_available.png")
	img_consideration = img_consideration.resize(
	(1000, 800), Image.Resampling.LANCZOS
	)


	if img_satisfaction is None:
	# Load the placeholder image if Satisfaction analysis was not performed
	img_satisfaction = Image.open("./images/satisfaction_not_available.png")
	img_satisfaction = img_satisfaction.resize(
	(1000, 800), Image.Resampling.LANCZOS
	)


	return (
	img_bucketfull,
	img_trust,
	img_nps,
	img_loyalty,
	img_consideration,
	img_satisfaction,
	df_builder_pivot,
	output_text,
	results_df_trust,
	results_df_nps,
	results_df_loyalty,
	results_df_consideration,
	results_df_satisfaction,
	)
	except Exception as e:
	logger.error("Error analyzing Excel file: %s", e)
	raise
	finally:
	if os.path.exists(temp_dir):
	try:
	os.rmdir(temp_dir)
	except Exception as e:
	logger.error("Error removing temporary directory: %s", e)


	def batch_file_processing(file_paths):
	"""
	Analyzes all Excel files in a list of file paths and generates plots for all models.
	Args:
	file_paths (List[str]): List of paths to the Excel files.
	Returns:
	Image: Image of the Trust regression plot.
	Image: Image of the NPS regression plot.
	Image: Image of the Loyalty regression plot.
	Image: Image of the Consideration regression plot.
	Image: Image of the Satisfaction regression plot.
	str: Summary of the analysis.
	"""

	img_bucketfull_list = []
	img_trust_list = []
	img_nps_list = []
	img_loyalty_list = []
	img_consideration_list = []
	img_satisfaction_list = []
	df_builder_pivot_list = []
	output_text_list = []

	for file_path in file_paths:
	try:
	(
	img_bucketfull,
	img_trust,
	img_nps,
	img_loyalty,
	img_consideration,
	img_satisfaction,
	df_builder_pivot,
	output_text,
	results_df_trust,
	results_df_nps,
	results_df_loyalty,
	results_df_consideration,
	results_df_satisfaction,
	) = analyze_excel_single(file_path)
	img_bucketfull_list.append(img_bucketfull)
	img_trust_list.append(img_trust)
	img_nps_list.append(img_nps)
	img_loyalty_list.append(img_loyalty)
	img_consideration_list.append(img_consideration)
	img_satisfaction_list.append(img_satisfaction)
	df_builder_pivot_list.append(df_builder_pivot)
	output_text_list.append(output_text)
	except Exception as e:
	logger.error("Error processing file %s: %s", file_path, e)

	return (
	img_bucketfull_list,
	img_trust_list,
	img_nps_list,
	img_loyalty_list,
	img_consideration_list,
	img_satisfaction_list,
	df_builder_pivot_list,
	output_text_list,
	)


	def variable_outputs(file_inputs):

	file_inputs_single = file_inputs

	# Call batch file processing and get analysis results
	(
	img_bucketfull_list,
	img_trust_list,
	img_nps_list,
	img_loyalty_list,
	img_consideration_list,
	img_satisfaction_list,
	df_builder_pivot_list,
	output_text_list,
	) = batch_file_processing(file_inputs_single)

	# Get number of datasets uploaded
	k = len(file_inputs_single)

	# Container for visible plots
	plots_visible = []

	# Use zip_longest to iterate over the lists, padding with None
	for row, (
	img_bucketfull,
	img_trust,
	img_nps,
	img_loyalty,
	img_consideration,
	img_satisfaction,
	df_builder_pivot,
	output_text,
	) in enumerate(
	zip_longest(
	img_bucketfull_list,
	img_trust_list,
	img_nps_list,
	img_loyalty_list,
	img_consideration_list,
	img_satisfaction_list,
	df_builder_pivot_list,
	output_text_list,
	)
	):
	# Get dataset name
	dataset_name = file_inputs_single[row].split("/")[-1]

	# Based on the number of files uploaded, determine the content of each textbox
	plots = [
	gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>2) Trust Profile</span>",
	visible=True,
	),
	gr.Markdown(
	"This analysis shows you how strongly you are trusted in each of the six Trust Buckets®. You can also see this for any competitor.",
	visible=True,
	),
	gr.Image(
	value=img_bucketfull,
	type="pil",
	label="Trust Profile",
	visible=True,
	),
	gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>3) Trust and KPI Drivers</span>",
	visible=True,
	),
	gr.Markdown(
	"This analysis shows you which of the TrustLogic® dimensions are most effective in building more trust and improving your KPIs. "
	+ "Here we display Trust and NPS, but in the full version you can include up to four KPIs (e.g. CSAT, Consideration, Loyalty). "
	+ "<br>The Trust Buckets® extending to the right are the more important ones. We show how they over and under-index. "
	+ "The average driver impact is 16.7% (100% divided by 6 trust dimensions). The higher the % above average, the more important. "
	+ "That means that you need to ‘fill’ these Trust Buckets® with the right attributes and messages.",
	visible=True,
	),
	gr.Image(
	value=img_trust,
	type="pil",
	label="Trust Drivers",
	visible=True,
	),
	gr.Image(
	value=img_nps,
	type="pil",
	label="NPS Drivers",
	visible=True,
	),
	gr.Image(
	value=img_loyalty,
	type="pil",
	visible=True,
	),
	gr.Image(
	value=img_consideration,
	type="pil",
	visible=True,
	),
	gr.Image(
	value=img_satisfaction,
	type="pil",
	visible=True,
	),
	gr.Textbox(
	value=output_text,
	visible=False,
	),
	]

	# add current plots to container
	plots_visible += plots

	if isinstance(df_builder_pivot, pd.DataFrame):
	logger.debug(f"df_builder_pivot: {df_builder_pivot}")

	markdown_5 = gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'> 4) TrustBuilders® </span>",
	visible=True,
	)

	markdown_6 = gr.Markdown(
	"These are the reasons to trust and recommend. They can be your brand values, features, attributes, programmes and messages. "
	+ "<br>In the table, use the little arrow in each column to toggle the most to least effective TrustBuilders® to fill each Trust Bucket®. Your focus is only on the Trust Bucket® with the highest driver impact. "
	#+ "<br>In the second table you see the top scoring TrustBuilders® ordered by Trust Bucket®. "
	+ "<br> Note: Even if Trust Buckets® for Customers and Prospects overlap, the most effective statements are very different. This provides clear guidance for acquisition versus loyalty activities.",
	visible=True,
	)

	table_builder_2 = gr.Dataframe(
	value=df_builder_pivot,
	headers=list(df_builder_pivot.columns),
	interactive=False,
	label=f"{dataset_name}",
	visible=True,
	height=800,
	wrap=True,
	)

	plots_visible.append(markdown_5)
	plots_visible.append(markdown_6)
	plots_visible.append(table_builder_2)
	else:
	plots_visible.append(gr.Markdown("", visible=False))
	plots_visible.append(gr.Markdown("", visible=False))
	plots_visible.append(gr.Dataframe(value=None, label="", visible=False))

	plots_invisible = [
	gr.Markdown("", visible=False),
	gr.Markdown("", visible=False),
	gr.Image(label="Trust Buckets", visible=False),
	gr.Markdown("", visible=False),
	gr.Markdown("", visible=False),
	gr.Image(label="Trust Drivers", visible=False),
	gr.Image(label="NPS Drivers", visible=False),
	gr.Image(label="Loyalty Drivers", visible=False),
	gr.Image(label="Consideration Drivers", visible=False),
	gr.Image(label="Satisfaction Drivers", visible=False),
	gr.Textbox(label="Analysis Summary", visible=False),
	gr.Markdown("", visible=False),
	gr.Markdown("", visible=False),
	gr.Dataframe(value=None, label=" ", visible=False),
	]

	return plots_visible + plots_invisible * (max_outputs - k)


	def reset_outputs():
	# Reset outputs
	outputs = []

	# Create fixed dummy components
	markdown_1 = gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>2) Trust Profile</span>",
	visible=True,
	)
	markdown_2 = gr.Markdown(
	"This analysis shows you show strongly you are trusted in each of the six Trust Buckets®. You can also see this for any competitor.",
	visible=True,
	)
	buckets_plot = gr.Image(value=None, label="Trust Buckets", visible=True)

	markdown_3 = gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>3) Trust and KPI Drivers</span>",
	visible=True,
	)
	markdown_4 = gr.Markdown(
	"This analysis shows you which of the TrustLogic® dimensions are most effective in building more trust and improving your KPIs. "
	+ "Here we display Trust and NPS, but in the full version you can include up to four KPIs (e.g. CSAT, Consideration, Loyalty). "
	+ "<br>The Trust Buckets® extending to the right are the more important ones. We show how they over and under-index. "
	+ "The average driver impact is 16.7% (100% divided by 6 trust dimensions). The higher the % above average, the more important. "
	+ "That means that you need to ‘fill’ these Trust Buckets® with the right attributes and messages.",
	visible=True,
	)
	trust_plot = gr.Image(value=None, label="Trust Drivers", visible=True)
	nps_plot = gr.Image(value=None, label="NPS Drivers", visible=True)
	loyalty_plot = gr.Image(value=None, label="Loyalty Drivers", visible=True)
	consideration_plot = gr.Image(
	value=None, label="Consideration Drivers", visible=True
	)
	satisfaction_plot = gr.Image(value=None, label="Satisfaction Drivers", visible=True)
	summary_text = gr.Textbox(value=None, label="Analysis Summary", visible=False)

	markdown_5 = gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>4)TrustBuilders®",
	visible=True,
	)
	markdown_6 = gr.Markdown(
	"These are the reasons to trust and recommend. They can be your brand values, features, attributes, programmes and messages. "
	+ "<br>In the first table, use the little arrow in each column to toggle the most to least effective proof points to fill each Trust Bucket®. Your focus is only on the Trust Bucket® with the highest driver impact. "
	+ "<br>In the second table you see the top scoring proof points ordered by Trust Bucket®. "
	+ "<br>Note: Even if Trust Buckets for Customers and Prospects overlap, the most effective statements are very different. This provides clear guidance for acquisition versus loyalty activities.",
	visible=True,
	)

	df_builder_pivot = gr.Dataframe(value=None, label="", visible=True)

	outputs.append(markdown_1)
	outputs.append(markdown_2)
	outputs.append(buckets_plot)
	outputs.append(markdown_3)
	outputs.append(markdown_4)
	outputs.append(trust_plot)
	outputs.append(nps_plot)
	outputs.append(loyalty_plot)
	outputs.append(consideration_plot)
	outputs.append(satisfaction_plot)
	outputs.append(summary_text)
	outputs.append(markdown_5)
	outputs.append(markdown_6)
	outputs.append(df_builder_pivot)

	# invisible from second set onwards
	for i in range(1, max_outputs):
	outputs.append(gr.Markdown("", visible=False))
	outputs.append(gr.Markdown("", visible=False))
	outputs.append(gr.Image(value=None, label="", visible=False))
	outputs.append(gr.Markdown("", visible=False))
	outputs.append(gr.Markdown("", visible=False))
	outputs.append(gr.Image(value=None, label="", visible=False))
	outputs.append(gr.Image(value=None, label="", visible=False))
	outputs.append(gr.Image(value=None, label="", visible=False))
	outputs.append(gr.Image(value=None, label="", visible=False))
	outputs.append(gr.Image(value=None, label="", visible=False))
	outputs.append(gr.Textbox(value=None, label="", visible=False))
	outputs.append(gr.Markdown("", visible=False))
	outputs.append(gr.Markdown("", visible=False))
	outputs.append(gr.Dataframe(value=None, label="", visible=False))

	return outputs


	def data_processing(file_path):
	"""
	Processes a single CSV file and generates required outputs.
	Args:
	file_path (str): Path to the CSV file.
	Returns:
	tuple: Contains processed data and results (customize based on your needs).
	"""
	try:
	logger.info("Processing CSV file: %s", file_path)

	# Load the first two rows to get the column names
	header_df = pd.read_csv(file_path, header=None, nrows=2)

	# Fill NaN values in the rows with an empty string
	header_df.iloc[0] = header_df.iloc[0].fillna("")
	header_df.iloc[1] = header_df.iloc[1].fillna("")

	# Merge the two rows to create column names
	merged_columns = header_df.iloc[0] + " " + header_df.iloc[1]

	# Load the rest of the DataFrame using the merged column names
	df = pd.read_csv(file_path, skiprows=2, names=merged_columns)

	# For any value in all columns that contain " - " (rating),
	# split and only take the first part (in digit format)
	def split_value(val):
	if isinstance(val, str) and " - " in val:
	return val.split(" - ")[0]
	return val

	# Apply the function to all elements of the DataFrame
	df = df.applymap(split_value)

	# Convert the columns from the third column onwards to numeric
	df.iloc[:, 2:] = df.iloc[:, 2:].apply(pd.to_numeric, errors="coerce")

	# Search for the text in the column names
	search_text = "how likely are you to buy another".lower()
	col_index = [
	i for i, col in enumerate(df.columns) if search_text in col.lower()
	]

	if col_index:
	col_index = col_index[0] # Assuming there is only one matching column

	# Define the mapping dictionary for reverse replacement
	replace_map = {1: 5, 2: 4, 4: 2, 5: 1}

	# Replace values in the specified column
	df.iloc[:, col_index] = df.iloc[:, col_index].replace(replace_map)

	column_mapping = {
	"Did you own a": "Q1",
	"your age": "Q2",
	"How likely are you to recommend buying a": "NPS",
	"level of trust": "Trust",
	"buy another": "Loyalty",
	"consider buying": "Consideration",
	"Has built a strong and stable foundation": "Stability",
	"Will develop well in the future": "Development",
	"Relates well to people like me": "Relationship",
	"Is valuable to our lives": "Benefit",
	"Has vision and values I find appealing": "Vision",
	"Has what it takes to succeed": "Competence",
	}

	# Create a list to hold the labels
	list_labels = []

	# Loop through each column in merged_columns
	for col in merged_columns:
	label = None
	for key, value in column_mapping.items():
	if key.lower() in col.lower():
	label = value
	break
	if label:
	list_labels.append(label)

	# Determine the difference between the lengths of list_labels and merged_columns
	difference = len(merged_columns) - len(list_labels)

	# TRUST STATEMENTS TB1 - TB37 populate to the rest of columns
	# Append the next values ("TB1", "TB2", ...) until list_labels matches the length of merged_columns
	for i in range(difference):
	list_labels.append(f"TB{i + 1}")

	# Add list_labels as the first row after the column names
	df_labels = pd.DataFrame([list_labels], columns=df.columns)

	# Concatenate header_df, df_labels, and df
	header_df.columns = df.columns # Ensure header_df has the same columns as df

	# Create a DataFrame with 2 rows of NaNs
	nan_rows = pd.DataFrame(np.nan, index=range(2), columns=df.columns)

	# Pad 2 rows of NaNs, followed by survey questions to make it the same format as the input excel file
	df = pd.concat([nan_rows, header_df, df_labels, df]).reset_index(drop=True)

	# Make list labels the column names
	df.columns = list_labels

	# Remove columns beyond TB37
	max_tb_label = 37
	tb_columns = [col for col in df.columns if col.startswith("TB")]
	tb_columns_to_keep = {f"TB{i + 1}" for i in range(max_tb_label)}
	tb_columns_to_drop = [
	col for col in tb_columns if col not in tb_columns_to_keep
	]
	df.drop(columns=tb_columns_to_drop, inplace=True)

	# Take snippets from df as drivers
	kpis = [
	"Trust",
	"NPS",
	"Loyalty",
	"Consideration",
	"Satisfaction",
	]

	drivers = [
	"Stability",
	"Development",
	"Relationship",
	"Benefit",
	"Vision",
	"Competence",
	]

	# Create an empty list to store the selected columns
	selected_columns = []

	# Check each item in kpis and drivers and search in df.columns
	for kpi in kpis:
	for col in df.columns:
	if pd.notna(col) and kpi.lower() in col.lower():
	selected_columns.append(col)

	for driver in drivers:
	for col in df.columns:
	if pd.notna(col) and driver.lower() in col.lower():
	selected_columns.append(col)

	# Extract the selected columns into a new DataFrame df_drivers
	df_drivers = df[selected_columns].iloc[4:].reset_index(drop=True)

	# Create a DataFrame with 2 rows of NaNs
	nan_rows = pd.DataFrame(np.nan, index=range(2), columns=df_drivers.columns)

	# Pad 3 rows of NaNs to make it the same format as the input excel file
	df_drivers = pd.concat([nan_rows, df_drivers]).reset_index(drop=True)

	# Get dataset name
	dataset_name = file_path.split("/")[-1]
	dataset_name = dataset_name.split(".")[0]

	# Create a temporary directory
	temp_dir = tempfile.mkdtemp()
	logger.info("Created temporary directory for processed file: %s", temp_dir)

	# Save processed df as an Excel file in the temporary directory
	processed_file_path = os.path.join(temp_dir, f"{dataset_name}.xlsx")
	with pd.ExcelWriter(processed_file_path) as writer:
	df_drivers.to_excel(writer, sheet_name="Driver", index=False)
	df.to_excel(writer, sheet_name="Builder", index=False)

	return processed_file_path
	except Exception as e:
	logger.error("Error processing CSV file: %s", e)
	raise


	def process_examples(file_name):
	file_path = f"example_files/{file_name[0]}"
	file_path = [file_path]
	outputs = variable_outputs(file_path)

	return outputs



	def process_datasets(file_inputs):
	"""
	Processes uploaded datasets and calls appropriate functions based on file type.
	Args:
	file_inputs (List[UploadFile]): List of uploaded files.
	Returns:
	List[gr.Blocks]: List of Gradio output components.
	"""
	outputs_list = []

	for file_input in file_inputs:
	file_path = file_input.name
	file_extension = os.path.splitext(file_path)[-1].lower()

	if file_extension == ".xlsx":
	outputs_list.append(file_path)

	elif file_extension == ".csv":
	try:
	processed_file_path = data_processing(file_path)
	outputs_list.append(processed_file_path)
	except Exception as e:
	logger.error("Error processing file %s: %s", file_path, e)

	outputs = variable_outputs(outputs_list)

	return outputs


	# Load knowledge base
	def load_knowledge_base():
	try:
	loader = TextLoader("./data_source/time_to_rethink_trust_book.md")
	documents = loader.load()
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	docs = text_splitter.split_documents(documents)
	return docs
	except Exception as e:
	logger.error(f"Error loading knowledge base: {e}")
	raise e


	knowledge_base = load_knowledge_base()

	# Initialize embeddings and FAISS index
	try:
	embeddings = OpenAIEmbeddings()
	db = FAISS.from_documents(knowledge_base, embeddings)
	except Exception as e:
	logger.error(f"Error initializing FAISS index: {e}")
	raise e


	# Define search function for knowledge base
	def search_knowledge_base(query):
	try:
	output = db.similarity_search(query)
	return output
	except Exception as e:
	logger.error(f"Error searching knowledge base: {e}")
	return ["Error occurred during knowledge base search"]


	# SERPER API Google Search function
	def google_search(query):
	try:
	search_client = serpapi.Client(api_key=serper_api_key)
	results = search_client.search(
	{
	"engine": "google",
	"q": query,
	}
	)
	snippets = [result["snippet"] for result in results.get("organic_results", [])]
	return snippets
	except requests.exceptions.HTTPError as http_err:
	logger.error(f"HTTP error occurred: {http_err}")
	return ["HTTP error occurred during Google search"]
	except Exception as e:
	logger.error(f"General Error: {e}")
	return ["Error occurred during Google search"]


	# RAG response function
	def rag_response(query):
	try:
	retrieved_docs = search_knowledge_base(query)
	context = "\n".join(doc.page_content for doc in retrieved_docs)
	prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
	llm = ChatOpenAI(model="gpt-4o", temperature=0.5, api_key=openai_api_key)
	response = llm.invoke(prompt)
	return response.content
	except Exception as e:
	logger.error(f"Error generating RAG response: {e}")
	return "Error occurred during RAG response generation"


	def compute_dataframe_proof_point():
	global selected_dataset_ai
	global df_builder_pivot_str

	try:
	# Load the selected dataset
	dataset_file_path = f"example_files/{selected_dataset_ai}"

	(
	img_bucketfull,
	img_trust,
	img_nps,
	img_loyalty,
	img_consideration,
	img_satisfaction,
	df_builder_pivot,
	output_text,
	results_df_trust,
	results_df_nps,
	results_df_loyalty,
	results_df_consideration,
	results_df_satisfaction,
	) = analyze_excel_single(dataset_file_path)

	if df_builder_pivot is not None:
	qualified_bucket_names_list = []

	# Remove buckets with values below 18%
	qualified_bucket_names_trust = results_df_trust[
	results_df_trust["Importance_percent"] >= 18
	]["Predictor"].tolist()
	qualified_bucket_names_list.append(qualified_bucket_names_trust)

	if results_df_nps is not None:
	qualified_bucket_names_nps = results_df_nps[
	results_df_nps["Importance_percent"] >= 18
	]["Predictor"].tolist()
	qualified_bucket_names_list.append(qualified_bucket_names_nps)

	if results_df_loyalty is not None:
	qualified_bucket_names_loyalty = results_df_loyalty[
	results_df_loyalty["Importance_percent"] >= 18
	]["Predictor"].tolist()
	qualified_bucket_names_list.append(qualified_bucket_names_loyalty)

	if results_df_consideration is not None:
	qualified_bucket_names_consideration = results_df_consideration[
	results_df_consideration["Importance_percent"] >= 18
	]["Predictor"].tolist()
	qualified_bucket_names_list.append(qualified_bucket_names_consideration)

	if results_df_satisfaction is not None:
	qualified_bucket_names_satisfaction = results_df_satisfaction[
	results_df_satisfaction["Importance_percent"] >= 18
	]["Predictor"].tolist()
	qualified_bucket_names_list.append(qualified_bucket_names_satisfaction)

	# Flatten the list of lists and convert to a set to remove duplicates
	qualified_bucket_names_flat = [
	item for sublist in qualified_bucket_names_list for item in sublist
	]
	qualified_bucket_names_unique = list(set(qualified_bucket_names_flat))

	# Filter df_builder_pivot to include only statements where "Trust Driver" is in qualified_bucket_names_unique
	df_builder_pivot = df_builder_pivot[
	df_builder_pivot["Trust Bucket®"].isin(qualified_bucket_names_unique)
	]

	# Remove statements with values below 18%
	df_builder_pivot = df_builder_pivot[df_builder_pivot["%"] >= 18]

	df_builder_pivot_str = df_builder_pivot.to_string(index=False)
	else:
	df_builder_pivot_str = "Trust Builder information is not available."
	except FileNotFoundError:
	df_builder_pivot_str = "Dataset not found."
	except Exception as e:
	df_builder_pivot_str = f"An error occurred during analysis: {e}"

	return df_builder_pivot_str


	# Define tools using LangChain's `tool` decorator
	@tool
	def knowledge_base_tool(query: str):
	"""
	Tool function to query the knowledge base and retrieve a response.
	Args:
	query (str): The query to search the knowledge base.
	Returns:
	str: The response retrieved from the knowledge base.
	"""
	return rag_response(query)


	@tool
	def google_search_tool(query: str):
	"""
	Tool function to perform a Google search using the SERPER API.
	Args:
	query (str): The query to search on Google.
	Returns:
	list: List of snippets extracted from search results.
	"""
	return google_search(query)


	@tool
	def compute_dataframe_proof_point_tool() -> str:
	"""
	Tool function to compute DATAFRAME_PROOF_POINT.
	Returns:
	str: The computed DATAFRAME_PROOF_POINT as a string.
	"""
	return compute_dataframe_proof_point()


	# compile all tools as a list
	tools = [
	knowledge_base_tool,
	google_search_tool,
	compute_dataframe_proof_point_tool,
	]



	def validate_ai_output(ai_output, proof_points):
	"""
	Validates that the AI output includes all relevant Trust Buckets and Builders.
	Args:
	ai_output: The generated response from the AI.
	proof_points: The DATAFRAME_PROOF_POINT dictionary with Trust Buckets and Builders.
	Returns:
	Validated and corrected output.
	"""
	validated_output = ai_output
	missing_buckets = []

	# Check if all relevant buckets are included
	for bucket, builders in proof_points.items():
	if bucket not in ai_output:
	missing_buckets.append(bucket)

	# Add missing buckets and builders if any
	if missing_buckets:
	corrections = []
	for bucket in missing_buckets:
	corrections.append(f"{bucket}")
	for builder in proof_points[bucket]:
	corrections.append(f"- {builder['Trust Builder']} [{builder['Percentage']}%]")
	validated_output = f"{validated_output}\n\nMissing Data:\n" + "\n".join(corrections)

	return validated_output
	# Create the prompt template
	prompt_message = """
	# Prompt for Dynamic Content Generation with Top-Scoring Statements

	### Role
	You are an expert copywriter specializing in creating high-quality marketing content that strictly integrates all top scoring statements into various formats. Must include all top scoring statements in the conversation.

	---

	### Data Handling
	- Retrieve `selected_dataset_ai` and compute `DATAFRAME_PROOF_POINT` using `compute_dataframe_proof_point()`.
	- Default Company Name: Volkswagen (unless specified otherwise).

	---

	### DATAFRAME_PROOF_POINT Structure
	- Trust Buckets: Stability, Development, Relationship, Benefit, Vision, Competence.
	- TrustBuilders®: Statements with percentages.

	---

	### Response Generation
	1. If `DATAFRAME_PROOF_POINT` is empty:
	- Respond: "Trust Builder information is not provided. Generating content based on general knowledge."
	- Create content using general knowledge.

	2. If `DATAFRAME_PROOF_POINT` is present:
	- Display Trust Buckets:
	- List all 6 Trust Buckets with their top three TrustBuilders® and percentages.
	- Format: Top scoring statements:
	- Bold Bucket Name
	- Bullet Point Statement [Percentage]

	- Generate Content:
	- Create the requested content type (Annual Report, Social Media Post, Sales Conversation, etc.).
	- Integrate all TrustBuilders® into the narrative.

	---

	### STRICT REQUIREMENTS TO ENSURE QUALITY
	- Mandatory Integration:
	- All top three Trust Statements (≥18%) from each Trust Bucket must be actively and explicitly integrated into the generated content. Minimum 3 statmenrs are must with each bucket
	- Example:
	```
	Stability
	- High-quality manufacturing [25%]
	- Consistent reliability [22%]
	- Durable materials [19%]

	Development
	- Future-proof technology [30%]
	- Innovation in design [25%]
	- Commitment to sustainability [20%]

	Competence
	- Expertise in engineering [28%]
	- Advanced safety features [23%]
	- Recognized certifications [19%]
	```

	- Verification:
	- Ensure each Trust Builder is present in the generated content.

	- Formatting Enforcement:
	- Display Top scoring statements at the top of the output in the specified format before the main content.

	---

	### Content Guidelines

	#### GENERAL INSTRUCTIONS
	- Consistency: Maintain a uniform format across all content types.
	- Tone: Active, engaging, direct. Avoid flowery and overly complex language.
	- No Conclusions: Do not include conclusions.
	- Specificity: Include relevant names, numbers (e.g., dollars, years), programs, strategies, places, awards, actions.
	- Formatting:
	- Avoid HTML tags except ® for specified trademarks.
	- Format numerical values with spaces (e.g., 750 billion to 1 trillion).
	- Do not style numbers with bold or italics.
	- Trademark Usage: Use ® only with TrustLogic®, TrustBuilder®/TrustBuilders®, Six Buckets of Trust®, TrustifierAI®. Do not use ® with other brands.

	---

	### CONTENT TYPES AND FORMATS

	#### 1. Annual Reports or Articles
	- Introduction: "Here is a draft of your [Annual Report/Article]. Feel free to suggest further refinements."
	- Headline: Incorporate principles without mentioning TrustBuilders® or trust buckets directly.
	- Structure:
	- Headline
	- One main heading followed by 3-4 detailed paragraphs summarizing key content integrating all top scoring statements (no source links or subheadings).
	- Perspective: Use "we" to emphasize collective effort.
	- Sub-Headings (After Summary):
	1. List of TrustBuilders® Used: List relevant TrustBuilders with facts, figures used in the content.
	2. Heuristics Used: List 3-5 relevant heuristics (e.g., Social Proof, Authority, Emotion).
	3. Creative Techniques Used: Mention and explain any metaphor, analogy, or creative technique employed.
	- Word Count: Follow user instructions. Exclude sub-headings from the limit.

	---

	#### 2. Sales Conversations or Ad Copy
	- Intro Line: "Here is a draft of your [Sales Conversation/Ad Copy]. Feel free to suggest further refinements."
	- Content: Structured with clear messaging, Strictly integrating all top scoring statements into the content.
	1. List of TrustBuilders® Used: List used TrustBuilders with facts, figures in the content directly, not just the top scoring statements.
	2. Heuristics Used: Provide 3-5 heuristics names only from the following list that are relevant to the content:
	- Social Proof, Scarcity, Authority, Reciprocity, Consistency, Liking, Anchoring, Contrast, Urgency, Simplicity, Storytelling, Emotion, Framing, Loss Aversion, Recency, Frequency, Congruence, Availability, Commitment, Halo Effect, Ingroup Bias, Reciprocal Concessions (Door-in-the-Face), Priming, Cognitive Ease, Affect Heuristic, Endowment Effect, Decoy Effect, Foot-in-the-Door, Pacing, Zeigarnik Effect.
	3. Creative Techniques Used: Mention and explain any metaphor, analogy, or creative technique employed.

	---

	#### 3. Emails, Newsletters, Direct Marketing Letters
	- Intro Line: "Here is a draft of your [Email/Newsletter/Letter]. Feel free to suggest further refinements."
	- Content: Clear, concise messaging with a call to action integrating all top scoring statements. No subheadings or source links.
	- Subject: Provide an appropriate subject line.
	- Additional Requirements: Do not mention TrustBuilders in the body unless required. Exclude source links.
	- Sub-Headings (At Bottom):
	1. List of TrustBuilders® Used: List relevant TrustBuilders with facts, figures used in the content.
	2. Heuristics Used: Provide 3-5 heuristics names only from the following list that are relevant to the content:
	- Social Proof, Scarcity, Authority, Reciprocity, Consistency, Liking, Anchoring, Contrast, Urgency, Simplicity, Storytelling, Emotion, Framing, Loss Aversion, Recency, Frequency, Congruence, Availability, Commitment, Halo Effect, Ingroup Bias, Reciprocal Concessions (Door-in-the-Face), Priming, Cognitive Ease, Affect Heuristic, Endowment Effect, Decoy Effect, Foot-in-the-Door, Pacing, Zeigarnik Effect.
	3. Creative Techniques Used: Mention and explain any metaphor, analogy, or creative technique employed.
	- Word Count: Follow user instructions. Exclude sub-headings from the limit.

	---

	#### GENERAL QUERIES
	- For blogs or reports, refer to the knowledge base first. Focus on overall flow and structure without mentioning trust metrics unless requested.


	"""



	prompt_template = ChatPromptTemplate.from_messages(
	[
	("system", prompt_message),
	MessagesPlaceholder(variable_name="chat_history"),
	("user", "{input}"),
	MessagesPlaceholder(variable_name="agent_scratchpad"),
	]
	)

	# Create Langchain Agent with specific model and temperature
	try:
	llm = ChatOpenAI(model="gpt-4o", temperature=0.6)
	llm_with_tools = llm.bind_tools(tools)
	except Exception as e:
	logger.error(f"Error creating Langchain Agent: {e}")

	# Define the agent pipeline to handle the conversation flow
	try:
	agent = (
	{
	"input": lambda x: x["input"],
	"agent_scratchpad": lambda x: format_to_openai_tool_messages(
	x["intermediate_steps"]
	),
	"chat_history": lambda x: x["chat_history"],
	}
	\| prompt_template
	\| llm_with_tools
	\| OpenAIToolsAgentOutputParser()
	)

	# Instantiate an AgentExecutor to execute the defined agent pipeline
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
	except Exception as e:
	logger.error(f"Error defining agent pipeline: {e}")

	# Initialize chat history
	chat_history = []
	trust_tips = [
	"What I don’t know I can’t trust you for. Make sure you know all your great TrustBuilders® and use them over time.",
	"The more specific, the more trustworthy each TrustBuilder® is.",
	"For TrustBuilders®, think about each Trust Bucket® and in each one organization, product, and key individuals.",
	"You are infinitely trustworthy. Organization, products, and your people. In each Trust Bucket® and past, present, and future.",
	"Some TrustBuilders® are enduring (we have over 3 million clients), others changing (we are ranked No. 1 for 8 years/9 years), and yet others short-lived (we will present at XYZ conference next month).",
	"Not all Trust Buckets® are equally important all the time. Think about which ones are most important right now and how to fill them (with TrustAnalyser® you know).",
	"In social media, structure posts over time to focus on different Trust Buckets® and themes within them.",
	"Try focusing your idea on specific Trust Buckets® or a mix of them.",
	"Within each Trust Bucket®, ask for examples across different themes like employee programs, IT, R&D.",
	"To create more and different trust, ask trustifier.ai to combine seemingly unconnected aspects like 'I played in bands all my youth. What does this add to my competence as a lawyer?'",
	"With every little bit more trust, your opportunity doubles. It's about using trustifier.ai to help you nudge trust up ever so slightly in everything you do.",
	"Being honest is not enough. You can be honest with one aspect and destroy trust and build a lot of trust with another. Define what that is.",
	"The more I trust you, the more likely I am to recommend you. And that's much easier with specifics.",
	"What others don’t say they are not trusted for - but you can claim that trust.",
	"Building more trust is a service to your audience. It's so valuable to us, as humans, that we reflect that value right away in our behaviors.",
	"In your audience journey, you can use TrustAnalyser® to know precisely which Trust Buckets® and TrustBuilders® are most effective at each stage of the journey.",
	"Try structuring a document. Like % use of each Trust Bucket® and different orders in the document.",
	"In longer documents like proposals, think about the chapter structure and which Trust Buckets® and TrustBuilders® you want to focus on when.",
	"Building Trust doesn’t take a long time. Trust is built and destroyed every second, with every word, action, and impression. That's why it's so important to build more trust all the time.",
	"There is no prize for the second most trusted. To get the most business, support, and recognition, you have to be the most trusted.",
	"With most clients, we know they don’t know 90% of their available TrustBuilders®. Knowing them increases internal trust - and that can be carried to the outside.",
	"Our client data always shows that, after price, trust is the key decision factor (and price is a part of benefit and relationship trust).",
	"Our client data shows that customer value increases 9x times from Trust Neutral to High Trust. A good reason for internal discussions.",
	"Our client's data shows that high trust customers are consistently far more valuable than just trusting ones.",
	"Trust determines up to 85% of your NPS. No wonder, because the more I trust you, the more likely I am to recommend you.",
	"Trust determines up to 75% of your loyalty. Think about it yourself. It's intuitive.",
	"Trust determines up to 87% of your reputation. Effectively, they are one and the same.",
	"Trust determines up to 85% of your employee engagement. But what is it that they want to trust you for?",
	"Don't just ask 'what your audience needs to trust for'. That just keeps you at low, hygiene trust levels. Ask what they 'would love to trust for'. That's what gets you to High Trust."
	]

	suggestions = [
	"Try digging deeper into a specific TrustBuilder®.",
	"Ask just for organization, product, or a person's TrustBuilders® for a specific Trust Bucket®.",
	"Some TrustBuilders® can fill more than one Trust Bucket®. We call these PowerBuilders. TrustAnalyser® reveals them for you.",
	"Building trust is storytelling. trustifier.ai connects Trust Buckets® and TrustBuilders® for you. But you can push it more to connect specific Trust Buckets® and TrustBuilders®.",
	"Describe your audience and ask trustifier.ai to choose the most relevant Trust Buckets®, TrustBuilders®, and tonality (TrustAnalyser® can do this precisely for you).",
	"Ask trustifier.ai to find TrustBuilders® for yourself. Then correct and add a few for your focus Trust Buckets® - and generate a profile or CV.",
	"LinkedIn Profiles are at their most powerful if they are regularly updated and focused on your objectives. Rewrite it every 2-3 months using different Trust Buckets®.",
	"Share more of your TrustBuilders® with others and get them to help you build your trust.",
	"Build a trust strategy. Ask trustifier.ai to find all your TrustBuilders® in the Trust Buckets® and then create a trust-building program for a specific person/audience over 8 weeks focusing on different Trust Buckets® that build on one another over time. Then refine and develop by channel ideas.",
	"Brief your own TrustBuilders® and ask trustifier.ai to tell you which Trust Buckets® they're likely to fill (some can fill more than one).",
	"Have some fun. Ask trustifier.ai to write a 200-word speech to investors using all Trust Buckets®, but leading and ending with Development Trust. Use [BRAND], product, and personal CEO [NAME] TrustBuilders®.",
	"Ask why TrustLogic® can be trusted in each Trust Bucket®.",
	"Ask what's behind TrustLogic®."
	]


	def get_trust_tip_and_suggestion():
	trust_tip = random.choice(trust_tips)
	suggestion = random.choice(suggestions)
	return trust_tip, suggestion


	def chatbot_response(message, history):

	global selected_dataset_ai
	global df_builder_pivot_str

	try:
	# Get the current value of selected_dataset_ai
	selected_dataset_ai = read_ai_dataset_selection()

	# Recompute DATAFRAME_PROOF_POINT based on the selected dataset
	df_builder_pivot_str = compute_dataframe_proof_point()

	# Generate response using the agent executor
	output = agent_executor.invoke({"input": message, "chat_history": chat_history})
	trust_tip, suggestion = get_trust_tip_and_suggestion()

	# Prepend the selected dataset to the response
	response = f"Selected Dataset: {selected_dataset_ai}\n\n{output['output']}"

	if trust_tip not in response: # Avoid duplication
	response += f"\n\n---\n\nTrust Tip: {trust_tip}\n\nSuggestion: {suggestion}"

	# Save the interaction context
	chat_history.extend(
	[
	HumanMessage(content=message),
	AIMessage(content=response),
	]
	)

	return response
	except Exception as e:
	logger.error(f"Error generating chatbot response: {e}")
	return "Error occurred during response generation"








	def read_ai_dataset_selection():
	global selected_dataset_ai
	return selected_dataset_ai

	def create_trust_score_1():


	score = 8.3
	max_score = 10
	fig, ax = plt.subplots(figsize=(2, 2), subplot_kw=dict(aspect="equal"))
	values = [score, max_score - score]
	ax.pie(values, colors=["#4CAF50", "#C0C0C0"], startangle=90, counterclock=False, wedgeprops=dict(width=0.3))

	# Set score text color to black for better visibility
	plt.text(0, 0, f"{score}/{max_score}", horizontalalignment='center', verticalalignment='center', fontsize=10, color='white')
	plt.axis("equal")
	fig.suptitle('Trust Composite Score ', fontsize=10,color='white')

	# Set background to transparent
	fig.patch.set_facecolor('none')
	ax.patch.set_facecolor('none')

	return fig

	def create_trust_score_2():

	score = 7.9
	max_score = 10
	fig, ax = plt.subplots(figsize=(2, 2), subplot_kw=dict(aspect="equal"))
	values = [score, max_score - score]
	ax.pie(values, colors=["#4CAF50", "#C0C0C0"], startangle=90, counterclock=False, wedgeprops=dict(width=0.3))

	# Set score text color to black for better visibility
	plt.text(0, 0, f"{score}/{max_score}", horizontalalignment='center', verticalalignment='center', fontsize=10, color='white')
	plt.axis("equal")
	fig.suptitle('Trust Composite Score', fontsize=10,color='white')

	# Set background to transparent
	fig.patch.set_facecolor('none')
	ax.patch.set_facecolor('none')

	return fig

	def plot_to_svg(fig):
	buf = io.StringIO()
	fig.savefig(buf, format="svg", bbox_inches='tight', pad_inches=0, transparent=True)
	buf.seek(0)
	svg_data = buf.getvalue()
	return svg_data

	def display_trust_score_1():
	fig1 = create_trust_score_1()
	return plot_to_svg(fig1)

	def display_trust_score_2():
	fig2 = create_trust_score_2()
	return plot_to_svg(fig2)



	def update_ai_dataset_selection(selection):
	"""
	Updates the dataset selection based on the radio button input.
	"""
	global selected_dataset_ai


	if selection == "VW Customers":
	selected_dataset_ai = vw_customers_state.value[0] # Use the customer dataset
	elif selection == "VW Prospects":
	selected_dataset_ai = vw_prospects_state.value[0] # Use the prospects dataset
	return compute_dataframe_proof_point()

	placeholder_text = """
	<b>Play with the Trustifier.AI®</b>
	Use the preset prompt or replace with your own prompt and click
	“Submit” button to get started.
	"""

	predefined_prompt = """
	What: Car showroom sales conversation between a prospective buyer of a new T-Roc and our VW advisor.
	Who: The visitor is a 24-year-old female, stylishly dressed in brand items.
	Topics:
	1. Future car usage.
	2. Current car and experience with it.
	Specifics:
	- Highlight T-Roc features that connect with her interests. Find the feature on the T-Roc US website.
	- Discuss petrol and electric engine types.
	- Focus on aesthetics, exterior design and strong interior features/experience.
	Proof Points and Feature Usage:
	- Connect features contextually and creatively.
	- Be specific with the features and examples, including feature names, numbers, brands, facts, and their implications for the driving and ownership experience.
	Style:
	- End responses with a question or suggestion to steer to the next topic.
	- Convey TrustBuilders® naturally.
	"""
	# Text input box for the user to enter their prompt
	prompt_textbox = gr.Textbox(
	value=predefined_prompt,
	scale=4,
	label="Insert your prompt",
	visible=True,
	)
	submit_button = gr.Button("Submit")

	bot = gr.Chatbot(placeholder=placeholder_text)
	js_func = """
	function refresh() {
	const url = new URL(window.location);

	if (url.searchParams.get('__theme') !== 'dark') {
	url.searchParams.set('__theme', 'dark');
	window.location.href = url.href;
	}
	}
	"""
	with gr.Blocks(js=js_func) as demo:
	with gr.Column():
	gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>Click 'Volkswagen Customers' or 'Volkswagen Prospects' to see the full results and play with the TrustifierAI.</span>",
	visible=True,
	)
	gr.Markdown(
	"Our calculator will conduct the driver analysis from the underlying Excel file and display the results. "
	+ "Scroll down to view them and interact with them. "
	+ "In the full version you can link your survey directly to our calculator or export your data as CSV and drag & drop it into our calculator.",
	visible=True,
	)

	with gr.Column():
	with gr.Row():

	vw_customers_btn = gr.Button("VW Customers")
	vw_prospects_btn = gr.Button("VW Prospects")

	with gr.Row():
	with gr.Column():
	gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>1) Trust Composite Score</span>",
	visible=True,
	)
	gr.Markdown(
	"This analysis shows you the average of the score across the six trust buckets.",
	visible=True,
	)
	trust_score_output = gr.HTML()

	with gr.Column():
	# set default output widgets
	outputs = reset_outputs()

	# Create gr.State components to store file names as lists
	vw_customers_state = gr.State(value=["Volkswagen Customers.xlsx"])
	vw_prospects_state = gr.State(value=["Volkswagen Non Customers.xlsx"])

	with gr.Column():
	gr.Markdown(
	"<span style='font-size:20px; font-weight:bold;'>5) Try our Trustifier.AI®</span>",
	visible=True,
	)
	gr.Markdown(
	"""
	<b>Prompt the Trustifier.AI® to generate content for you.</b> It will
	automatically use the top-scoring Trust Builders® of the top-scoring
	Trust Buckets®.<br>

	<b>Option 1:</b> Use the preset prompt provided in the textbox below and
	click 'Submit’. Try it for Customers vs Prospects and see the
	difference.<br>

	<b>Option 2:</b> Replace the preset prompt with your own and click
	'Submit’.<br>

	You can add the output to the prompt to further customize it.
	""",
	visible=True,
	)

	# Define the radio button component
	radio = gr.Radio(
	choices=["VW Customers", "VW Prospects"],
	label="Select a dataset for Trustifier.AI®",
	visible=True,
	)

	# gr.Markdown(
	# "2)<b> Prompt </b> the TrustifierAI to generate content for you.<br><br>"
	# + "<b> Option 1:</b> Use the preset prompt provided in the textbox below and click 'Submit'. <br>Try it for Customers vs Prospects and see the difference.<br><br>"
	# + "<b> Option 2:</b> Replace the preset prompt with your own and click 'Submit'.<br>"
	# + "Note: Every time you click 'Submit', it creates a different version of the text. <br>You can add the output to the prompt to customise it.<br><br>",
	# visible=True,
	# )

	chatbot = gr.ChatInterface(
	fn=chatbot_response,
	stop_btn=None,
	retry_btn=None,
	undo_btn=None,
	clear_btn=None,
	autofocus=False,
	textbox=prompt_textbox,
	submit_btn=submit_button,
	chatbot=bot,
	theme="dark",
	)

	## All widget functions here ##

	vw_customers_btn.click(
	fn=display_trust_score_1,
	inputs=[],
	outputs=trust_score_output,
	)

	vw_prospects_btn.click(
	fn=display_trust_score_2,
	inputs=[],
	outputs=trust_score_output,
	)

	vw_customers_btn.click(
	fn=process_examples,
	inputs=[vw_customers_state],
	outputs= outputs,
	)

	vw_prospects_btn.click(
	fn=process_examples,
	inputs=[vw_prospects_state],
	outputs= outputs,
	)

	radio.change(fn=update_ai_dataset_selection, inputs=radio, outputs=[])


	# Launch the Gradio app
	try:
	demo.launch(server_name="0.0.0.0")
	except Exception as e:
	logger.error(f"Error launching Gradio app: {e}")
	raise e