Spaces:

GXSNetwork
/

Covan-BaocaoPTBV

Runtime error

App Files Files Community

Covan-BaocaoPTBV / app.py

gaialive

Update app.py

44dbb01 verified 9 months ago

raw

history blame contribute delete

10.2 kB

	import gradio as gr
	import pandas as pd
	import os
	from pathlib import Path
	from dotenv import load_dotenv
	import tiktoken
	import ast

	# Custom function process wrapper - this is already imported
	from fncs.chatb import process_query

	# Define paths and default values
	DEFAULT_CSV_PATH = "data_chatbot/esg_faq_subset.csv" # Adjust to your actual embeddings file path
	DEFAULT_CHAT_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
	DEFAULT_EMB_MODEL = 'text-embedding-3-large'
	#DEFAULT_MAX_OUT_SIZE = 'Small'
	#DEFAULT_MAX_INPUT_SIZE = 'Small'
	DEFAULT_MAX_INPUT_TOKENS = 2000
	DEFAULT_MAX_OUTPUT_TOKENS = 500
	DEFAULT_TEMPERATURE = 0.1

	# Load environment vars
	# First check direct environment variables
	api_key_voc = os.environ.get("GROQ_API")
	base_url_voc = os.environ.get("GROQ_BASE")

	# Only fall back to dotenv if not found in environment
	if not api_key_voc:
	load_dotenv() # Only load .env if needed
	api_key_voc = os.getenv("GROQ_API")
	if not base_url_voc:
	load_dotenv()
	base_url_voc = os.getenv("GROQ_BASE")

	# For base_url_voc:
	# If it doesn't already have a protocol prefix, add one
	if not base_url_voc.startswith(("http://", "https://")):
	base_url_voc = "https://" + base_url_voc

	### Example Question flow:
	# message,
	# history,
	# csv_path,
	# chat_model,
	# input_tokens_inp,
	# max_tokens_out,
	# temperature,
	# api_key_input
	EXAMPLE_QUESTIONS_1 = [
	['What are the main compliance standards for a SME?',
	None,
	DEFAULT_CSV_PATH,
	DEFAULT_CHAT_MODEL,
	DEFAULT_MAX_INPUT_TOKENS,
	DEFAULT_MAX_OUTPUT_TOKENS,
	DEFAULT_TEMPERATURE,
	api_key_voc]
	]

	EXAMPLE_QUESTIONS = [
	['What are the main compliance standards for a SME?'],
	['Which kind of business must create a sustainability report in 2025?'],
	['How do I know if my company is required to comply with the latest sustainability reporting directives?'],
	['What are the key steps to meet the CSRD requirements?'],
	['Which reporting standards should I focus on for my sustainability disclosures?'],
	['How can I make sure my sustainability data is accurate and comparable?'],
	['Can you clarify the scope of the SFDR in relation to my business activities?'],
	['What timeline should I follow to ensure compliance with the new sustainability reporting standards?'],
	['Are there exceptions for small and medium-sized enterprises under the new reporting requirements?']
	]


	def list_available_csvs(directory="data_chatbot"):
	"""List all CSV files in the specified directory"""
	try:
	csv_files = [str(path) for path in Path(directory).glob("*/.csv")]
	return csv_files if csv_files else [DEFAULT_CSV_PATH]
	except Exception:
	return [DEFAULT_CSV_PATH]


	def create_chatbot():
	"""Create and configure the Gradio chatbot interface"""

	# Define the chat function that uses process_query directly
	def chat_function(message,
	history,
	csv_path,
	chat_model,
	input_tokens_inp,
	max_tokens_out,
	temperature,
	api_key_input
	):

	# Use DEFAULT_CSV_PATH if csv_path is None
	file_path_csv = DEFAULT_CSV_PATH if csv_path is None else csv_path

	# If no API key provided in UI, use the one from environment
	api_key = api_key_input.strip() if api_key_input and api_key_input.strip() else api_key_voc

	# Use a default value (like DEFAULT_MAX_INPUT_TOKENS) if input_tokens_inp is None
	input_tokens = DEFAULT_MAX_INPUT_TOKENS if input_tokens_inp is None else int(input_tokens_inp)

	# Use a default value (like DEFAULT_MAX_INPUT_TOKENS) if input_tokens_inp is None
	max_tokens = DEFAULT_MAX_OUTPUT_TOKENS if max_tokens_out is None else int(max_tokens_out)

	temperature_value = DEFAULT_TEMPERATURE if temperature is None else float(temperature)

	# Use DEFAULT_CHAT_MODEL if chat_model is None
	model_to_use = DEFAULT_CHAT_MODEL if chat_model is None else chat_model

	# chat_history = history

	# Additional options for the model
	additional_options = {
	"temperature": temperature_value,
	"max_tokens": max_tokens
	}

	try:
	# Process the query using the imported function
	result = process_query(
	csv_path=file_path_csv,
	query=message,
	api_key=api_key,
	# endpoint=base_url_voc,
	chat_model=model_to_use,
	max_token_count=input_tokens,
	additional_options=additional_options
	)

	# Append usage information to the response
	response_with_usage = (
	f"{result['response']}\n\n"
	f"---\n"
	f"Total tokens: {result['total_tokens']} · Cost: €{result['cost']:.6f}"
	)

	# Return the response text with usage statistics
	return response_with_usage

	except Exception as e:
	import traceback
	trace = traceback.format_exc()
	return f"Error processing your query: {str(e)}\n\nDetails: {trace}"

	# crate theme:
	gradio_theme = gr.themes.Default(
	primary_hue=gr.themes.colors.emerald,
	secondary_hue=gr.themes.colors.emerald,
	neutral_hue= gr.themes.colors.zinc
	#font="system-ui"
	)
	with gr.Blocks(title="Gaia \| An ESG Corporate Sustainability Reporting Rules & Compliance Advisor", theme= gradio_theme) as demo:
	gr.Markdown("# Gaia \| An ESG Corporate Sustainability Reporting Rules & Compliance Advisor")
	gr.Markdown("Ask Gaia questions about corporate sustainability reporting rules "
	"and practices to receive expert advice.")

	with gr.Row(elem_id="main-row"):
	# Left column for settings
	with gr.Column(scale=1, variant="compact"):
	with gr.Accordion("Settings", open=False):
	csv_input = gr.Dropdown(
	choices= [("EU Corporate Sustainability & Reporting Rules",DEFAULT_CSV_PATH)
	], #list_available_csvs(),
	value=DEFAULT_CSV_PATH,
	label="Knowledge Bases",
	info="Select ESG Knowledge Base"
	)

	#api_key_input = gr.Textbox(
	# value="",
	# label="GROQ API Key",
	# placeholder="Leave empty to use environment variable",
	# type="password"
	#)

	#model_input = gr.Dropdown(
	# choices=["meta-llama/llama-4-scout-17b-16e-instruct"],
	# value=DEFAULT_CHAT_MODEL,
	# label="AI Model"
	#)

	input_tokens_input = gr.Radio(
	choices = [("Small",DEFAULT_MAX_INPUT_TOKENS), ("Medium",4000), ("Large", 10000)],
	value= DEFAULT_MAX_INPUT_TOKENS,
	label="Input Context Size",
	info="The input context size determines the number of tokens used for the input to the AI model."
	)


	max_tokens_output = gr.Radio(
	[("XSmall",DEFAULT_MAX_OUTPUT_TOKENS),("Small",2000), ("Medium",4000), ("Large", 10000)],
	value=DEFAULT_MAX_OUTPUT_TOKENS,
	label="Maximum Possible Output Size",
	info="The maximum possible output answer size determines the number of tokens the AI model will output."
	)


	temp_input = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=DEFAULT_TEMPERATURE,
	step=0.1,
	label="Temperature"
	)

	with gr.Accordion("About", open=False):
	gr.Markdown("""
	This AI chatbot is designed to help you navigate and clarify sustainability reporting requirements,
	especially those introduced by the Corporate Sustainability Reporting Directive (CSRD)
	and related regulations.



	By tapping into its knowledge base it can offer quick insights on compliance obligations,
	interpretation of specific terms or directives,
	and guidance on how to meet reporting standards in a straightforward and cost-effective way.

	Prototype version: Alpha 0.1.0
	""")

	# Right column for chat interface
	with gr.Column(scale=2, variant='compact'):
	chat_interface = gr.ChatInterface(
	fill_width=False,
	fill_height=False,
	type='messages',
	fn=chat_function,
	#examples=EXAMPLE_QUESTIONS,
	cache_examples=False,
	chatbot=gr.Chatbot(
	type='messages',
	#height=500,
	show_label=False,
	show_copy_button=True,
	container=True,
	#resizable=True,
	layout='bubble',
	placeholder='Hi, my name is Gaia, your personal AI ESG Advisor. How can I help you today?'

	),
	additional_inputs=[
	csv_input,
	#model_input,
	input_tokens_input,
	max_tokens_output,
	temp_input,
	#api_key_input
	],
	title=""
	)

	return demo


	if __name__ == "__main__":
	demo = create_chatbot()
	demo.launch()