gaialive's picture
Update app.py
44dbb01 verified
import gradio as gr
import pandas as pd
import os
from pathlib import Path
from dotenv import load_dotenv
import tiktoken
import ast
# Custom function process wrapper - this is already imported
from fncs.chatb import process_query
# Define paths and default values
DEFAULT_CSV_PATH = "data_chatbot/esg_faq_subset.csv" # Adjust to your actual embeddings file path
DEFAULT_CHAT_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
DEFAULT_EMB_MODEL = 'text-embedding-3-large'
#DEFAULT_MAX_OUT_SIZE = 'Small'
#DEFAULT_MAX_INPUT_SIZE = 'Small'
DEFAULT_MAX_INPUT_TOKENS = 2000
DEFAULT_MAX_OUTPUT_TOKENS = 500
DEFAULT_TEMPERATURE = 0.1
# Load environment vars
# First check direct environment variables
api_key_voc = os.environ.get("GROQ_API")
base_url_voc = os.environ.get("GROQ_BASE")
# Only fall back to dotenv if not found in environment
if not api_key_voc:
load_dotenv() # Only load .env if needed
api_key_voc = os.getenv("GROQ_API")
if not base_url_voc:
load_dotenv()
base_url_voc = os.getenv("GROQ_BASE")
# For base_url_voc:
# If it doesn't already have a protocol prefix, add one
if not base_url_voc.startswith(("http://", "https://")):
base_url_voc = "https://" + base_url_voc
### Example Question flow:
# message,
# history,
# csv_path,
# chat_model,
# input_tokens_inp,
# max_tokens_out,
# temperature,
# api_key_input
EXAMPLE_QUESTIONS_1 = [
['What are the main compliance standards for a SME?',
None,
DEFAULT_CSV_PATH,
DEFAULT_CHAT_MODEL,
DEFAULT_MAX_INPUT_TOKENS,
DEFAULT_MAX_OUTPUT_TOKENS,
DEFAULT_TEMPERATURE,
api_key_voc]
]
EXAMPLE_QUESTIONS = [
['What are the main compliance standards for a SME?'],
['Which kind of business must create a sustainability report in 2025?'],
['How do I know if my company is required to comply with the latest sustainability reporting directives?'],
['What are the key steps to meet the CSRD requirements?'],
['Which reporting standards should I focus on for my sustainability disclosures?'],
['How can I make sure my sustainability data is accurate and comparable?'],
['Can you clarify the scope of the SFDR in relation to my business activities?'],
['What timeline should I follow to ensure compliance with the new sustainability reporting standards?'],
['Are there exceptions for small and medium-sized enterprises under the new reporting requirements?']
]
def list_available_csvs(directory="data_chatbot"):
"""List all CSV files in the specified directory"""
try:
csv_files = [str(path) for path in Path(directory).glob("**/*.csv")]
return csv_files if csv_files else [DEFAULT_CSV_PATH]
except Exception:
return [DEFAULT_CSV_PATH]
def create_chatbot():
"""Create and configure the Gradio chatbot interface"""
# Define the chat function that uses process_query directly
def chat_function(message,
history,
csv_path,
chat_model,
input_tokens_inp,
max_tokens_out,
temperature,
api_key_input
):
# Use DEFAULT_CSV_PATH if csv_path is None
file_path_csv = DEFAULT_CSV_PATH if csv_path is None else csv_path
# If no API key provided in UI, use the one from environment
api_key = api_key_input.strip() if api_key_input and api_key_input.strip() else api_key_voc
# Use a default value (like DEFAULT_MAX_INPUT_TOKENS) if input_tokens_inp is None
input_tokens = DEFAULT_MAX_INPUT_TOKENS if input_tokens_inp is None else int(input_tokens_inp)
# Use a default value (like DEFAULT_MAX_INPUT_TOKENS) if input_tokens_inp is None
max_tokens = DEFAULT_MAX_OUTPUT_TOKENS if max_tokens_out is None else int(max_tokens_out)
temperature_value = DEFAULT_TEMPERATURE if temperature is None else float(temperature)
# Use DEFAULT_CHAT_MODEL if chat_model is None
model_to_use = DEFAULT_CHAT_MODEL if chat_model is None else chat_model
# chat_history = history
# Additional options for the model
additional_options = {
"temperature": temperature_value,
"max_tokens": max_tokens
}
try:
# Process the query using the imported function
result = process_query(
csv_path=file_path_csv,
query=message,
api_key=api_key,
# endpoint=base_url_voc,
chat_model=model_to_use,
max_token_count=input_tokens,
additional_options=additional_options
)
# Append usage information to the response
response_with_usage = (
f"{result['response']}\n\n"
f"---\n"
f"*Total tokens: {result['total_tokens']} · Cost: €{result['cost']:.6f}*"
)
# Return the response text with usage statistics
return response_with_usage
except Exception as e:
import traceback
trace = traceback.format_exc()
return f"Error processing your query: {str(e)}\n\nDetails: {trace}"
# crate theme:
gradio_theme = gr.themes.Default(
primary_hue=gr.themes.colors.emerald,
secondary_hue=gr.themes.colors.emerald,
neutral_hue= gr.themes.colors.zinc
#font="system-ui"
)
with gr.Blocks(title="Gaia | An ESG Corporate Sustainability Reporting Rules & Compliance Advisor", theme= gradio_theme) as demo:
gr.Markdown("# Gaia | An ESG Corporate Sustainability Reporting Rules & Compliance Advisor")
gr.Markdown("Ask Gaia questions about corporate sustainability reporting rules "
"and practices to receive expert advice.")
with gr.Row(elem_id="main-row"):
# Left column for settings
with gr.Column(scale=1, variant="compact"):
with gr.Accordion("Settings", open=False):
csv_input = gr.Dropdown(
choices= [("EU Corporate Sustainability & Reporting Rules",DEFAULT_CSV_PATH)
], #list_available_csvs(),
value=DEFAULT_CSV_PATH,
label="Knowledge Bases",
info="Select ESG Knowledge Base"
)
#api_key_input = gr.Textbox(
# value="",
# label="GROQ API Key",
# placeholder="Leave empty to use environment variable",
# type="password"
#)
#model_input = gr.Dropdown(
# choices=["meta-llama/llama-4-scout-17b-16e-instruct"],
# value=DEFAULT_CHAT_MODEL,
# label="AI Model"
#)
input_tokens_input = gr.Radio(
choices = [("Small",DEFAULT_MAX_INPUT_TOKENS), ("Medium",4000), ("Large", 10000)],
value= DEFAULT_MAX_INPUT_TOKENS,
label="Input Context Size",
info="The input context size determines the number of tokens used for the input to the AI model."
)
max_tokens_output = gr.Radio(
[("XSmall",DEFAULT_MAX_OUTPUT_TOKENS),("Small",2000), ("Medium",4000), ("Large", 10000)],
value=DEFAULT_MAX_OUTPUT_TOKENS,
label="Maximum Possible Output Size",
info="The maximum possible output answer size determines the number of tokens the AI model will output."
)
temp_input = gr.Slider(
minimum=0.0,
maximum=2.0,
value=DEFAULT_TEMPERATURE,
step=0.1,
label="Temperature"
)
with gr.Accordion("About", open=False):
gr.Markdown("""
This AI chatbot is designed to help you navigate and clarify sustainability reporting requirements,
especially those introduced by the Corporate Sustainability Reporting Directive (CSRD)
and related regulations.
By tapping into its knowledge base it can offer quick insights on compliance obligations,
interpretation of specific terms or directives,
and guidance on how to meet reporting standards in a straightforward and cost-effective way.
Prototype version: **Alpha 0.1.0**
""")
# Right column for chat interface
with gr.Column(scale=2, variant='compact'):
chat_interface = gr.ChatInterface(
fill_width=False,
fill_height=False,
type='messages',
fn=chat_function,
#examples=EXAMPLE_QUESTIONS,
cache_examples=False,
chatbot=gr.Chatbot(
type='messages',
#height=500,
show_label=False,
show_copy_button=True,
container=True,
#resizable=True,
layout='bubble',
placeholder='Hi, my name is Gaia, your personal AI ESG Advisor. How can I help you today?'
),
additional_inputs=[
csv_input,
#model_input,
input_tokens_input,
max_tokens_output,
temp_input,
#api_key_input
],
title=""
)
return demo
if __name__ == "__main__":
demo = create_chatbot()
demo.launch()