LVEBOT / Main.py
middha's picture
Upload 4 files
70c1fad verified
import os
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from dotenv import load_dotenv
import gradio as gr
#from gradio.themes import CustomTheme # Import the CustomTheme class
import os
from langchain.document_loaders import DirectoryLoader
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
#custom_theme = CustomTheme(css_path="custom_theme.css") # Use css_path argument
# Load environment variables from .env file
load_dotenv()
# Retrieve the API key
openai_api_key = os.getenv('OPENAI_API_KEY')
CHROMA_PATH = "chroma"
#PROMPT_TEMPLATE = """
#Answer the question based only on the following context:
#Using all relevant details from the context provided, answer the following question comprehensively:
PROMPT_TEMPLATE = """
Given the context provided, answer the question directly and concisely, using only the necessary details:
Context:
{context}
Question:
{question}
Ensure your answer is as detailed and complete as possible.
---
"""
def start_chat(query_text):
"""Handles the chat interaction, returning a response based on the provided query."""
# Prepare the DB.
embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=5)
if len(results) == 0 or results[0][1] < 0.7:
return "Unable to find matching results.", "No sources found."
# Collect context from the documents
context_entries = []
sources = []
for doc, _score in results:
context_entries.append(doc.page_content)
sources.append(doc.metadata.get("source", "Unknown source"))
context_text = "\n\n---\n\n".join(context_entries)
sources_formatted = '\n'.join(sources)
# Prepare the prompt
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)
# Get the response from the model
model = ChatOpenAI(api_key=openai_api_key)
response_text = model.predict(prompt)
# Format the context and sources for display
context_and_sources = "Context Used:\n" + context_text + "\n\nSources:\n" + sources_formatted
return response_text, context_and_sources
import logging
import os
from dotenv import load_dotenv
import pandas as pd
import gradio as gr
from ragas import evaluate # Ensure this matches the actual import path
from ragas.metrics import context_precision, context_recall, faithfulness, answer_relevancy
from datasets import Dataset # Ensure datasets library is installed
# Set up logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s')
# Define mock functions for simulation
def real_rag_invoke(question):
"""Generate a response for the provided question using RAG model."""
# Assuming openai_api_key and CHROMA_PATH are already set
embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
# Collect context from the documents
results = db.similarity_search_with_relevance_scores(question, k=3)
if len(results) == 0 or results[0][1] < 0.7:
return "Unable to find a matching result" # Consider handling this case according to your requirements
# Combine the contexts
context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results])
# Prepare and send the prompt
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=question)
model = ChatOpenAI(api_key=openai_api_key)
response_text = model.predict(prompt)
return response_text
def real_get_relevant_documents(question):
"""Retrieve documents relevant to the provided question."""
# Assuming openai_api_key and CHROMA_PATH are already set
embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
# Search the database for relevant documents
results = db.similarity_search_with_relevance_scores(question, k=3)
# Extract and return the content of relevant documents
documents = [{"page_content": doc.page_content} for doc, _ in results if doc.page_content and _ >= 0.7]
return documents
# Define evaluation function
def mock_evaluate(dataset, metrics):
# Simulated evaluation
results = {
'context_precision': [0.9, 0.8, 0.85],
'context_recall': [0.95, 0.9, 0.92],
'faithfulness': [0.96, 0.94, 0.95],
'answer_relevancy': [0.97, 0.89, 0.93]
}
return pd.DataFrame(results)
# Main evaluation function
def perform_rag_evaluation():
logging.info("Preparing evaluation data...")
# Preparing the Evaluation Data
questions = [
"lvet swim pool address?",
"how many teams are there in odsl?",
"what does lvet stands for?"
]
ground_truths = [
["43624 Lucketts Bridge Cir, Ashburn, VA 20148, USA."],
["30 teams."],
["Loudoun Valley Torpedoes."]
]
answers = []
contexts = []
# Inference using real functions
for query in questions:
answers.append(real_rag_invoke(query))
contexts.append([doc['page_content'] for doc in real_get_relevant_documents(query)])
# Convert data to a Hugging Face dataset
data = {
"question": questions,
"answer": answers,
"contexts": contexts,
"ground_truths": ground_truths
}
dataset = Dataset.from_dict(data)
# Real evaluation using RAGAs
result = evaluate(
dataset=dataset,
metrics=[
context_precision,
context_recall,
faithfulness,
answer_relevancy,
],
)
result_df = result.to_pandas()
logging.info("Real evaluation completed.")
results_html = result_df.to_html(border=1) # Convert DataFrame to HTML for output
# Explanation text in HTML remains the same
explanation_html = """
<div style='font-size: small;'>
<p><b>context_precision</b>: Precision measures the percentage of relevant contexts retrieved out of all the contexts retrieved.</p>
<p><b>context_recall</b>: Recall measures the percentage of relevant contexts retrieved out of all the relevant contexts available.</p>
<p><b>faithfulness</b>: This metric measures how much the generated answer sticks to the information present in the provided contexts.</p>
<p><b>answer_relevancy</b>: This assesses how relevant the generated answers are to the questions asked.</p>
</div>
"""
# Combine both HTML strings
final_html = results_html + explanation_html
# Return the combined HTML
return final_html
def upload_data(file_obj, add_to_existing):
feedback = "" # Feedback message to user
try:
if file_obj is None:
print("Debug: No file uploaded.")
return "No file was uploaded, please upload a file."
# Ensure the directory exists
os.makedirs(UPLOAD_DIR, exist_ok=True)
filename = os.path.basename(file_obj.name)
file_path = os.path.join(UPLOAD_DIR, filename)
# Count files before potentially removing them
num_files_before = len(os.listdir(UPLOAD_DIR))
print(f"Debug: Number of files before operation: {num_files_before}")
if not add_to_existing:
# If replacing, remove all existing files
print("Debug: Removing existing files.")
for f in os.listdir(UPLOAD_DIR):
file_to_remove = os.path.join(UPLOAD_DIR, f)
print(f"Debug: Removing file: {file_to_remove}")
os.remove(file_to_remove)
feedback = "Replacing the existing files with the new file."
# Save the new file
print(f"Debug: Saving new file: {file_path}")
with open(file_path, 'wb') as f:
f.write(file_obj.read())
feedback += f" {filename} uploaded and saved successfully."
print(feedback)
# Execute the create_data.py script after handling the file upload
print("Debug: Executing create_data.py")
subprocess.run(['python', 'create_data.py'], check=True)
feedback += " The create_data.py script was executed successfully."
print(feedback)
return feedback
except subprocess.CalledProcessError:
feedback += " An error occurred while executing create_data.py."
print(feedback)
return feedback
except Exception as e:
print(f"Debug: Exception occurred: {str(e)}")
return f"An error occurred: {str(e)}"
#with gr.Blocks(theme=custom_theme) as demo:
# Define your start_chat function here
# ...
# List of default questions
default_questions = [
"When does the swim season starts?",
"Detailed Practice Schedule:",
"I am new parent what do i need to know about registration?",
"lvet swim pool address",
"What are the required steps for an individual to complete after attending the Stroke & Turn Clinic to become a certified official?",
"As a new parent to the swim team, what are some beginner-friendly volunteer roles I could take on during the meets?"
]
def update_query_with_default(question):
"""Update the text input with a default question."""
return question # Return as a tuple
# Gradio interface setup
with gr.Blocks() as demo:
with gr.Tab("Chat with Your Data"):
default_q_dropdown = gr.Dropdown(choices=default_questions, label="Default Questions")
query_input = gr.Textbox(label="Enter your query")
submit_button = gr.Button("Submit")
chat_response = gr.Textbox(label="Chatbot Response", lines=4)
source_response = gr.Textbox(label="Context and Sources", lines=4)
# When a default question is chosen, update the query input box
default_q_dropdown.change(fn=update_query_with_default, inputs=default_q_dropdown, outputs=query_input)
# When the submit button is clicked, run the start_chat function
submit_button.click(fn=start_chat, inputs=query_input, outputs=[chat_response, source_response])
with gr.Tab("RAG Evaluation"):
start_evaluation = gr.Button("Start Evaluation")
evaluation_results = gr.HTML()
start_evaluation.click(perform_rag_evaluation, inputs=[], outputs=evaluation_results)
with gr.Tab("Upload Data"):
file_input = gr.File(label="Upload Your Data")
upload_button = gr.Button("Upload")
upload_result = gr.Textbox()
file_input.change(upload_data, inputs=file_input, outputs=upload_result)
upload_button.click(upload_data, inputs=file_input, outputs=upload_result)
# Add other tabs like "RAG Evaluation" and "Upload Data" as before
# ...
if __name__ == "__main__":
demo.launch()