Spaces:

middha
/

LVEBOT

Build error

App Files Files Community

LVEBOT / Main.py

middha

Upload 4 files

70c1fad verified about 2 years ago

raw

history blame contribute delete

11.3 kB

	import os
	from langchain.vectorstores.chroma import Chroma
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.chat_models import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate
	from dotenv import load_dotenv
	import gradio as gr
	#from gradio.themes import CustomTheme # Import the CustomTheme class
	import os
	from langchain.document_loaders import DirectoryLoader
	from ragas.testset.generator import TestsetGenerator
	from ragas.testset.evolutions import simple, reasoning, multi_context
	from langchain_openai import ChatOpenAI, OpenAIEmbeddings
	#custom_theme = CustomTheme(css_path="custom_theme.css") # Use css_path argument

	# Load environment variables from .env file
	load_dotenv()

	# Retrieve the API key
	openai_api_key = os.getenv('OPENAI_API_KEY')

	CHROMA_PATH = "chroma"
	#PROMPT_TEMPLATE = """
	#Answer the question based only on the following context:
	#Using all relevant details from the context provided, answer the following question comprehensively:
	PROMPT_TEMPLATE = """
	Given the context provided, answer the question directly and concisely, using only the necessary details:

	Context:
	{context}

	Question:
	{question}

	Ensure your answer is as detailed and complete as possible.
	---
	"""


	def start_chat(query_text):
	"""Handles the chat interaction, returning a response based on the provided query."""

	# Prepare the DB.
	embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
	db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

	# Search the DB.
	results = db.similarity_search_with_relevance_scores(query_text, k=5)
	if len(results) == 0 or results[0][1] < 0.7:
	return "Unable to find matching results.", "No sources found."

	# Collect context from the documents
	context_entries = []
	sources = []
	for doc, _score in results:
	context_entries.append(doc.page_content)
	sources.append(doc.metadata.get("source", "Unknown source"))
	context_text = "\n\n---\n\n".join(context_entries)
	sources_formatted = '\n'.join(sources)

	# Prepare the prompt
	prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
	prompt = prompt_template.format(context=context_text, question=query_text)

	# Get the response from the model
	model = ChatOpenAI(api_key=openai_api_key)
	response_text = model.predict(prompt)

	# Format the context and sources for display
	context_and_sources = "Context Used:\n" + context_text + "\n\nSources:\n" + sources_formatted
	return response_text, context_and_sources

	import logging
	import os
	from dotenv import load_dotenv
	import pandas as pd
	import gradio as gr
	from ragas import evaluate # Ensure this matches the actual import path
	from ragas.metrics import context_precision, context_recall, faithfulness, answer_relevancy



	from datasets import Dataset # Ensure datasets library is installed

	# Set up logging
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s')

	# Define mock functions for simulation
	def real_rag_invoke(question):
	"""Generate a response for the provided question using RAG model."""

	# Assuming openai_api_key and CHROMA_PATH are already set
	embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
	db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

	# Collect context from the documents
	results = db.similarity_search_with_relevance_scores(question, k=3)
	if len(results) == 0 or results[0][1] < 0.7:
	return "Unable to find a matching result" # Consider handling this case according to your requirements

	# Combine the contexts
	context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results])

	# Prepare and send the prompt
	prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
	prompt = prompt_template.format(context=context_text, question=question)
	model = ChatOpenAI(api_key=openai_api_key)
	response_text = model.predict(prompt)

	return response_text


	def real_get_relevant_documents(question):
	"""Retrieve documents relevant to the provided question."""

	# Assuming openai_api_key and CHROMA_PATH are already set
	embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
	db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

	# Search the database for relevant documents
	results = db.similarity_search_with_relevance_scores(question, k=3)

	# Extract and return the content of relevant documents
	documents = [{"page_content": doc.page_content} for doc, _ in results if doc.page_content and _ >= 0.7]
	return documents


	# Define evaluation function
	def mock_evaluate(dataset, metrics):
	# Simulated evaluation
	results = {
	'context_precision': [0.9, 0.8, 0.85],
	'context_recall': [0.95, 0.9, 0.92],
	'faithfulness': [0.96, 0.94, 0.95],
	'answer_relevancy': [0.97, 0.89, 0.93]
	}
	return pd.DataFrame(results)

	# Main evaluation function
	def perform_rag_evaluation():
	logging.info("Preparing evaluation data...")

	# Preparing the Evaluation Data
	questions = [
	"lvet swim pool address?",
	"how many teams are there in odsl?",
	"what does lvet stands for?"
	]
	ground_truths = [
	["43624 Lucketts Bridge Cir, Ashburn, VA 20148, USA."],
	["30 teams."],
	["Loudoun Valley Torpedoes."]
	]
	answers = []
	contexts = []

	# Inference using real functions
	for query in questions:
	answers.append(real_rag_invoke(query))
	contexts.append([doc['page_content'] for doc in real_get_relevant_documents(query)])

	# Convert data to a Hugging Face dataset
	data = {
	"question": questions,
	"answer": answers,
	"contexts": contexts,
	"ground_truths": ground_truths
	}
	dataset = Dataset.from_dict(data)

	# Real evaluation using RAGAs
	result = evaluate(
	dataset=dataset,
	metrics=[
	context_precision,
	context_recall,
	faithfulness,
	answer_relevancy,
	],
	)
	result_df = result.to_pandas()

	logging.info("Real evaluation completed.")
	results_html = result_df.to_html(border=1) # Convert DataFrame to HTML for output

	# Explanation text in HTML remains the same
	explanation_html = """
	<div style='font-size: small;'>
	<p><b>context_precision</b>: Precision measures the percentage of relevant contexts retrieved out of all the contexts retrieved.</p>
	<p><b>context_recall</b>: Recall measures the percentage of relevant contexts retrieved out of all the relevant contexts available.</p>
	<p><b>faithfulness</b>: This metric measures how much the generated answer sticks to the information present in the provided contexts.</p>
	<p><b>answer_relevancy</b>: This assesses how relevant the generated answers are to the questions asked.</p>
	</div>
	"""

	# Combine both HTML strings
	final_html = results_html + explanation_html

	# Return the combined HTML
	return final_html

	def upload_data(file_obj, add_to_existing):
	feedback = "" # Feedback message to user
	try:
	if file_obj is None:
	print("Debug: No file uploaded.")
	return "No file was uploaded, please upload a file."

	# Ensure the directory exists
	os.makedirs(UPLOAD_DIR, exist_ok=True)

	filename = os.path.basename(file_obj.name)
	file_path = os.path.join(UPLOAD_DIR, filename)

	# Count files before potentially removing them
	num_files_before = len(os.listdir(UPLOAD_DIR))
	print(f"Debug: Number of files before operation: {num_files_before}")

	if not add_to_existing:
	# If replacing, remove all existing files
	print("Debug: Removing existing files.")
	for f in os.listdir(UPLOAD_DIR):
	file_to_remove = os.path.join(UPLOAD_DIR, f)
	print(f"Debug: Removing file: {file_to_remove}")
	os.remove(file_to_remove)
	feedback = "Replacing the existing files with the new file."

	# Save the new file
	print(f"Debug: Saving new file: {file_path}")
	with open(file_path, 'wb') as f:
	f.write(file_obj.read())
	feedback += f" {filename} uploaded and saved successfully."
	print(feedback)

	# Execute the create_data.py script after handling the file upload
	print("Debug: Executing create_data.py")
	subprocess.run(['python', 'create_data.py'], check=True)
	feedback += " The create_data.py script was executed successfully."
	print(feedback)

	return feedback
	except subprocess.CalledProcessError:
	feedback += " An error occurred while executing create_data.py."
	print(feedback)
	return feedback
	except Exception as e:
	print(f"Debug: Exception occurred: {str(e)}")
	return f"An error occurred: {str(e)}"
	#with gr.Blocks(theme=custom_theme) as demo:


	# Define your start_chat function here
	# ...

	# List of default questions
	default_questions = [
	"When does the swim season starts?",
	"Detailed Practice Schedule:",
	"I am new parent what do i need to know about registration?",
	"lvet swim pool address",
	"What are the required steps for an individual to complete after attending the Stroke & Turn Clinic to become a certified official?",
	"As a new parent to the swim team, what are some beginner-friendly volunteer roles I could take on during the meets?"
	]

	def update_query_with_default(question):
	"""Update the text input with a default question."""
	return question # Return as a tuple

	# Gradio interface setup
	with gr.Blocks() as demo:
	with gr.Tab("Chat with Your Data"):
	default_q_dropdown = gr.Dropdown(choices=default_questions, label="Default Questions")
	query_input = gr.Textbox(label="Enter your query")
	submit_button = gr.Button("Submit")
	chat_response = gr.Textbox(label="Chatbot Response", lines=4)
	source_response = gr.Textbox(label="Context and Sources", lines=4)
	# When a default question is chosen, update the query input box
	default_q_dropdown.change(fn=update_query_with_default, inputs=default_q_dropdown, outputs=query_input)
	# When the submit button is clicked, run the start_chat function
	submit_button.click(fn=start_chat, inputs=query_input, outputs=[chat_response, source_response])

	with gr.Tab("RAG Evaluation"):
	start_evaluation = gr.Button("Start Evaluation")
	evaluation_results = gr.HTML()
	start_evaluation.click(perform_rag_evaluation, inputs=[], outputs=evaluation_results)


	with gr.Tab("Upload Data"):
	file_input = gr.File(label="Upload Your Data")
	upload_button = gr.Button("Upload")
	upload_result = gr.Textbox()
	file_input.change(upload_data, inputs=file_input, outputs=upload_result)
	upload_button.click(upload_data, inputs=file_input, outputs=upload_result)

	# Add other tabs like "RAG Evaluation" and "Upload Data" as before
	# ...

	if __name__ == "__main__":
	demo.launch()