import os from langchain.vectorstores.chroma import Chroma from langchain.embeddings import OpenAIEmbeddings from langchain.chat_models import ChatOpenAI from langchain.prompts import ChatPromptTemplate from dotenv import load_dotenv import gradio as gr #from gradio.themes import CustomTheme # Import the CustomTheme class import os from langchain.document_loaders import DirectoryLoader from ragas.testset.generator import TestsetGenerator from ragas.testset.evolutions import simple, reasoning, multi_context from langchain_openai import ChatOpenAI, OpenAIEmbeddings #custom_theme = CustomTheme(css_path="custom_theme.css") # Use css_path argument # Load environment variables from .env file load_dotenv() # Retrieve the API key openai_api_key = os.getenv('OPENAI_API_KEY') CHROMA_PATH = "chroma" #PROMPT_TEMPLATE = """ #Answer the question based only on the following context: #Using all relevant details from the context provided, answer the following question comprehensively: PROMPT_TEMPLATE = """ Given the context provided, answer the question directly and concisely, using only the necessary details: Context: {context} Question: {question} Ensure your answer is as detailed and complete as possible. --- """ def start_chat(query_text): """Handles the chat interaction, returning a response based on the provided query.""" # Prepare the DB. embedding_function = OpenAIEmbeddings(api_key=openai_api_key) db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) # Search the DB. results = db.similarity_search_with_relevance_scores(query_text, k=5) if len(results) == 0 or results[0][1] < 0.7: return "Unable to find matching results.", "No sources found." # Collect context from the documents context_entries = [] sources = [] for doc, _score in results: context_entries.append(doc.page_content) sources.append(doc.metadata.get("source", "Unknown source")) context_text = "\n\n---\n\n".join(context_entries) sources_formatted = '\n'.join(sources) # Prepare the prompt prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) prompt = prompt_template.format(context=context_text, question=query_text) # Get the response from the model model = ChatOpenAI(api_key=openai_api_key) response_text = model.predict(prompt) # Format the context and sources for display context_and_sources = "Context Used:\n" + context_text + "\n\nSources:\n" + sources_formatted return response_text, context_and_sources import logging import os from dotenv import load_dotenv import pandas as pd import gradio as gr from ragas import evaluate # Ensure this matches the actual import path from ragas.metrics import context_precision, context_recall, faithfulness, answer_relevancy from datasets import Dataset # Ensure datasets library is installed # Set up logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s') # Define mock functions for simulation def real_rag_invoke(question): """Generate a response for the provided question using RAG model.""" # Assuming openai_api_key and CHROMA_PATH are already set embedding_function = OpenAIEmbeddings(api_key=openai_api_key) db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) # Collect context from the documents results = db.similarity_search_with_relevance_scores(question, k=3) if len(results) == 0 or results[0][1] < 0.7: return "Unable to find a matching result" # Consider handling this case according to your requirements # Combine the contexts context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results]) # Prepare and send the prompt prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) prompt = prompt_template.format(context=context_text, question=question) model = ChatOpenAI(api_key=openai_api_key) response_text = model.predict(prompt) return response_text def real_get_relevant_documents(question): """Retrieve documents relevant to the provided question.""" # Assuming openai_api_key and CHROMA_PATH are already set embedding_function = OpenAIEmbeddings(api_key=openai_api_key) db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) # Search the database for relevant documents results = db.similarity_search_with_relevance_scores(question, k=3) # Extract and return the content of relevant documents documents = [{"page_content": doc.page_content} for doc, _ in results if doc.page_content and _ >= 0.7] return documents # Define evaluation function def mock_evaluate(dataset, metrics): # Simulated evaluation results = { 'context_precision': [0.9, 0.8, 0.85], 'context_recall': [0.95, 0.9, 0.92], 'faithfulness': [0.96, 0.94, 0.95], 'answer_relevancy': [0.97, 0.89, 0.93] } return pd.DataFrame(results) # Main evaluation function def perform_rag_evaluation(): logging.info("Preparing evaluation data...") # Preparing the Evaluation Data questions = [ "lvet swim pool address?", "how many teams are there in odsl?", "what does lvet stands for?" ] ground_truths = [ ["43624 Lucketts Bridge Cir, Ashburn, VA 20148, USA."], ["30 teams."], ["Loudoun Valley Torpedoes."] ] answers = [] contexts = [] # Inference using real functions for query in questions: answers.append(real_rag_invoke(query)) contexts.append([doc['page_content'] for doc in real_get_relevant_documents(query)]) # Convert data to a Hugging Face dataset data = { "question": questions, "answer": answers, "contexts": contexts, "ground_truths": ground_truths } dataset = Dataset.from_dict(data) # Real evaluation using RAGAs result = evaluate( dataset=dataset, metrics=[ context_precision, context_recall, faithfulness, answer_relevancy, ], ) result_df = result.to_pandas() logging.info("Real evaluation completed.") results_html = result_df.to_html(border=1) # Convert DataFrame to HTML for output # Explanation text in HTML remains the same explanation_html = """
context_precision: Precision measures the percentage of relevant contexts retrieved out of all the contexts retrieved.
context_recall: Recall measures the percentage of relevant contexts retrieved out of all the relevant contexts available.
faithfulness: This metric measures how much the generated answer sticks to the information present in the provided contexts.
answer_relevancy: This assesses how relevant the generated answers are to the questions asked.