| import os |
| from langchain.vectorstores.chroma import Chroma |
| from langchain.embeddings import OpenAIEmbeddings |
| from langchain.chat_models import ChatOpenAI |
| from langchain.prompts import ChatPromptTemplate |
| from dotenv import load_dotenv |
| import gradio as gr |
| |
| import os |
| from langchain.document_loaders import DirectoryLoader |
| from ragas.testset.generator import TestsetGenerator |
| from ragas.testset.evolutions import simple, reasoning, multi_context |
| from langchain_openai import ChatOpenAI, OpenAIEmbeddings |
| |
|
|
| |
| load_dotenv() |
|
|
| |
| openai_api_key = os.getenv('OPENAI_API_KEY') |
|
|
| CHROMA_PATH = "chroma" |
| |
| |
| |
| PROMPT_TEMPLATE = """ |
| Given the context provided, answer the question directly and concisely, using only the necessary details: |
| |
| Context: |
| {context} |
| |
| Question: |
| {question} |
| |
| Ensure your answer is as detailed and complete as possible. |
| --- |
| """ |
|
|
|
|
| def start_chat(query_text): |
| """Handles the chat interaction, returning a response based on the provided query.""" |
| |
| |
| embedding_function = OpenAIEmbeddings(api_key=openai_api_key) |
| db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) |
|
|
| |
| results = db.similarity_search_with_relevance_scores(query_text, k=5) |
| if len(results) == 0 or results[0][1] < 0.7: |
| return "Unable to find matching results.", "No sources found." |
|
|
| |
| context_entries = [] |
| sources = [] |
| for doc, _score in results: |
| context_entries.append(doc.page_content) |
| sources.append(doc.metadata.get("source", "Unknown source")) |
| context_text = "\n\n---\n\n".join(context_entries) |
| sources_formatted = '\n'.join(sources) |
|
|
| |
| prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) |
| prompt = prompt_template.format(context=context_text, question=query_text) |
|
|
| |
| model = ChatOpenAI(api_key=openai_api_key) |
| response_text = model.predict(prompt) |
|
|
| |
| context_and_sources = "Context Used:\n" + context_text + "\n\nSources:\n" + sources_formatted |
| return response_text, context_and_sources |
|
|
| import logging |
| import os |
| from dotenv import load_dotenv |
| import pandas as pd |
| import gradio as gr |
| from ragas import evaluate |
| from ragas.metrics import context_precision, context_recall, faithfulness, answer_relevancy |
|
|
|
|
|
|
| from datasets import Dataset |
|
|
| |
| logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s: %(message)s') |
|
|
| |
| def real_rag_invoke(question): |
| """Generate a response for the provided question using RAG model.""" |
|
|
| |
| embedding_function = OpenAIEmbeddings(api_key=openai_api_key) |
| db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) |
|
|
| |
| results = db.similarity_search_with_relevance_scores(question, k=3) |
| if len(results) == 0 or results[0][1] < 0.7: |
| return "Unable to find a matching result" |
|
|
| |
| context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results]) |
|
|
| |
| prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) |
| prompt = prompt_template.format(context=context_text, question=question) |
| model = ChatOpenAI(api_key=openai_api_key) |
| response_text = model.predict(prompt) |
|
|
| return response_text |
|
|
|
|
| def real_get_relevant_documents(question): |
| """Retrieve documents relevant to the provided question.""" |
|
|
| |
| embedding_function = OpenAIEmbeddings(api_key=openai_api_key) |
| db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) |
|
|
| |
| results = db.similarity_search_with_relevance_scores(question, k=3) |
|
|
| |
| documents = [{"page_content": doc.page_content} for doc, _ in results if doc.page_content and _ >= 0.7] |
| return documents |
|
|
|
|
| |
| def mock_evaluate(dataset, metrics): |
| |
| results = { |
| 'context_precision': [0.9, 0.8, 0.85], |
| 'context_recall': [0.95, 0.9, 0.92], |
| 'faithfulness': [0.96, 0.94, 0.95], |
| 'answer_relevancy': [0.97, 0.89, 0.93] |
| } |
| return pd.DataFrame(results) |
|
|
| |
| def perform_rag_evaluation(): |
| logging.info("Preparing evaluation data...") |
| |
| |
| questions = [ |
| "lvet swim pool address?", |
| "how many teams are there in odsl?", |
| "what does lvet stands for?" |
| ] |
| ground_truths = [ |
| ["43624 Lucketts Bridge Cir, Ashburn, VA 20148, USA."], |
| ["30 teams."], |
| ["Loudoun Valley Torpedoes."] |
| ] |
| answers = [] |
| contexts = [] |
|
|
| |
| for query in questions: |
| answers.append(real_rag_invoke(query)) |
| contexts.append([doc['page_content'] for doc in real_get_relevant_documents(query)]) |
|
|
| |
| data = { |
| "question": questions, |
| "answer": answers, |
| "contexts": contexts, |
| "ground_truths": ground_truths |
| } |
| dataset = Dataset.from_dict(data) |
|
|
| |
| result = evaluate( |
| dataset=dataset, |
| metrics=[ |
| context_precision, |
| context_recall, |
| faithfulness, |
| answer_relevancy, |
| ], |
| ) |
| result_df = result.to_pandas() |
|
|
| logging.info("Real evaluation completed.") |
| results_html = result_df.to_html(border=1) |
|
|
| |
| explanation_html = """ |
| <div style='font-size: small;'> |
| <p><b>context_precision</b>: Precision measures the percentage of relevant contexts retrieved out of all the contexts retrieved.</p> |
| <p><b>context_recall</b>: Recall measures the percentage of relevant contexts retrieved out of all the relevant contexts available.</p> |
| <p><b>faithfulness</b>: This metric measures how much the generated answer sticks to the information present in the provided contexts.</p> |
| <p><b>answer_relevancy</b>: This assesses how relevant the generated answers are to the questions asked.</p> |
| </div> |
| """ |
|
|
| |
| final_html = results_html + explanation_html |
|
|
| |
| return final_html |
|
|
| def upload_data(file_obj, add_to_existing): |
| feedback = "" |
| try: |
| if file_obj is None: |
| print("Debug: No file uploaded.") |
| return "No file was uploaded, please upload a file." |
|
|
| |
| os.makedirs(UPLOAD_DIR, exist_ok=True) |
| |
| filename = os.path.basename(file_obj.name) |
| file_path = os.path.join(UPLOAD_DIR, filename) |
| |
| |
| num_files_before = len(os.listdir(UPLOAD_DIR)) |
| print(f"Debug: Number of files before operation: {num_files_before}") |
|
|
| if not add_to_existing: |
| |
| print("Debug: Removing existing files.") |
| for f in os.listdir(UPLOAD_DIR): |
| file_to_remove = os.path.join(UPLOAD_DIR, f) |
| print(f"Debug: Removing file: {file_to_remove}") |
| os.remove(file_to_remove) |
| feedback = "Replacing the existing files with the new file." |
|
|
| |
| print(f"Debug: Saving new file: {file_path}") |
| with open(file_path, 'wb') as f: |
| f.write(file_obj.read()) |
| feedback += f" {filename} uploaded and saved successfully." |
| print(feedback) |
|
|
| |
| print("Debug: Executing create_data.py") |
| subprocess.run(['python', 'create_data.py'], check=True) |
| feedback += " The create_data.py script was executed successfully." |
| print(feedback) |
|
|
| return feedback |
| except subprocess.CalledProcessError: |
| feedback += " An error occurred while executing create_data.py." |
| print(feedback) |
| return feedback |
| except Exception as e: |
| print(f"Debug: Exception occurred: {str(e)}") |
| return f"An error occurred: {str(e)}" |
| |
|
|
|
|
| |
| |
|
|
| |
| default_questions = [ |
| "When does the swim season starts?", |
| "Detailed Practice Schedule:", |
| "I am new parent what do i need to know about registration?", |
| "lvet swim pool address", |
| "What are the required steps for an individual to complete after attending the Stroke & Turn Clinic to become a certified official?", |
| "As a new parent to the swim team, what are some beginner-friendly volunteer roles I could take on during the meets?" |
| ] |
|
|
| def update_query_with_default(question): |
| """Update the text input with a default question.""" |
| return question |
|
|
| |
| with gr.Blocks() as demo: |
| with gr.Tab("Chat with Your Data"): |
| default_q_dropdown = gr.Dropdown(choices=default_questions, label="Default Questions") |
| query_input = gr.Textbox(label="Enter your query") |
| submit_button = gr.Button("Submit") |
| chat_response = gr.Textbox(label="Chatbot Response", lines=4) |
| source_response = gr.Textbox(label="Context and Sources", lines=4) |
| |
| default_q_dropdown.change(fn=update_query_with_default, inputs=default_q_dropdown, outputs=query_input) |
| |
| submit_button.click(fn=start_chat, inputs=query_input, outputs=[chat_response, source_response]) |
| |
| with gr.Tab("RAG Evaluation"): |
| start_evaluation = gr.Button("Start Evaluation") |
| evaluation_results = gr.HTML() |
| start_evaluation.click(perform_rag_evaluation, inputs=[], outputs=evaluation_results) |
|
|
|
|
| with gr.Tab("Upload Data"): |
| file_input = gr.File(label="Upload Your Data") |
| upload_button = gr.Button("Upload") |
| upload_result = gr.Textbox() |
| file_input.change(upload_data, inputs=file_input, outputs=upload_result) |
| upload_button.click(upload_data, inputs=file_input, outputs=upload_result) |
|
|
| |
| |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|
|
|