astra / app /src /eval_rag.py
olamideba's picture
Initial Commit
8f624ec
# import sys
# sys.path.append('app/src')
from astra import astra_rag_eval
from llm import groq_chat, CHAT_MODEL
from chroma import search_eval
from typing import Any
LLM_ANSWER_GEN_TEMPLATE = """\
Generate one brief and informative answer to the following question: {question}. \
The answer should be concise, relevant, and not exceed 60 words in length.
"""
import json
import json
from tqdm import tqdm
import time
def generate_responses_llm(questions_file: str, output_file: str, model: CHAT_MODEL="mixtral-8x7b-32768", batch_size: int = 30, delay_between_batches: int = 10):
"""
Generate responses using the LLM for each question in the input file and save them to the output file.
"""
responses = [] # Dictionary to store question-response pairs
with open(questions_file, 'r') as f_questions:
data = json.load(f_questions)
questions = data["question"]
num_questions = len(questions)
for i in tqdm(range(0, num_questions, batch_size), desc="Generating responses", total=num_questions // batch_size):
batch_questions = questions[i:i+batch_size]
for question in batch_questions:
# Generate response using LLM
answer = groq_chat(
message=question,
preamble=LLM_ANSWER_GEN_TEMPLATE, # Use a short prompt template
model=model,
).choices[0].message.content
responses.append({"question": question, "answer": answer}) # Store question-response pair in dictionary
# Introduce delay between batches
time.sleep(delay_between_batches)
# Save responses to JSON file
with open(output_file, 'w') as f_output:
json.dump(responses, f_output, indent=4)
import json
from typing import Any
from tqdm import tqdm
import time
def generate_responses_rag(questions_file: str, output_file: str, model: CHAT_MODEL="mixtral-8x7b-32768", batch_size: int = 30, delay_between_batches: int = 10):
"""
Generate responses using the LLM for each question in the input file and save them to the output file.
"""
responses = [] # List to store question-response pairs
with open(questions_file, 'r') as f_questions:
data = json.load(f_questions)
num_questions = len(data)
for i in tqdm(range(0, num_questions, batch_size), desc="Generating responses", total=num_questions // batch_size):
batch_data = data[i:i+batch_size]
for idx, item in enumerate(batch_data):
question = item["question"]
print(question)
context = search_eval(query=question, k=3, model_name_or_path="models/bge-large_finetuned")
# Generate response using LLM
if not context:
answer = "I'm sorry, I don't have any information on that. Feel free to ask me anything else."
else:
answer = astra_rag_eval(
prompt=question,
context=[result["doc"] for result in context]
)
responses.append({"question": question, "answer": answer}) # Store question-response pair in list
print(f"{i+idx+1} questions answered")
# Introduce delay between batches
time.sleep(delay_between_batches)
# Save responses to JSON file
with open(output_file, 'w') as f_output:
json.dump(responses, f_output, indent=4)
generate_responses_rag(questions_file='app/evaluations/eval_data/question_answer_pairs-min.json', output_file='app/evaluations/eval_data/rag_bge_large_finetuned_response_qa.json')