Spaces:

olamideba
/

astra

Sleeping

App Files Files Community

astra / app /src /eval_rag.py

olamideba

Initial Commit

8f624ec almost 2 years ago

raw

history blame contribute delete

3.81 kB

	# import sys
	# sys.path.append('app/src')

	from astra import astra_rag_eval
	from llm import groq_chat, CHAT_MODEL
	from chroma import search_eval
	from typing import Any


	LLM_ANSWER_GEN_TEMPLATE = """\
	Generate one brief and informative answer to the following question: {question}. \
	The answer should be concise, relevant, and not exceed 60 words in length.
	"""

	import json

	import json
	from tqdm import tqdm
	import time

	def generate_responses_llm(questions_file: str, output_file: str, model: CHAT_MODEL="mixtral-8x7b-32768", batch_size: int = 30, delay_between_batches: int = 10):
	"""
	Generate responses using the LLM for each question in the input file and save them to the output file.
	"""
	responses = [] # Dictionary to store question-response pairs

	with open(questions_file, 'r') as f_questions:
	data = json.load(f_questions)
	questions = data["question"]
	num_questions = len(questions)

	for i in tqdm(range(0, num_questions, batch_size), desc="Generating responses", total=num_questions // batch_size):
	batch_questions = questions[i:i+batch_size]
	for question in batch_questions:
	# Generate response using LLM
	answer = groq_chat(
	message=question,
	preamble=LLM_ANSWER_GEN_TEMPLATE, # Use a short prompt template
	model=model,
	).choices[0].message.content
	responses.append({"question": question, "answer": answer}) # Store question-response pair in dictionary

	# Introduce delay between batches
	time.sleep(delay_between_batches)

	# Save responses to JSON file
	with open(output_file, 'w') as f_output:
	json.dump(responses, f_output, indent=4)


	import json
	from typing import Any
	from tqdm import tqdm
	import time

	def generate_responses_rag(questions_file: str, output_file: str, model: CHAT_MODEL="mixtral-8x7b-32768", batch_size: int = 30, delay_between_batches: int = 10):
	"""
	Generate responses using the LLM for each question in the input file and save them to the output file.
	"""
	responses = [] # List to store question-response pairs

	with open(questions_file, 'r') as f_questions:
	data = json.load(f_questions)
	num_questions = len(data)

	for i in tqdm(range(0, num_questions, batch_size), desc="Generating responses", total=num_questions // batch_size):
	batch_data = data[i:i+batch_size]
	for idx, item in enumerate(batch_data):
	question = item["question"]
	print(question)
	context = search_eval(query=question, k=3, model_name_or_path="models/bge-large_finetuned")

	# Generate response using LLM
	if not context:
	answer = "I'm sorry, I don't have any information on that. Feel free to ask me anything else."
	else:
	answer = astra_rag_eval(
	prompt=question,
	context=[result["doc"] for result in context]
	)

	responses.append({"question": question, "answer": answer}) # Store question-response pair in list
	print(f"{i+idx+1} questions answered")

	# Introduce delay between batches
	time.sleep(delay_between_batches)

	# Save responses to JSON file
	with open(output_file, 'w') as f_output:
	json.dump(responses, f_output, indent=4)


	generate_responses_rag(questions_file='app/evaluations/eval_data/question_answer_pairs-min.json', output_file='app/evaluations/eval_data/rag_bge_large_finetuned_response_qa.json')