| | |
| | |
| |
|
| | from astra import astra_rag_eval |
| | from llm import groq_chat, CHAT_MODEL |
| | from chroma import search_eval |
| | from typing import Any |
| |
|
| |
|
| | LLM_ANSWER_GEN_TEMPLATE = """\ |
| | Generate one brief and informative answer to the following question: {question}. \ |
| | The answer should be concise, relevant, and not exceed 60 words in length. |
| | """ |
| |
|
| | import json |
| |
|
| | import json |
| | from tqdm import tqdm |
| | import time |
| |
|
| | def generate_responses_llm(questions_file: str, output_file: str, model: CHAT_MODEL="mixtral-8x7b-32768", batch_size: int = 30, delay_between_batches: int = 10): |
| | """ |
| | Generate responses using the LLM for each question in the input file and save them to the output file. |
| | """ |
| | responses = [] |
| | |
| | with open(questions_file, 'r') as f_questions: |
| | data = json.load(f_questions) |
| | questions = data["question"] |
| | num_questions = len(questions) |
| | |
| | for i in tqdm(range(0, num_questions, batch_size), desc="Generating responses", total=num_questions // batch_size): |
| | batch_questions = questions[i:i+batch_size] |
| | for question in batch_questions: |
| | |
| | answer = groq_chat( |
| | message=question, |
| | preamble=LLM_ANSWER_GEN_TEMPLATE, |
| | model=model, |
| | ).choices[0].message.content |
| | responses.append({"question": question, "answer": answer}) |
| | |
| | |
| | time.sleep(delay_between_batches) |
| | |
| | |
| | with open(output_file, 'w') as f_output: |
| | json.dump(responses, f_output, indent=4) |
| |
|
| |
|
| | import json |
| | from typing import Any |
| | from tqdm import tqdm |
| | import time |
| |
|
| | def generate_responses_rag(questions_file: str, output_file: str, model: CHAT_MODEL="mixtral-8x7b-32768", batch_size: int = 30, delay_between_batches: int = 10): |
| | """ |
| | Generate responses using the LLM for each question in the input file and save them to the output file. |
| | """ |
| | responses = [] |
| | |
| | with open(questions_file, 'r') as f_questions: |
| | data = json.load(f_questions) |
| | num_questions = len(data) |
| | |
| | for i in tqdm(range(0, num_questions, batch_size), desc="Generating responses", total=num_questions // batch_size): |
| | batch_data = data[i:i+batch_size] |
| | for idx, item in enumerate(batch_data): |
| | question = item["question"] |
| | print(question) |
| | context = search_eval(query=question, k=3, model_name_or_path="models/bge-large_finetuned") |
| | |
| | |
| | if not context: |
| | answer = "I'm sorry, I don't have any information on that. Feel free to ask me anything else." |
| | else: |
| | answer = astra_rag_eval( |
| | prompt=question, |
| | context=[result["doc"] for result in context] |
| | ) |
| | |
| | responses.append({"question": question, "answer": answer}) |
| | print(f"{i+idx+1} questions answered") |
| | |
| | |
| | time.sleep(delay_between_batches) |
| | |
| | |
| | with open(output_file, 'w') as f_output: |
| | json.dump(responses, f_output, indent=4) |
| |
|
| | |
| | generate_responses_rag(questions_file='app/evaluations/eval_data/question_answer_pairs-min.json', output_file='app/evaluations/eval_data/rag_bge_large_finetuned_response_qa.json') |
| |
|