Final_Assignment_Template / prepareEvaluationData.py
José Enrique
moved tools to /tools
61c17f1
import pandas as pd
from datasets import load_dataset
import os
import json
import re
import requests
from huggingface_hub import (login, logout)
from langfuse import Langfuse
from dotenv import load_dotenv
from tools.download_attachments import download_file
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
load_dotenv()
def prepare_dataset(base_doc:str)->pd.DataFrame:
df = pd.read_csv(base_doc)
answer_data = []
for index, row in df.iterrows():
question, answer = get_question_and_answer(row['content'])
task_id = get_tag_id(row['metadata'])
answer_data.append({
'question': question,
'answer': answer,
'task_id': task_id
})
answer_df = pd.DataFrame(answer_data)
return answer_df
def get_questions_from_gaia():
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
print("Fetched questions list is empty.")
return "Fetched questions list is empty or invalid format.", None
print(f"Fetched {len(questions_data)} questions.")
except requests.exceptions.RequestException as e:
print(f"Error fetching questions: {e}")
return f"Error fetching questions: {e}", None
except requests.exceptions.JSONDecodeError as e:
print(f"Error decoding JSON response from questions endpoint: {e}")
print(f"Response text: {response.text[:500]}")
return f"Error decoding server response for questions: {e}", None
except Exception as e:
print(f"An unexpected error occurred fetching questions: {e}")
return f"An unexpected error occurred fetching questions: {e}", None
return questions_data
def prepare_evaluation_data(gaia_questions:list, answer_df:pd.DataFrame)->pd.DataFrame:
evaluation_data = []
for item in gaia_questions:
task_id = item.get("task_id")
question_text = item.get("question")
# check if task_id has a file:
has_file = False
filename = download_file(task_id)
if filename:
has_file = filename
# search task id in answer_df
answer_row = answer_df[answer_df['task_id'] == task_id]
evaluation_data.append({
'task_id': task_id,
'attachment': has_file,
'question': question_text,
'answer': answer_row['answer'].values[0] if not answer_row.empty else None,
})
evaluation_df = pd.DataFrame(evaluation_data)
return evaluation_df
def get_tag_id(line:str)->str:
return json.loads(line.replace("'",'"'))['task_id']
def get_question_and_answer(line:str)->str:
search = "Final answer :"
length = len(search)
pos = line.find(search)
if pos == -1:
raise Exception("Final answer not found in line: " + line)
return line[:pos] ,line[pos + length:].strip()
def create_langfuse_dataset(evaluation_df:pd.DataFrame, dataset_name:str,dataset_description:str):
langfuse = Langfuse()
langfuse.create_dataset(
name=dataset_name,
description=dataset_description,
metadata={
"source ": "GAIA",
"type" : "benchmark",
"date": "2025-06-29"
}
)
for index, row in evaluation_df.iterrows():
langfuse.create_dataset_item(
dataset_name=dataset_name,
input=row['question'],
expected_output=row['answer'],
metadata={"task_id":row['task_id'],"attachment":row['attachment']},
#tags=["GAIA_Evaluation"],
#tags=["GAIA_Evaluation", "level_1"]
)
def main():
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(hf_token)
print("Hugging Face token set successfully.")
else:
print("HF token not in env vars")
exit(-2)
gaia_questions = get_questions_from_gaia()
answer_df = prepare_dataset('supabase_docs.csv')
evaluation_df = prepare_evaluation_data(gaia_questions, answer_df)
evaluation_df.to_csv('evaluation_data.csv', index=False)
create_langfuse_dataset(evaluation_df, "GAIA_Evaluation_Dataset","Evaluation of 20 questions level 1 from GAIA")
if __name__ == "__main__":
main()