Final_Assignment_Template

Sleeping

Final_Assignment_Template / prepareEvaluationData.py

José Enrique

moved tools to /tools

61c17f1 7 months ago

4.44 kB

	import pandas as pd
	from datasets import load_dataset
	import os
	import json
	import re
	import requests
	from huggingface_hub import (login, logout)
	from langfuse import Langfuse


	from dotenv import load_dotenv

	from tools.download_attachments import download_file

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


	load_dotenv()


	def prepare_dataset(base_doc:str)->pd.DataFrame:

	df = pd.read_csv(base_doc)

	answer_data = []
	for index, row in df.iterrows():
	question, answer = get_question_and_answer(row['content'])
	task_id = get_tag_id(row['metadata'])

	answer_data.append({
	'question': question,
	'answer': answer,
	'task_id': task_id
	})
	answer_df = pd.DataFrame(answer_data)
	return answer_df

	def get_questions_from_gaia():
	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except requests.exceptions.RequestException as e:
	print(f"Error fetching questions: {e}")
	return f"Error fetching questions: {e}", None
	except requests.exceptions.JSONDecodeError as e:
	print(f"Error decoding JSON response from questions endpoint: {e}")
	print(f"Response text: {response.text[:500]}")
	return f"Error decoding server response for questions: {e}", None
	except Exception as e:
	print(f"An unexpected error occurred fetching questions: {e}")
	return f"An unexpected error occurred fetching questions: {e}", None
	return questions_data


	def prepare_evaluation_data(gaia_questions:list, answer_df:pd.DataFrame)->pd.DataFrame:
	evaluation_data = []
	for item in gaia_questions:
	task_id = item.get("task_id")
	question_text = item.get("question")
	# check if task_id has a file:
	has_file = False
	filename = download_file(task_id)
	if filename:
	has_file = filename

	# search task id in answer_df
	answer_row = answer_df[answer_df['task_id'] == task_id]
	evaluation_data.append({
	'task_id': task_id,
	'attachment': has_file,
	'question': question_text,
	'answer': answer_row['answer'].values[0] if not answer_row.empty else None,
	})
	evaluation_df = pd.DataFrame(evaluation_data)
	return evaluation_df

	def get_tag_id(line:str)->str:
	return json.loads(line.replace("'",'"'))['task_id']

	def get_question_and_answer(line:str)->str:
	search = "Final answer :"
	length = len(search)
	pos = line.find(search)
	if pos == -1:
	raise Exception("Final answer not found in line: " + line)
	return line[:pos] ,line[pos + length:].strip()

	def create_langfuse_dataset(evaluation_df:pd.DataFrame, dataset_name:str,dataset_description:str):
	langfuse = Langfuse()

	langfuse.create_dataset(
	name=dataset_name,
	description=dataset_description,
	metadata={
	"source ": "GAIA",
	"type" : "benchmark",
	"date": "2025-06-29"
	}
	)
	for index, row in evaluation_df.iterrows():
	langfuse.create_dataset_item(
	dataset_name=dataset_name,
	input=row['question'],
	expected_output=row['answer'],
	metadata={"task_id":row['task_id'],"attachment":row['attachment']},
	#tags=["GAIA_Evaluation"],
	#tags=["GAIA_Evaluation", "level_1"]
	)



	def main():
	hf_token = os.environ.get("HF_TOKEN")
	if hf_token:
	login(hf_token)
	print("Hugging Face token set successfully.")
	else:
	print("HF token not in env vars")
	exit(-2)
	gaia_questions = get_questions_from_gaia()
	answer_df = prepare_dataset('supabase_docs.csv')
	evaluation_df = prepare_evaluation_data(gaia_questions, answer_df)
	evaluation_df.to_csv('evaluation_data.csv', index=False)
	create_langfuse_dataset(evaluation_df, "GAIA_Evaluation_Dataset","Evaluation of 20 questions level 1 from GAIA")



	if __name__ == "__main__":
	main()