Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from datasets import load_dataset | |
| import os | |
| import json | |
| import re | |
| import requests | |
| from huggingface_hub import (login, logout) | |
| from langfuse import Langfuse | |
| from dotenv import load_dotenv | |
| from tools.download_attachments import download_file | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| load_dotenv() | |
| def prepare_dataset(base_doc:str)->pd.DataFrame: | |
| df = pd.read_csv(base_doc) | |
| answer_data = [] | |
| for index, row in df.iterrows(): | |
| question, answer = get_question_and_answer(row['content']) | |
| task_id = get_tag_id(row['metadata']) | |
| answer_data.append({ | |
| 'question': question, | |
| 'answer': answer, | |
| 'task_id': task_id | |
| }) | |
| answer_df = pd.DataFrame(answer_data) | |
| return answer_df | |
| def get_questions_from_gaia(): | |
| api_url = DEFAULT_API_URL | |
| questions_url = f"{api_url}/questions" | |
| try: | |
| response = requests.get(questions_url, timeout=15) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| if not questions_data: | |
| print("Fetched questions list is empty.") | |
| return "Fetched questions list is empty or invalid format.", None | |
| print(f"Fetched {len(questions_data)} questions.") | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error fetching questions: {e}") | |
| return f"Error fetching questions: {e}", None | |
| except requests.exceptions.JSONDecodeError as e: | |
| print(f"Error decoding JSON response from questions endpoint: {e}") | |
| print(f"Response text: {response.text[:500]}") | |
| return f"Error decoding server response for questions: {e}", None | |
| except Exception as e: | |
| print(f"An unexpected error occurred fetching questions: {e}") | |
| return f"An unexpected error occurred fetching questions: {e}", None | |
| return questions_data | |
| def prepare_evaluation_data(gaia_questions:list, answer_df:pd.DataFrame)->pd.DataFrame: | |
| evaluation_data = [] | |
| for item in gaia_questions: | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| # check if task_id has a file: | |
| has_file = False | |
| filename = download_file(task_id) | |
| if filename: | |
| has_file = filename | |
| # search task id in answer_df | |
| answer_row = answer_df[answer_df['task_id'] == task_id] | |
| evaluation_data.append({ | |
| 'task_id': task_id, | |
| 'attachment': has_file, | |
| 'question': question_text, | |
| 'answer': answer_row['answer'].values[0] if not answer_row.empty else None, | |
| }) | |
| evaluation_df = pd.DataFrame(evaluation_data) | |
| return evaluation_df | |
| def get_tag_id(line:str)->str: | |
| return json.loads(line.replace("'",'"'))['task_id'] | |
| def get_question_and_answer(line:str)->str: | |
| search = "Final answer :" | |
| length = len(search) | |
| pos = line.find(search) | |
| if pos == -1: | |
| raise Exception("Final answer not found in line: " + line) | |
| return line[:pos] ,line[pos + length:].strip() | |
| def create_langfuse_dataset(evaluation_df:pd.DataFrame, dataset_name:str,dataset_description:str): | |
| langfuse = Langfuse() | |
| langfuse.create_dataset( | |
| name=dataset_name, | |
| description=dataset_description, | |
| metadata={ | |
| "source ": "GAIA", | |
| "type" : "benchmark", | |
| "date": "2025-06-29" | |
| } | |
| ) | |
| for index, row in evaluation_df.iterrows(): | |
| langfuse.create_dataset_item( | |
| dataset_name=dataset_name, | |
| input=row['question'], | |
| expected_output=row['answer'], | |
| metadata={"task_id":row['task_id'],"attachment":row['attachment']}, | |
| #tags=["GAIA_Evaluation"], | |
| #tags=["GAIA_Evaluation", "level_1"] | |
| ) | |
| def main(): | |
| hf_token = os.environ.get("HF_TOKEN") | |
| if hf_token: | |
| login(hf_token) | |
| print("Hugging Face token set successfully.") | |
| else: | |
| print("HF token not in env vars") | |
| exit(-2) | |
| gaia_questions = get_questions_from_gaia() | |
| answer_df = prepare_dataset('supabase_docs.csv') | |
| evaluation_df = prepare_evaluation_data(gaia_questions, answer_df) | |
| evaluation_df.to_csv('evaluation_data.csv', index=False) | |
| create_langfuse_dataset(evaluation_df, "GAIA_Evaluation_Dataset","Evaluation of 20 questions level 1 from GAIA") | |
| if __name__ == "__main__": | |
| main() | |