Final_Assignment_Template

Sleeping

File size: 4,439 Bytes

import pandas as pd
from datasets import load_dataset
import os
import json
import re
import requests
from huggingface_hub import (login, logout)
from langfuse import Langfuse


from dotenv import load_dotenv

from tools.download_attachments import download_file

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


load_dotenv()


def prepare_dataset(base_doc:str)->pd.DataFrame:
    
    df = pd.read_csv(base_doc)

    answer_data = []
    for index, row in df.iterrows():
        question, answer = get_question_and_answer(row['content'])
        task_id = get_tag_id(row['metadata'])

        answer_data.append({
            'question': question,
            'answer': answer,
            'task_id': task_id
        })
    answer_df = pd.DataFrame(answer_data)
    return answer_df

def get_questions_from_gaia():
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
             print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None
    except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
    except Exception as e:
        print(f"An unexpected error occurred fetching questions: {e}")
        return f"An unexpected error occurred fetching questions: {e}", None
    return questions_data

    
def prepare_evaluation_data(gaia_questions:list, answer_df:pd.DataFrame)->pd.DataFrame:
    evaluation_data = []
    for item in gaia_questions:
        task_id = item.get("task_id")
        question_text = item.get("question")
        # check if task_id has a file:
        has_file = False
        filename = download_file(task_id)
        if filename:
            has_file = filename

        # search task id in answer_df
        answer_row = answer_df[answer_df['task_id'] == task_id]
        evaluation_data.append({
            'task_id': task_id,
            'attachment': has_file,
            'question': question_text,
            'answer': answer_row['answer'].values[0] if not answer_row.empty else None,
        })
    evaluation_df = pd.DataFrame(evaluation_data)
    return evaluation_df

def get_tag_id(line:str)->str:
    return json.loads(line.replace("'",'"'))['task_id']

def get_question_and_answer(line:str)->str:
   search = "Final answer :"
   length = len(search)
   pos = line.find(search)
   if pos == -1:
         raise Exception("Final answer not found in line: " + line)
   return line[:pos] ,line[pos + length:].strip()

def create_langfuse_dataset(evaluation_df:pd.DataFrame, dataset_name:str,dataset_description:str):
    langfuse = Langfuse()

    langfuse.create_dataset(
        name=dataset_name,
        description=dataset_description,
        metadata={
            "source ": "GAIA",
            "type" : "benchmark",
            "date": "2025-06-29" 
        }   
    )
    for index, row in evaluation_df.iterrows():
        langfuse.create_dataset_item(
            dataset_name=dataset_name,
            input=row['question'],
            expected_output=row['answer'],
            metadata={"task_id":row['task_id'],"attachment":row['attachment']},
            #tags=["GAIA_Evaluation"],
            #tags=["GAIA_Evaluation", "level_1"]
        )
    


def main():
    hf_token = os.environ.get("HF_TOKEN")
    if hf_token:
        login(hf_token)
        print("Hugging Face token set successfully.")
    else:
        print("HF token not in env vars")
        exit(-2)
    gaia_questions = get_questions_from_gaia()
    answer_df = prepare_dataset('supabase_docs.csv')
    evaluation_df = prepare_evaluation_data(gaia_questions, answer_df)
    evaluation_df.to_csv('evaluation_data.csv', index=False)
    create_langfuse_dataset(evaluation_df, "GAIA_Evaluation_Dataset","Evaluation of 20 questions level 1 from GAIA")
    


if __name__ == "__main__":
    main()