File size: 1,938 Bytes
b70c4a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import requests
from langchain_core.messages import HumanMessage
from agent import build_graph
from huggingface_hub import hf_hub_download
import pyarrow.parquet as pq
from dotenv import load_dotenv

load_dotenv(override=True)

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def file_extract(local_file_path, task_id):
    if not local_file_path:
        return None
    token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
    prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]
    for prefix in prefixes:
        try:
            resolved_path = hf_hub_download(
                repo_id="gaia-benchmark/GAIA",
                filename=f"{prefix}{local_file_path}",
                repo_type="dataset",
                token=token
            )
            return resolved_path
        except Exception:
            continue
    return None

graph = build_graph()
resp = requests.get(f"{DEFAULT_API_URL}/questions")
questions = resp.json()

token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
df = pq.read_table(path).to_pandas()
answer_map = dict(zip(df['task_id'], df['Final answer']))

# Q19
q = questions[18]
task_id = q['task_id']
question = q['question']
file_name = q.get('file_name')
ground_truth = answer_map.get(task_id, "NOT FOUND")

# Add file path
resolved_path = None
if file_name:
    resolved_path = file_extract(file_name, task_id)
    if resolved_path:
        question += f"\n\n[Attached File Local Path: {resolved_path}]"

print(f"Q19 File: {file_name}")
print(f"Resolved: {resolved_path}")
print(f"Q19 Question: {question[:100]}...")

result = graph.invoke({"messages": [HumanMessage(content=question)]})
answer = result['messages'][-1].content
print(f"GT: {ground_truth}")
print(f"Ans: {answer[:80]}")