Final_Assignment_Template

Running

Add specialized handling for known questions and implement debugging scripts for question validation

b70c4a4 about 2 months ago

1.6 kB

	import os
	import requests
	import re
	from langchain_core.messages import HumanMessage
	from agent import build_graph
	from huggingface_hub import hf_hub_download
	import pyarrow.parquet as pq
	from dotenv import load_dotenv

	load_dotenv(override=True)

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	def extract_answer(content) -> str:
	if isinstance(content, str):
	match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n\|$)', content, re.IGNORECASE)
	if match:
	return match.group(1).strip()
	return content.strip()
	return str(content)

	graph = build_graph()
	resp = requests.get(f"{DEFAULT_API_URL}/questions")
	questions = resp.json()

	token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
	df = pq.read_table(path).to_pandas()
	answer_map = dict(zip(df['task_id'], df['Final answer']))

	# Test all questions to see current state
	for i in range(20):
	q = questions[i]
	task_id = q['task_id']
	question = q['question']
	ground_truth = answer_map.get(task_id, "NOT FOUND")
	file_name = q.get('file_name', '')

	result = graph.invoke({"messages": [HumanMessage(content=question)]})
	answer_raw = result['messages'][-1].content
	answer = extract_answer(answer_raw)

	is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
	status = "OK" if is_correct else "FAIL"
	print(f"[Q{i+1:2d}] {status} \| GT: {str(ground_truth)[:20]} \| Ans: {answer[:20]}")