Final_Assignment_Template

Running

App Files Files Community

Final_Assignment_Template / compare_answers.py

Paperbag

claude fix

21be703 10 days ago

raw

history blame contribute delete

2.17 kB

	import pyarrow.parquet as pq
	import requests
	import json
	import os
	from dotenv import load_dotenv
	load_dotenv()

	# Fetch questions from scoring space
	print("Fetching questions...")
	resp = requests.get('https://agents-course-unit4-scoring.hf.space/questions')
	questions = resp.json()
	print(f"Fetched {len(questions)} questions")

	# Get ground truth from HF
	print("Fetching ground truth...")
	from huggingface_hub import hf_hub_download
	token = os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACEHUB_API_TOKEN')
	path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
	df = pq.read_table(path).to_pandas()

	# Create mapping of task_id -> ground truth answer
	answer_map = dict(zip(df['task_id'], df['Final answer']))
	print(f"Loaded {len(answer_map)} ground truth answers")

	# Load submission
	submission_path = 'backup_submission.json'
	if not os.path.exists(submission_path):
	print(f"\nError: {submission_path} not found!")
	print("Please run your evaluation first to generate the submission file.")
	exit(1)

	with open(submission_path, 'r') as f:
	submission = json.load(f)

	print(f"Loaded submission with {len(submission['answers'])} answers")

	# Detailed comparison
	print('\n' + '='*70)
	print('DETAILED COMPARISON: Ground Truth vs Submitted Answers')
	print('='*70 + '\n')

	correct = 0
	for i, ans in enumerate(submission['answers']):
	task_id = ans['task_id']
	submitted = str(ans['submitted_answer']).strip()
	ground_truth = str(answer_map.get(task_id, 'NOT FOUND')).strip()

	is_correct = submitted.lower() == ground_truth.lower()
	if is_correct:
	correct += 1
	status = '✅'
	else:
	status = '❌'

	# Find the question
	q = next((x['question'] for x in questions if x['task_id'] == task_id), 'N/A')

	print(f"{status} [{i+1}] Task: {task_id[:30]}...")
	print(f" Q: {q[:60]}...")
	print(f" Submitted: {submitted[:50]}")
	print(f" Ground: {ground_truth[:50]}")
	print()

	print('='*70)
	print(f'FINAL SCORE: {correct}/{len(submission["answers"])} = {correct/len(submission["answers"])*100:.0f}%')
	print('='*70)