Spaces:

jomasego
/

Antientropy

Sleeping

Jose-Maria Segui

Fix file paths (use /tmp) and make prompts more aggressive

31e4adf about 2 months ago

9.82 kB

	import os
	import json
	import time
	import gradio as gr
	from gradio import OAuthProfile
	import requests
	import concurrent.futures
	import pandas as pd
	from langchain_core.messages import HumanMessage
	from agent import build_graph

	# --- CONFIGURATION ---
	print("DEBUG: Loading Antientropy Agent v5.8 (LangGraph + File Tools) code...")
	SPACE_URL = "https://huggingface.co/spaces/jomasego/Antientropy/tree/main"
	API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- THE ANTIENTROPY AGENT SETUP ---
	# System prompt is now handled in agent.py via system_prompt.txt
	# We will still prepend instructions to the user query just in case

	# Initialize the Agent
	graph = build_graph()

	# --- HELPER FUNCTIONS ---
	def get_questions():
	"""Retrieve the full list of evaluation questions."""
	response = requests.get(f"{API_URL}/questions", timeout=30)
	if response.status_code == 200:
	return response.json()
	return []

	def download_file(task_id):
	"""Download the file associated with a task, if it exists."""
	response = requests.get(f"{API_URL}/files/{task_id}", timeout=30)
	if response.status_code == 200:
	# Use absolute path in /tmp for reliability
	data_dir = "/tmp/gaia_files"
	os.makedirs(data_dir, exist_ok=True)
	content_disp = response.headers.get("Content-Disposition")
	filename = f"{data_dir}/{task_id}_file"
	if content_disp and "filename=" in content_disp:
	raw_filename = content_disp.split('filename=')[1].strip('"')
	filename = f"{data_dir}/{raw_filename}"
	with open(filename, "wb") as f:
	f.write(response.content)
	print(f"📁 Downloaded file to: {filename} ({len(response.content)} bytes)")
	return filename
	return None

	def clean_answer(raw_response):
	"""Strip everything except the final answer."""
	if not raw_response:
	return ""
	clean = str(raw_response)

	# 1. Look for explicit "FINAL ANSWER:" marker (case insensitive)
	import re
	# Match "FINAL ANSWER:" with any casing
	match = re.search(r'FINAL\sANSWER\s:\s*(.+)', clean, re.IGNORECASE \| re.DOTALL)
	if match:
	answer = match.group(1).strip()
	# Remove trailing punctuation that might be added
	answer = answer.rstrip('.')
	# If multiline, take just the first line (the actual answer)
	if '\n' in answer:
	answer = answer.split('\n')[0].strip()
	return answer

	# 2. If no marker, the model didn't follow instructions - return empty or last line
	# This signals an error to the evaluation
	lines = [l.strip() for l in clean.strip().split('\n') if l.strip()]
	if lines:
	# Return the last non-empty line as a fallback
	return lines[-1]

	return clean.strip()

	def run_agent_on_task(prompt):
	"""Run the graph agent on a single prompt."""
	messages = [HumanMessage(content=prompt)]
	result = graph.invoke({"messages": messages})
	# The last message is the AI's final response
	return result["messages"][-1].content

	def run_evaluation(profile: gr.OAuthProfile \| None):
	"""Main function to run the agent and submit results."""
	try:
	if profile is None:
	return "❌ Error: You must be logged in to submit. Please log in with Hugging Face.", pd.DataFrame()

	username = profile.username
	except Exception as e:
	return f"❌ Error retrieving user profile: {e}", pd.DataFrame()

	output = f"🚀 Antientropy Agent (LangGraph Edition) Initiated for user {username}...\n\n"

	# Check token again just in case
	if not os.environ.get("HF_TOKEN"):
	return (
	"❌ Missing HF token for model access. "
	"Add a Space secret named HF_TOKEN with read access.",
	pd.DataFrame(),
	)

	# 1. Fetch Questions
	questions = get_questions()
	output += f"📥 Fetched {len(questions)} tasks from the API.\n\n"

	submission_results = []
	questions_and_answers = []

	# 2. Solve Each Question
	per_task_timeout_sec = 180 # Increased timeout for LangGraph to 3 minutes
	for i, task in enumerate(questions, 1):
	task_id = task.get("id") or task.get("task_id")
	question_text = task.get("question")
	if not task_id or question_text is None:
	output += f"⚠️ Skipping malformed task: {task}\n\n"
	continue
	output += f"--- Solving Task {i}/{len(questions)} (ID: {task_id}) ---\n"

	# Add delay to avoid rate limits
	time.sleep(5)

	file_path = download_file(task_id)

	# Construct prompt with specific guidance based on file type
	prompt = f"Question: {question_text}"
	if file_path:
	ext = file_path.split('.')[-1].lower() if '.' in file_path else ''

	if ext in ['mp3', 'wav', 'ogg', 'flac', 'm4a']:
	prompt += f"\n\nIMPORTANT: An audio file has been downloaded to: '{file_path}'. You MUST use the transcribe_audio tool with this exact path to get the content."
	elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp']:
	prompt += f"\n\nIMPORTANT: An image file has been downloaded to: '{file_path}'. You MUST use extract_text_from_image or analyze_image tool with this exact path."
	elif ext == 'pdf':
	prompt += f"\n\nIMPORTANT: A PDF file has been downloaded to: '{file_path}'. You MUST use the read_pdf_file tool with this exact path."
	elif ext in ['csv']:
	prompt += f"\n\nIMPORTANT: A CSV file has been downloaded to: '{file_path}'. You MUST use analyze_csv_file or execute_code_multilang to read it."
	elif ext in ['xlsx', 'xls']:
	prompt += f"\n\nIMPORTANT: An Excel file has been downloaded to: '{file_path}'. You MUST use analyze_excel_file or execute_code_multilang to read it."
	elif ext in ['py', 'txt', 'json', 'xml', 'html', 'css', 'js', 'md']:
	prompt += f"\n\nIMPORTANT: A text/code file has been downloaded to: '{file_path}'. You MUST use read_file_content to read it first."
	else:
	prompt += f"\n\nIMPORTANT: A file has been downloaded to: '{file_path}'. You MUST use read_file_content or execute_code_multilang to read it."

	try:
	with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
	future = executor.submit(run_agent_on_task, prompt)
	response = future.result(timeout=per_task_timeout_sec)

	final_answer = clean_answer(response)
	output += f"✅ Agent Answer: {final_answer}\n\n"

	submission_results.append({
	"task_id": task_id,
	"submitted_answer": final_answer
	})

	questions_and_answers.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "...",
	"Submitted Answer": final_answer
	})

	except concurrent.futures.TimeoutError:
	output += f"⏳ Timeout on task {task_id} after {per_task_timeout_sec}s\n\n"
	submission_results.append({
	"task_id": task_id,
	"submitted_answer": "Timeout"
	})
	questions_and_answers.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "...",
	"Submitted Answer": "Timeout"
	})
	except Exception as e:
	error_msg = str(e)
	output += f"❌ Error on task {task_id}: {error_msg}\n\n"
	submission_results.append({
	"task_id": task_id,
	"submitted_answer": "Error"
	})
	questions_and_answers.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "...",
	"Submitted Answer": f"Error: {error_msg}"
	})

	# 3. Submit to Leaderboard
	output += "\n📤 Submitting results to Leaderboard...\n"
	payload = {
	"username": username,
	"agent_code": SPACE_URL,
	"answers": submission_results
	}

	try:
	submit_response = requests.post(f"{API_URL}/submit", json=payload)
	if submit_response.status_code == 200:
	result = submit_response.json()
	output += "🎉 SUCCESS! Submission received.\n"
	output += json.dumps(result, indent=2)
	else:
	output += f"⚠️ Submission failed: {submit_response.text}"
	except Exception as e:
	output += f"⚠️ Submission failed with error: {e}"

	return output, pd.DataFrame(questions_and_answers)

	# --- GRADIO INTERFACE ---
	with gr.Blocks(title="Antientropy Final Assignment v5") as demo:
	gr.Markdown("# 🕵🏻‍♂️ Antientropy Agent - GAIA Benchmark v5 (LangGraph + Multimedia)")
	gr.Markdown(
	"""
	Instructions:
	1. Log in to your Hugging Face account using the button below.
	2. Click 'Run Evaluation & Submit All Answers' to run the agent.
	"""
	)

	gr.LoginButton()

	submit_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary", size="lg")

	with gr.Row():
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=15, max_lines=30)

	with gr.Row():
	results_table = gr.Dataframe(
	headers=["Task ID", "Question", "Submitted Answer"],
	label="Questions and Agent Answers"
	)

	submit_btn.click(
	fn=run_evaluation,
	inputs=None,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	demo.launch()