import os import json import time import gradio as gr from gradio import OAuthProfile import requests import concurrent.futures import pandas as pd from langchain_core.messages import HumanMessage from agent import build_graph # --- CONFIGURATION --- print("DEBUG: Loading Antientropy Agent v5.8 (LangGraph + File Tools) code...") SPACE_URL = "https://huggingface.co/spaces/jomasego/Antientropy/tree/main" API_URL = "https://agents-course-unit4-scoring.hf.space" # --- THE ANTIENTROPY AGENT SETUP --- # System prompt is now handled in agent.py via system_prompt.txt # We will still prepend instructions to the user query just in case # Initialize the Agent graph = build_graph() # --- HELPER FUNCTIONS --- def get_questions(): """Retrieve the full list of evaluation questions.""" response = requests.get(f"{API_URL}/questions", timeout=30) if response.status_code == 200: return response.json() return [] def download_file(task_id): """Download the file associated with a task, if it exists.""" response = requests.get(f"{API_URL}/files/{task_id}", timeout=30) if response.status_code == 200: # Use absolute path in /tmp for reliability data_dir = "/tmp/gaia_files" os.makedirs(data_dir, exist_ok=True) content_disp = response.headers.get("Content-Disposition") filename = f"{data_dir}/{task_id}_file" if content_disp and "filename=" in content_disp: raw_filename = content_disp.split('filename=')[1].strip('"') filename = f"{data_dir}/{raw_filename}" with open(filename, "wb") as f: f.write(response.content) print(f"šŸ“ Downloaded file to: {filename} ({len(response.content)} bytes)") return filename return None def clean_answer(raw_response): """Strip everything except the final answer.""" if not raw_response: return "" clean = str(raw_response) # 1. Look for explicit "FINAL ANSWER:" marker (case insensitive) import re # Match "FINAL ANSWER:" with any casing match = re.search(r'FINAL\s*ANSWER\s*:\s*(.+)', clean, re.IGNORECASE | re.DOTALL) if match: answer = match.group(1).strip() # Remove trailing punctuation that might be added answer = answer.rstrip('.') # If multiline, take just the first line (the actual answer) if '\n' in answer: answer = answer.split('\n')[0].strip() return answer # 2. If no marker, the model didn't follow instructions - return empty or last line # This signals an error to the evaluation lines = [l.strip() for l in clean.strip().split('\n') if l.strip()] if lines: # Return the last non-empty line as a fallback return lines[-1] return clean.strip() def run_agent_on_task(prompt): """Run the graph agent on a single prompt.""" messages = [HumanMessage(content=prompt)] result = graph.invoke({"messages": messages}) # The last message is the AI's final response return result["messages"][-1].content def run_evaluation(profile: gr.OAuthProfile | None): """Main function to run the agent and submit results.""" try: if profile is None: return "āŒ Error: You must be logged in to submit. Please log in with Hugging Face.", pd.DataFrame() username = profile.username except Exception as e: return f"āŒ Error retrieving user profile: {e}", pd.DataFrame() output = f"šŸš€ Antientropy Agent (LangGraph Edition) Initiated for user {username}...\n\n" # Check token again just in case if not os.environ.get("HF_TOKEN"): return ( "āŒ Missing HF token for model access. " "Add a Space secret named HF_TOKEN with read access.", pd.DataFrame(), ) # 1. Fetch Questions questions = get_questions() output += f"šŸ“„ Fetched {len(questions)} tasks from the API.\n\n" submission_results = [] questions_and_answers = [] # 2. Solve Each Question per_task_timeout_sec = 180 # Increased timeout for LangGraph to 3 minutes for i, task in enumerate(questions, 1): task_id = task.get("id") or task.get("task_id") question_text = task.get("question") if not task_id or question_text is None: output += f"āš ļø Skipping malformed task: {task}\n\n" continue output += f"--- Solving Task {i}/{len(questions)} (ID: {task_id}) ---\n" # Add delay to avoid rate limits time.sleep(5) file_path = download_file(task_id) # Construct prompt with specific guidance based on file type prompt = f"Question: {question_text}" if file_path: ext = file_path.split('.')[-1].lower() if '.' in file_path else '' if ext in ['mp3', 'wav', 'ogg', 'flac', 'm4a']: prompt += f"\n\nIMPORTANT: An audio file has been downloaded to: '{file_path}'. You MUST use the transcribe_audio tool with this exact path to get the content." elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp']: prompt += f"\n\nIMPORTANT: An image file has been downloaded to: '{file_path}'. You MUST use extract_text_from_image or analyze_image tool with this exact path." elif ext == 'pdf': prompt += f"\n\nIMPORTANT: A PDF file has been downloaded to: '{file_path}'. You MUST use the read_pdf_file tool with this exact path." elif ext in ['csv']: prompt += f"\n\nIMPORTANT: A CSV file has been downloaded to: '{file_path}'. You MUST use analyze_csv_file or execute_code_multilang to read it." elif ext in ['xlsx', 'xls']: prompt += f"\n\nIMPORTANT: An Excel file has been downloaded to: '{file_path}'. You MUST use analyze_excel_file or execute_code_multilang to read it." elif ext in ['py', 'txt', 'json', 'xml', 'html', 'css', 'js', 'md']: prompt += f"\n\nIMPORTANT: A text/code file has been downloaded to: '{file_path}'. You MUST use read_file_content to read it first." else: prompt += f"\n\nIMPORTANT: A file has been downloaded to: '{file_path}'. You MUST use read_file_content or execute_code_multilang to read it." try: with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(run_agent_on_task, prompt) response = future.result(timeout=per_task_timeout_sec) final_answer = clean_answer(response) output += f"āœ… Agent Answer: {final_answer}\n\n" submission_results.append({ "task_id": task_id, "submitted_answer": final_answer }) questions_and_answers.append({ "Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": final_answer }) except concurrent.futures.TimeoutError: output += f"ā³ Timeout on task {task_id} after {per_task_timeout_sec}s\n\n" submission_results.append({ "task_id": task_id, "submitted_answer": "Timeout" }) questions_and_answers.append({ "Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": "Timeout" }) except Exception as e: error_msg = str(e) output += f"āŒ Error on task {task_id}: {error_msg}\n\n" submission_results.append({ "task_id": task_id, "submitted_answer": "Error" }) questions_and_answers.append({ "Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": f"Error: {error_msg}" }) # 3. Submit to Leaderboard output += "\nšŸ“¤ Submitting results to Leaderboard...\n" payload = { "username": username, "agent_code": SPACE_URL, "answers": submission_results } try: submit_response = requests.post(f"{API_URL}/submit", json=payload) if submit_response.status_code == 200: result = submit_response.json() output += "šŸŽ‰ SUCCESS! Submission received.\n" output += json.dumps(result, indent=2) else: output += f"āš ļø Submission failed: {submit_response.text}" except Exception as e: output += f"āš ļø Submission failed with error: {e}" return output, pd.DataFrame(questions_and_answers) # --- GRADIO INTERFACE --- with gr.Blocks(title="Antientropy Final Assignment v5") as demo: gr.Markdown("# šŸ•µšŸ»ā€ā™‚ļø Antientropy Agent - GAIA Benchmark v5 (LangGraph + Multimedia)") gr.Markdown( """ **Instructions:** 1. Log in to your Hugging Face account using the button below. 2. Click 'Run Evaluation & Submit All Answers' to run the agent. """ ) gr.LoginButton() submit_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary", size="lg") with gr.Row(): status_output = gr.Textbox(label="Run Status / Submission Result", lines=15, max_lines=30) with gr.Row(): results_table = gr.Dataframe( headers=["Task ID", "Question", "Submitted Answer"], label="Questions and Agent Answers" ) submit_btn.click( fn=run_evaluation, inputs=None, outputs=[status_output, results_table] ) if __name__ == "__main__": demo.launch()