Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import time | |
| import gradio as gr | |
| from gradio import OAuthProfile | |
| import requests | |
| import concurrent.futures | |
| import pandas as pd | |
| from langchain_core.messages import HumanMessage | |
| from agent import build_graph | |
| # --- CONFIGURATION --- | |
| print("DEBUG: Loading Antientropy Agent v5.8 (LangGraph + File Tools) code...") | |
| SPACE_URL = "https://huggingface.co/spaces/jomasego/Antientropy/tree/main" | |
| API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| # --- THE ANTIENTROPY AGENT SETUP --- | |
| # System prompt is now handled in agent.py via system_prompt.txt | |
| # We will still prepend instructions to the user query just in case | |
| # Initialize the Agent | |
| graph = build_graph() | |
| # --- HELPER FUNCTIONS --- | |
| def get_questions(): | |
| """Retrieve the full list of evaluation questions.""" | |
| response = requests.get(f"{API_URL}/questions", timeout=30) | |
| if response.status_code == 200: | |
| return response.json() | |
| return [] | |
| def download_file(task_id): | |
| """Download the file associated with a task, if it exists.""" | |
| response = requests.get(f"{API_URL}/files/{task_id}", timeout=30) | |
| if response.status_code == 200: | |
| # Use absolute path in /tmp for reliability | |
| data_dir = "/tmp/gaia_files" | |
| os.makedirs(data_dir, exist_ok=True) | |
| content_disp = response.headers.get("Content-Disposition") | |
| filename = f"{data_dir}/{task_id}_file" | |
| if content_disp and "filename=" in content_disp: | |
| raw_filename = content_disp.split('filename=')[1].strip('"') | |
| filename = f"{data_dir}/{raw_filename}" | |
| with open(filename, "wb") as f: | |
| f.write(response.content) | |
| print(f"π Downloaded file to: {filename} ({len(response.content)} bytes)") | |
| return filename | |
| return None | |
| def clean_answer(raw_response): | |
| """Strip everything except the final answer.""" | |
| if not raw_response: | |
| return "" | |
| clean = str(raw_response) | |
| # 1. Look for explicit "FINAL ANSWER:" marker (case insensitive) | |
| import re | |
| # Match "FINAL ANSWER:" with any casing | |
| match = re.search(r'FINAL\s*ANSWER\s*:\s*(.+)', clean, re.IGNORECASE | re.DOTALL) | |
| if match: | |
| answer = match.group(1).strip() | |
| # Remove trailing punctuation that might be added | |
| answer = answer.rstrip('.') | |
| # If multiline, take just the first line (the actual answer) | |
| if '\n' in answer: | |
| answer = answer.split('\n')[0].strip() | |
| return answer | |
| # 2. If no marker, the model didn't follow instructions - return empty or last line | |
| # This signals an error to the evaluation | |
| lines = [l.strip() for l in clean.strip().split('\n') if l.strip()] | |
| if lines: | |
| # Return the last non-empty line as a fallback | |
| return lines[-1] | |
| return clean.strip() | |
| def run_agent_on_task(prompt): | |
| """Run the graph agent on a single prompt.""" | |
| messages = [HumanMessage(content=prompt)] | |
| result = graph.invoke({"messages": messages}) | |
| # The last message is the AI's final response | |
| return result["messages"][-1].content | |
| def run_evaluation(profile: gr.OAuthProfile | None): | |
| """Main function to run the agent and submit results.""" | |
| try: | |
| if profile is None: | |
| return "β Error: You must be logged in to submit. Please log in with Hugging Face.", pd.DataFrame() | |
| username = profile.username | |
| except Exception as e: | |
| return f"β Error retrieving user profile: {e}", pd.DataFrame() | |
| output = f"π Antientropy Agent (LangGraph Edition) Initiated for user {username}...\n\n" | |
| # Check token again just in case | |
| if not os.environ.get("HF_TOKEN"): | |
| return ( | |
| "β Missing HF token for model access. " | |
| "Add a Space secret named HF_TOKEN with read access.", | |
| pd.DataFrame(), | |
| ) | |
| # 1. Fetch Questions | |
| questions = get_questions() | |
| output += f"π₯ Fetched {len(questions)} tasks from the API.\n\n" | |
| submission_results = [] | |
| questions_and_answers = [] | |
| # 2. Solve Each Question | |
| per_task_timeout_sec = 180 # Increased timeout for LangGraph to 3 minutes | |
| for i, task in enumerate(questions, 1): | |
| task_id = task.get("id") or task.get("task_id") | |
| question_text = task.get("question") | |
| if not task_id or question_text is None: | |
| output += f"β οΈ Skipping malformed task: {task}\n\n" | |
| continue | |
| output += f"--- Solving Task {i}/{len(questions)} (ID: {task_id}) ---\n" | |
| # Add delay to avoid rate limits | |
| time.sleep(5) | |
| file_path = download_file(task_id) | |
| # Construct prompt with specific guidance based on file type | |
| prompt = f"Question: {question_text}" | |
| if file_path: | |
| ext = file_path.split('.')[-1].lower() if '.' in file_path else '' | |
| if ext in ['mp3', 'wav', 'ogg', 'flac', 'm4a']: | |
| prompt += f"\n\nIMPORTANT: An audio file has been downloaded to: '{file_path}'. You MUST use the transcribe_audio tool with this exact path to get the content." | |
| elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp']: | |
| prompt += f"\n\nIMPORTANT: An image file has been downloaded to: '{file_path}'. You MUST use extract_text_from_image or analyze_image tool with this exact path." | |
| elif ext == 'pdf': | |
| prompt += f"\n\nIMPORTANT: A PDF file has been downloaded to: '{file_path}'. You MUST use the read_pdf_file tool with this exact path." | |
| elif ext in ['csv']: | |
| prompt += f"\n\nIMPORTANT: A CSV file has been downloaded to: '{file_path}'. You MUST use analyze_csv_file or execute_code_multilang to read it." | |
| elif ext in ['xlsx', 'xls']: | |
| prompt += f"\n\nIMPORTANT: An Excel file has been downloaded to: '{file_path}'. You MUST use analyze_excel_file or execute_code_multilang to read it." | |
| elif ext in ['py', 'txt', 'json', 'xml', 'html', 'css', 'js', 'md']: | |
| prompt += f"\n\nIMPORTANT: A text/code file has been downloaded to: '{file_path}'. You MUST use read_file_content to read it first." | |
| else: | |
| prompt += f"\n\nIMPORTANT: A file has been downloaded to: '{file_path}'. You MUST use read_file_content or execute_code_multilang to read it." | |
| try: | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: | |
| future = executor.submit(run_agent_on_task, prompt) | |
| response = future.result(timeout=per_task_timeout_sec) | |
| final_answer = clean_answer(response) | |
| output += f"β Agent Answer: {final_answer}\n\n" | |
| submission_results.append({ | |
| "task_id": task_id, | |
| "submitted_answer": final_answer | |
| }) | |
| questions_and_answers.append({ | |
| "Task ID": task_id, | |
| "Question": question_text[:100] + "...", | |
| "Submitted Answer": final_answer | |
| }) | |
| except concurrent.futures.TimeoutError: | |
| output += f"β³ Timeout on task {task_id} after {per_task_timeout_sec}s\n\n" | |
| submission_results.append({ | |
| "task_id": task_id, | |
| "submitted_answer": "Timeout" | |
| }) | |
| questions_and_answers.append({ | |
| "Task ID": task_id, | |
| "Question": question_text[:100] + "...", | |
| "Submitted Answer": "Timeout" | |
| }) | |
| except Exception as e: | |
| error_msg = str(e) | |
| output += f"β Error on task {task_id}: {error_msg}\n\n" | |
| submission_results.append({ | |
| "task_id": task_id, | |
| "submitted_answer": "Error" | |
| }) | |
| questions_and_answers.append({ | |
| "Task ID": task_id, | |
| "Question": question_text[:100] + "...", | |
| "Submitted Answer": f"Error: {error_msg}" | |
| }) | |
| # 3. Submit to Leaderboard | |
| output += "\nπ€ Submitting results to Leaderboard...\n" | |
| payload = { | |
| "username": username, | |
| "agent_code": SPACE_URL, | |
| "answers": submission_results | |
| } | |
| try: | |
| submit_response = requests.post(f"{API_URL}/submit", json=payload) | |
| if submit_response.status_code == 200: | |
| result = submit_response.json() | |
| output += "π SUCCESS! Submission received.\n" | |
| output += json.dumps(result, indent=2) | |
| else: | |
| output += f"β οΈ Submission failed: {submit_response.text}" | |
| except Exception as e: | |
| output += f"β οΈ Submission failed with error: {e}" | |
| return output, pd.DataFrame(questions_and_answers) | |
| # --- GRADIO INTERFACE --- | |
| with gr.Blocks(title="Antientropy Final Assignment v5") as demo: | |
| gr.Markdown("# π΅π»ββοΈ Antientropy Agent - GAIA Benchmark v5 (LangGraph + Multimedia)") | |
| gr.Markdown( | |
| """ | |
| **Instructions:** | |
| 1. Log in to your Hugging Face account using the button below. | |
| 2. Click 'Run Evaluation & Submit All Answers' to run the agent. | |
| """ | |
| ) | |
| gr.LoginButton() | |
| submit_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary", size="lg") | |
| with gr.Row(): | |
| status_output = gr.Textbox(label="Run Status / Submission Result", lines=15, max_lines=30) | |
| with gr.Row(): | |
| results_table = gr.Dataframe( | |
| headers=["Task ID", "Question", "Submitted Answer"], | |
| label="Questions and Agent Answers" | |
| ) | |
| submit_btn.click( | |
| fn=run_evaluation, | |
| inputs=None, | |
| outputs=[status_output, results_table] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |