import os import requests import pandas as pd import gradio as gr from crew import run_crew # ← your multi-agent logic API_URL = "https://agents-course-unit4-scoring.hf.space" # ─── AGENT WRAPPER ────────────────────────────────────────────────────────────── class CrewAgent: def __call__(self, question: str) -> str: return run_crew(question, file_path="") # It MUST use your real crew logic! agent = CrewAgent() # ─── MAIN HANDLER ─────────────────────────────────────────────────────────────── def evaluate_and_submit(username: str): """Runs the agent on benchmark questions and submits answers, with debug logging.""" username = username.strip() if not username: return "❌ Please enter your Hugging Face username.", None space_id = os.getenv("SPACE_ID", "") agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "" # 1) Fetch questions try: questions = requests.get(f"{API_URL}/questions", timeout=30).json() except Exception as e: return f"❌ Failed to fetch questions: {e}", None # 2) Answer questions, logging every result answers, log = [], [] for item in questions: qid, qtxt = item["task_id"], item["question"] try: ans = agent(qtxt) # Debug print: print(f"QID: {qid} | Q: {qtxt[:60]}... | Agent Answer: {ans}") # Add warning if placeholder detected if ans.strip().lower() in ["this is a default answer.", "", "n/a"]: print(f"⚠️ Warning: Agent returned a default/empty answer for QID {qid}.") except Exception as e: ans = f"AGENT ERROR: {e}" print(f"⚠️ Agent error on QID {qid}: {e}") answers.append({"task_id": qid, "submitted_answer": ans}) log.append({"Task ID": qid, "Question": qtxt, "Answer": ans}) # Show part of the DataFrame in the console for debugging try: df = pd.DataFrame(log) print("=== First 5 results ===") print(df.head()) except Exception as e: print(f"DataFrame print error: {e}") if not answers: return "⚠️ No answers generated.", pd.DataFrame(log) # 3) Submit try: resp = requests.post( f"{API_URL}/submit", json={"username": username, "agent_code": agent_code, "answers": answers}, timeout=60, ) resp.raise_for_status() data = resp.json() status = ( "✅ Submission successful!\n" f"Score: {data.get('score')} % " f"({data.get('correct_count')}/{data.get('total_attempted')})\n" f"Message: {data.get('message')}" ) except Exception as e: status = f"❌ Submission failed: {e}" return status, pd.DataFrame(log) # ─── GRADIO UI ────────────────────────────────────────────────────────────────── demo = gr.Interface( fn=evaluate_and_submit, inputs=gr.Textbox(label="Hugging Face username", placeholder="e.g. john-doe"), outputs=[ gr.Textbox(label="Status", lines=6), gr.DataFrame(label="Submitted Answers"), ], title="GAIA Agent Submission", description=( "Enter your Hugging Face username and click **Run Evaluation & Submit**. " "The app will run your agent on all benchmark questions and send the answers." ), ) if __name__ == "__main__": demo.launch()