import os import gradio as gr import requests import pandas as pd import time import json from pathlib import Path from typing import Optional, Dict # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" CACHE_FILE = "answers_cache.json" # --- Cache Management --- def load_cache() -> Dict[str, str]: """Load cached answers from file.""" if Path(CACHE_FILE).exists(): try: with open(CACHE_FILE, 'r') as f: return json.load(f) except: return {} return {} def save_cache(cache: Dict[str, str]): """Save answers to cache file.""" with open(CACHE_FILE, 'w') as f: json.dump(cache, f, indent=2) # --- Improved Agent --- class BasicAgent: def __init__(self): print("🤖 Agent initialized.") self.api_url = "https://api-inference.huggingface.co/models/google/flan-t5-large" self.headers = { "Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}" } self.cache = load_cache() def _query_model(self, prompt: str) -> str: """Query the HF inference API with retry logic.""" max_retries = 3 for attempt in range(max_retries): try: response = requests.post( self.api_url, headers=self.headers, json={ "inputs": prompt, "parameters": { "max_new_tokens": 100, "return_full_text": False, "temperature": 0.1, "do_sample": False } }, timeout=45 ) if response.status_code == 200: result = response.json() if isinstance(result, list) and len(result) > 0: return result[0].get("generated_text", "").strip() elif isinstance(result, dict): return result.get("generated_text", "").strip() except Exception as e: print(f"Attempt {attempt + 1} failed: {e}") if attempt < max_retries - 1: time.sleep(2 ** attempt) return "" def _clean_answer(self, answer: str) -> str: """Clean and normalize the answer.""" if not answer: return "" prefixes = [ "Answer:", "Final Answer:", "The answer is", "The answer:", "Answer is", "Result:", "Output:" ] for prefix in prefixes: if answer.startswith(prefix): answer = answer[len(prefix):].strip() answer = answer.strip() answer = answer.rstrip('.') answer = answer.rstrip(',') answer = answer.split('\n')[0].strip() if answer.startswith('"') and answer.endswith('"'): answer = answer[1:-1] if answer.startswith("'") and answer.endswith("'"): answer = answer[1:-1] return answer def __call__(self, question: str) -> str: """Main agent call method.""" print(f"❓ Question: {question[:80]}...") cache_key = question.strip().lower() if cache_key in self.cache: print("✅ Using cached answer") return self.cache[cache_key] prompt = f"""Answer the following question concisely with ONLY the final answer. No explanation. No full sentences. Just the answer. Question: {question} Answer:""" raw_answer = self._query_model(prompt) answer = self._clean_answer(raw_answer) if not answer or len(answer) < 2: alt_prompt = f"""What is the answer to: {question} Give only the answer, nothing else.""" raw_answer = self._query_model(alt_prompt) answer = self._clean_answer(raw_answer) if not answer: answer = "unknown" print(f"💡 Answer: {answer}") self.cache[cache_key] = answer save_cache(self.cache) return answer # --- Main Evaluation Function --- def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None): """Fetches all questions, runs the agent, submits answers, and displays results.""" space_id = os.getenv("SPACE_ID", "unknown/user-space") if profile: username = f"{profile.username}".strip() print(f"✅ User logged in: {username}") else: print("❌ User not logged in.") return "❌ Please Login to Hugging Face with the button above.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" try: agent = BasicAgent() print("✅ Agent initialized successfully") except Exception as e: return f"❌ Error initializing agent: {e}", None print(f"📥 Fetching questions from: {questions_url}") try: response = requests.get(questions_url, timeout=30) response.raise_for_status() questions_data = response.json() if not questions_data or not isinstance(questions_data, list): return "❌ Fetched questions list is empty or invalid format.", None print(f"✅ Fetched {len(questions_data)} questions.") except Exception as e: return f"❌ Error fetching questions: {e}", None results_log = [] answers_payload = [] print(f"🔄 Running agent on {len(questions_data)} questions...") for idx, item in enumerate(questions_data): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"⚠️ Skipping item with missing task_id or question: {item}") continue try: time.sleep(0.5) submitted_answer = agent(question_text) answers_payload.append({ "task_id": task_id, "submitted_answer": submitted_answer }) results_log.append({ "Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, "Submitted Answer": submitted_answer }) print(f" [{idx + 1}/{len(questions_data)}] Task {task_id}: {submitted_answer}") except Exception as e: error_msg = f"AGENT ERROR: {str(e)[:100]}" print(f" ❌ Error on task {task_id}: {e}") answers_payload.append({ "task_id": task_id, "submitted_answer": error_msg }) results_log.append({ "Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, "Submitted Answer": error_msg }) if not answers_payload: return "❌ Agent did not produce any answers to submit.", pd.DataFrame(results_log) submission_data = { "username": username, "agent_code": agent_code, "answers": answers_payload } print(f"📤 Submitting {len(answers_payload)} answers for user '{username}'...") try: response = requests.post(submit_url, json=submission_data, timeout=120) response.raise_for_status() result_data = response.json() score = result_data.get('score', 'N/A') correct = result_data.get('correct_count', '?') total = result_data.get('total_attempted', '?') final_status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {result_data.get('username', username)}\n" f"**Overall Score:** {score}% \n" f"**Correct:** {correct}/{total}\n" f"**Message:** {result_data.get('message', 'No message received.')}" ) print("✅ Submission successful!") results_df = pd.DataFrame(results_log) return final_status, results_df except requests.exceptions.HTTPError as e: error_detail = f"Server responded with status {e.response.status_code}." try: error_json = e.response.json() error_detail += f" Detail: {error_json.get('detail', e.response.text)}" except: error_detail += f" Response: {e.response.text[:500]}" results_df = pd.DataFrame(results_log) return f"❌ **Submission Failed:** {error_detail}", results_df except Exception as e: results_df = pd.DataFrame(results_log) return f"❌ **Submission Failed:** {str(e)}", results_df # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# 🤖 Basic Agent Evaluation Runner") gr.Markdown( """ ### Instructions: 1. **Log in** to your Hugging Face account using the button below 2. **Click** 'Run Evaluation & Submit All Answers' 3. **Wait** for the agent to process all questions (may take 2-5 minutes) 4. **View** your score and detailed results --- ### Tips for Better Scores: - Ensure you have a valid HF_TOKEN in your Space secrets - First run builds cache, subsequent runs are faster - Agent uses google/flan-t5-large for better accuracy """ ) gr.LoginButton() run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary") status_output = gr.Textbox( label="📊 Run Status / Submission Result", lines=8, interactive=False, container=True ) # ✅ FIXED: Removed 'height' parameter results_table = gr.DataFrame( label="📋 Questions and Agent Answers", wrap=True ) run_button.click( fn=run_and_submit_all, inputs=None, outputs=[status_output, results_table] ) gr.Markdown( """ --- **Note:** Answers are cached locally. Clear `answers_cache.json` to re-run all questions. """ ) if __name__ == "__main__": print("\n" + "="*60) print("🚀 Basic Agent Evaluation Runner Starting...") print("="*60) space_host = os.getenv("SPACE_HOST") space_id = os.getenv("SPACE_ID") if space_host: print(f"✅ SPACE_HOST: {space_host}") print(f" Runtime URL: https://{space_host}.hf.space") if space_id: print(f"✅ SPACE_ID: {space_id}") print(f" Repo URL: https://huggingface.co/spaces/{space_id}") print("="*60 + "\n") demo.launch(debug=False, share=False)