Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| import time | |
| import json | |
| from pathlib import Path | |
| from typing import Optional, Dict | |
| # --- Constants --- | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| CACHE_FILE = "answers_cache.json" | |
| # --- Cache Management --- | |
| def load_cache() -> Dict[str, str]: | |
| """Load cached answers from file.""" | |
| if Path(CACHE_FILE).exists(): | |
| try: | |
| with open(CACHE_FILE, 'r') as f: | |
| return json.load(f) | |
| except: | |
| return {} | |
| return {} | |
| def save_cache(cache: Dict[str, str]): | |
| """Save answers to cache file.""" | |
| with open(CACHE_FILE, 'w') as f: | |
| json.dump(cache, f, indent=2) | |
| # --- Improved Agent --- | |
| class BasicAgent: | |
| def __init__(self): | |
| print("π€ Agent initialized.") | |
| self.api_url = "https://api-inference.huggingface.co/models/google/flan-t5-large" | |
| self.headers = { | |
| "Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}" | |
| } | |
| self.cache = load_cache() | |
| def _query_model(self, prompt: str) -> str: | |
| """Query the HF inference API with retry logic.""" | |
| max_retries = 3 | |
| for attempt in range(max_retries): | |
| try: | |
| response = requests.post( | |
| self.api_url, | |
| headers=self.headers, | |
| json={ | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": 100, | |
| "return_full_text": False, | |
| "temperature": 0.1, | |
| "do_sample": False | |
| } | |
| }, | |
| timeout=45 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0].get("generated_text", "").strip() | |
| elif isinstance(result, dict): | |
| return result.get("generated_text", "").strip() | |
| except Exception as e: | |
| print(f"Attempt {attempt + 1} failed: {e}") | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) | |
| return "" | |
| def _clean_answer(self, answer: str) -> str: | |
| """Clean and normalize the answer.""" | |
| if not answer: | |
| return "" | |
| prefixes = [ | |
| "Answer:", "Final Answer:", "The answer is", "The answer:", | |
| "Answer is", "Result:", "Output:" | |
| ] | |
| for prefix in prefixes: | |
| if answer.startswith(prefix): | |
| answer = answer[len(prefix):].strip() | |
| answer = answer.strip() | |
| answer = answer.rstrip('.') | |
| answer = answer.rstrip(',') | |
| answer = answer.split('\n')[0].strip() | |
| if answer.startswith('"') and answer.endswith('"'): | |
| answer = answer[1:-1] | |
| if answer.startswith("'") and answer.endswith("'"): | |
| answer = answer[1:-1] | |
| return answer | |
| def __call__(self, question: str) -> str: | |
| """Main agent call method.""" | |
| print(f"β Question: {question[:80]}...") | |
| cache_key = question.strip().lower() | |
| if cache_key in self.cache: | |
| print("β Using cached answer") | |
| return self.cache[cache_key] | |
| prompt = f"""Answer the following question concisely with ONLY the final answer. | |
| No explanation. No full sentences. Just the answer. | |
| Question: {question} | |
| Answer:""" | |
| raw_answer = self._query_model(prompt) | |
| answer = self._clean_answer(raw_answer) | |
| if not answer or len(answer) < 2: | |
| alt_prompt = f"""What is the answer to: {question} | |
| Give only the answer, nothing else.""" | |
| raw_answer = self._query_model(alt_prompt) | |
| answer = self._clean_answer(raw_answer) | |
| if not answer: | |
| answer = "unknown" | |
| print(f"π‘ Answer: {answer}") | |
| self.cache[cache_key] = answer | |
| save_cache(self.cache) | |
| return answer | |
| # --- Main Evaluation Function --- | |
| def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None): | |
| """Fetches all questions, runs the agent, submits answers, and displays results.""" | |
| space_id = os.getenv("SPACE_ID", "unknown/user-space") | |
| if profile: | |
| username = f"{profile.username}".strip() | |
| print(f"β User logged in: {username}") | |
| else: | |
| print("β User not logged in.") | |
| return "β Please Login to Hugging Face with the button above.", None | |
| api_url = DEFAULT_API_URL | |
| questions_url = f"{api_url}/questions" | |
| submit_url = f"{api_url}/submit" | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
| try: | |
| agent = BasicAgent() | |
| print("β Agent initialized successfully") | |
| except Exception as e: | |
| return f"β Error initializing agent: {e}", None | |
| print(f"π₯ Fetching questions from: {questions_url}") | |
| try: | |
| response = requests.get(questions_url, timeout=30) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| if not questions_data or not isinstance(questions_data, list): | |
| return "β Fetched questions list is empty or invalid format.", None | |
| print(f"β Fetched {len(questions_data)} questions.") | |
| except Exception as e: | |
| return f"β Error fetching questions: {e}", None | |
| results_log = [] | |
| answers_payload = [] | |
| print(f"π Running agent on {len(questions_data)} questions...") | |
| for idx, item in enumerate(questions_data): | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| if not task_id or question_text is None: | |
| print(f"β οΈ Skipping item with missing task_id or question: {item}") | |
| continue | |
| try: | |
| time.sleep(0.5) | |
| submitted_answer = agent(question_text) | |
| answers_payload.append({ | |
| "task_id": task_id, | |
| "submitted_answer": submitted_answer | |
| }) | |
| results_log.append({ | |
| "Task ID": task_id, | |
| "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, | |
| "Submitted Answer": submitted_answer | |
| }) | |
| print(f" [{idx + 1}/{len(questions_data)}] Task {task_id}: {submitted_answer}") | |
| except Exception as e: | |
| error_msg = f"AGENT ERROR: {str(e)[:100]}" | |
| print(f" β Error on task {task_id}: {e}") | |
| answers_payload.append({ | |
| "task_id": task_id, | |
| "submitted_answer": error_msg | |
| }) | |
| results_log.append({ | |
| "Task ID": task_id, | |
| "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, | |
| "Submitted Answer": error_msg | |
| }) | |
| if not answers_payload: | |
| return "β Agent did not produce any answers to submit.", pd.DataFrame(results_log) | |
| submission_data = { | |
| "username": username, | |
| "agent_code": agent_code, | |
| "answers": answers_payload | |
| } | |
| print(f"π€ Submitting {len(answers_payload)} answers for user '{username}'...") | |
| try: | |
| response = requests.post(submit_url, json=submission_data, timeout=120) | |
| response.raise_for_status() | |
| result_data = response.json() | |
| score = result_data.get('score', 'N/A') | |
| correct = result_data.get('correct_count', '?') | |
| total = result_data.get('total_attempted', '?') | |
| final_status = ( | |
| f"β **Submission Successful!**\n\n" | |
| f"**User:** {result_data.get('username', username)}\n" | |
| f"**Overall Score:** {score}% \n" | |
| f"**Correct:** {correct}/{total}\n" | |
| f"**Message:** {result_data.get('message', 'No message received.')}" | |
| ) | |
| print("β Submission successful!") | |
| results_df = pd.DataFrame(results_log) | |
| return final_status, results_df | |
| except requests.exceptions.HTTPError as e: | |
| error_detail = f"Server responded with status {e.response.status_code}." | |
| try: | |
| error_json = e.response.json() | |
| error_detail += f" Detail: {error_json.get('detail', e.response.text)}" | |
| except: | |
| error_detail += f" Response: {e.response.text[:500]}" | |
| results_df = pd.DataFrame(results_log) | |
| return f"β **Submission Failed:** {error_detail}", results_df | |
| except Exception as e: | |
| results_df = pd.DataFrame(results_log) | |
| return f"β **Submission Failed:** {str(e)}", results_df | |
| # --- Gradio Interface --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ Basic Agent Evaluation Runner") | |
| gr.Markdown( | |
| """ | |
| ### Instructions: | |
| 1. **Log in** to your Hugging Face account using the button below | |
| 2. **Click** 'Run Evaluation & Submit All Answers' | |
| 3. **Wait** for the agent to process all questions (may take 2-5 minutes) | |
| 4. **View** your score and detailed results | |
| --- | |
| ### Tips for Better Scores: | |
| - Ensure you have a valid HF_TOKEN in your Space secrets | |
| - First run builds cache, subsequent runs are faster | |
| - Agent uses google/flan-t5-large for better accuracy | |
| """ | |
| ) | |
| gr.LoginButton() | |
| run_button = gr.Button("π Run Evaluation & Submit All Answers", variant="primary") | |
| status_output = gr.Textbox( | |
| label="π Run Status / Submission Result", | |
| lines=8, | |
| interactive=False, | |
| container=True | |
| ) | |
| # β FIXED: Removed 'height' parameter | |
| results_table = gr.DataFrame( | |
| label="π Questions and Agent Answers", | |
| wrap=True | |
| ) | |
| run_button.click( | |
| fn=run_and_submit_all, | |
| inputs=None, | |
| outputs=[status_output, results_table] | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Note:** Answers are cached locally. Clear `answers_cache.json` to re-run all questions. | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| print("\n" + "="*60) | |
| print("π Basic Agent Evaluation Runner Starting...") | |
| print("="*60) | |
| space_host = os.getenv("SPACE_HOST") | |
| space_id = os.getenv("SPACE_ID") | |
| if space_host: | |
| print(f"β SPACE_HOST: {space_host}") | |
| print(f" Runtime URL: https://{space_host}.hf.space") | |
| if space_id: | |
| print(f"β SPACE_ID: {space_id}") | |
| print(f" Repo URL: https://huggingface.co/spaces/{space_id}") | |
| print("="*60 + "\n") | |
| demo.launch(debug=False, share=False) |