Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| from smolagents import ( | |
| CodeAgent, | |
| InferenceClientModel, | |
| DuckDuckGoSearchTool, | |
| WikipediaSearchTool, | |
| PythonInterpreterTool, | |
| VisitWebpageTool, | |
| tool, | |
| ) | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| def get_current_date_time() -> str: | |
| """Returns the current date and time in ISO format.""" | |
| from datetime import datetime | |
| return datetime.now().isoformat() | |
| class StrictHuggingFaceAgent: | |
| def __init__(self): | |
| print("Initializing Strict Hugging Face Agent with Few-Shot Prompting...") | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| raise ValueError("HF_TOKEN environment variable not set in Space Secrets.") | |
| self.model = InferenceClientModel( | |
| model_id="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| token=hf_token, | |
| ) | |
| self.tools = [ | |
| DuckDuckGoSearchTool(), | |
| WikipediaSearchTool(), | |
| VisitWebpageTool(), | |
| PythonInterpreterTool(), | |
| get_current_date_time, | |
| ] | |
| self.agent = CodeAgent( | |
| tools=self.tools, | |
| model=self.model, | |
| max_steps=7, | |
| additional_authorized_imports=["datetime", "re", "json", "math", "collections", "pandas", "requests", "bs4"], | |
| ) | |
| print("Agent ready.") | |
| def __call__(self, question: str) -> str: | |
| print(f"\nAgent received question: {question[:80]}...") | |
| max_retries = 3 | |
| for attempt in range(max_retries): | |
| try: | |
| time.sleep(2) | |
| answer = self.agent.run(question) | |
| # Clean up any accidental leading/trailing whitespace or quotes the agent might slip in | |
| clean_answer = str(answer).strip(" '\"\n\t.") | |
| print(f"Agent answer: {clean_answer}") | |
| return clean_answer | |
| except Exception as e: | |
| err_msg = str(e).lower() | |
| if "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg: | |
| wait_time = 20 * (attempt + 1) | |
| print(f"Rate limit hit! Pausing for {wait_time} seconds before retrying...") | |
| time.sleep(wait_time) | |
| else: | |
| print(f"Agent error processing question: {e}") | |
| return f"Error: {str(e)}" | |
| return "Error: Rate limit exceeded after maximum retries." | |
| # --- App Runner --- | |
| def run_and_submit_all(profile: gr.OAuthProfile | None): | |
| space_id = os.getenv("SPACE_ID") | |
| if profile: | |
| username = f"{profile.username}" | |
| print(f"User logged in: {username}") | |
| else: | |
| print("User not logged in.") | |
| return "Please Login to Hugging Face with the button.", None | |
| api_url = DEFAULT_API_URL | |
| questions_url = f"{api_url}/questions" | |
| submit_url = f"{api_url}/submit" | |
| try: | |
| agent = StrictHuggingFaceAgent() | |
| except Exception as e: | |
| print(f"Error instantiating agent: {e}") | |
| return f"Error initializing agent: {e}", None | |
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" | |
| try: | |
| response = requests.get(questions_url, timeout=15) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| if not questions_data: | |
| return "No questions.", None | |
| except Exception as e: | |
| return f"Error fetching questions: {e}", None | |
| results_log = [] | |
| answers_payload = [] | |
| print(f"Running agent on {len(questions_data)} questions...") | |
| for i, item in enumerate(questions_data): | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| file_url = item.get("file_url") | |
| if not task_id or not question_text: | |
| continue | |
| # Inject the file URL if it exists | |
| if file_url: | |
| question_text += f"\n\n[IMPORTANT: This task requires analyzing an attached file. You MUST download or read it directly from this URL: {file_url} using your Python tool.]" | |
| # The ultimate, unbreakable strict prompt WITH few-shot examples | |
| ultra_strict_prompt = ( | |
| f"{question_text}\n\n" | |
| "=== CRITICAL OUTPUT INSTRUCTIONS ===\n" | |
| "You are being evaluated by a strict programmatic regex parser.\n" | |
| "Your final answer MUST consist of ONLY the exact requested name, number, or string.\n" | |
| "DO NOT wrap your answer in quotes, DO NOT add a trailing period, and DO NOT provide any explanation or conversational filler.\n\n" | |
| "Here are examples of perfect submissions:\n" | |
| "Example 1\n" | |
| "Question: What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?\n" | |
| "Answer: Claus\n\n" | |
| "Example 2\n" | |
| "Question: How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?\n" | |
| "Answer: 519\n\n" | |
| "Example 3\n" | |
| "Question: .rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI\n" | |
| "Answer: right\n\n" | |
| "Failure to follow these instructions perfectly will result in an immediate score of 0." | |
| ) | |
| try: | |
| submitted_answer = agent(ultra_strict_prompt) | |
| answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}) | |
| except Exception as e: | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"}) | |
| # 15 second cooldown to protect your new Hugging Face token limits | |
| print("Cooling down for 15 seconds to protect quotas...") | |
| time.sleep(15) | |
| if not answers_payload: | |
| return "No answers.", pd.DataFrame(results_log) | |
| submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} | |
| print(f"Submitting {len(answers_payload)} answers...") | |
| try: | |
| # INCREASED TIMEOUT TO 300 SECONDS (5 Minutes) to allow the scoring server to wake up! | |
| response = requests.post(submit_url, json=submission_data, timeout=300) | |
| response.raise_for_status() | |
| result_data = response.json() | |
| final_status = ( | |
| f"Submission Successful!\n" | |
| f"User: {result_data.get('username')}\n" | |
| f"Score: {result_data.get('score')}%\n" | |
| f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n" | |
| f"Message: {result_data.get('message')}" | |
| ) | |
| print("\n" + "="*40) | |
| print(final_status) | |
| print("="*40 + "\n") | |
| return final_status, pd.DataFrame(results_log) | |
| except Exception as e: | |
| # ADDED PRINT STATEMENT so you can actually see the error in the logs! | |
| error_msg = f"Submission failed: {e}" | |
| print(f"\n🚨 {error_msg} 🚨\n") | |
| return error_msg, pd.DataFrame(results_log) | |
| # --- Build Gradio UI --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Strict Hugging Face Evaluation Runner (Few-Shot Edition)") | |
| gr.Markdown( | |
| """ | |
| **Instructions:** | |
| 1. Ensure your fresh `HF_TOKEN` is set in Space Secrets. | |
| 2. Log in below. | |
| 3. Click 'Run Evaluation & Submit' to start. | |
| """ | |
| ) | |
| gr.LoginButton() | |
| run_button = gr.Button("Run Evaluation & Submit All Answers") | |
| status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) | |
| results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) | |
| run_button.click( | |
| fn=run_and_submit_all, | |
| outputs=[status_output, results_table] | |
| ) | |
| if __name__ == "__main__": | |
| print("Starting Gradio app...") | |
| demo.launch(debug=True, share=False) |