| import os |
| import time |
| import re |
| import gradio as gr |
| import requests |
| import pandas as pd |
| from groq import Groq |
|
|
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
|
| class BasicAgent: |
| def __init__(self): |
| api_key = os.getenv("GROQ_API_KEY") |
| if not api_key: |
| raise ValueError("GROQ_API_KEY not set!") |
| self.client = Groq(api_key=api_key) |
| print("Groq agent initialized.") |
|
|
| def is_reversed_text(self, text: str) -> bool: |
| """Detect if text looks reversed (common riddle pattern).""" |
| |
| sample = text.strip() |
| return sample.endswith((".", "?")) is False and sample[:1].islower() and len(sample) > 20 |
|
|
| def search_wikipedia(self, query: str) -> str: |
| """Search Wikipedia directly via API - much more reliable than DuckDuckGo.""" |
| try: |
| |
| search_resp = requests.get( |
| "https://en.wikipedia.org/w/api.php", |
| params={ |
| "action": "query", |
| "list": "search", |
| "srsearch": query, |
| "format": "json", |
| "srlimit": 1 |
| }, |
| headers={"User-Agent": "AgentBot/1.0"}, |
| timeout=10 |
| ) |
| search_data = search_resp.json() |
| results = search_data.get("query", {}).get("search", []) |
| if not results: |
| return "" |
|
|
| title = results[0]["title"] |
|
|
| |
| extract_resp = requests.get( |
| "https://en.wikipedia.org/w/api.php", |
| params={ |
| "action": "query", |
| "prop": "extracts", |
| "exintro": True, |
| "explaintext": True, |
| "titles": title, |
| "format": "json" |
| }, |
| headers={"User-Agent": "AgentBot/1.0"}, |
| timeout=10 |
| ) |
| extract_data = extract_resp.json() |
| pages = extract_data.get("query", {}).get("pages", {}) |
| for page_id, page in pages.items(): |
| extract = page.get("extract", "") |
| if extract: |
| return extract[:2000] |
| return "" |
| except Exception as e: |
| print(f"Wikipedia search error: {e}") |
| return "" |
|
|
| def search_duckduckgo(self, query: str) -> str: |
| """Fallback search via DuckDuckGo.""" |
| try: |
| params = {"q": query, "format": "json", "no_html": "1"} |
| resp = requests.get( |
| "https://api.duckduckgo.com/", |
| params=params, |
| headers={"User-Agent": "Mozilla/5.0"}, |
| timeout=8 |
| ) |
| data = resp.json() |
| results = [] |
| if data.get("Abstract"): |
| results.append(data["Abstract"]) |
| if data.get("Answer"): |
| results.append(data["Answer"]) |
| for t in data.get("RelatedTopics", [])[:3]: |
| if isinstance(t, dict) and t.get("Text"): |
| results.append(t["Text"]) |
| return "\n".join(results) if results else "" |
| except Exception: |
| return "" |
|
|
| def search_web(self, query: str) -> str: |
| """Try Wikipedia first, fallback to DuckDuckGo.""" |
| wiki_result = self.search_wikipedia(query) |
| if wiki_result: |
| return wiki_result |
| return self.search_duckduckgo(query) |
|
|
| def __call__(self, question: str) -> str: |
| try: |
| original_question = question |
|
|
| |
| stripped = question.strip() |
| if stripped and stripped[-1] not in ".?!" and stripped[0].islower(): |
| reversed_q = stripped[::-1] |
| if reversed_q[0].isupper() or "?" in reversed_q[-5:]: |
| print("Detected reversed text question, decoding...") |
| decoded = reversed_q |
| |
| prompt = f"""This question was written backwards. Decode it and answer it. |
| Decoded question: {decoded} |
| |
| Answer with ONLY the final answer, no explanation.""" |
| response = self.client.chat.completions.create( |
| model="llama-3.1-8b-instant", |
| messages=[{"role": "user", "content": prompt}], |
| max_tokens=50, |
| temperature=0 |
| ) |
| answer = response.choices[0].message.content.strip() |
| print(f"Reversed Q decoded -> A: {answer}") |
| return answer |
|
|
| |
| math_keywords = ["table", "set s =", "commutative", "calculate", "solve"] |
| needs_search = not any(kw in question.lower() for kw in math_keywords) |
|
|
| search_results = "" |
| if needs_search: |
| search_results = self.search_web(question) |
|
|
| context = f"\n\nReference Information:\n{search_results}" if search_results else "" |
|
|
| prompt = f"""You are a precise question-answering assistant taking an exam. |
| Answer with ONLY the final answer - no explanation, no extra words, no restating the question. |
| For numbers: give just the number (e.g. "3" not "three studio albums"). |
| For names: give just the name. |
| For yes/no: give just "Yes" or "No". |
| For lists: give comma-separated values in the exact format requested. |
| |
| Question: {question}{context} |
| |
| Final Answer:""" |
|
|
| response = self.client.chat.completions.create( |
| model="llama-3.1-8b-instant", |
| messages=[ |
| { |
| "role": "system", |
| "content": "You are a precise, concise question-answering assistant. Never explain your reasoning, only give the final answer." |
| }, |
| {"role": "user", "content": prompt} |
| ], |
| max_tokens=150, |
| temperature=0 |
| ) |
| answer = response.choices[0].message.content.strip() |
|
|
| |
| for prefix in ["Final Answer:", "Answer:", "The answer is:", "The answer is"]: |
| if answer.lower().startswith(prefix.lower()): |
| answer = answer[len(prefix):].strip() |
|
|
| print(f"Q: {original_question[:60]} | A: {answer[:60]}") |
| return answer |
|
|
| except Exception as e: |
| print(f"Error: {e}") |
| return f"Error: {e}" |
|
|
|
|
| def run_and_submit_all(profile: gr.OAuthProfile | None): |
| space_id = os.getenv("SPACE_ID") |
|
|
| if not profile: |
| return "Please Login to Hugging Face with the button.", None |
|
|
| username = profile.username |
|
|
| try: |
| agent = BasicAgent() |
| except Exception as e: |
| return f"Error initializing agent: {e}", None |
|
|
| agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" |
|
|
| try: |
| resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) |
| resp.raise_for_status() |
| questions_data = resp.json() |
| print(f"Fetched {len(questions_data)} questions.") |
| except Exception as e: |
| return f"Error fetching questions: {e}", None |
|
|
| results_log = [] |
| answers_payload = [] |
|
|
| for i, item in enumerate(questions_data): |
| task_id = item.get("task_id") |
| question_text = item.get("question") |
| if not task_id or question_text is None: |
| continue |
|
|
| print(f"Q{i+1}/{len(questions_data)}: {question_text[:60]}") |
|
|
| try: |
| answer = agent(question_text) |
| except Exception as e: |
| answer = f"ERROR: {e}" |
|
|
| answers_payload.append({"task_id": task_id, "submitted_answer": answer}) |
| results_log.append({ |
| "Task ID": task_id, |
| "Question": question_text, |
| "Submitted Answer": answer |
| }) |
| time.sleep(1) |
|
|
| if not answers_payload: |
| return "No answers produced.", pd.DataFrame(results_log) |
|
|
| submission_data = { |
| "username": username.strip(), |
| "agent_code": agent_code, |
| "answers": answers_payload |
| } |
|
|
| try: |
| resp = requests.post( |
| f"{DEFAULT_API_URL}/submit", |
| json=submission_data, |
| timeout=60 |
| ) |
| resp.raise_for_status() |
| result = resp.json() |
| status = ( |
| f"Submission Successful!\n" |
| f"User: {result.get('username')}\n" |
| f"Score: {result.get('score', 'N/A')}% " |
| f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" |
| f"Message: {result.get('message', '')}" |
| ) |
| return status, pd.DataFrame(results_log) |
| except Exception as e: |
| return f"Submission Failed: {e}", pd.DataFrame(results_log) |
|
|
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# Basic Agent Evaluation Runner") |
| gr.Markdown(""" |
| **Instructions:** |
| 1. Log in with Hugging Face below |
| 2. Click **Run Evaluation & Submit All Answers** |
| 3. Wait ~1-2 minutes for results |
| """) |
| gr.LoginButton() |
| run_button = gr.Button("Run Evaluation & Submit All Answers") |
| status_output = gr.Textbox( |
| label="Run Status / Submission Result", |
| lines=5, |
| interactive=False |
| ) |
| results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) |
| run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table]) |
|
|
| if __name__ == "__main__": |
| demo.launch(debug=True, share=False) |