import os import time import re import gradio as gr import requests import pandas as pd from groq import Groq DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" class BasicAgent: def __init__(self): api_key = os.getenv("GROQ_API_KEY") if not api_key: raise ValueError("GROQ_API_KEY not set!") self.client = Groq(api_key=api_key) print("Groq agent initialized.") def is_reversed_text(self, text: str) -> bool: """Detect if text looks reversed (common riddle pattern).""" # Reversed text often ends with what would be capital letters/punctuation reversed sample = text.strip() return sample.endswith((".", "?")) is False and sample[:1].islower() and len(sample) > 20 def search_wikipedia(self, query: str) -> str: """Search Wikipedia directly via API - much more reliable than DuckDuckGo.""" try: # Step 1: search for the most relevant page title search_resp = requests.get( "https://en.wikipedia.org/w/api.php", params={ "action": "query", "list": "search", "srsearch": query, "format": "json", "srlimit": 1 }, headers={"User-Agent": "AgentBot/1.0"}, timeout=10 ) search_data = search_resp.json() results = search_data.get("query", {}).get("search", []) if not results: return "" title = results[0]["title"] # Step 2: get the summary/extract of that page extract_resp = requests.get( "https://en.wikipedia.org/w/api.php", params={ "action": "query", "prop": "extracts", "exintro": True, "explaintext": True, "titles": title, "format": "json" }, headers={"User-Agent": "AgentBot/1.0"}, timeout=10 ) extract_data = extract_resp.json() pages = extract_data.get("query", {}).get("pages", {}) for page_id, page in pages.items(): extract = page.get("extract", "") if extract: return extract[:2000] # limit length return "" except Exception as e: print(f"Wikipedia search error: {e}") return "" def search_duckduckgo(self, query: str) -> str: """Fallback search via DuckDuckGo.""" try: params = {"q": query, "format": "json", "no_html": "1"} resp = requests.get( "https://api.duckduckgo.com/", params=params, headers={"User-Agent": "Mozilla/5.0"}, timeout=8 ) data = resp.json() results = [] if data.get("Abstract"): results.append(data["Abstract"]) if data.get("Answer"): results.append(data["Answer"]) for t in data.get("RelatedTopics", [])[:3]: if isinstance(t, dict) and t.get("Text"): results.append(t["Text"]) return "\n".join(results) if results else "" except Exception: return "" def search_web(self, query: str) -> str: """Try Wikipedia first, fallback to DuckDuckGo.""" wiki_result = self.search_wikipedia(query) if wiki_result: return wiki_result return self.search_duckduckgo(query) def __call__(self, question: str) -> str: try: original_question = question # Handle reversed-text riddle questions stripped = question.strip() if stripped and stripped[-1] not in ".?!" and stripped[0].islower(): reversed_q = stripped[::-1] if reversed_q[0].isupper() or "?" in reversed_q[-5:]: print("Detected reversed text question, decoding...") decoded = reversed_q # Ask the model to answer the decoded question directly prompt = f"""This question was written backwards. Decode it and answer it. Decoded question: {decoded} Answer with ONLY the final answer, no explanation.""" response = self.client.chat.completions.create( model="llama-3.1-8b-instant", messages=[{"role": "user", "content": prompt}], max_tokens=50, temperature=0 ) answer = response.choices[0].message.content.strip() print(f"Reversed Q decoded -> A: {answer}") return answer # Skip web search for math/logic-only questions (faster, more accurate) math_keywords = ["table", "set s =", "commutative", "calculate", "solve"] needs_search = not any(kw in question.lower() for kw in math_keywords) search_results = "" if needs_search: search_results = self.search_web(question) context = f"\n\nReference Information:\n{search_results}" if search_results else "" prompt = f"""You are a precise question-answering assistant taking an exam. Answer with ONLY the final answer - no explanation, no extra words, no restating the question. For numbers: give just the number (e.g. "3" not "three studio albums"). For names: give just the name. For yes/no: give just "Yes" or "No". For lists: give comma-separated values in the exact format requested. Question: {question}{context} Final Answer:""" response = self.client.chat.completions.create( model="llama-3.1-8b-instant", messages=[ { "role": "system", "content": "You are a precise, concise question-answering assistant. Never explain your reasoning, only give the final answer." }, {"role": "user", "content": prompt} ], max_tokens=150, temperature=0 ) answer = response.choices[0].message.content.strip() # Clean up common prefixes for prefix in ["Final Answer:", "Answer:", "The answer is:", "The answer is"]: if answer.lower().startswith(prefix.lower()): answer = answer[len(prefix):].strip() print(f"Q: {original_question[:60]} | A: {answer[:60]}") return answer except Exception as e: print(f"Error: {e}") return f"Error: {e}" def run_and_submit_all(profile: gr.OAuthProfile | None): space_id = os.getenv("SPACE_ID") if not profile: return "Please Login to Hugging Face with the button.", None username = profile.username try: agent = BasicAgent() except Exception as e: return f"Error initializing agent: {e}", None agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" try: resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) resp.raise_for_status() questions_data = resp.json() print(f"Fetched {len(questions_data)} questions.") except Exception as e: return f"Error fetching questions: {e}", None results_log = [] answers_payload = [] for i, item in enumerate(questions_data): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: continue print(f"Q{i+1}/{len(questions_data)}: {question_text[:60]}") try: answer = agent(question_text) except Exception as e: answer = f"ERROR: {e}" answers_payload.append({"task_id": task_id, "submitted_answer": answer}) results_log.append({ "Task ID": task_id, "Question": question_text, "Submitted Answer": answer }) time.sleep(1) if not answers_payload: return "No answers produced.", pd.DataFrame(results_log) submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } try: resp = requests.post( f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60 ) resp.raise_for_status() result = resp.json() status = ( f"Submission Successful!\n" f"User: {result.get('username')}\n" f"Score: {result.get('score', 'N/A')}% " f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" f"Message: {result.get('message', '')}" ) return status, pd.DataFrame(results_log) except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log) with gr.Blocks() as demo: gr.Markdown("# Basic Agent Evaluation Runner") gr.Markdown(""" **Instructions:** 1. Log in with Hugging Face below 2. Click **Run Evaluation & Submit All Answers** 3. Wait ~1-2 minutes for results """) gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox( label="Run Status / Submission Result", lines=5, interactive=False ) results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table]) if __name__ == "__main__": demo.launch(debug=True, share=False)