Update app.py
Browse files
app.py
CHANGED
|
@@ -7,15 +7,11 @@ import sympy as sp
|
|
| 7 |
import wikipedia
|
| 8 |
from bs4 import BeautifulSoup
|
| 9 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
| 10 |
-
import spacy
|
| 11 |
from io import StringIO
|
| 12 |
|
| 13 |
# --- Constants ---
|
| 14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 15 |
|
| 16 |
-
# --- Initialize NLP for Question Classification ---
|
| 17 |
-
nlp = spacy.load("en_core_web_sm")
|
| 18 |
-
|
| 19 |
# --- Basic Agent Definition ---
|
| 20 |
class BasicAgent:
|
| 21 |
def __init__(self):
|
|
@@ -27,14 +23,13 @@ class BasicAgent:
|
|
| 27 |
print("BasicAgent initialized with Mixtral-8x7B, SymPy, Wikipedia, and DuckDuckGo search.")
|
| 28 |
|
| 29 |
def classify_question(self, question: str) -> str:
|
| 30 |
-
"""Classify question type
|
| 31 |
question_lower = question.lower()
|
| 32 |
-
|
| 33 |
-
if any(token.text in ["calculate", "solve", "equation", "sum", "product"] or re.search(r'[\d+\-*/=]', question_lower) for token in doc):
|
| 34 |
return "math"
|
| 35 |
-
if any(
|
| 36 |
return "factual"
|
| 37 |
-
if any(
|
| 38 |
return "code"
|
| 39 |
if any(ext in question_lower for ext in [".xlsx", ".csv", ".pdf"]):
|
| 40 |
return "file"
|
|
@@ -49,7 +44,6 @@ class BasicAgent:
|
|
| 49 |
# Handle file-based questions (basic CSV parsing if text is provided)
|
| 50 |
if question_type == "file" and (".xlsx" in question.lower() or ".csv" in question.lower()):
|
| 51 |
try:
|
| 52 |
-
# Assume table data is embedded in question text (simplified)
|
| 53 |
table_match = re.search(r'(\|.*?\|.*?\|.*?\|)', question, re.DOTALL)
|
| 54 |
if table_match:
|
| 55 |
table_text = table_match.group(1)
|
|
@@ -91,11 +85,9 @@ class BasicAgent:
|
|
| 91 |
# Handle code questions
|
| 92 |
if question_type == "code":
|
| 93 |
try:
|
| 94 |
-
# Extract code snippet if provided
|
| 95 |
code_match = re.search(r'```python\n(.*?)\n```', question, re.DOTALL)
|
| 96 |
if code_match:
|
| 97 |
code = code_match.group(1)
|
| 98 |
-
# Simulate code execution (simplified)
|
| 99 |
locals_dict = {}
|
| 100 |
exec(code, {}, locals_dict)
|
| 101 |
concise_answer = str(list(locals_dict.values())[-1]) if locals_dict else "Unknown"
|
|
@@ -109,10 +101,10 @@ class BasicAgent:
|
|
| 109 |
# Handle factual questions with Wikipedia
|
| 110 |
if question_type == "factual":
|
| 111 |
try:
|
| 112 |
-
|
| 113 |
-
key_terms = " ".join([
|
| 114 |
if not key_terms:
|
| 115 |
-
key_terms = " ".join([
|
| 116 |
print(f"Searching Wikipedia for: {key_terms}")
|
| 117 |
wikipedia.set_lang("en")
|
| 118 |
search_results = wikipedia.search(key_terms, results=1)
|
|
@@ -189,15 +181,9 @@ class BasicAgent:
|
|
| 189 |
return response[:sentence_end].strip()
|
| 190 |
return response[:50].strip()
|
| 191 |
|
| 192 |
-
# ---
|
| 193 |
-
# [Insert the original run_and_submit_all function and Gradio interface code here]
|
| 194 |
-
|
| 195 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 196 |
-
"""
|
| 197 |
-
Fetches all questions, runs the agent, submits answers, and displays results.
|
| 198 |
-
"""
|
| 199 |
space_id = os.getenv("SPACE_ID")
|
| 200 |
-
|
| 201 |
if profile:
|
| 202 |
username = f"{profile.username}"
|
| 203 |
print(f"User logged in: {username}")
|
|
@@ -209,7 +195,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 209 |
questions_url = f"{api_url}/questions"
|
| 210 |
submit_url = f"{api_url}/submit"
|
| 211 |
|
| 212 |
-
# 1. Instantiate Agent
|
| 213 |
try:
|
| 214 |
agent = BasicAgent()
|
| 215 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
@@ -218,7 +203,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 218 |
print(f"Error instantiating agent: {e}")
|
| 219 |
return f"Error initializing agent: {e}", None
|
| 220 |
|
| 221 |
-
# 2. Fetch Questions
|
| 222 |
print(f"Fetching questions from: {questions_url}")
|
| 223 |
try:
|
| 224 |
response = requests.get(questions_url, timeout=15)
|
|
@@ -234,11 +218,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 234 |
except requests.exceptions.JSONDecodeError as e:
|
| 235 |
print(f"Error decoding JSON response: {response.text[:100]}")
|
| 236 |
return f"Error decoding server response: {e}", None
|
| 237 |
-
except Exception as e:
|
| 238 |
-
print(f"An unexpected error occurred: {e}")
|
| 239 |
-
return f"An unexpected error occurred: {e}", None
|
| 240 |
|
| 241 |
-
# 3. Run Agent
|
| 242 |
results_log = []
|
| 243 |
answers_payload = []
|
| 244 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
@@ -271,12 +251,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 271 |
print("Agent did not produce any answers to submit.")
|
| 272 |
return "Agent did not produce any answers.", pd.DataFrame(results_log)
|
| 273 |
|
| 274 |
-
# 4. Prepare Submission
|
| 275 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 276 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 277 |
print(status_update)
|
| 278 |
|
| 279 |
-
# 5. Submit
|
| 280 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 281 |
try:
|
| 282 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
|
|
|
| 7 |
import wikipedia
|
| 8 |
from bs4 import BeautifulSoup
|
| 9 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
|
|
|
| 10 |
from io import StringIO
|
| 11 |
|
| 12 |
# --- Constants ---
|
| 13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
# --- Basic Agent Definition ---
|
| 16 |
class BasicAgent:
|
| 17 |
def __init__(self):
|
|
|
|
| 23 |
print("BasicAgent initialized with Mixtral-8x7B, SymPy, Wikipedia, and DuckDuckGo search.")
|
| 24 |
|
| 25 |
def classify_question(self, question: str) -> str:
|
| 26 |
+
"""Classify question type using regex (no SpaCy dependency)."""
|
| 27 |
question_lower = question.lower()
|
| 28 |
+
if re.search(r'[\d+\-*/=]', question_lower) or any(keyword in question_lower for keyword in ["calculate", "solve", "equation", "sum", "product"]):
|
|
|
|
| 29 |
return "math"
|
| 30 |
+
if any(keyword in question_lower for keyword in ["who", "what", "where", "when", "how many"]):
|
| 31 |
return "factual"
|
| 32 |
+
if any(keyword in question_lower for keyword in ["code", "python", "program", ".py"]):
|
| 33 |
return "code"
|
| 34 |
if any(ext in question_lower for ext in [".xlsx", ".csv", ".pdf"]):
|
| 35 |
return "file"
|
|
|
|
| 44 |
# Handle file-based questions (basic CSV parsing if text is provided)
|
| 45 |
if question_type == "file" and (".xlsx" in question.lower() or ".csv" in question.lower()):
|
| 46 |
try:
|
|
|
|
| 47 |
table_match = re.search(r'(\|.*?\|.*?\|.*?\|)', question, re.DOTALL)
|
| 48 |
if table_match:
|
| 49 |
table_text = table_match.group(1)
|
|
|
|
| 85 |
# Handle code questions
|
| 86 |
if question_type == "code":
|
| 87 |
try:
|
|
|
|
| 88 |
code_match = re.search(r'```python\n(.*?)\n```', question, re.DOTALL)
|
| 89 |
if code_match:
|
| 90 |
code = code_match.group(1)
|
|
|
|
| 91 |
locals_dict = {}
|
| 92 |
exec(code, {}, locals_dict)
|
| 93 |
concise_answer = str(list(locals_dict.values())[-1]) if locals_dict else "Unknown"
|
|
|
|
| 101 |
# Handle factual questions with Wikipedia
|
| 102 |
if question_type == "factual":
|
| 103 |
try:
|
| 104 |
+
words = re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b|\b\w+\b', question.lower())
|
| 105 |
+
key_terms = " ".join([w for w in words if w not in ["what", "is", "the", "of", "in", "on", "at", "by", "for", "how", "many", "who", "where", "when", "if"]][-3:])
|
| 106 |
if not key_terms:
|
| 107 |
+
key_terms = " ".join(words[-3:])
|
| 108 |
print(f"Searching Wikipedia for: {key_terms}")
|
| 109 |
wikipedia.set_lang("en")
|
| 110 |
search_results = wikipedia.search(key_terms, results=1)
|
|
|
|
| 181 |
return response[:sentence_end].strip()
|
| 182 |
return response[:50].strip()
|
| 183 |
|
| 184 |
+
# --- Original run_and_submit_all and Gradio interface ---
|
|
|
|
|
|
|
| 185 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
| 186 |
space_id = os.getenv("SPACE_ID")
|
|
|
|
| 187 |
if profile:
|
| 188 |
username = f"{profile.username}"
|
| 189 |
print(f"User logged in: {username}")
|
|
|
|
| 195 |
questions_url = f"{api_url}/questions"
|
| 196 |
submit_url = f"{api_url}/submit"
|
| 197 |
|
|
|
|
| 198 |
try:
|
| 199 |
agent = BasicAgent()
|
| 200 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
|
|
| 203 |
print(f"Error instantiating agent: {e}")
|
| 204 |
return f"Error initializing agent: {e}", None
|
| 205 |
|
|
|
|
| 206 |
print(f"Fetching questions from: {questions_url}")
|
| 207 |
try:
|
| 208 |
response = requests.get(questions_url, timeout=15)
|
|
|
|
| 218 |
except requests.exceptions.JSONDecodeError as e:
|
| 219 |
print(f"Error decoding JSON response: {response.text[:100]}")
|
| 220 |
return f"Error decoding server response: {e}", None
|
|
|
|
|
|
|
|
|
|
| 221 |
|
|
|
|
| 222 |
results_log = []
|
| 223 |
answers_payload = []
|
| 224 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
|
| 251 |
print("Agent did not produce any answers to submit.")
|
| 252 |
return "Agent did not produce any answers.", pd.DataFrame(results_log)
|
| 253 |
|
|
|
|
| 254 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 255 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 256 |
print(status_update)
|
| 257 |
|
|
|
|
| 258 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 259 |
try:
|
| 260 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|