Update app.py
Browse files
app.py
CHANGED
|
@@ -4,15 +4,19 @@ import traceback
|
|
| 4 |
import gradio as gr
|
| 5 |
import requests
|
| 6 |
import pandas as pd
|
|
|
|
| 7 |
from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
|
| 8 |
-
from smolagents.models import
|
| 9 |
|
| 10 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
try:
|
| 18 |
search_tool_instance = DuckDuckGoSearchTool()
|
|
@@ -66,8 +70,8 @@ def wikipedia_lookup(page_title: str) -> str:
|
|
| 66 |
logger.warning(f"Wikipedia page '{title}' is disambiguation.")
|
| 67 |
return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
|
| 68 |
else:
|
| 69 |
-
|
| 70 |
-
|
| 71 |
except requests.exceptions.HTTPError as e:
|
| 72 |
if e.response.status_code == 404:
|
| 73 |
logger.warning(f"Wikipedia page not found: {page_safe}")
|
|
@@ -98,51 +102,19 @@ Formatting Rules for FINAL ANSWER:
|
|
| 98 |
Let's begin!
|
| 99 |
"""
|
| 100 |
|
| 101 |
-
|
| 102 |
-
class PollinationsModel(BaseModel):
|
| 103 |
-
def __init__(self, model_id="openai-large", max_tokens=8196, seed=42):
|
| 104 |
-
self.model_id = model_id
|
| 105 |
-
self.max_tokens = max_tokens
|
| 106 |
-
self.seed = seed
|
| 107 |
-
self.api_url = POLLINATIONS_API
|
| 108 |
-
logger.info(f"Initialized PollinationsModel with model_id={model_id}")
|
| 109 |
-
|
| 110 |
-
def generate(self, prompt, **kwargs):
|
| 111 |
-
logger.info(f"Generating with PollinationsModel. Prompt length: {len(prompt)}")
|
| 112 |
-
try:
|
| 113 |
-
payload = {
|
| 114 |
-
"messages": [
|
| 115 |
-
{"role": "user", "content": prompt}
|
| 116 |
-
],
|
| 117 |
-
"model": self.model_id,
|
| 118 |
-
"max_tokens": self.max_tokens,
|
| 119 |
-
"seed": self.seed,
|
| 120 |
-
"jsonMode": False,
|
| 121 |
-
"private": True
|
| 122 |
-
}
|
| 123 |
-
response = requests.post(self.api_url, json=payload, timeout=120)
|
| 124 |
-
response.raise_for_status()
|
| 125 |
-
result = response.json()
|
| 126 |
-
if "content" in result:
|
| 127 |
-
return result["content"]
|
| 128 |
-
else:
|
| 129 |
-
logger.error(f"Unexpected response structure: {result}")
|
| 130 |
-
return "Error: Unexpected API response format"
|
| 131 |
-
except Exception as e:
|
| 132 |
-
logger.exception(f"PollinationsModel generate failed: {e}")
|
| 133 |
-
return f"Error generating response: {str(e)}"
|
| 134 |
-
|
| 135 |
-
logger.info(f"Initializing Pollinations LLM connection: {MODEL_ID}")
|
| 136 |
try:
|
| 137 |
-
|
|
|
|
| 138 |
model_id=MODEL_ID,
|
| 139 |
-
|
| 140 |
-
|
|
|
|
| 141 |
)
|
| 142 |
-
logger.info("LLM connection configured using
|
| 143 |
except Exception as e:
|
| 144 |
-
logger.exception("CRITICAL: Failed to configure
|
| 145 |
-
raise RuntimeError(f"Could not configure
|
| 146 |
|
| 147 |
logger.info("Initializing CodeAgent...")
|
| 148 |
try:
|
|
@@ -197,7 +169,7 @@ def evaluate_and_submit():
|
|
| 197 |
logger.info(f"Processing Q {i+1}/{len(questions)} (ID: {task_id})...")
|
| 198 |
raw_agent_output = run_agent_on_question(question_text)
|
| 199 |
final_answer = "AGENT_ERROR: No 'FINAL ANSWER:' marker."
|
| 200 |
-
marker = "FINAL ANSWER:"
|
| 201 |
if marker in raw_agent_output: final_answer = raw_agent_output.split(marker, 1)[1].strip()
|
| 202 |
elif "AGENT_ERROR:" in raw_agent_output: final_answer = raw_agent_output
|
| 203 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer, "Full Output": raw_agent_output})
|
|
@@ -224,7 +196,7 @@ def evaluate_and_submit():
|
|
| 224 |
|
| 225 |
logger.info("Setting up Gradio interface...")
|
| 226 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 227 |
-
gr.Markdown("# ๐ Agent Evaluation Runner ๐\nEnsure
|
| 228 |
run_button = gr.Button("โถ๏ธ Run Evaluation & Submit All Answers", variant="primary")
|
| 229 |
status_textbox = gr.Textbox(label="๐ Status", lines=4, interactive=False)
|
| 230 |
results_df_display = gr.DataFrame(label="๐ Detailed Log", headers=["Task ID", "Question", "Submitted Answer", "Full Output"], wrap=True, column_widths=["10%", "25%", "20%", "45%"])
|
|
@@ -234,4 +206,4 @@ logger.info("Gradio interface setup complete.")
|
|
| 234 |
if __name__ == "__main__":
|
| 235 |
logger.info("Launching Gradio application...")
|
| 236 |
demo.launch(debug=True, share=False)
|
| 237 |
-
logger.info("Gradio application launched.")
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import requests
|
| 6 |
import pandas as pd
|
| 7 |
+
from openai import OpenAI
|
| 8 |
from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
|
| 9 |
+
from smolagents.models import OpenAIServerModel
|
| 10 |
|
| 11 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 15 |
+
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
|
| 16 |
+
if not GITHUB_TOKEN:
|
| 17 |
+
raise ValueError("CRITICAL: GITHUB_TOKEN environment variable not set.")
|
| 18 |
+
GITHUB_ENDPOINT = "https://models.github.ai/inference"
|
| 19 |
+
MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")
|
| 20 |
|
| 21 |
try:
|
| 22 |
search_tool_instance = DuckDuckGoSearchTool()
|
|
|
|
| 70 |
logger.warning(f"Wikipedia page '{title}' is disambiguation.")
|
| 71 |
return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
|
| 72 |
else:
|
| 73 |
+
logger.warning(f"Wikipedia page '{title}' found but has no summary.")
|
| 74 |
+
return f"Wikipedia Error: Page '{title}' found but has no summary."
|
| 75 |
except requests.exceptions.HTTPError as e:
|
| 76 |
if e.response.status_code == 404:
|
| 77 |
logger.warning(f"Wikipedia page not found: {page_safe}")
|
|
|
|
| 102 |
Let's begin!
|
| 103 |
"""
|
| 104 |
|
| 105 |
+
logger.info(f"Initializing LLM connection: {MODEL_ID} @ {GITHUB_ENDPOINT}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
try:
|
| 107 |
+
logger.info("Attempting to configure OpenAIServerModel with 'api_base' (and no request_timeout)...")
|
| 108 |
+
llm_model = OpenAIServerModel(
|
| 109 |
model_id=MODEL_ID,
|
| 110 |
+
api_key=GITHUB_TOKEN,
|
| 111 |
+
api_base=GITHUB_ENDPOINT
|
| 112 |
+
# Removed request_timeout=60
|
| 113 |
)
|
| 114 |
+
logger.info("LLM connection configured using 'api_base'.")
|
| 115 |
except Exception as e:
|
| 116 |
+
logger.exception("CRITICAL: Failed to configure OpenAIServerModel (tried with api_base)")
|
| 117 |
+
raise RuntimeError(f"Could not configure SmolAgents model using api_base: {e}") from e
|
| 118 |
|
| 119 |
logger.info("Initializing CodeAgent...")
|
| 120 |
try:
|
|
|
|
| 169 |
logger.info(f"Processing Q {i+1}/{len(questions)} (ID: {task_id})...")
|
| 170 |
raw_agent_output = run_agent_on_question(question_text)
|
| 171 |
final_answer = "AGENT_ERROR: No 'FINAL ANSWER:' marker."
|
| 172 |
+
marker = "FINAL ANSWER:";
|
| 173 |
if marker in raw_agent_output: final_answer = raw_agent_output.split(marker, 1)[1].strip()
|
| 174 |
elif "AGENT_ERROR:" in raw_agent_output: final_answer = raw_agent_output
|
| 175 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer, "Full Output": raw_agent_output})
|
|
|
|
| 196 |
|
| 197 |
logger.info("Setting up Gradio interface...")
|
| 198 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 199 |
+
gr.Markdown("# ๐ Agent Evaluation Runner ๐\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
|
| 200 |
run_button = gr.Button("โถ๏ธ Run Evaluation & Submit All Answers", variant="primary")
|
| 201 |
status_textbox = gr.Textbox(label="๐ Status", lines=4, interactive=False)
|
| 202 |
results_df_display = gr.DataFrame(label="๐ Detailed Log", headers=["Task ID", "Question", "Submitted Answer", "Full Output"], wrap=True, column_widths=["10%", "25%", "20%", "45%"])
|
|
|
|
| 206 |
if __name__ == "__main__":
|
| 207 |
logger.info("Launching Gradio application...")
|
| 208 |
demo.launch(debug=True, share=False)
|
| 209 |
+
logger.info("Gradio application launched.") __name__ == "__main__": logger.info("Launching Gradio application...") demo.launch(debug=True, share=False) logger.info("Gradio application launched.")
|