Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import sys
|
|
| 3 |
import subprocess
|
| 4 |
|
| 5 |
# 检查并安装缺失的依赖
|
| 6 |
-
required_packages = ["litellm", "duckduckgo-search"]
|
| 7 |
for package in required_packages:
|
| 8 |
try:
|
| 9 |
__import__(package)
|
|
@@ -19,7 +19,6 @@ import inspect
|
|
| 19 |
import pandas as pd
|
| 20 |
import json
|
| 21 |
import time
|
| 22 |
-
import re
|
| 23 |
from typing import List, Dict, Any, Optional
|
| 24 |
from litellm import completion
|
| 25 |
from duckduckgo_search import DDGS
|
|
@@ -34,7 +33,7 @@ class DuckDuckGoSearchTool:
|
|
| 34 |
self.name = "duckduckgo_search"
|
| 35 |
self.description = "Search the web using DuckDuckGo"
|
| 36 |
|
| 37 |
-
def search(self, query: str, max_results: int =
|
| 38 |
"""
|
| 39 |
Search the web using DuckDuckGo and return results.
|
| 40 |
|
|
@@ -53,7 +52,7 @@ class DuckDuckGoSearchTool:
|
|
| 53 |
print(f"DuckDuckGo search error: {e}")
|
| 54 |
return [{"title": f"Search error: {e}", "body": "", "href": ""}]
|
| 55 |
|
| 56 |
-
def __call__(self, query: str, max_results: int =
|
| 57 |
"""
|
| 58 |
Execute the search and return results in a structured format.
|
| 59 |
|
|
@@ -83,14 +82,13 @@ class LiteLLMModel:
|
|
| 83 |
self.api_key = api_key
|
| 84 |
print(f"Initialized LiteLLM with model: {model_id}")
|
| 85 |
|
| 86 |
-
def generate(self, prompt: str, system_prompt: str = None
|
| 87 |
"""
|
| 88 |
Generate text using the LiteLLM model.
|
| 89 |
|
| 90 |
Args:
|
| 91 |
prompt: The user prompt
|
| 92 |
system_prompt: Optional system prompt
|
| 93 |
-
temperature: Temperature for generation (lower = more deterministic)
|
| 94 |
|
| 95 |
Returns:
|
| 96 |
Generated text response
|
|
@@ -104,8 +102,7 @@ class LiteLLMModel:
|
|
| 104 |
response = completion(
|
| 105 |
model=self.model_id,
|
| 106 |
messages=messages,
|
| 107 |
-
api_key=self.api_key
|
| 108 |
-
temperature=temperature
|
| 109 |
)
|
| 110 |
|
| 111 |
return response.choices[0].message.content
|
|
@@ -124,102 +121,71 @@ class CodeAgent:
|
|
| 124 |
def format_search_results(self, results: List[Dict[str, str]]) -> str:
|
| 125 |
"""Format search results into a readable string"""
|
| 126 |
formatted = "Search Results:\n"
|
|
|
|
|
|
|
| 127 |
for i, result in enumerate(results, 1):
|
| 128 |
formatted += f"{i}. {result.get('title', 'No title')}\n"
|
| 129 |
-
formatted += f" {result.get('body', 'No description')[:
|
| 130 |
formatted += f" URL: {result.get('href', 'No URL')}\n\n"
|
| 131 |
return formatted
|
| 132 |
|
| 133 |
-
def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
|
| 134 |
-
"""Create a prompt for the model with optional search results"""
|
| 135 |
-
prompt = f"Question: {question}\n\n"
|
| 136 |
-
|
| 137 |
-
if search_results:
|
| 138 |
-
prompt += self.format_search_results(search_results)
|
| 139 |
-
|
| 140 |
-
prompt += "\nPlease provide a concise, factual answer to the question. "
|
| 141 |
-
prompt += "Your answer should be direct and to the point, without any explanations or reasoning. "
|
| 142 |
-
prompt += "For example, if asked 'What is the capital of France?', just answer 'Paris'. "
|
| 143 |
-
prompt += "If asked for a numerical value, provide only the number. "
|
| 144 |
-
prompt += "If asked for a list, provide comma-separated values without numbering. "
|
| 145 |
-
prompt += "If you don't know the answer, respond with 'Unknown' rather than speculating.\n\n"
|
| 146 |
-
|
| 147 |
-
# 添加特定问题类型的指导
|
| 148 |
-
if "how many" in question.lower():
|
| 149 |
-
prompt += "For 'how many' questions, just provide the number as your answer.\n"
|
| 150 |
-
elif "which" in question.lower() and "option" in question.lower():
|
| 151 |
-
prompt += "For multiple choice questions, just provide the letter(s) of the correct option(s).\n"
|
| 152 |
-
elif any(word in question.lower() for word in ["list", "name all", "what are"]):
|
| 153 |
-
prompt += "For list questions, provide items as comma-separated values without numbering or bullet points.\n"
|
| 154 |
-
|
| 155 |
-
prompt += "Answer: "
|
| 156 |
-
|
| 157 |
-
return prompt
|
| 158 |
-
|
| 159 |
def create_system_prompt(self) -> str:
|
| 160 |
"""Create a system prompt for the model"""
|
| 161 |
return (
|
| 162 |
-
"You are a
|
| 163 |
-
"
|
| 164 |
-
"Your answers
|
| 165 |
-
"
|
| 166 |
-
"
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
-
"For list questions, provide comma-separated values without numbering or bullet points. "
|
| 170 |
-
"If you don't know the answer, just say 'Unknown'."
|
| 171 |
)
|
| 172 |
-
|
| 173 |
-
def
|
| 174 |
-
"""
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
if options:
|
| 216 |
-
return ", ".join(options).lower()
|
| 217 |
|
| 218 |
-
|
| 219 |
-
answer = answer.rstrip(".!,;:")
|
| 220 |
|
| 221 |
-
return answer
|
| 222 |
-
|
| 223 |
def __call__(self, question: str) -> str:
|
| 224 |
"""
|
| 225 |
Process a question and return an answer.
|
|
@@ -232,35 +198,62 @@ class CodeAgent:
|
|
| 232 |
"""
|
| 233 |
print(f"Agent received question: {question[:100]}...")
|
| 234 |
|
| 235 |
-
|
| 236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
search_results = None
|
| 239 |
if should_search and self.search_tool:
|
| 240 |
print(f"Searching for information about: {question}")
|
| 241 |
-
|
| 242 |
-
search_query = question
|
| 243 |
-
if "code" in question.lower() or "python" in question.lower():
|
| 244 |
-
search_query += " code example"
|
| 245 |
-
elif "date" in question.lower() or "when" in question.lower():
|
| 246 |
-
search_query += " exact date"
|
| 247 |
-
|
| 248 |
-
search_response = self.search_tool(search_query, max_results=8)
|
| 249 |
search_results = search_response.get("results", [])
|
| 250 |
print(f"Found {len(search_results)} search results")
|
| 251 |
|
| 252 |
-
# 创建提示词和生成回答
|
| 253 |
prompt = self.create_prompt(question, search_results)
|
| 254 |
system_prompt = self.create_system_prompt()
|
| 255 |
|
| 256 |
print("Generating response with LLM...")
|
| 257 |
-
#
|
| 258 |
-
|
|
|
|
| 259 |
|
| 260 |
-
|
| 261 |
-
answer = self.clean_answer(response, question)
|
| 262 |
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
return answer
|
| 265 |
|
| 266 |
# 简化版本,不使用OAuthProfile
|
|
@@ -269,15 +262,12 @@ def run_and_submit_all():
|
|
| 269 |
Fetches all questions, runs the Agent on them, submits all answers,
|
| 270 |
and displays the results.
|
| 271 |
"""
|
| 272 |
-
|
| 273 |
-
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 274 |
api_url = DEFAULT_API_URL
|
| 275 |
questions_url = f"{api_url}/questions"
|
| 276 |
submit_url = f"{api_url}/submit"
|
| 277 |
|
| 278 |
-
# 1. Instantiate Agent with Gemini model and DuckDuckGo search
|
| 279 |
try:
|
| 280 |
-
# Get API key from environment variable
|
| 281 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 282 |
if not api_key:
|
| 283 |
return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
|
|
@@ -288,34 +278,31 @@ def run_and_submit_all():
|
|
| 288 |
print(f"Error instantiating agent: {e}")
|
| 289 |
return f"Error initializing agent: {e}", None
|
| 290 |
|
| 291 |
-
# In the case of an app running as a hugging Face space, this link points toward your codebase
|
| 292 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 293 |
if not space_id:
|
| 294 |
-
agent_code = "https://huggingface.co/spaces/lethaq/Final_Assignment_Template/tree/main"
|
| 295 |
print(f"Agent code URL: {agent_code}")
|
| 296 |
|
| 297 |
-
# 2. Fetch Questions
|
| 298 |
print(f"Fetching questions from: {questions_url}")
|
| 299 |
try:
|
| 300 |
-
response = requests.get(questions_url, timeout=
|
| 301 |
response.raise_for_status()
|
| 302 |
questions_data = response.json()
|
| 303 |
if not questions_data:
|
| 304 |
-
|
| 305 |
-
|
| 306 |
print(f"Fetched {len(questions_data)} questions.")
|
| 307 |
except requests.exceptions.RequestException as e:
|
| 308 |
print(f"Error fetching questions: {e}")
|
| 309 |
return f"Error fetching questions: {e}", None
|
| 310 |
except requests.exceptions.JSONDecodeError as e:
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
except Exception as e:
|
| 315 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 316 |
return f"An unexpected error occurred fetching questions: {e}", None
|
| 317 |
|
| 318 |
-
# 3. Run your Agent
|
| 319 |
results_log = []
|
| 320 |
answers_payload = []
|
| 321 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
@@ -328,26 +315,23 @@ def run_and_submit_all():
|
|
| 328 |
try:
|
| 329 |
print(f"Processing task {task_id}: {question_text[:50]}...")
|
| 330 |
submitted_answer = agent(question_text)
|
| 331 |
-
# Important: Use "submitted_answer" as the key, not "model_answer"
|
| 332 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 333 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 334 |
print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
|
| 335 |
except Exception as e:
|
| 336 |
-
|
| 337 |
-
|
| 338 |
|
| 339 |
if not answers_payload:
|
| 340 |
print("Agent did not produce any answers to submit.")
|
| 341 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 342 |
|
| 343 |
-
# 4. Submit answers with all required fields
|
| 344 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 345 |
try:
|
| 346 |
-
# Important: Submit with all required fields
|
| 347 |
submission_dict = {
|
| 348 |
-
"username": DEFAULT_USERNAME,
|
| 349 |
-
"agent_code": agent_code,
|
| 350 |
-
"answers": answers_payload
|
| 351 |
}
|
| 352 |
response = requests.post(submit_url, json=submission_dict, timeout=60)
|
| 353 |
response.raise_for_status()
|
|
@@ -408,29 +392,28 @@ with gr.Blocks() as demo:
|
|
| 408 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 409 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 410 |
|
| 411 |
-
# Add a single question test feature
|
| 412 |
gr.Markdown("## Test Single Question")
|
| 413 |
with gr.Row():
|
| 414 |
-
question_in = gr.Textbox(label="Question", lines=3)
|
| 415 |
answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
|
| 416 |
|
| 417 |
test_btn = gr.Button("Test Question", variant="secondary")
|
| 418 |
|
| 419 |
-
# Add a function to test a single question
|
| 420 |
def test_single_question(question):
|
|
|
|
|
|
|
| 421 |
try:
|
| 422 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 423 |
if not api_key:
|
| 424 |
return "Error: GEMINI_API_KEY environment variable not found"
|
| 425 |
|
| 426 |
-
model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=
|
| 427 |
agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
|
| 428 |
answer = agent(question)
|
| 429 |
return answer
|
| 430 |
except Exception as e:
|
| 431 |
return f"Error: {str(e)}"
|
| 432 |
|
| 433 |
-
# 完全移除OAuthProfile相关代码
|
| 434 |
run_button.click(
|
| 435 |
fn=run_and_submit_all,
|
| 436 |
outputs=[status_output, results_table]
|
|
@@ -444,9 +427,8 @@ with gr.Blocks() as demo:
|
|
| 444 |
|
| 445 |
if __name__ == "__main__":
|
| 446 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 447 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 448 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 449 |
-
space_id_startup = os.getenv("SPACE_ID")
|
| 450 |
|
| 451 |
if space_host_startup:
|
| 452 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
@@ -454,7 +436,7 @@ if __name__ == "__main__":
|
|
| 454 |
else:
|
| 455 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 456 |
|
| 457 |
-
if space_id_startup:
|
| 458 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 459 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 460 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
@@ -464,8 +446,9 @@ if __name__ == "__main__":
|
|
| 464 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 465 |
|
| 466 |
print("Launching Gradio Interface for Gemini Agent Evaluation...")
|
| 467 |
-
|
| 468 |
-
|
|
|
|
| 469 |
|
| 470 |
|
| 471 |
|
|
|
|
| 3 |
import subprocess
|
| 4 |
|
| 5 |
# 检查并安装缺失的依赖
|
| 6 |
+
required_packages = ["litellm", "duckduckgo-search", "gradio", "requests", "pandas"] # 确保gradio, requests, pandas也在这里
|
| 7 |
for package in required_packages:
|
| 8 |
try:
|
| 9 |
__import__(package)
|
|
|
|
| 19 |
import pandas as pd
|
| 20 |
import json
|
| 21 |
import time
|
|
|
|
| 22 |
from typing import List, Dict, Any, Optional
|
| 23 |
from litellm import completion
|
| 24 |
from duckduckgo_search import DDGS
|
|
|
|
| 33 |
self.name = "duckduckgo_search"
|
| 34 |
self.description = "Search the web using DuckDuckGo"
|
| 35 |
|
| 36 |
+
def search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
|
| 37 |
"""
|
| 38 |
Search the web using DuckDuckGo and return results.
|
| 39 |
|
|
|
|
| 52 |
print(f"DuckDuckGo search error: {e}")
|
| 53 |
return [{"title": f"Search error: {e}", "body": "", "href": ""}]
|
| 54 |
|
| 55 |
+
def __call__(self, query: str, max_results: int = 5) -> Dict[str, Any]:
|
| 56 |
"""
|
| 57 |
Execute the search and return results in a structured format.
|
| 58 |
|
|
|
|
| 82 |
self.api_key = api_key
|
| 83 |
print(f"Initialized LiteLLM with model: {model_id}")
|
| 84 |
|
| 85 |
+
def generate(self, prompt: str, system_prompt: str = None) -> str:
|
| 86 |
"""
|
| 87 |
Generate text using the LiteLLM model.
|
| 88 |
|
| 89 |
Args:
|
| 90 |
prompt: The user prompt
|
| 91 |
system_prompt: Optional system prompt
|
|
|
|
| 92 |
|
| 93 |
Returns:
|
| 94 |
Generated text response
|
|
|
|
| 102 |
response = completion(
|
| 103 |
model=self.model_id,
|
| 104 |
messages=messages,
|
| 105 |
+
api_key=self.api_key
|
|
|
|
| 106 |
)
|
| 107 |
|
| 108 |
return response.choices[0].message.content
|
|
|
|
| 121 |
def format_search_results(self, results: List[Dict[str, str]]) -> str:
|
| 122 |
"""Format search results into a readable string"""
|
| 123 |
formatted = "Search Results:\n"
|
| 124 |
+
if not results:
|
| 125 |
+
return "No search results found.\n\n"
|
| 126 |
for i, result in enumerate(results, 1):
|
| 127 |
formatted += f"{i}. {result.get('title', 'No title')}\n"
|
| 128 |
+
formatted += f" {result.get('body', 'No description')[:200]}...\n"
|
| 129 |
formatted += f" URL: {result.get('href', 'No URL')}\n\n"
|
| 130 |
return formatted
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
def create_system_prompt(self) -> str:
|
| 133 |
"""Create a system prompt for the model"""
|
| 134 |
return (
|
| 135 |
+
"You are a highly specialized AI assistant built for factual question answering. Your primary function is to provide accurate, concise, and direct answers. "
|
| 136 |
+
"Adhere to the following principles without deviation:\n"
|
| 137 |
+
"- Accuracy and Conciseness: Prioritize factual correctness and brevity above all else. Your answers should be short and to the point.\n"
|
| 138 |
+
"- Literal Interpretation: Interpret questions literally. Do not infer intent beyond what is explicitly stated.\n"
|
| 139 |
+
"- No Extraneous Information: Do not offer opinions, suggestions, engage in conversation, or provide any information not directly requested. Avoid any form of elaboration.\n"
|
| 140 |
+
"- Strict Format Adherence: Follow any explicit or implicit formatting instructions in the user's question (e.g., for numbers, lists, yes/no answers).\n"
|
| 141 |
+
"- 'Unknown' for Uncertainty: If a high-confidence, factual answer cannot be derived from provided context or your knowledge base, or if the question is impossible for you to answer (e.g. due to lack of capability like image understanding), you MUST output the single word 'Unknown'. Do not attempt to guess or provide a partially correct answer."
|
|
|
|
|
|
|
| 142 |
)
|
| 143 |
+
|
| 144 |
+
def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
|
| 145 |
+
"""Create a prompt for the model with optional search results"""
|
| 146 |
+
prompt_parts = []
|
| 147 |
+
|
| 148 |
+
if search_results:
|
| 149 |
+
prompt_parts.append(self.format_search_results(search_results))
|
| 150 |
+
prompt_parts.append(
|
| 151 |
+
"Instructions for using search results:\n"
|
| 152 |
+
"- Carefully review the search results provided above.\n"
|
| 153 |
+
"- If the search results contain a clear and factual answer to the question, use that information for your response.\n"
|
| 154 |
+
"- If the search results are relevant but do not directly answer the question, you may synthesize information if explicitly asked to, otherwise state what is found.\n"
|
| 155 |
+
"- If the search results are irrelevant or insufficient, state that the answer cannot be found in the provided search results and then use your general knowledge if applicable.\n\n"
|
| 156 |
+
)
|
| 157 |
+
else:
|
| 158 |
+
prompt_parts.append("No specific search results were provided for this question. You will need to answer using your general knowledge.\n\n")
|
| 159 |
+
|
| 160 |
+
prompt_parts.append(f"Question: {question}\n")
|
| 161 |
+
prompt_parts.append(
|
| 162 |
+
"\nYour Task:\n"
|
| 163 |
+
"Provide a concise, factual answer to the question above. Follow these strict instructions for your answer:\n"
|
| 164 |
+
"1. Directness: Your answer must be direct and to the point. Do NOT include any pleasantries, apologies, self-references (e.g., 'As an AI...'), or any information not explicitly asked for.\n"
|
| 165 |
+
"2. No Explanations: Do NOT provide any explanations, reasoning, or justifications for your answer unless the question specifically asks for it.\n"
|
| 166 |
+
"3. Factual Accuracy: Ensure your answer is factually correct based on the information provided or your internal knowledge.\n"
|
| 167 |
+
"4. Formatting:\n"
|
| 168 |
+
" - If the question asks for a numerical value (e.g., 'How many...'), provide ONLY the number (e.g., '42', '1000').\n"
|
| 169 |
+
" - If the question asks for a list of items, provide them as a comma-separated list without numbering or bullet points (e.g., 'red, green, blue').\n"
|
| 170 |
+
" - If a yes/no answer is appropriate, provide 'Yes' or 'No'.\n"
|
| 171 |
+
" - For other types of questions, provide the most direct and brief factual answer.\n"
|
| 172 |
+
"5. Unknown Answers: If, and only if, you cannot confidently determine a factual answer from the search results (if provided) or your general knowledge, or if the question is unanswerable (e.g., requires processing an image you cannot see, or is nonsensical), you MUST respond with the single word 'Unknown'. Do not guess or provide speculative information.\n"
|
| 173 |
+
"\nExample Scenarios:\n"
|
| 174 |
+
"- Question: What is the capital of France?\n"
|
| 175 |
+
" Correct Answer: Paris\n"
|
| 176 |
+
"- Question: How many moons does Earth have?\n"
|
| 177 |
+
" Correct Answer: 1\n"
|
| 178 |
+
"- Question: List the primary colors.\n"
|
| 179 |
+
" Correct Answer: red, yellow, blue\n"
|
| 180 |
+
"- Question: Does a dog meow?\n"
|
| 181 |
+
" Correct Answer: No\n"
|
| 182 |
+
"- Question: (A question where the answer is truly unknowable or unfindable for you)\n"
|
| 183 |
+
" Correct Answer: Unknown\n"
|
| 184 |
+
)
|
| 185 |
+
prompt_parts.append("\nAnswer: ")
|
|
|
|
|
|
|
| 186 |
|
| 187 |
+
return "".join(prompt_parts)
|
|
|
|
| 188 |
|
|
|
|
|
|
|
| 189 |
def __call__(self, question: str) -> str:
|
| 190 |
"""
|
| 191 |
Process a question and return an answer.
|
|
|
|
| 198 |
"""
|
| 199 |
print(f"Agent received question: {question[:100]}...")
|
| 200 |
|
| 201 |
+
question_lower = question.lower()
|
| 202 |
+
|
| 203 |
+
# 更新 should_search 逻辑
|
| 204 |
+
search_trigger_keywords = [
|
| 205 |
+
"what", "who", "when", "where", "how many", "which", "list", "name", "find", "does",
|
| 206 |
+
"is there", "are there", "can you tell me", "describe", "published by", "released by",
|
| 207 |
+
"highest number", "what is the population of", "when was the first", "who invented",
|
| 208 |
+
"identify", "capital of", "what year", "tell me the", "average", "statistics", "data on",
|
| 209 |
+
"information about", "details on"
|
| 210 |
+
]
|
| 211 |
+
should_search = any(trigger in question_lower for trigger in search_trigger_keywords)
|
| 212 |
+
|
| 213 |
+
if not should_search and ("?" in question and len(question_lower.split()) > 3) :
|
| 214 |
+
if not (question_lower.startswith("can you") or \
|
| 215 |
+
question_lower.startswith("write") or \
|
| 216 |
+
"tfel" in question_lower or \
|
| 217 |
+
"chess position" in question_lower or \
|
| 218 |
+
"image" in question_lower):
|
| 219 |
+
should_search = True
|
| 220 |
|
| 221 |
+
if '.remna eht sa "tfel" drow eht fo etisoppo eht etirw' in question_lower:
|
| 222 |
+
should_search = False
|
| 223 |
+
if "chess position provided in the image" in question_lower or "image." in question_lower:
|
| 224 |
+
should_search = False
|
| 225 |
+
|
| 226 |
search_results = None
|
| 227 |
if should_search and self.search_tool:
|
| 228 |
print(f"Searching for information about: {question}")
|
| 229 |
+
search_response = self.search_tool(question, max_results=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
search_results = search_response.get("results", [])
|
| 231 |
print(f"Found {len(search_results)} search results")
|
| 232 |
|
|
|
|
| 233 |
prompt = self.create_prompt(question, search_results)
|
| 234 |
system_prompt = self.create_system_prompt()
|
| 235 |
|
| 236 |
print("Generating response with LLM...")
|
| 237 |
+
# print(f"System Prompt: {system_prompt}") # For debugging prompts
|
| 238 |
+
# print(f"User Prompt: {prompt}") # For debugging prompts
|
| 239 |
+
response = self.model.generate(prompt, system_prompt)
|
| 240 |
|
| 241 |
+
answer = response.strip()
|
|
|
|
| 242 |
|
| 243 |
+
prefixes_to_remove = [
|
| 244 |
+
"Answer:", "The answer is:", "I believe", "I think",
|
| 245 |
+
"Based on", "According to", "The answer would be"
|
| 246 |
+
]
|
| 247 |
+
|
| 248 |
+
for prefix in prefixes_to_remove:
|
| 249 |
+
if answer.lower().startswith(prefix.lower()): # Case-insensitive prefix check
|
| 250 |
+
answer = answer[len(prefix):].strip()
|
| 251 |
+
|
| 252 |
+
if (answer.startswith('"') and answer.endswith('"')) or \
|
| 253 |
+
(answer.startswith("'") and answer.endswith("'")):
|
| 254 |
+
answer = answer[1:-1].strip()
|
| 255 |
+
|
| 256 |
+
print(f" {answer[:100]}...")
|
| 257 |
return answer
|
| 258 |
|
| 259 |
# 简化版本,不使用OAuthProfile
|
|
|
|
| 262 |
Fetches all questions, runs the Agent on them, submits all answers,
|
| 263 |
and displays the results.
|
| 264 |
"""
|
| 265 |
+
space_id = os.getenv("SPACE_ID")
|
|
|
|
| 266 |
api_url = DEFAULT_API_URL
|
| 267 |
questions_url = f"{api_url}/questions"
|
| 268 |
submit_url = f"{api_url}/submit"
|
| 269 |
|
|
|
|
| 270 |
try:
|
|
|
|
| 271 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 272 |
if not api_key:
|
| 273 |
return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
|
|
|
|
| 278 |
print(f"Error instantiating agent: {e}")
|
| 279 |
return f"Error initializing agent: {e}", None
|
| 280 |
|
|
|
|
| 281 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 282 |
if not space_id:
|
| 283 |
+
agent_code = "https://huggingface.co/spaces/lethaq/Final_Assignment_Template/tree/main" # Fallback
|
| 284 |
print(f"Agent code URL: {agent_code}")
|
| 285 |
|
|
|
|
| 286 |
print(f"Fetching questions from: {questions_url}")
|
| 287 |
try:
|
| 288 |
+
response = requests.get(questions_url, timeout=20) # Increased timeout
|
| 289 |
response.raise_for_status()
|
| 290 |
questions_data = response.json()
|
| 291 |
if not questions_data:
|
| 292 |
+
print("Fetched questions list is empty.")
|
| 293 |
+
return "Fetched questions list is empty or invalid format.", None
|
| 294 |
print(f"Fetched {len(questions_data)} questions.")
|
| 295 |
except requests.exceptions.RequestException as e:
|
| 296 |
print(f"Error fetching questions: {e}")
|
| 297 |
return f"Error fetching questions: {e}", None
|
| 298 |
except requests.exceptions.JSONDecodeError as e:
|
| 299 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 300 |
+
print(f"Response text: {response.text[:500]}")
|
| 301 |
+
return f"Error decoding server response for questions: {e}", None
|
| 302 |
except Exception as e:
|
| 303 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 304 |
return f"An unexpected error occurred fetching questions: {e}", None
|
| 305 |
|
|
|
|
| 306 |
results_log = []
|
| 307 |
answers_payload = []
|
| 308 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
|
| 315 |
try:
|
| 316 |
print(f"Processing task {task_id}: {question_text[:50]}...")
|
| 317 |
submitted_answer = agent(question_text)
|
|
|
|
| 318 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 319 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 320 |
print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
|
| 321 |
except Exception as e:
|
| 322 |
+
print(f"Error running agent on task {task_id}: {e}")
|
| 323 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
| 324 |
|
| 325 |
if not answers_payload:
|
| 326 |
print("Agent did not produce any answers to submit.")
|
| 327 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 328 |
|
|
|
|
| 329 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 330 |
try:
|
|
|
|
| 331 |
submission_dict = {
|
| 332 |
+
"username": DEFAULT_USERNAME,
|
| 333 |
+
"agent_code": agent_code,
|
| 334 |
+
"answers": answers_payload
|
| 335 |
}
|
| 336 |
response = requests.post(submit_url, json=submission_dict, timeout=60)
|
| 337 |
response.raise_for_status()
|
|
|
|
| 392 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 393 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 394 |
|
|
|
|
| 395 |
gr.Markdown("## Test Single Question")
|
| 396 |
with gr.Row():
|
| 397 |
+
question_in = gr.Textbox(label="Question", lines=3,秀传placeholder="Enter your question here...")
|
| 398 |
answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
|
| 399 |
|
| 400 |
test_btn = gr.Button("Test Question", variant="secondary")
|
| 401 |
|
|
|
|
| 402 |
def test_single_question(question):
|
| 403 |
+
if not question.strip():
|
| 404 |
+
return "Please enter a question."
|
| 405 |
try:
|
| 406 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 407 |
if not api_key:
|
| 408 |
return "Error: GEMINI_API_KEY environment variable not found"
|
| 409 |
|
| 410 |
+
model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=AIzaSyAhmwogxZFBtt7_OUsKQGNeOYF7ced39bM)
|
| 411 |
agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
|
| 412 |
answer = agent(question)
|
| 413 |
return answer
|
| 414 |
except Exception as e:
|
| 415 |
return f"Error: {str(e)}"
|
| 416 |
|
|
|
|
| 417 |
run_button.click(
|
| 418 |
fn=run_and_submit_all,
|
| 419 |
outputs=[status_output, results_table]
|
|
|
|
| 427 |
|
| 428 |
if __name__ == "__main__":
|
| 429 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
|
| 430 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 431 |
+
space_id_startup = os.getenv("SPACE_ID")
|
| 432 |
|
| 433 |
if space_host_startup:
|
| 434 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
|
| 436 |
else:
|
| 437 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 438 |
|
| 439 |
+
if space_id_startup:
|
| 440 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 441 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 442 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
|
| 446 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 447 |
|
| 448 |
print("Launching Gradio Interface for Gemini Agent Evaluation...")
|
| 449 |
+
# For Hugging Face Spaces, share=True is often not needed or handled by the platform.
|
| 450 |
+
# debug=True can be helpful during development.
|
| 451 |
+
demo.launch(debug=False, share=False)
|
| 452 |
|
| 453 |
|
| 454 |
|