Spaces:
Sleeping
Sleeping
Qscar KIM commited on
Commit ยท
7cdb49c
1
Parent(s): c62d245
update codes
Browse files
app.py
CHANGED
|
@@ -6,69 +6,103 @@ import pandas as pd
|
|
| 6 |
import time
|
| 7 |
import re
|
| 8 |
|
|
|
|
| 9 |
from smolagents import CodeAgent, InferenceClientModel, Tool
|
| 10 |
|
| 11 |
# --- Constants ---
|
| 12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 13 |
|
| 14 |
-
# ---
|
| 15 |
-
class
|
| 16 |
name = "web_search"
|
| 17 |
-
description = "Searches the web for a given query and returns snippet results."
|
| 18 |
-
inputs = {"query": {"type": "string", "description": "The search query"}}
|
| 19 |
output_type = "string"
|
| 20 |
|
| 21 |
def forward(self, query: str) -> str:
|
| 22 |
try:
|
| 23 |
url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}"
|
| 24 |
-
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
| 25 |
response = requests.get(url, headers=headers, timeout=10)
|
| 26 |
if response.status_code != 200:
|
| 27 |
-
return f"Search
|
| 28 |
|
| 29 |
html = response.text
|
|
|
|
| 30 |
snippets = re.findall(r'<a class="result__snippet"[^>]*>(.*?)</a>', html, re.DOTALL)
|
| 31 |
-
cleaned_snippets = []
|
| 32 |
-
for snip in snippets[:5]:
|
| 33 |
-
clean = re.sub(r'<[^>]+>', '', snip)
|
| 34 |
-
clean = clean.replace('\n', ' ').strip()
|
| 35 |
-
cleaned_snippets.append(clean)
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
except Exception as e:
|
| 41 |
-
return f"Search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# --- Basic Agent Definition ---
|
| 44 |
-
# ----- THIS IS
|
| 45 |
class BasicAgent:
|
| 46 |
def __init__(self):
|
|
|
|
| 47 |
self.model = InferenceClientModel(
|
| 48 |
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 49 |
token=os.getenv("HF_TOKEN")
|
| 50 |
)
|
| 51 |
-
self.search_tool =
|
|
|
|
|
|
|
|
|
|
| 52 |
self.agent = CodeAgent(
|
| 53 |
-
tools=[self.search_tool],
|
| 54 |
model=self.model,
|
| 55 |
-
max_steps=
|
| 56 |
-
additional_authorized_imports=["pandas", "numpy", "json", "math", "re", "datetime"]
|
| 57 |
)
|
|
|
|
| 58 |
|
| 59 |
def __call__(self, question: str) -> str:
|
|
|
|
| 60 |
try:
|
| 61 |
refined_prompt = (
|
| 62 |
-
f"{question}\n\n"
|
| 63 |
-
f"
|
| 64 |
-
f"Provide the final short answer clearly at the very end."
|
| 65 |
)
|
| 66 |
result = self.agent.run(refined_prompt)
|
| 67 |
if result is None:
|
| 68 |
return "unknown"
|
| 69 |
return str(result).strip()
|
| 70 |
except Exception as e:
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 74 |
"""
|
|
@@ -134,8 +168,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 134 |
submitted_answer = agent(question_text)
|
| 135 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 136 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 137 |
-
|
| 138 |
-
time.sleep(2)
|
| 139 |
except Exception as e:
|
| 140 |
print(f"Error running agent on task {task_id}: {e}")
|
| 141 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
@@ -224,4 +256,25 @@ with gr.Blocks() as demo:
|
|
| 224 |
)
|
| 225 |
|
| 226 |
if __name__ == "__main__":
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import time
|
| 7 |
import re
|
| 8 |
|
| 9 |
+
# --- Course Architecture Modules ---
|
| 10 |
from smolagents import CodeAgent, InferenceClientModel, Tool
|
| 11 |
|
| 12 |
# --- Constants ---
|
| 13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 14 |
|
| 15 |
+
# --- Deep Research Tool 1: ์น ๊ฒ์ ๋ฐ ์ค๋ํซ ์์ง ํด ---
|
| 16 |
+
class DeepSearchTool(Tool):
|
| 17 |
name = "web_search"
|
| 18 |
+
description = "Searches the web for a given query and returns snippet results with URLs. Useful for finding initial links."
|
| 19 |
+
inputs = {"query": {"type": "string", "description": "The search query to look up"}}
|
| 20 |
output_type = "string"
|
| 21 |
|
| 22 |
def forward(self, query: str) -> str:
|
| 23 |
try:
|
| 24 |
url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}"
|
| 25 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
| 26 |
response = requests.get(url, headers=headers, timeout=10)
|
| 27 |
if response.status_code != 200:
|
| 28 |
+
return f"Search temporary unavailable (Status: {response.status_code})."
|
| 29 |
|
| 30 |
html = response.text
|
| 31 |
+
links = re.findall(r'<a class="result__url"[^>]* href="(.*?)"', html)
|
| 32 |
snippets = re.findall(r'<a class="result__snippet"[^>]*>(.*?)</a>', html, re.DOTALL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
+
cleaned_results = []
|
| 35 |
+
for i in range(min(len(links), 4)):
|
| 36 |
+
lnk = links[i]
|
| 37 |
+
snip = re.sub(r'<[^>]+>', '', snippets[i]).replace('\n', ' ').strip() if i < len(snippets) else ""
|
| 38 |
+
cleaned_results.append(f"[{i+1}] URL: {lnk}\nSnippet: {snip}")
|
| 39 |
+
|
| 40 |
+
return "\n\n".join(cleaned_results) if cleaned_results else "No results found."
|
| 41 |
except Exception as e:
|
| 42 |
+
return f"Search execution failed: {str(e)}"
|
| 43 |
+
|
| 44 |
+
# --- Deep Research Tool 2: ํน์ ํ์ด์ง ์ฌ์ธต ํ
์คํธ ์คํฌ๋ํ ํด ---
|
| 45 |
+
class VisitWebpageTool(Tool):
|
| 46 |
+
name = "visit_webpage"
|
| 47 |
+
description = "Downloads and extracts all plain text content from a specific URL. Use this to read full articles, CSV contents, or text documents provided in GAIA tasks."
|
| 48 |
+
inputs = {"url": {"type": "string", "description": "The exact web URL to visit and read"}}
|
| 49 |
+
output_type = "string"
|
| 50 |
+
|
| 51 |
+
def forward(self, url: str) -> str:
|
| 52 |
+
try:
|
| 53 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
| 54 |
+
response = requests.get(url, headers=headers, timeout=15)
|
| 55 |
+
if response.status_code != 200:
|
| 56 |
+
return f"Failed to retrieve webpage. Status code: {response.status_code}"
|
| 57 |
+
|
| 58 |
+
# HTML ํ๊ทธ ์ ๊ฑฐ ๋ฐ ์์ ํ
์คํธ ์ถ์ถ ๊ฐ๊ณต
|
| 59 |
+
text = response.text
|
| 60 |
+
text = re.sub(r'<script[^>]*>.*?</script>', '', text, flags=re.DOTALL)
|
| 61 |
+
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL)
|
| 62 |
+
text = re.sub(r'<[^>]+>', ' ', text)
|
| 63 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 64 |
+
|
| 65 |
+
# ์ปจํ
์คํธ ์๋์ฐ ์ค์ผ ๋ฐฉ์ง๋ฅผ ์ํด ์ต๋ 4000์ ์ฌ๋ผ์ด์ฑ ์ ํ
|
| 66 |
+
return text[:4000] if len(text) > 4000 else text
|
| 67 |
+
except Exception as e:
|
| 68 |
+
return f"Failed to visit webpage: {str(e)}"
|
| 69 |
|
| 70 |
# --- Basic Agent Definition ---
|
| 71 |
+
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 72 |
class BasicAgent:
|
| 73 |
def __init__(self):
|
| 74 |
+
# Open Deep Research ๋ช
์ธ์ ๋ฐ๋ฅธ ์ต์ ์ ๋ฌด๋ฃ ์คํ์์ค ์ถ๋ก ๋ฐฑ์๋ ํด๋ผ์ด์ธํธ ๋ฐ์ธ๋ฉ
|
| 75 |
self.model = InferenceClientModel(
|
| 76 |
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 77 |
token=os.getenv("HF_TOKEN")
|
| 78 |
)
|
| 79 |
+
self.search_tool = DeepSearchTool()
|
| 80 |
+
self.visit_tool = VisitWebpageTool()
|
| 81 |
+
|
| 82 |
+
# ์์จ ์ปดํ์ผ ๋ฐ ์ค๋ฅ ์๊ฐ ์์ (Self-correction) ๋ฃจํ ๊ฐ๋์ ์ํ CodeAgent ํจํค์ง
|
| 83 |
self.agent = CodeAgent(
|
| 84 |
+
tools=[self.search_tool, self.visit_tool],
|
| 85 |
model=self.model,
|
| 86 |
+
max_steps=10, # ๋ณต์กํ ์น ์ํ ๋ฐ ๋ฐ์ดํฐ ๊ต์ฐจ ๋์กฐ๋ฅผ ์ํด ์คํ ๋ง์ผ์คํค์ 10๋จ๊ณ๋ก ํ์ฅ
|
| 87 |
+
additional_authorized_imports=["requests", "pandas", "numpy", "json", "math", "re", "datetime"]
|
| 88 |
)
|
| 89 |
+
print("BasicAgent initialized.")
|
| 90 |
|
| 91 |
def __call__(self, question: str) -> str:
|
| 92 |
+
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 93 |
try:
|
| 94 |
refined_prompt = (
|
| 95 |
+
f"Task: {question}\n\n"
|
| 96 |
+
f"You have deep research tools. Search information and visit specific URLs if necessary. "
|
| 97 |
+
f"Provide the final short answer clearly at the very end of your response."
|
| 98 |
)
|
| 99 |
result = self.agent.run(refined_prompt)
|
| 100 |
if result is None:
|
| 101 |
return "unknown"
|
| 102 |
return str(result).strip()
|
| 103 |
except Exception as e:
|
| 104 |
+
print(f"Error running agent: {e}")
|
| 105 |
+
return "unknown"
|
| 106 |
|
| 107 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 108 |
"""
|
|
|
|
| 168 |
submitted_answer = agent(question_text)
|
| 169 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 170 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
|
|
|
|
| 171 |
except Exception as e:
|
| 172 |
print(f"Error running agent on task {task_id}: {e}")
|
| 173 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
|
|
| 256 |
)
|
| 257 |
|
| 258 |
if __name__ == "__main__":
|
| 259 |
+
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 260 |
+
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 261 |
+
space_host_startup = os.getenv("SPACE_HOST")
|
| 262 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
| 263 |
+
|
| 264 |
+
if space_host_startup:
|
| 265 |
+
print(f"โ
SPACE_HOST found: {space_host_startup}")
|
| 266 |
+
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
| 267 |
+
else:
|
| 268 |
+
print("โน๏ธ SPACE_HOST environment variable not found (running locally?).")
|
| 269 |
+
|
| 270 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
| 271 |
+
print(f"โ
SPACE_ID found: {space_id_startup}")
|
| 272 |
+
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 273 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 274 |
+
else:
|
| 275 |
+
print("โน๏ธ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 276 |
+
|
| 277 |
+
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 278 |
+
|
| 279 |
+
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 280 |
+
demo.launch(debug=True, share=False)
|