Qscar KIM commited on
Commit
7cdb49c
ยท
1 Parent(s): c62d245

update codes

Browse files
Files changed (1) hide show
  1. app.py +80 -27
app.py CHANGED
@@ -6,69 +6,103 @@ import pandas as pd
6
  import time
7
  import re
8
 
 
9
  from smolagents import CodeAgent, InferenceClientModel, Tool
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
- # --- Custom Tool Definition ---
15
- class DuckDuckGoSearchTool(Tool):
16
  name = "web_search"
17
- description = "Searches the web for a given query and returns snippet results."
18
- inputs = {"query": {"type": "string", "description": "The search query"}}
19
  output_type = "string"
20
 
21
  def forward(self, query: str) -> str:
22
  try:
23
  url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}"
24
- headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
25
  response = requests.get(url, headers=headers, timeout=10)
26
  if response.status_code != 200:
27
- return f"Search failed with status code {response.status_code}"
28
 
29
  html = response.text
 
30
  snippets = re.findall(r'<a class="result__snippet"[^>]*>(.*?)</a>', html, re.DOTALL)
31
- cleaned_snippets = []
32
- for snip in snippets[:5]:
33
- clean = re.sub(r'<[^>]+>', '', snip)
34
- clean = clean.replace('\n', ' ').strip()
35
- cleaned_snippets.append(clean)
36
 
37
- if cleaned_snippets:
38
- return "\n".join(f"- {s}" for s in cleaned_snippets)
39
- return "No results found."
 
 
 
 
40
  except Exception as e:
41
- return f"Search error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # --- Basic Agent Definition ---
44
- # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
45
  class BasicAgent:
46
  def __init__(self):
 
47
  self.model = InferenceClientModel(
48
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
49
  token=os.getenv("HF_TOKEN")
50
  )
51
- self.search_tool = DuckDuckGoSearchTool()
 
 
 
52
  self.agent = CodeAgent(
53
- tools=[self.search_tool],
54
  model=self.model,
55
- max_steps=5,
56
- additional_authorized_imports=["pandas", "numpy", "json", "math", "re", "datetime"]
57
  )
 
58
 
59
  def __call__(self, question: str) -> str:
 
60
  try:
61
  refined_prompt = (
62
- f"{question}\n\n"
63
- f"Solve this task step by step using your tools. "
64
- f"Provide the final short answer clearly at the very end."
65
  )
66
  result = self.agent.run(refined_prompt)
67
  if result is None:
68
  return "unknown"
69
  return str(result).strip()
70
  except Exception as e:
71
- return f"Error: {type(e).__name__}"
 
72
 
73
  def run_and_submit_all( profile: gr.OAuthProfile | None):
74
  """
@@ -134,8 +168,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
134
  submitted_answer = agent(question_text)
135
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
136
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
137
-
138
- time.sleep(2)
139
  except Exception as e:
140
  print(f"Error running agent on task {task_id}: {e}")
141
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -224,4 +256,25 @@ with gr.Blocks() as demo:
224
  )
225
 
226
  if __name__ == "__main__":
227
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import time
7
  import re
8
 
9
+ # --- Course Architecture Modules ---
10
  from smolagents import CodeAgent, InferenceClientModel, Tool
11
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ # --- Deep Research Tool 1: ์›น ๊ฒ€์ƒ‰ ๋ฐ ์Šค๋‹ˆํŽซ ์ˆ˜์ง‘ ํˆด ---
16
+ class DeepSearchTool(Tool):
17
  name = "web_search"
18
+ description = "Searches the web for a given query and returns snippet results with URLs. Useful for finding initial links."
19
+ inputs = {"query": {"type": "string", "description": "The search query to look up"}}
20
  output_type = "string"
21
 
22
  def forward(self, query: str) -> str:
23
  try:
24
  url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}"
25
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
26
  response = requests.get(url, headers=headers, timeout=10)
27
  if response.status_code != 200:
28
+ return f"Search temporary unavailable (Status: {response.status_code})."
29
 
30
  html = response.text
31
+ links = re.findall(r'<a class="result__url"[^>]* href="(.*?)"', html)
32
  snippets = re.findall(r'<a class="result__snippet"[^>]*>(.*?)</a>', html, re.DOTALL)
 
 
 
 
 
33
 
34
+ cleaned_results = []
35
+ for i in range(min(len(links), 4)):
36
+ lnk = links[i]
37
+ snip = re.sub(r'<[^>]+>', '', snippets[i]).replace('\n', ' ').strip() if i < len(snippets) else ""
38
+ cleaned_results.append(f"[{i+1}] URL: {lnk}\nSnippet: {snip}")
39
+
40
+ return "\n\n".join(cleaned_results) if cleaned_results else "No results found."
41
  except Exception as e:
42
+ return f"Search execution failed: {str(e)}"
43
+
44
+ # --- Deep Research Tool 2: ํŠน์ • ํŽ˜์ด์ง€ ์‹ฌ์ธต ํ…์ŠคํŠธ ์Šคํฌ๋ž˜ํ•‘ ํˆด ---
45
+ class VisitWebpageTool(Tool):
46
+ name = "visit_webpage"
47
+ description = "Downloads and extracts all plain text content from a specific URL. Use this to read full articles, CSV contents, or text documents provided in GAIA tasks."
48
+ inputs = {"url": {"type": "string", "description": "The exact web URL to visit and read"}}
49
+ output_type = "string"
50
+
51
+ def forward(self, url: str) -> str:
52
+ try:
53
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
54
+ response = requests.get(url, headers=headers, timeout=15)
55
+ if response.status_code != 200:
56
+ return f"Failed to retrieve webpage. Status code: {response.status_code}"
57
+
58
+ # HTML ํƒœ๊ทธ ์ œ๊ฑฐ ๋ฐ ์ˆœ์ˆ˜ ํ…์ŠคํŠธ ์ถ”์ถœ ๊ฐ€๊ณต
59
+ text = response.text
60
+ text = re.sub(r'<script[^>]*>.*?</script>', '', text, flags=re.DOTALL)
61
+ text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL)
62
+ text = re.sub(r'<[^>]+>', ' ', text)
63
+ text = re.sub(r'\s+', ' ', text).strip()
64
+
65
+ # ์ปจํ…์ŠคํŠธ ์œˆ๋„์šฐ ์˜ค์—ผ ๋ฐฉ์ง€๋ฅผ ์œ„ํ•ด ์ตœ๋Œ€ 4000์ž ์Šฌ๋ผ์ด์‹ฑ ์ œํ•œ
66
+ return text[:4000] if len(text) > 4000 else text
67
+ except Exception as e:
68
+ return f"Failed to visit webpage: {str(e)}"
69
 
70
  # --- Basic Agent Definition ---
71
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
72
  class BasicAgent:
73
  def __init__(self):
74
+ # Open Deep Research ๋ช…์„ธ์— ๋”ฐ๋ฅธ ์ตœ์ ์˜ ๋ฌด๋ฃŒ ์˜คํ”ˆ์†Œ์Šค ์ถ”๋ก  ๋ฐฑ์—”๋“œ ํด๋ผ์ด์–ธํŠธ ๋ฐ”์ธ๋”ฉ
75
  self.model = InferenceClientModel(
76
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
77
  token=os.getenv("HF_TOKEN")
78
  )
79
+ self.search_tool = DeepSearchTool()
80
+ self.visit_tool = VisitWebpageTool()
81
+
82
+ # ์ž์œจ ์ปดํŒŒ์ผ ๋ฐ ์˜ค๋ฅ˜ ์ž๊ฐ€ ์ˆ˜์ •(Self-correction) ๋ฃจํ”„ ๊ฐ€๋™์„ ์œ„ํ•œ CodeAgent ํŒจํ‚ค์ง•
83
  self.agent = CodeAgent(
84
+ tools=[self.search_tool, self.visit_tool],
85
  model=self.model,
86
+ max_steps=10, # ๋ณต์žกํ•œ ์›น ์„œํ•‘ ๋ฐ ๋ฐ์ดํ„ฐ ๊ต์ฐจ ๋Œ€์กฐ๋ฅผ ์œ„ํ•ด ์‹คํ–‰ ๋งˆ์ผ์Šคํ†ค์„ 10๋‹จ๊ณ„๋กœ ํ™•์žฅ
87
+ additional_authorized_imports=["requests", "pandas", "numpy", "json", "math", "re", "datetime"]
88
  )
89
+ print("BasicAgent initialized.")
90
 
91
  def __call__(self, question: str) -> str:
92
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
93
  try:
94
  refined_prompt = (
95
+ f"Task: {question}\n\n"
96
+ f"You have deep research tools. Search information and visit specific URLs if necessary. "
97
+ f"Provide the final short answer clearly at the very end of your response."
98
  )
99
  result = self.agent.run(refined_prompt)
100
  if result is None:
101
  return "unknown"
102
  return str(result).strip()
103
  except Exception as e:
104
+ print(f"Error running agent: {e}")
105
+ return "unknown"
106
 
107
  def run_and_submit_all( profile: gr.OAuthProfile | None):
108
  """
 
168
  submitted_answer = agent(question_text)
169
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
170
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
171
  except Exception as e:
172
  print(f"Error running agent on task {task_id}: {e}")
173
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
256
  )
257
 
258
  if __name__ == "__main__":
259
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
260
+ # Check for SPACE_HOST and SPACE_ID at startup for information
261
+ space_host_startup = os.getenv("SPACE_HOST")
262
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
263
+
264
+ if space_host_startup:
265
+ print(f"โœ… SPACE_HOST found: {space_host_startup}")
266
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
267
+ else:
268
+ print("โ„น๏ธ SPACE_HOST environment variable not found (running locally?).")
269
+
270
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
271
+ print(f"โœ… SPACE_ID found: {space_id_startup}")
272
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
273
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
274
+ else:
275
+ print("โ„น๏ธ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
276
+
277
+ print("-"*(60 + len(" App Starting ")) + "\n")
278
+
279
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
280
+ demo.launch(debug=True, share=False)