MasterOfHugs commited on
Commit
9f8247b
·
verified ·
1 Parent(s): 4a79093

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -49
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from smolagents import CodeAgent, HfApiModel, tool
2
  from duckduckgo_search import DDGS
3
  import requests
@@ -8,15 +9,15 @@ import yaml
8
  from tools.final_answer import FinalAnswerTool
9
  from Gradio_UI import GradioUI
10
  import json
 
 
11
 
12
- #---------------------------------------------------------------------------#
13
- #--------------------------------- Tools ----------------------------------#
14
- #---------------------------------------------------------------------------#
15
 
16
  @tool
17
  def web_search(query: str, max_results: int = 5) -> str:
18
  """
19
- Recherche des informations sur internet via DuckDuckGo et retourne une liste de résultats JSON.
20
 
21
  Args:
22
  query (str): La requête de recherche.
@@ -30,25 +31,40 @@ def web_search(query: str, max_results: int = 5) -> str:
30
  results = list(ddgs.text(query, max_results=max_results))
31
  return json.dumps(results, ensure_ascii=False, indent=2)
32
  except Exception as e:
33
- return json.dumps({"error": str(e)})
34
 
35
  @tool
36
- def get_current_time_in_timezone(timezone: str) -> str:
37
  """
38
- Retourne l’heure locale actuelle dans un fuseau horaire donné.
39
 
40
  Args:
41
- timezone (str): Un fuseau horaire valide (ex: 'America/New_York').
42
 
43
  Returns:
44
- str: Heure locale formatée ou message derreur.
45
  """
46
  try:
47
- tz = pytz.timezone(timezone)
48
- local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
49
- return f"L’heure locale à {timezone} est {local_time}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
- return f"Erreur pour le fuseau horaire '{timezone}': {str(e)}"
52
 
53
  @tool
54
  def visit_webpage(url: str) -> str:
@@ -59,7 +75,7 @@ def visit_webpage(url: str) -> str:
59
  url (str): URL de la page web.
60
 
61
  Returns:
62
- str: Contenu HTML de la page.
63
  """
64
  headers = {
65
  "User-Agent": (
@@ -68,65 +84,226 @@ def visit_webpage(url: str) -> str:
68
  "Chrome/122.0.0.0 Safari/537.36"
69
  )
70
  }
71
- resp = requests.get(url, headers=headers, timeout=10)
72
- resp.raise_for_status()
73
- return resp.text
 
 
 
74
 
75
  @tool
76
- def parse_html(html: str, css_selector: str) -> list:
77
  """
78
- Extrait du texte d’éléments HTML via un sélecteur CSS.
79
 
80
  Args:
81
  html (str): Code HTML complet.
82
- css_selector (str): Sélecteur CSS (ex: 'table tr td:last-child').
83
 
84
  Returns:
85
- list: Liste de textes extraits des éléments correspondant au sélecteur.
86
  """
87
- soup = BeautifulSoup(html, "html.parser")
88
- elements = soup.select(css_selector)
89
- return [el.get_text(strip=True) for el in elements]
 
 
 
 
90
 
91
- #---------------------------------------------------------------------------#
92
- #------------------------------- Agent Init -------------------------------#
93
- #---------------------------------------------------------------------------#
 
94
 
95
- final_answer = FinalAnswerTool()
 
96
 
97
- # Utiliser un modèle gratuit
98
- model = HfApiModel(
99
- max_tokens=1024,
100
- temperature=0.5,
101
- model_id='Qwen/Qwen2.5-small-Instruct',
102
- custom_role_conversions=None,
103
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # Charger les templates de prompt
106
- with open("prompts.yaml", 'r') as stream:
107
- prompt_templates = yaml.safe_load(stream)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # Créer l’agent avec les outils
110
  agent = CodeAgent(
111
  model=model,
112
  tools=[
113
  final_answer,
114
  web_search,
115
- get_current_time_in_timezone,
116
  visit_webpage,
117
- parse_html
 
118
  ],
119
  max_steps=6,
120
  verbosity_level=1,
121
- grammar=None,
122
- planning_interval=None,
123
  name="InfoAgent",
124
- description="Un agent capable de donner l’heure, rechercher sur internet et analyser des pages web",
125
- prompt_templates=prompt_templates
126
  )
127
 
128
- #---------------------------------------------------------------------------#
129
- #------------------------------- Launch UI --------------------------------#
130
- #---------------------------------------------------------------------------#
 
 
 
 
 
 
 
 
 
 
131
 
132
- GradioUI(agent).launch()
 
 
 
 
 
 
 
 
 
1
+ # app.py — robust starter (fallback model + safe proxy + web tools)
2
  from smolagents import CodeAgent, HfApiModel, tool
3
  from duckduckgo_search import DDGS
4
  import requests
 
9
  from tools.final_answer import FinalAnswerTool
10
  from Gradio_UI import GradioUI
11
  import json
12
+ import sys
13
+ import traceback
14
 
15
+ # ---------------------- Tools (Google-style docstrings) --------------------
 
 
16
 
17
  @tool
18
  def web_search(query: str, max_results: int = 5) -> str:
19
  """
20
+ Recherche des informations sur internet via DuckDuckGo et retourne une liste JSON.
21
 
22
  Args:
23
  query (str): La requête de recherche.
 
31
  results = list(ddgs.text(query, max_results=max_results))
32
  return json.dumps(results, ensure_ascii=False, indent=2)
33
  except Exception as e:
34
+ return json.dumps({"error": f"web_search failed: {str(e)}"})
35
 
36
  @tool
37
+ def extract_first_url(search_results_json: str) -> str:
38
  """
39
+ Extrait la première URL HTTP de la sortie JSON de web_search.
40
 
41
  Args:
42
+ search_results_json (str): JSON string retourné par web_search.
43
 
44
  Returns:
45
+ str: la première URL trouvée, ou un message d'erreur.
46
  """
47
  try:
48
+ results = json.loads(search_results_json)
49
+ if not results:
50
+ return "Aucun résultat trouvé"
51
+ first = results[0]
52
+ if isinstance(first, dict):
53
+ for k in ("href", "link", "url", "result"):
54
+ v = first.get(k)
55
+ if isinstance(v, str) and v.startswith("http"):
56
+ return v
57
+ for v in first.values():
58
+ if isinstance(v, str) and v.startswith("http"):
59
+ return v
60
+ text = json.dumps(results)
61
+ import re
62
+ m = re.search(r"https?://[^\s'\"<>()]+", text)
63
+ if m:
64
+ return m.group(0)
65
+ return "Aucune URL trouvée"
66
  except Exception as e:
67
+ return f"Erreur extraction URL: {str(e)}"
68
 
69
  @tool
70
  def visit_webpage(url: str) -> str:
 
75
  url (str): URL de la page web.
76
 
77
  Returns:
78
+ str: Contenu HTML de la page ou message d'erreur.
79
  """
80
  headers = {
81
  "User-Agent": (
 
84
  "Chrome/122.0.0.0 Safari/537.36"
85
  )
86
  }
87
+ try:
88
+ resp = requests.get(url, headers=headers, timeout=12)
89
+ resp.raise_for_status()
90
+ return resp.text
91
+ except Exception as e:
92
+ return f"Erreur téléchargement page: {str(e)}"
93
 
94
  @tool
95
+ def parse_html(html: str, css_selector: str) -> str:
96
  """
97
+ Extrait des textes du HTML via un sélecteur CSS et retourne une liste JSON.
98
 
99
  Args:
100
  html (str): Code HTML complet.
101
+ css_selector (str): Sélecteur CSS (ex: 'table.wikitable tr').
102
 
103
  Returns:
104
+ str: JSON list (string) des textes extraits ou message d'erreur.
105
  """
106
+ try:
107
+ soup = BeautifulSoup(html, "html.parser")
108
+ items = soup.select(css_selector)
109
+ texts = [el.get_text(" ", strip=True) for el in items]
110
+ return json.dumps(texts, ensure_ascii=False, indent=2)
111
+ except Exception as e:
112
+ return json.dumps({"error": f"Parse failed: {str(e)}"})
113
 
114
+ @tool
115
+ def get_current_time_in_timezone(timezone: str) -> str:
116
+ """
117
+ Retourne l'heure locale actuelle dans un fuseau horaire donné.
118
 
119
+ Args:
120
+ timezone (str): Nom du fuseau horaire (ex: 'Europe/Paris').
121
 
122
+ Returns:
123
+ str: JSON avec timezone et local_time.
124
+ """
125
+ try:
126
+ tz = pytz.timezone(timezone)
127
+ local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
128
+ return json.dumps({"timezone": timezone, "local_time": local_time}, ensure_ascii=False)
129
+ except Exception as e:
130
+ return json.dumps({"error": f"Timezone error: {str(e)}"})
131
+
132
+
133
+ # ---------------------- Safe model proxy & fallback ------------------------
134
+
135
+ class SafeModelProxy:
136
+ """
137
+ Wrap a model-like object and ensure token counters never return None.
138
+ Delegates attribute access and 'run' to the wrapped model when present.
139
+ """
140
+ def __init__(self, model_obj):
141
+ self._m = model_obj
142
+ # ensure attributes exist
143
+ if not hasattr(self._m, "last_input_token_count"):
144
+ self._m.last_input_token_count = 0
145
+ if not hasattr(self._m, "last_output_token_count"):
146
+ self._m.last_output_token_count = 0
147
+
148
+ def __getattr__(self, name):
149
+ # return attributes from underlying model
150
+ return getattr(self._m, name)
151
+
152
+ @property
153
+ def last_input_token_count(self):
154
+ v = getattr(self._m, "last_input_token_count", 0)
155
+ return v if isinstance(v, int) else 0
156
+
157
+ @property
158
+ def last_output_token_count(self):
159
+ v = getattr(self._m, "last_output_token_count", 0)
160
+ return v if isinstance(v, int) else 0
161
+
162
+ def run(self, *args, **kwargs):
163
+ # call underlying run/generate/chat if available
164
+ try:
165
+ if hasattr(self._m, "run"):
166
+ out = self._m.run(*args, **kwargs)
167
+ elif hasattr(self._m, "generate"):
168
+ out = self._m.generate(*args, **kwargs)
169
+ else:
170
+ # fallback echo
171
+ prompt = args[0] if args else kwargs.get("prompt", "")
172
+ out = f"[FALLBACK] model not available to generate answer. Prompt was: {prompt}"
173
+ # keep token counters non-None (best-effort)
174
+ if not isinstance(getattr(self._m, "last_input_token_count", 0), int):
175
+ try:
176
+ self._m.last_input_token_count = len(args[0].split()) if args else 0
177
+ except Exception:
178
+ self._m.last_input_token_count = 0
179
+ if not isinstance(getattr(self._m, "last_output_token_count", 0), int):
180
+ try:
181
+ self._m.last_output_token_count = len(str(out).split())
182
+ except Exception:
183
+ self._m.last_output_token_count = 0
184
+ return out
185
+ except Exception as e:
186
+ # never raise: return an informative string
187
+ return f"[MODEL ERROR] {str(e)}"
188
+
189
+
190
+ def create_safe_hf_model(preferred_model_ids=None):
191
+ """
192
+ Try to instantiate HfApiModel with several model_ids.
193
+ If all fail, return a local fallback model wrapped in SafeModelProxy.
194
+ """
195
+ if preferred_model_ids is None:
196
+ preferred_model_ids = [
197
+ "google/flan-t5-small",
198
+ "gpt2",
199
+ "facebook/opt-125m"
200
+ ]
201
+ last_exc = None
202
+ for mid in preferred_model_ids:
203
+ try:
204
+ print(f"[model] trying HfApiModel({mid})", file=sys.stderr)
205
+ m = HfApiModel(model_id=mid, max_tokens=1024, temperature=0.5)
206
+ # wrap and return
207
+ proxy = SafeModelProxy(m)
208
+ print(f"[model] loaded {mid}", file=sys.stderr)
209
+ return proxy
210
+ except Exception as e:
211
+ last_exc = e
212
+ print(f"[model] cannot load {mid}: {repr(e)}", file=sys.stderr)
213
+ # continue to next
214
+ # fallback local simple model with run()
215
+ print("[model] all remote models failed -> using fallback local model", file=sys.stderr)
216
+
217
+ class FallbackLocal:
218
+ def __init__(self):
219
+ self.model_id = "fallback-local"
220
+ self.last_input_token_count = 0
221
+ self.last_output_token_count = 0
222
+ def run(self, prompt: str):
223
+ # Return a safe, instructive reply and keep token counters integer
224
+ reply = (
225
+ "MODE DÉGRADÉ: le modèle distant n'a pas pu être chargé.\n"
226
+ "Je peux cependant effectuer une recherche web et extraire des pages.\n"
227
+ "Demande par exemple : 'Who nominated the Giganotosaurus featured article?'"
228
+ )
229
+ try:
230
+ self.last_input_token_count = len(prompt.split()) if prompt else 0
231
+ except Exception:
232
+ self.last_input_token_count = 0
233
+ try:
234
+ self.last_output_token_count = len(reply.split())
235
+ except Exception:
236
+ self.last_output_token_count = 0
237
+ return reply
238
 
239
+ return SafeModelProxy(FallbackLocal())
240
+
241
+
242
+ # ---------------------- Create model (safe) -------------------------------
243
+
244
+ model = create_safe_hf_model()
245
+
246
+ # ensure attributes exist and are ints
247
+ if not hasattr(model, "last_input_token_count") or model.last_input_token_count is None:
248
+ try:
249
+ model._m.last_input_token_count = 0
250
+ except Exception:
251
+ # fallback attribute on proxy
252
+ pass
253
+ if not hasattr(model, "last_output_token_count") or model.last_output_token_count is None:
254
+ try:
255
+ model._m.last_output_token_count = 0
256
+ except Exception:
257
+ pass
258
+
259
+ # ---------------------- Agent init ---------------------------------------
260
+
261
+ final_answer = FinalAnswerTool()
262
+
263
+ # load prompts.yaml safely
264
+ try:
265
+ with open("prompts.yaml", "r") as fh:
266
+ prompt_templates = yaml.safe_load(fh) or {}
267
+ except Exception:
268
+ prompt_templates = {}
269
 
 
270
  agent = CodeAgent(
271
  model=model,
272
  tools=[
273
  final_answer,
274
  web_search,
275
+ extract_first_url,
276
  visit_webpage,
277
+ parse_html,
278
+ get_current_time_in_timezone
279
  ],
280
  max_steps=6,
281
  verbosity_level=1,
282
+ prompt_templates=prompt_templates,
 
283
  name="InfoAgent",
284
+ description="Agent minimal : recherche web + parsing HTML"
 
285
  )
286
 
287
+ # final safety: make sure token counters available and ints
288
+ try:
289
+ if getattr(agent.model, "last_input_token_count", None) is None:
290
+ agent.model._m.last_input_token_count = 0
291
+ except Exception:
292
+ pass
293
+ try:
294
+ if getattr(agent.model, "last_output_token_count", None) is None:
295
+ agent.model._m.last_output_token_count = 0
296
+ except Exception:
297
+ pass
298
+
299
+ # ---------------------- Launch UI ----------------------------------------
300
 
301
+ if __name__ == "__main__":
302
+ # extra guard for Gradio_UI usage (no TypeError)
303
+ # Gradio_UI will read agent.model.last_input_token_count — guarantee int
304
+ if getattr(agent.model, "last_input_token_count", None) is None:
305
+ try:
306
+ agent.model._m.last_input_token_count = 0
307
+ except Exception:
308
+ pass
309
+ GradioUI(agent).launch()