Spaces:
Runtime error
Runtime error
uuu
Browse files- my_tools.py +100 -363
my_tools.py
CHANGED
|
@@ -7,469 +7,206 @@ from io import BytesIO
|
|
| 7 |
from bs4 import BeautifulSoup
|
| 8 |
from pydantic import Field
|
| 9 |
|
| 10 |
-
# -----
|
| 11 |
-
# (avoid hard‑failure if libs are absent; import inside tools when needed)
|
| 12 |
-
|
| 13 |
-
# ---------- LLM WRAPPER ----------
|
| 14 |
from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
|
| 15 |
from llama_index.core.agent import ReActAgent
|
| 16 |
from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
|
| 17 |
from llama_index.core.tools import FunctionTool
|
| 18 |
-
from llama_index.
|
| 19 |
-
|
| 20 |
from langchain_community.retrievers import TavilySearchAPIRetriever
|
| 21 |
-
import google.generativeai as genai
|
| 22 |
-
|
| 23 |
|
| 24 |
# ---------- BASIC SETUP ----------
|
| 25 |
HEADERS = {"User-Agent": "Mozilla/5.0"}
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
def check_required_keys() -> None:
|
| 30 |
-
missing = []
|
| 31 |
-
if not os.getenv("TAVILY_API_KEY"):
|
| 32 |
-
missing.append("TAVILY_API_KEY")
|
| 33 |
-
if not os.getenv("GEMINI_API_KEY"):
|
| 34 |
-
missing.append("GEMINI_API_KEY")
|
| 35 |
if missing:
|
| 36 |
-
print(
|
| 37 |
-
f"⚠️ WARNING: Missing API keys: {', '.join(missing)}. Agent will not function properly!"
|
| 38 |
-
)
|
| 39 |
else:
|
| 40 |
print("✅ All required API keys are present.")
|
| 41 |
|
| 42 |
-
|
| 43 |
check_required_keys()
|
| 44 |
|
| 45 |
-
# Monkey
|
| 46 |
ChatMessage.message = property(lambda self: self)
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
_model: object = None
|
| 57 |
-
_gen_cfg: object = None
|
| 58 |
|
| 59 |
class Config:
|
| 60 |
extra = "allow"
|
| 61 |
|
| 62 |
def __init__(self, **kwargs):
|
| 63 |
super().__init__(**kwargs)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
if not isinstance(self.temperature, (float, int)):
|
| 69 |
-
self.temperature = 0.0
|
| 70 |
-
# ------------------------------------------------------------------
|
| 71 |
-
|
| 72 |
-
# Configurar clave y modelo
|
| 73 |
-
key = os.getenv("GEMINI_API_KEY")
|
| 74 |
-
if not key:
|
| 75 |
-
raise ValueError("GEMINI_API_KEY no configurada en variables de entorno")
|
| 76 |
-
genai.configure(api_key=key)
|
| 77 |
-
|
| 78 |
-
self._gen_cfg = genai.types.GenerationConfig(
|
| 79 |
-
temperature=float(self.temperature)
|
| 80 |
-
)
|
| 81 |
-
self._model = genai.GenerativeModel(
|
| 82 |
-
model_name=self.model_name,
|
| 83 |
-
generation_config=self._gen_cfg
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
-
# callback manager defensivo
|
| 87 |
if self.callback_manager is None:
|
| 88 |
from llama_index.core.callbacks.base import CallbackManager
|
| 89 |
self.callback_manager = CallbackManager([])
|
| 90 |
if not self.callback_manager.handlers:
|
| 91 |
self.callback_manager.add_handler(LlamaDebugHandler())
|
| 92 |
|
| 93 |
-
# -- metadatos ----------------------------------------------------------
|
| 94 |
@property
|
| 95 |
-
def metadata(self) -> LLMMetadata:
|
| 96 |
return LLMMetadata(
|
| 97 |
-
context_window=
|
| 98 |
-
num_output=
|
| 99 |
is_chat_model=True,
|
| 100 |
is_function_calling_model=True,
|
| 101 |
model_name=self.model_name,
|
| 102 |
)
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
|
| 117 |
-
async def achat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage: # type: ignore[override]
|
| 118 |
return await asyncio.to_thread(self.chat, messages, **kwargs)
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
# ----------------------------------------------------------------------
|
| 123 |
-
def complete(self, prompt: str, formatted: bool = False, **kwargs) -> CompletionResponse: # type: ignore[override]
|
| 124 |
-
resp = self._model.generate_content(prompt)
|
| 125 |
-
return CompletionResponse(text=resp.text)
|
| 126 |
-
|
| 127 |
-
# 2-bis COMPLETE ASINCRONO
|
| 128 |
-
async def acomplete(self, prompt: str, formatted: bool = False, **kwargs) -> CompletionResponse: # type: ignore[override]
|
| 129 |
-
return await asyncio.to_thread(self.complete, prompt, formatted=formatted, **kwargs)
|
| 130 |
-
|
| 131 |
-
# ----------------------------------------------------------------------
|
| 132 |
-
# 3️⃣ STREAMING DE COMPLETIONS
|
| 133 |
-
# ----------------------------------------------------------------------
|
| 134 |
-
def stream_complete(self, prompt: str, formatted: bool = False, **kwargs):
|
| 135 |
-
stream = self._model.generate_content(prompt, stream=True)
|
| 136 |
-
|
| 137 |
-
def generator():
|
| 138 |
-
from llama_index.core.llms import CompletionResponse
|
| 139 |
-
acc = ""
|
| 140 |
-
for chunk in stream:
|
| 141 |
-
delta = getattr(chunk, "text", "") or (chunk.parts[0].text if chunk.parts else "")
|
| 142 |
-
if delta:
|
| 143 |
-
acc += delta
|
| 144 |
-
yield CompletionResponse(text=acc, delta=delta)
|
| 145 |
-
|
| 146 |
-
return generator()
|
| 147 |
-
|
| 148 |
-
async def astream_complete(self, prompt: str, formatted: bool = False, **kwargs):
|
| 149 |
-
sync_gen = await asyncio.to_thread(self.stream_complete, prompt, formatted=formatted, **kwargs)
|
| 150 |
-
|
| 151 |
-
async def agen():
|
| 152 |
-
for item in sync_gen:
|
| 153 |
-
yield item
|
| 154 |
-
|
| 155 |
-
return agen()
|
| 156 |
-
|
| 157 |
-
# ----------------------------------------------------------------------
|
| 158 |
-
# 4️⃣ STREAMING DE CHAT
|
| 159 |
-
# ----------------------------------------------------------------------
|
| 160 |
-
def stream_chat(self, messages: list[ChatMessage], **kwargs):
|
| 161 |
-
history = [
|
| 162 |
-
{"role": "user" if m.role == "user" else "model", "parts": [{"text": str(m.content)}]}
|
| 163 |
-
for m in messages[:-1]
|
| 164 |
-
]
|
| 165 |
-
session = self._model.start_chat(history=history)
|
| 166 |
-
stream = session.send_message(str(messages[-1].content), stream=True)
|
| 167 |
-
|
| 168 |
-
def generator():
|
| 169 |
-
acc = ""
|
| 170 |
-
for chunk in stream:
|
| 171 |
-
delta = getattr(chunk, "text", "") or (chunk.parts[0].text if chunk.parts else "")
|
| 172 |
-
if delta:
|
| 173 |
-
acc += delta
|
| 174 |
-
yield ChatMessage(
|
| 175 |
-
role="assistant",
|
| 176 |
-
content=acc,
|
| 177 |
-
additional_kwargs={"delta": delta},
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
return generator()
|
| 181 |
-
|
| 182 |
-
async def astream_chat(self, messages: list[ChatMessage], **kwargs):
|
| 183 |
-
sync_gen = await asyncio.to_thread(self.stream_chat, messages, **kwargs)
|
| 184 |
-
|
| 185 |
-
async def agen():
|
| 186 |
-
for item in sync_gen:
|
| 187 |
-
yield item
|
| 188 |
-
|
| 189 |
-
return agen()
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
|
| 194 |
# ---------- TOOLING ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
def web_search(query: str, num_results: int = 5) -> str:
|
| 197 |
"""Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
|
| 198 |
try:
|
| 199 |
retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
|
| 200 |
results = retriever.invoke(query)
|
| 201 |
-
formatted = []
|
| 202 |
-
for i, doc in enumerate(results, start=1):
|
| 203 |
-
formatted.append(
|
| 204 |
-
f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n"
|
| 205 |
-
)
|
| 206 |
return "\n\n".join(formatted)
|
| 207 |
except Exception as exc:
|
| 208 |
return f"Error web_search: {exc}"
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
return text[::-1]
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
# small util for optional pandas
|
| 216 |
-
|
| 217 |
-
def _pd_safe_import():
|
| 218 |
try:
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
def analyze_markdown_table(table_md: str, question: str) -> str:
|
|
|
|
|
|
|
|
|
|
| 227 |
try:
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
clean = [
|
| 232 |
-
ln for ln in table_md.strip().splitlines()
|
| 233 |
-
if ln.strip() and not ln.lstrip().startswith("|---")
|
| 234 |
-
]
|
| 235 |
-
rows = [ [c.strip() for c in ln.strip("|").split("|")] for ln in clean ]
|
| 236 |
-
if len(rows) < 2:
|
| 237 |
-
return "Error analyze_table: empty or malformed markdown table"
|
| 238 |
-
|
| 239 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
| 240 |
-
|
| 241 |
-
# — 2️⃣ ¿Nos piden conmutatividad? —
|
| 242 |
if "conmut" in question.lower():
|
| 243 |
offenders: set[str] = set()
|
| 244 |
-
header = df.columns[0]
|
| 245 |
-
|
| 246 |
-
cols = df.columns[1:] # solo las etiquetas
|
| 247 |
for x in cols:
|
| 248 |
for y in cols:
|
| 249 |
try:
|
| 250 |
val_xy = df.loc[df[header] == x, y].iat[0]
|
| 251 |
val_yx = df.loc[df[header] == y, x].iat[0]
|
| 252 |
-
if val_xy != val_yx:
|
| 253 |
-
|
| 254 |
-
except (IndexError, KeyError):
|
| 255 |
-
continue
|
| 256 |
return ", ".join(sorted(offenders)) or "Conmutativa"
|
| 257 |
-
|
| 258 |
-
# — 3️⃣ Si no, devolvemos CSV —
|
| 259 |
return df.to_csv(index=False)
|
| 260 |
except Exception as exc:
|
| 261 |
return f"Error analyze_markdown_table: {exc}"
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
def execute_code(code: str) -> str:
|
| 266 |
-
"""Runs
|
| 267 |
try:
|
| 268 |
res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
|
| 269 |
if res.returncode == 0:
|
| 270 |
output = res.stdout.strip()
|
| 271 |
return f"Output: {output if output else '(No output)'}"
|
| 272 |
return f"Error: {res.stderr.strip()}"
|
| 273 |
-
except subprocess.TimeoutExpired:
|
| 274 |
-
return "Error: execution timeout"
|
| 275 |
except Exception as exc:
|
| 276 |
return f"Error execute_code: {exc}"
|
| 277 |
|
|
|
|
|
|
|
| 278 |
|
| 279 |
-
|
| 280 |
-
"""Downloads or opens an excel file and returns CSV (requires pandas)."""
|
| 281 |
-
try:
|
| 282 |
-
pd = _pd_safe_import()
|
| 283 |
-
if file_path.startswith(("http://", "https://")):
|
| 284 |
-
resp = requests.get(file_path, headers=HEADERS, timeout=20)
|
| 285 |
-
resp.raise_for_status()
|
| 286 |
-
df = pd.read_excel(BytesIO(resp.content), sheet_name=sheet_name)
|
| 287 |
-
else:
|
| 288 |
-
if not os.path.exists(file_path):
|
| 289 |
-
return f"Error read_excel_data: file '{file_path}' not found"
|
| 290 |
-
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 291 |
-
return df.fillna("").to_csv(index=False)
|
| 292 |
-
except Exception as exc:
|
| 293 |
-
return f"Error read_excel_data: {exc}"
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
# --- botanical classifier (unchanged) ---
|
| 297 |
-
|
| 298 |
-
def classify_botanical_foods(items_list_str: str) -> str:
|
| 299 |
-
botanical_fruits = {
|
| 300 |
-
"tomato",
|
| 301 |
-
"bell pepper",
|
| 302 |
-
"pepper",
|
| 303 |
-
"green beans",
|
| 304 |
-
"beans",
|
| 305 |
-
"zucchini",
|
| 306 |
-
"cucumber",
|
| 307 |
-
"eggplant",
|
| 308 |
-
"corn",
|
| 309 |
-
"peas",
|
| 310 |
-
"pea",
|
| 311 |
-
"pumpkin",
|
| 312 |
-
"squash",
|
| 313 |
-
"avocado",
|
| 314 |
-
}
|
| 315 |
-
botanical_vegetables = {
|
| 316 |
-
"broccoli",
|
| 317 |
-
"celery",
|
| 318 |
-
"lettuce",
|
| 319 |
-
"kale",
|
| 320 |
-
"spinach",
|
| 321 |
-
"sweet potatoes",
|
| 322 |
-
"sweet potato",
|
| 323 |
-
"potato",
|
| 324 |
-
"onion",
|
| 325 |
-
"garlic",
|
| 326 |
-
"carrot",
|
| 327 |
-
"okra",
|
| 328 |
-
"cabbage",
|
| 329 |
-
"cauliflower",
|
| 330 |
-
"beet",
|
| 331 |
-
"turnip",
|
| 332 |
-
"parsnip",
|
| 333 |
-
"leek",
|
| 334 |
-
}
|
| 335 |
-
vegs, fruits, others = set(), set(), set()
|
| 336 |
-
for token in (t.strip().lower() for t in items_list_str.split(",")):
|
| 337 |
-
if token in botanical_vegetables and token not in botanical_fruits:
|
| 338 |
-
vegs.add(token)
|
| 339 |
-
elif token in botanical_fruits:
|
| 340 |
-
fruits.add(token)
|
| 341 |
-
else:
|
| 342 |
-
others.add(token)
|
| 343 |
-
return (
|
| 344 |
-
f"Vegetables: {', '.join(sorted(vegs))}\n"
|
| 345 |
-
f"Fruits: {', '.join(sorted(fruits))}\n"
|
| 346 |
-
f"Others: {', '.join(sorted(others))}"
|
| 347 |
-
)
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
# --- flexible Wikipedia table scraper ---
|
| 351 |
-
|
| 352 |
-
def scrape_wiki_table(page_title: str, section: str | None = None, table_index: int = 0) -> str:
|
| 353 |
-
"""Returns the requested Wikipedia table in markdown."""
|
| 354 |
-
try:
|
| 355 |
-
url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
|
| 356 |
-
html = requests.get(url, timeout=15).text
|
| 357 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 358 |
-
|
| 359 |
-
def _find_tables(s: BeautifulSoup):
|
| 360 |
-
return s.find_all("table", class_="wikitable")
|
| 361 |
-
|
| 362 |
-
if section:
|
| 363 |
-
header_tag = soup.find(lambda tag: tag.name in {"h2", "h3"} and section.lower() in tag.get_text(" ", strip=True).lower())
|
| 364 |
-
if not header_tag:
|
| 365 |
-
return f"Error scrape_wiki_table: section '{section}' not found"
|
| 366 |
-
tables = header_tag.find_all_next("table", class_="wikitable")
|
| 367 |
-
else:
|
| 368 |
-
tables = _find_tables(soup)
|
| 369 |
-
if not tables or table_index >= len(tables):
|
| 370 |
-
return f"Error scrape_wiki_table: table index {table_index} out of range (found {len(tables)})"
|
| 371 |
-
|
| 372 |
-
pd = _pd_safe_import()
|
| 373 |
-
df = pd.read_html(str(tables[table_index]), flavor="bs4")[0]
|
| 374 |
-
return df.to_markdown(index=False)
|
| 375 |
-
except Exception as exc:
|
| 376 |
-
return f"Error scrape_wiki_table: {exc}"
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
# --- generic URL text scraper ---
|
| 380 |
-
|
| 381 |
-
def scrape_url_text(url: str) -> str:
|
| 382 |
-
"""Descarga página y devuelve texto visible (trim-8k)."""
|
| 383 |
-
try:
|
| 384 |
-
resp = requests.get(url, headers=HEADERS, timeout=20)
|
| 385 |
-
if "Just a moment" in resp.text and "cloudflare" in resp.text.lower():
|
| 386 |
-
return "Error scrape_url_text: Cloudflare protection detected"
|
| 387 |
-
resp.raise_for_status()
|
| 388 |
-
|
| 389 |
-
soup = BeautifulSoup(resp.text, "html.parser")
|
| 390 |
-
for tag in soup(["script", "style", "noscript"]):
|
| 391 |
-
tag.decompose()
|
| 392 |
-
text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
|
| 393 |
-
return text[:8000]
|
| 394 |
-
except Exception as exc:
|
| 395 |
-
return f"Error scrape_url_text: {exc}"
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
# ---------- TOOL WRAPPERS ----------
|
| 400 |
-
|
| 401 |
tool_defs = [
|
| 402 |
(web_search, "web_search", "Searches the web via Tavily."),
|
| 403 |
-
(scrape_wiki_table, "scrape_wiki_table", "Extracts a wikitable from Wikipedia."),
|
| 404 |
(scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
|
| 405 |
-
(analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table
|
| 406 |
(execute_code, "execute_code", "Run short python snippets securely."),
|
| 407 |
-
(read_excel_data, "read_excel_data", "Load Excel (URL or local) → CSV."),
|
| 408 |
-
(classify_botanical_foods, "classify_botanical_foods", "Botanically classify food list."),
|
| 409 |
(reverse_text, "reverse_text", "Reverse a text string."),
|
| 410 |
-
(lambda
|
| 411 |
]
|
| 412 |
-
|
| 413 |
TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]
|
| 414 |
|
| 415 |
-
# ---------- SYSTEM PROMPT ----------
|
| 416 |
-
tool_desc_str = "\n".join(f"{t.metadata.name}: {t.metadata.description}" for t in TOOLS)
|
| 417 |
SYSTEM_PROMPT = f"""
|
| 418 |
-
You are Alfred, a ReAct agent.
|
| 419 |
-
Rules:
|
| 420 |
-
1. Try a relevant tool first when external info is needed.
|
| 421 |
-
2. After a tool call you receive `Observation:`. Your *very next* assistant message **must** be exactly that observation (untouched) *or* the fixed string "I cannot answer with the available tools." – no extra text.
|
| 422 |
-
3. If a tool fails, think why and try an alternative (different params / another tool) once before giving up.
|
| 423 |
-
4. Do not invent facts.
|
| 424 |
-
Available tools:
|
| 425 |
-
{tool_desc_str}
|
| 426 |
-
"""
|
| 427 |
|
| 428 |
-
|
| 429 |
-
llm = GeminiLLM()
|
| 430 |
-
agent = ReActAgent.from_tools(
|
| 431 |
-
tools=TOOLS,
|
| 432 |
-
llm=llm,
|
| 433 |
-
system_prompt=SYSTEM_PROMPT,
|
| 434 |
-
verbose=True,
|
| 435 |
-
max_iterations=25,
|
| 436 |
-
callback_manager=llm.callback_manager,
|
| 437 |
-
handle_parsing_errors=True,
|
| 438 |
-
)
|
| 439 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
|
| 441 |
-
# Helper to strip to the last Observation or fallback
|
| 442 |
def _extract_observation(raw: str) -> str:
|
| 443 |
-
"""
|
| 444 |
if "Observation:" in raw:
|
| 445 |
-
|
| 446 |
-
segment = raw.rsplit("Observation:", 1)[-1]
|
| 447 |
if "Final Answer:" in segment:
|
| 448 |
segment = segment.split("Final Answer:", 1)[0]
|
| 449 |
return segment.strip()
|
| 450 |
return raw.strip()
|
| 451 |
|
| 452 |
-
|
| 453 |
-
# Public entry point
|
| 454 |
-
|
| 455 |
def basic_agent_response(question: str) -> str:
|
|
|
|
| 456 |
try:
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
verbose=False,
|
| 463 |
-
max_iterations=25,
|
| 464 |
-
callback_manager=None, # sin historial previo
|
| 465 |
-
handle_parsing_errors=True,
|
| 466 |
-
)
|
| 467 |
-
|
| 468 |
-
raw = fresh_agent.query(question)
|
| 469 |
-
cleaned = _extract_observation(
|
| 470 |
-
str(raw.response if hasattr(raw, "response") else raw)
|
| 471 |
-
)
|
| 472 |
return cleaned or "I cannot answer with the available tools."
|
| 473 |
except Exception as exc:
|
| 474 |
-
print(f"[ERROR] {exc}")
|
| 475 |
-
return "I cannot answer with the available tools."
|
|
|
|
| 7 |
from bs4 import BeautifulSoup
|
| 8 |
from pydantic import Field
|
| 9 |
|
| 10 |
+
# ----- LlamaIndex & LangChain Imports -----
|
|
|
|
|
|
|
|
|
|
| 11 |
from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
|
| 12 |
from llama_index.core.agent import ReActAgent
|
| 13 |
from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
|
| 14 |
from llama_index.core.tools import FunctionTool
|
| 15 |
+
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
|
|
|
| 16 |
from langchain_community.retrievers import TavilySearchAPIRetriever
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# ---------- BASIC SETUP ----------
|
| 19 |
HEADERS = {"User-Agent": "Mozilla/5.0"}
|
| 20 |
|
|
|
|
|
|
|
| 21 |
def check_required_keys() -> None:
|
| 22 |
+
missing = [k for k in ("TAVILY_API_KEY", "HUGGINGFACE_TOKEN") if not os.getenv(k)]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
if missing:
|
| 24 |
+
print(f"⚠️ WARNING: Missing API keys: {', '.join(missing)}")
|
|
|
|
|
|
|
| 25 |
else:
|
| 26 |
print("✅ All required API keys are present.")
|
| 27 |
|
|
|
|
| 28 |
check_required_keys()
|
| 29 |
|
| 30 |
+
# Monkey-patch requerido por LlamaIndex
|
| 31 |
ChatMessage.message = property(lambda self: self)
|
| 32 |
|
| 33 |
+
# ---------- HUGGING FACE LLM WRAPPER (Command R+) ----------
|
| 34 |
+
class HuggingFaceLLM(LLM):
|
| 35 |
+
"""Wrapper para la API de Inferencia de Hugging Face, optimizado para Command R+."""
|
| 36 |
+
model_name: str = Field(default="CohereForAI/c4ai-command-r-plus")
|
| 37 |
+
temperature: float = Field(default=0.01)
|
| 38 |
+
max_new_tokens: int = Field(default=2048) # Aumentado para respuestas más largas
|
| 39 |
+
|
| 40 |
+
_client: HuggingFaceInferenceAPI = None
|
|
|
|
|
|
|
| 41 |
|
| 42 |
class Config:
|
| 43 |
extra = "allow"
|
| 44 |
|
| 45 |
def __init__(self, **kwargs):
|
| 46 |
super().__init__(**kwargs)
|
| 47 |
+
api_key = os.getenv("HUGGINGFACE_TOKEN")
|
| 48 |
+
if not api_key:
|
| 49 |
+
raise ValueError("HUGGINGFACE_TOKEN no configurado en los secrets del Space")
|
| 50 |
+
self._client = HuggingFaceInferenceAPI(model_name=self.model_name, token=api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
if self.callback_manager is None:
|
| 52 |
from llama_index.core.callbacks.base import CallbackManager
|
| 53 |
self.callback_manager = CallbackManager([])
|
| 54 |
if not self.callback_manager.handlers:
|
| 55 |
self.callback_manager.add_handler(LlamaDebugHandler())
|
| 56 |
|
|
|
|
| 57 |
@property
|
| 58 |
+
def metadata(self) -> LLMMetadata:
|
| 59 |
return LLMMetadata(
|
| 60 |
+
context_window=128000,
|
| 61 |
+
num_output=self.max_new_tokens,
|
| 62 |
is_chat_model=True,
|
| 63 |
is_function_calling_model=True,
|
| 64 |
model_name=self.model_name,
|
| 65 |
)
|
| 66 |
|
| 67 |
+
def chat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
|
| 68 |
+
prompt = self._client.tokenizer.apply_chat_template(
|
| 69 |
+
[{"role": msg.role.value, "content": msg.content} for msg in messages],
|
| 70 |
+
tokenize=False, add_generation_prompt=True
|
| 71 |
+
)
|
| 72 |
+
try:
|
| 73 |
+
response = self._client.text_generation(
|
| 74 |
+
prompt, max_new_tokens=self.max_new_tokens,
|
| 75 |
+
temperature=self.temperature if self.temperature > 0 else 0.01, # Temp no puede ser 0
|
| 76 |
+
do_sample=True, top_p=0.95
|
| 77 |
+
)
|
| 78 |
+
return ChatMessage(role="assistant", content=response)
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"[ERROR] HuggingFace API call failed: {e}")
|
| 81 |
+
return ChatMessage(role="assistant", content=f"Error: API call failed. Reason: {e}")
|
| 82 |
|
| 83 |
+
async def achat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
|
|
|
|
| 84 |
return await asyncio.to_thread(self.chat, messages, **kwargs)
|
| 85 |
|
| 86 |
+
def complete(self, prompt: str, **kwargs) -> CompletionResponse:
|
| 87 |
+
raise NotImplementedError("Use .chat() for this model.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
# ---------- TOOLING ----------
|
| 90 |
+
def _pd_safe_import():
|
| 91 |
+
try:
|
| 92 |
+
import pandas as pd
|
| 93 |
+
return pd
|
| 94 |
+
except ModuleNotFoundError:
|
| 95 |
+
return None
|
| 96 |
|
| 97 |
def web_search(query: str, num_results: int = 5) -> str:
|
| 98 |
"""Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
|
| 99 |
try:
|
| 100 |
retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
|
| 101 |
results = retriever.invoke(query)
|
| 102 |
+
formatted = [f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n" for i, doc in enumerate(results, 1)]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
return "\n\n".join(formatted)
|
| 104 |
except Exception as exc:
|
| 105 |
return f"Error web_search: {exc}"
|
| 106 |
|
| 107 |
+
def scrape_url_text(url: str) -> str:
|
| 108 |
+
"""Downloads a webpage and returns cleaned visible text."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
try:
|
| 110 |
+
resp = requests.get(url, headers=HEADERS, timeout=20)
|
| 111 |
+
resp.raise_for_status()
|
| 112 |
+
if "Just a moment" in resp.text and "cloudflare" in resp.text.lower():
|
| 113 |
+
return "Error: The site is protected by Cloudflare and cannot be scraped directly. Use information from web_search instead."
|
| 114 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
| 115 |
+
for tag in soup(["script", "style", "noscript", "header", "footer", "nav"]):
|
| 116 |
+
tag.decompose()
|
| 117 |
+
text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
|
| 118 |
+
return text[:8000]
|
| 119 |
+
except Exception as exc:
|
| 120 |
+
return f"Error scrape_url_text: {exc}"
|
| 121 |
|
| 122 |
def analyze_markdown_table(table_md: str, question: str) -> str:
|
| 123 |
+
"""Check commutativity or return CSV. Requires pandas lazily."""
|
| 124 |
+
pd = _pd_safe_import()
|
| 125 |
+
if pd is None: return "Error: pandas library is required for this tool but not installed."
|
| 126 |
try:
|
| 127 |
+
clean = [ln for ln in table_md.strip().splitlines() if ln.strip() and not ln.lstrip().startswith("|---")]
|
| 128 |
+
rows = [[c.strip() for c in ln.strip("|").split("|")] for ln in clean]
|
| 129 |
+
if len(rows) < 2: return "Error: malformed markdown table"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
|
|
|
|
|
|
| 131 |
if "conmut" in question.lower():
|
| 132 |
offenders: set[str] = set()
|
| 133 |
+
header, cols = df.columns[0], df.columns[1:]
|
|
|
|
|
|
|
| 134 |
for x in cols:
|
| 135 |
for y in cols:
|
| 136 |
try:
|
| 137 |
val_xy = df.loc[df[header] == x, y].iat[0]
|
| 138 |
val_yx = df.loc[df[header] == y, x].iat[0]
|
| 139 |
+
if val_xy != val_yx: offenders.update([x, y])
|
| 140 |
+
except (IndexError, KeyError): continue
|
|
|
|
|
|
|
| 141 |
return ", ".join(sorted(offenders)) or "Conmutativa"
|
|
|
|
|
|
|
| 142 |
return df.to_csv(index=False)
|
| 143 |
except Exception as exc:
|
| 144 |
return f"Error analyze_markdown_table: {exc}"
|
| 145 |
|
|
|
|
|
|
|
| 146 |
def execute_code(code: str) -> str:
|
| 147 |
+
"""Runs short python code in a sandboxed subprocess."""
|
| 148 |
try:
|
| 149 |
res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
|
| 150 |
if res.returncode == 0:
|
| 151 |
output = res.stdout.strip()
|
| 152 |
return f"Output: {output if output else '(No output)'}"
|
| 153 |
return f"Error: {res.stderr.strip()}"
|
|
|
|
|
|
|
| 154 |
except Exception as exc:
|
| 155 |
return f"Error execute_code: {exc}"
|
| 156 |
|
| 157 |
+
# ... (otras herramientas como reverse_text, classify_botanical_foods, etc. van aquí, sin cambios) ...
|
| 158 |
+
def reverse_text(text: str) -> str: return text[::-1]
|
| 159 |
|
| 160 |
+
# ---------- TOOL DEFINITIONS & PROMPT ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
tool_defs = [
|
| 162 |
(web_search, "web_search", "Searches the web via Tavily."),
|
|
|
|
| 163 |
(scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
|
| 164 |
+
(analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table."),
|
| 165 |
(execute_code, "execute_code", "Run short python snippets securely."),
|
|
|
|
|
|
|
| 166 |
(reverse_text, "reverse_text", "Reverse a text string."),
|
| 167 |
+
(lambda _: "I cannot answer with the available tools.", "no_tool_solution", "Fallback answer when stuck."),
|
| 168 |
]
|
|
|
|
| 169 |
TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]
|
| 170 |
|
|
|
|
|
|
|
| 171 |
SYSTEM_PROMPT = f"""
|
| 172 |
+
You are Alfred, a ReAct agent. Your goal is to answer questions accurately. Follow these rules STRICTLY.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
**OPERATING PROCEDURE:**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
1. **TRIAGE:** First, analyze the question. If it involves a local file (image, audio, Excel) or multimedia, IMMEDIATELY use `no_tool_solution`.
|
| 177 |
+
2. **INFORMATION GATHERING:** For all other questions, your FIRST step is ALWAYS `web_search`.
|
| 178 |
+
3. **ANALYZE SNIPPET:** After `web_search`, CAREFULLY read the `Content:` snippet of each result. If the answer is clearly present, answer immediately. DO NOT use another tool if you already have the information.
|
| 179 |
+
4. **DEEP DIVE:** Only if the snippet is incomplete, use `scrape_url_text` on the most promising URL. If `scrape_url_text` fails (e.g., Cloudflare error), go back to the text from `web_search` or give up.
|
| 180 |
+
5. **FINAL ANSWER:** Your final response MUST be ONLY the `Observation:` from your last successful tool call, or the phrase "I cannot answer with the available tools."
|
| 181 |
+
"""
|
| 182 |
+
|
| 183 |
+
# ---------- AGENT CREATION & EXECUTION ----------
|
| 184 |
+
def create_fresh_agent():
|
| 185 |
+
"""Creates a new, clean agent instance to prevent state contamination."""
|
| 186 |
+
llm = HuggingFaceLLM()
|
| 187 |
+
return ReActAgent.from_tools(
|
| 188 |
+
tools=TOOLS, llm=llm, system_prompt=SYSTEM_PROMPT, verbose=False,
|
| 189 |
+
max_iterations=20, handle_parsing_errors=True
|
| 190 |
+
)
|
| 191 |
|
|
|
|
| 192 |
def _extract_observation(raw: str) -> str:
|
| 193 |
+
"""Extracts the LAST observation from the ReAct agent's reasoning dump."""
|
| 194 |
if "Observation:" in raw:
|
| 195 |
+
segment = raw.rsplit("Observation:", 1)[-1]
|
|
|
|
| 196 |
if "Final Answer:" in segment:
|
| 197 |
segment = segment.split("Final Answer:", 1)[0]
|
| 198 |
return segment.strip()
|
| 199 |
return raw.strip()
|
| 200 |
|
|
|
|
|
|
|
|
|
|
| 201 |
def basic_agent_response(question: str) -> str:
|
| 202 |
+
"""Public entry point: creates a fresh agent and runs one query."""
|
| 203 |
try:
|
| 204 |
+
print(f"[DEBUG] ➜ Question: {question}")
|
| 205 |
+
agent = create_fresh_agent()
|
| 206 |
+
raw_resp = agent.query(question)
|
| 207 |
+
text_response = str(raw_resp.response if hasattr(raw_resp, "response") else raw_resp)
|
| 208 |
+
cleaned = _extract_observation(text_response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
return cleaned or "I cannot answer with the available tools."
|
| 210 |
except Exception as exc:
|
| 211 |
+
print(f"[ERROR] Agent execution failed: {exc}")
|
| 212 |
+
return "I cannot answer with the available tools."
|