Spaces:
Runtime error
Runtime error
File size: 9,907 Bytes
e22f7ea 3d04920 9d1f396 5b46a08 9d1f396 1cfd1a2 5b6cef8 07de393 e22f7ea 07de393 5b46a08 5b6cef8 44d2a9f b710608 5b46a08 44d2a9f 5b46a08 5b6cef8 37e9a6c 5b6cef8 37e9a6c 5b46a08 5b6cef8 353ccd4 190c392 5b6cef8 8b19ed4 380e830 5b6cef8 380e830 5b6cef8 380e830 5b6cef8 380e830 5b6cef8 380e830 5b6cef8 380e830 5b6cef8 5b46a08 5b6cef8 5b46a08 5b6cef8 5b46a08 5b6cef8 9480657 5b6cef8 5b46a08 5b6cef8 5b46a08 5b6cef8 07de393 5b46a08 236c7ed 5b6cef8 236c7ed 5b6cef8 5b46a08 3d04920 5b46a08 3d04920 5b6cef8 3d04920 5b46a08 5b6cef8 5b46a08 5b6cef8 5b46a08 5b6cef8 5b46a08 5b6cef8 190c392 5b46a08 5b6cef8 44d2a9f 5b6cef8 d87b08e 5b6cef8 5b46a08 0524371 5b6cef8 0524371 5b6cef8 cdbafb9 0524371 46b58aa 5b6cef8 46b58aa 5b6cef8 5b46a08 5b6cef8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | import os
import math
import asyncio
import subprocess
import requests
from io import BytesIO
from bs4 import BeautifulSoup
from pydantic import Field
# ----- LlamaIndex & LangChain Imports -----
from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
from llama_index.core.agent import ReActAgent
from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
from llama_index.core.tools import FunctionTool
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from langchain_community.retrievers import TavilySearchAPIRetriever
# ---------- BASIC SETUP ----------
HEADERS = {"User-Agent": "Mozilla/5.0"}
def check_required_keys() -> None:
missing = [k for k in ("TAVILY_API_KEY", "HUGGINGFACE_TOKEN") if not os.getenv(k)]
if missing:
print(f"⚠️ WARNING: Missing API keys: {', '.join(missing)}")
else:
print("✅ All required API keys are present.")
check_required_keys()
# Monkey-patch requerido por LlamaIndex
ChatMessage.message = property(lambda self: self)
# ---------- HUGGING FACE LLM WRAPPER (Command R+) ----------
class HuggingFaceLLM(LLM):
"""Wrapper para la API de Inferencia de Hugging Face, optimizado para Command R+."""
model_name: str = Field(default="CohereForAI/c4ai-command-r-plus")
temperature: float = Field(default=0.01)
max_new_tokens: int = Field(default=2048) # Aumentado para respuestas más largas
_client: HuggingFaceInferenceAPI = None
class Config:
extra = "allow"
def __init__(self, **kwargs):
super().__init__(**kwargs)
api_key = os.getenv("HUGGINGFACE_TOKEN")
if not api_key:
raise ValueError("HUGGINGFACE_TOKEN no configurado en los secrets del Space")
self._client = HuggingFaceInferenceAPI(model_name=self.model_name, token=api_key)
if self.callback_manager is None:
from llama_index.core.callbacks.base import CallbackManager
self.callback_manager = CallbackManager([])
if not self.callback_manager.handlers:
self.callback_manager.add_handler(LlamaDebugHandler())
@property
def metadata(self) -> LLMMetadata:
return LLMMetadata(
context_window=128000,
num_output=self.max_new_tokens,
is_chat_model=True,
is_function_calling_model=True,
model_name=self.model_name,
)
def chat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
prompt = self._client.tokenizer.apply_chat_template(
[{"role": msg.role.value, "content": msg.content} for msg in messages],
tokenize=False, add_generation_prompt=True
)
try:
response = self._client.text_generation(
prompt, max_new_tokens=self.max_new_tokens,
temperature=self.temperature if self.temperature > 0 else 0.01, # Temp no puede ser 0
do_sample=True, top_p=0.95
)
return ChatMessage(role="assistant", content=response)
except Exception as e:
print(f"[ERROR] HuggingFace API call failed: {e}")
return ChatMessage(role="assistant", content=f"Error: API call failed. Reason: {e}")
async def achat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
return await asyncio.to_thread(self.chat, messages, **kwargs)
def complete(self, prompt: str, **kwargs) -> CompletionResponse:
raise NotImplementedError("Use .chat() for this model.")
# ---------- TOOLING ----------
def _pd_safe_import():
try:
import pandas as pd
return pd
except ModuleNotFoundError:
return None
def web_search(query: str, num_results: int = 5) -> str:
"""Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
try:
retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
results = retriever.invoke(query)
formatted = [f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n" for i, doc in enumerate(results, 1)]
return "\n\n".join(formatted)
except Exception as exc:
return f"Error web_search: {exc}"
def scrape_url_text(url: str) -> str:
"""Downloads a webpage and returns cleaned visible text."""
try:
resp = requests.get(url, headers=HEADERS, timeout=20)
resp.raise_for_status()
if "Just a moment" in resp.text and "cloudflare" in resp.text.lower():
return "Error: The site is protected by Cloudflare and cannot be scraped directly. Use information from web_search instead."
soup = BeautifulSoup(resp.text, "html.parser")
for tag in soup(["script", "style", "noscript", "header", "footer", "nav"]):
tag.decompose()
text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
return text[:8000]
except Exception as exc:
return f"Error scrape_url_text: {exc}"
def analyze_markdown_table(table_md: str, question: str) -> str:
"""Check commutativity or return CSV. Requires pandas lazily."""
pd = _pd_safe_import()
if pd is None: return "Error: pandas library is required for this tool but not installed."
try:
clean = [ln for ln in table_md.strip().splitlines() if ln.strip() and not ln.lstrip().startswith("|---")]
rows = [[c.strip() for c in ln.strip("|").split("|")] for ln in clean]
if len(rows) < 2: return "Error: malformed markdown table"
df = pd.DataFrame(rows[1:], columns=rows[0])
if "conmut" in question.lower():
offenders: set[str] = set()
header, cols = df.columns[0], df.columns[1:]
for x in cols:
for y in cols:
try:
val_xy = df.loc[df[header] == x, y].iat[0]
val_yx = df.loc[df[header] == y, x].iat[0]
if val_xy != val_yx: offenders.update([x, y])
except (IndexError, KeyError): continue
return ", ".join(sorted(offenders)) or "Conmutativa"
return df.to_csv(index=False)
except Exception as exc:
return f"Error analyze_markdown_table: {exc}"
def execute_code(code: str) -> str:
"""Runs short python code in a sandboxed subprocess."""
try:
res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
if res.returncode == 0:
output = res.stdout.strip()
return f"Output: {output if output else '(No output)'}"
return f"Error: {res.stderr.strip()}"
except Exception as exc:
return f"Error execute_code: {exc}"
# ... (otras herramientas como reverse_text, classify_botanical_foods, etc. van aquí, sin cambios) ...
def reverse_text(text: str) -> str: return text[::-1]
# ---------- TOOL DEFINITIONS & PROMPT ----------
tool_defs = [
(web_search, "web_search", "Searches the web via Tavily."),
(scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
(analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table."),
(execute_code, "execute_code", "Run short python snippets securely."),
(reverse_text, "reverse_text", "Reverse a text string."),
(lambda _: "I cannot answer with the available tools.", "no_tool_solution", "Fallback answer when stuck."),
]
TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]
SYSTEM_PROMPT = f"""
You are Alfred, a ReAct agent. Your goal is to answer questions accurately. Follow these rules STRICTLY.
**OPERATING PROCEDURE:**
1. **TRIAGE:** First, analyze the question. If it involves a local file (image, audio, Excel) or multimedia, IMMEDIATELY use `no_tool_solution`.
2. **INFORMATION GATHERING:** For all other questions, your FIRST step is ALWAYS `web_search`.
3. **ANALYZE SNIPPET:** After `web_search`, CAREFULLY read the `Content:` snippet of each result. If the answer is clearly present, answer immediately. DO NOT use another tool if you already have the information.
4. **DEEP DIVE:** Only if the snippet is incomplete, use `scrape_url_text` on the most promising URL. If `scrape_url_text` fails (e.g., Cloudflare error), go back to the text from `web_search` or give up.
5. **FINAL ANSWER:** Your final response MUST be ONLY the `Observation:` from your last successful tool call, or the phrase "I cannot answer with the available tools."
"""
# ---------- AGENT CREATION & EXECUTION ----------
def create_fresh_agent():
"""Creates a new, clean agent instance to prevent state contamination."""
llm = HuggingFaceLLM()
return ReActAgent.from_tools(
tools=TOOLS, llm=llm, system_prompt=SYSTEM_PROMPT, verbose=False,
max_iterations=20, handle_parsing_errors=True
)
def _extract_observation(raw: str) -> str:
"""Extracts the LAST observation from the ReAct agent's reasoning dump."""
if "Observation:" in raw:
segment = raw.rsplit("Observation:", 1)[-1]
if "Final Answer:" in segment:
segment = segment.split("Final Answer:", 1)[0]
return segment.strip()
return raw.strip()
def basic_agent_response(question: str) -> str:
"""Public entry point: creates a fresh agent and runs one query."""
try:
print(f"[DEBUG] ➜ Question: {question}")
agent = create_fresh_agent()
raw_resp = agent.query(question)
text_response = str(raw_resp.response if hasattr(raw_resp, "response") else raw_resp)
cleaned = _extract_observation(text_response)
return cleaned or "I cannot answer with the available tools."
except Exception as exc:
print(f"[ERROR] Agent execution failed: {exc}")
return "I cannot answer with the available tools." |