File size: 9,907 Bytes
e22f7ea
 
3d04920
 
9d1f396
5b46a08
9d1f396
1cfd1a2
 
5b6cef8
07de393
e22f7ea
07de393
5b46a08
5b6cef8
44d2a9f
b710608
5b46a08
 
44d2a9f
5b46a08
5b6cef8
37e9a6c
5b6cef8
37e9a6c
 
 
 
5b46a08
5b6cef8
353ccd4
190c392
5b6cef8
 
 
 
 
 
 
 
8b19ed4
 
 
 
380e830
 
5b6cef8
 
 
 
380e830
 
 
 
 
 
 
5b6cef8
380e830
5b6cef8
 
380e830
 
 
 
 
5b6cef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380e830
5b6cef8
380e830
 
5b6cef8
 
5b46a08
 
5b6cef8
 
 
 
 
 
5b46a08
 
 
 
 
 
5b6cef8
5b46a08
 
 
 
5b6cef8
 
9480657
5b6cef8
 
 
 
 
 
 
 
 
 
 
5b46a08
 
5b6cef8
 
 
5b46a08
5b6cef8
 
 
07de393
5b46a08
236c7ed
5b6cef8
236c7ed
 
 
 
 
5b6cef8
 
5b46a08
3d04920
5b46a08
 
 
3d04920
5b6cef8
3d04920
5b46a08
 
 
 
 
 
 
 
5b6cef8
 
5b46a08
5b6cef8
5b46a08
 
 
5b6cef8
5b46a08
 
5b6cef8
190c392
5b46a08
 
 
5b6cef8
44d2a9f
5b6cef8
d87b08e
5b6cef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b46a08
0524371
5b6cef8
0524371
5b6cef8
cdbafb9
 
 
0524371
 
46b58aa
5b6cef8
46b58aa
5b6cef8
 
 
 
 
5b46a08
 
5b6cef8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import math
import asyncio
import subprocess
import requests
from io import BytesIO
from bs4 import BeautifulSoup
from pydantic import Field

# ----- LlamaIndex & LangChain Imports -----
from llama_index.core.llms import ChatMessage, LLMMetadata, LLM, CompletionResponse
from llama_index.core.agent import ReActAgent
from llama_index.core.callbacks.llama_debug import LlamaDebugHandler
from llama_index.core.tools import FunctionTool
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from langchain_community.retrievers import TavilySearchAPIRetriever

# ---------- BASIC SETUP ----------
HEADERS = {"User-Agent": "Mozilla/5.0"}

def check_required_keys() -> None:
    missing = [k for k in ("TAVILY_API_KEY", "HUGGINGFACE_TOKEN") if not os.getenv(k)]
    if missing:
        print(f"⚠️  WARNING: Missing API keys: {', '.join(missing)}")
    else:
        print("✅ All required API keys are present.")

check_required_keys()

# Monkey-patch requerido por LlamaIndex
ChatMessage.message = property(lambda self: self)

# ---------- HUGGING FACE LLM WRAPPER (Command R+) ----------
class HuggingFaceLLM(LLM):
    """Wrapper para la API de Inferencia de Hugging Face, optimizado para Command R+."""
    model_name: str = Field(default="CohereForAI/c4ai-command-r-plus")
    temperature: float = Field(default=0.01)
    max_new_tokens: int = Field(default=2048) # Aumentado para respuestas más largas
    
    _client: HuggingFaceInferenceAPI = None

    class Config:
        extra = "allow"

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        api_key = os.getenv("HUGGINGFACE_TOKEN")
        if not api_key:
            raise ValueError("HUGGINGFACE_TOKEN no configurado en los secrets del Space")
        self._client = HuggingFaceInferenceAPI(model_name=self.model_name, token=api_key)
        if self.callback_manager is None:
            from llama_index.core.callbacks.base import CallbackManager
            self.callback_manager = CallbackManager([])
        if not self.callback_manager.handlers:
            self.callback_manager.add_handler(LlamaDebugHandler())

    @property
    def metadata(self) -> LLMMetadata:
        return LLMMetadata(
            context_window=128000,
            num_output=self.max_new_tokens,
            is_chat_model=True,
            is_function_calling_model=True,
            model_name=self.model_name,
        )

    def chat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
        prompt = self._client.tokenizer.apply_chat_template(
            [{"role": msg.role.value, "content": msg.content} for msg in messages],
            tokenize=False, add_generation_prompt=True
        )
        try:
            response = self._client.text_generation(
                prompt, max_new_tokens=self.max_new_tokens,
                temperature=self.temperature if self.temperature > 0 else 0.01, # Temp no puede ser 0
                do_sample=True, top_p=0.95
            )
            return ChatMessage(role="assistant", content=response)
        except Exception as e:
            print(f"[ERROR] HuggingFace API call failed: {e}")
            return ChatMessage(role="assistant", content=f"Error: API call failed. Reason: {e}")

    async def achat(self, messages: list[ChatMessage], **kwargs) -> ChatMessage:
        return await asyncio.to_thread(self.chat, messages, **kwargs)

    def complete(self, prompt: str, **kwargs) -> CompletionResponse:
        raise NotImplementedError("Use .chat() for this model.")

# ---------- TOOLING ----------
def _pd_safe_import():
    try:
        import pandas as pd
        return pd
    except ModuleNotFoundError:
        return None

def web_search(query: str, num_results: int = 5) -> str:
    """Tavily search -> concatenated, citation‑ready snippet list (includes URL)."""
    try:
        retriever = TavilySearchAPIRetriever(api_key=os.getenv("TAVILY_API_KEY"), k=num_results)
        results = retriever.invoke(query)
        formatted = [f"Result {i}:\nTitle: {doc.metadata.get('title','')}\nURL: {doc.metadata.get('source','')}\nContent: {doc.page_content}\n" for i, doc in enumerate(results, 1)]
        return "\n\n".join(formatted)
    except Exception as exc:
        return f"Error web_search: {exc}"

def scrape_url_text(url: str) -> str:
    """Downloads a webpage and returns cleaned visible text."""
    try:
        resp = requests.get(url, headers=HEADERS, timeout=20)
        resp.raise_for_status()
        if "Just a moment" in resp.text and "cloudflare" in resp.text.lower():
            return "Error: The site is protected by Cloudflare and cannot be scraped directly. Use information from web_search instead."
        soup = BeautifulSoup(resp.text, "html.parser")
        for tag in soup(["script", "style", "noscript", "header", "footer", "nav"]):
            tag.decompose()
        text = "\n".join(t.strip() for t in soup.get_text("\n").splitlines() if t.strip())
        return text[:8000]
    except Exception as exc:
        return f"Error scrape_url_text: {exc}"

def analyze_markdown_table(table_md: str, question: str) -> str:
    """Check commutativity or return CSV. Requires pandas lazily."""
    pd = _pd_safe_import()
    if pd is None: return "Error: pandas library is required for this tool but not installed."
    try:
        clean = [ln for ln in table_md.strip().splitlines() if ln.strip() and not ln.lstrip().startswith("|---")]
        rows = [[c.strip() for c in ln.strip("|").split("|")] for ln in clean]
        if len(rows) < 2: return "Error: malformed markdown table"
        df = pd.DataFrame(rows[1:], columns=rows[0])
        if "conmut" in question.lower():
            offenders: set[str] = set()
            header, cols = df.columns[0], df.columns[1:]
            for x in cols:
                for y in cols:
                    try:
                        val_xy = df.loc[df[header] == x, y].iat[0]
                        val_yx = df.loc[df[header] == y, x].iat[0]
                        if val_xy != val_yx: offenders.update([x, y])
                    except (IndexError, KeyError): continue
            return ", ".join(sorted(offenders)) or "Conmutativa"
        return df.to_csv(index=False)
    except Exception as exc:
        return f"Error analyze_markdown_table: {exc}"

def execute_code(code: str) -> str:
    """Runs short python code in a sandboxed subprocess."""
    try:
        res = subprocess.run(["python", "-S", "-c", code], capture_output=True, text=True, timeout=10)
        if res.returncode == 0:
            output = res.stdout.strip()
            return f"Output: {output if output else '(No output)'}"
        return f"Error: {res.stderr.strip()}"
    except Exception as exc:
        return f"Error execute_code: {exc}"

# ... (otras herramientas como reverse_text, classify_botanical_foods, etc. van aquí, sin cambios) ...
def reverse_text(text: str) -> str: return text[::-1]

# ---------- TOOL DEFINITIONS & PROMPT ----------
tool_defs = [
    (web_search, "web_search", "Searches the web via Tavily."),
    (scrape_url_text, "scrape_url_text", "Fetch any URL and return visible text."),
    (analyze_markdown_table, "analyze_markdown_table", "Analyze a markdown table."),
    (execute_code, "execute_code", "Run short python snippets securely."),
    (reverse_text, "reverse_text", "Reverse a text string."),
    (lambda _: "I cannot answer with the available tools.", "no_tool_solution", "Fallback answer when stuck."),
]
TOOLS = [FunctionTool.from_defaults(fn=fn, name=name, description=desc) for fn, name, desc in tool_defs]

SYSTEM_PROMPT = f"""
You are Alfred, a ReAct agent. Your goal is to answer questions accurately. Follow these rules STRICTLY.

**OPERATING PROCEDURE:**

1.  **TRIAGE:** First, analyze the question. If it involves a local file (image, audio, Excel) or multimedia, IMMEDIATELY use `no_tool_solution`.
2.  **INFORMATION GATHERING:** For all other questions, your FIRST step is ALWAYS `web_search`.
3.  **ANALYZE SNIPPET:** After `web_search`, CAREFULLY read the `Content:` snippet of each result. If the answer is clearly present, answer immediately. DO NOT use another tool if you already have the information.
4.  **DEEP DIVE:** Only if the snippet is incomplete, use `scrape_url_text` on the most promising URL. If `scrape_url_text` fails (e.g., Cloudflare error), go back to the text from `web_search` or give up.
5.  **FINAL ANSWER:** Your final response MUST be ONLY the `Observation:` from your last successful tool call, or the phrase "I cannot answer with the available tools."
"""

# ---------- AGENT CREATION & EXECUTION ----------
def create_fresh_agent():
    """Creates a new, clean agent instance to prevent state contamination."""
    llm = HuggingFaceLLM()
    return ReActAgent.from_tools(
        tools=TOOLS, llm=llm, system_prompt=SYSTEM_PROMPT, verbose=False,
        max_iterations=20, handle_parsing_errors=True
    )

def _extract_observation(raw: str) -> str:
    """Extracts the LAST observation from the ReAct agent's reasoning dump."""
    if "Observation:" in raw:
        segment = raw.rsplit("Observation:", 1)[-1]
        if "Final Answer:" in segment:
            segment = segment.split("Final Answer:", 1)[0]
        return segment.strip()
    return raw.strip()

def basic_agent_response(question: str) -> str:
    """Public entry point: creates a fresh agent and runs one query."""
    try:
        print(f"[DEBUG] ➜ Question: {question}")
        agent = create_fresh_agent()
        raw_resp = agent.query(question)
        text_response = str(raw_resp.response if hasattr(raw_resp, "response") else raw_resp)
        cleaned = _extract_observation(text_response)
        return cleaned or "I cannot answer with the available tools."
    except Exception as exc:
        print(f"[ERROR] Agent execution failed: {exc}")
        return "I cannot answer with the available tools."