File size: 11,381 Bytes
481fea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1745c9e
481fea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1745c9e
 
481fea5
1745c9e
 
 
 
 
 
481fea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
import os
import re
import io
import contextlib
import requests
import base64
import zipfile
import json
from typing import TypedDict, Annotated

from langgraph.graph import StateGraph, START
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

from langchain_openai import ChatOpenAI
from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
from langchain_core.tools import tool
from pydantic import BaseModel, Field
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

from dotenv import load_dotenv
load_dotenv()




SYSTEM_PROMPT = """You are a research agent solving questions from the GAIA benchmark.

WORKFLOW:
1. Analyze the question carefully before acting.
2. If the question contains reversed text, reverse it back first using python_executor.
3. If the question references a file (Excel, CSV, Python, etc.), use read_file to read it.
4. If the question references an image file, use analyze_image to look at it.
5. If the question references an audio/mp3 file, use transcribe_audio to get the text.
6. If the question requires math or logic, use python_executor.
7. If the question asks about a YouTube video, first try youtube_transcript. If that fails, use web_search.
8. Use web_search or wikipedia_search for factual questions.
9. If you find a URL that might have the answer, use fetch_webpage to read it.


RULES:
- NEVER call the same tool with the same query twice.
- If a tool fails, try a DIFFERENT approach.
- For math/logic problems with tables, use python_executor to check ALL pairs systematically.

- For math — ALWAYS use python_executor, never calculate in your head.
- Keep search queries short: 2-5 words.
- NEVER say "I cannot access" or "I'm unable to" — always try tools first, then give your best guess.
- For botany questions: bell peppers, corn, green beans, zucchini, tomatoes, pumpkins are botanical FRUITS, not vegetables.


CRITICAL — ANSWER FORMAT:
Your response must end with exactly:
FINAL ANSWER: [your answer]

The answer must be:
- CONCISE: a number, name, date, or short phrase
- EXACT: no extra words like "The answer is..."
- If a number: just the number
- If a name: just the name
- If a list: comma-separated values
"""

MAX_TOOL_CALLS = 10
RECURSION_LIMIT = 40

@tool
def web_search(query: str) -> str:
    """Search the web for current events, facts, people, etc.
    Args:
        query: search query string (keep it short and specific)
    """
    try:
        from langchain_tavily import TavilySearch
        search = TavilySearch(max_results=3)
        results = search.invoke(query)
        
        # TavilySearch возвращает list of dicts или string
        if isinstance(results, list):
            formatted = []
            for r in results:
                url = r.get("url", "")
                content = r.get("content", "")
                formatted.append(f"Source: {url}\n{content}")
            return "\n\n---\n\n".join(formatted)[:5000]
        return str(results)[:5000]
    except Exception as e:
        return f"Search failed: {e}"

@tool
def wikipedia_search(query: str) -> str:
    """Search Wikipedia for factual information about people, places, history, science.
    Args:
        query: topic to search on Wikipedia
    """
    try:
        from langchain_community.utilities import WikipediaAPIWrapper
        wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000)
        return wiki.run(query)
    except Exception as e:
        return f"Wikipedia search failed: {e}"


@tool
def arxiv_search(query: str) -> str:
    """Search academic papers on ArXiv for scientific/research questions.
    Args:
        query: search query for academic papers
    """
    try:
        from langchain_community.document_loaders import ArxivLoader
        docs = ArxivLoader(query=query, load_max_docs=2).load()
        results = []
        for doc in docs:
            title = doc.metadata.get("Title", "No title")
            results.append(f"**{title}**\n{doc.page_content[:1500]}")
        return "\n\n---\n\n".join(results) if results else "No results found."
    except Exception as e:
        return f"ArXiv search failed: {e}"
    

@tool
def fetch_webpage(url: str) -> str:
    """Fetch and read content from a URL/webpage.
    Args:
        url: full URL to fetch
    """
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        resp = requests.get(url, headers=headers, timeout=15)
        resp.raise_for_status()

        from bs4 import BeautifulSoup
        soup = BeautifulSoup(resp.text, "html.parser")

        for tag in soup(["script", "style", "nav", "footer", "header"]):
            tag.decompose()
        text = soup.get_text(separator="\n", strip=True)
        return text[:8000]
    except Exception as e:
        return f"Failed to fetch URL: {e}"

python_state = {
    "__builtins__": __builtins__,
    "import_module": __import__
}

@tool
def python_executor(code: str) -> str:
    """
    Execute Python code with persistent state across calls.
    Use print() to see results. All variables are saved for the next call.
    """
    # Очистка кода от Markdown-оберток, если модель их добавила
    code = re.sub(r'^```python\n|```$', '', code, flags=re.MULTILINE)
    
    output = io.StringIO()
    try:
        with contextlib.redirect_stdout(output):
            # Используем один и тот же словарь python_state
            exec(code, python_state)
        
        result = output.getvalue().strip()
        if not result:
            return "Code executed successfully, but produced no output. Remember to use print()."
        return result
    except Exception as e:
        return f"Python Error: {str(e)}"
    
@tool
def read_file(file_path: str) -> str:
    """
    Read content of files: TXT, CSV, JSON, PY, XLSX, PDF, or ZIP.
    For ZIP: lists files inside. For PDF: extracts text.
    For Tables: returns a summary and first 10 rows.
    """
    if not os.path.exists(file_path):
        return f"Error: File '{file_path}' not found."

    ext = file_path.lower().split('.')[-1]
    
    try:
        # 1. Таблицы (Excel, CSV)
        if ext in ['xlsx', 'xls', 'csv']:
            import pandas as pd
            df = pd.read_excel(file_path) if ext.startswith('xls') else pd.read_csv(file_path)
            summary = f"Rows: {len(df)}, Columns: {df.columns.tolist()}\n"
            return summary + df.head(15).to_string()

        # 2. PDF (через PyMuPDF / fitz)
        elif ext == 'pdf':
            import fitz
            doc = fitz.open(file_path)
            text = []
            for i, page in enumerate(doc[:10]): # Ограничимся 10 страницами
                text.append(f"--- Page {i+1} ---\n{page.get_text()}")
            return "\n".join(text)[:15000]

        # 3. ZIP-архивы
        elif ext == 'zip':
            with zipfile.ZipFile(file_path, 'r') as z:
                files = z.namelist()
                return f"ZIP Archive contains: {files}. Use python_executor to extract if needed."

        # 4. JSON
        elif ext == 'json':
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                return json.dumps(data, indent=2)[:10000]

        # 5. Обычный текст
        else:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                return f.read(15000) # Читаем первые 15к символов

    except Exception as e:
        return f"Error processing file {file_path}: {str(e)}"
    

@tool
def analyze_image(image_path: str, question: str) -> str:
    """Analyze an image using GPT-4o vision. Use for photos, charts, chess positions, diagrams.
    Args:
        image_path: path to the image file (png, jpg, etc.)
        question: what you want to know about the image
    """
    try:
        with open(image_path, "rb") as f:
            image_data = base64.b64encode(f.read()).decode("utf-8")

        # Determine mime type
        ext = image_path.lower().split(".")[-1]
        mime_map = {"png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "gif": "image/gif", "webp": "image/webp"}
        mime_type = mime_map.get(ext, "image/png")

        from openai import OpenAI
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": question},
                        {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_data}"}},
                    ],
                }
            ],
            max_tokens=1000,
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Image analysis failed: {e}"


@tool
def transcribe_audio(file_path: str) -> str:
    """Transcribe an audio file (mp3, wav, m4a) to text using OpenAI Whisper.
    Args:
        file_path: path to the audio file
    """
    try:
        from openai import OpenAI
        client = OpenAI()
        with open(file_path, "rb") as f:
            transcription = client.audio.transcriptions.create(
                model="whisper-1",
                file=f,
            )
        return transcription.text[:8000]
    except Exception as e:
        return f"Transcription failed: {e}"


# llm_fast =  #ChatOpenAI(model="gpt-4o-mini", temperature=0)   # основной агент
# llm_strong = ChatOpenAI(model="gpt-4o", temperature=0)

llm = HuggingFaceEndpoint(
    repo_id="Qwen/Qwen2.5-72B-Instruct",
    huggingfacehub_api_token= os.environ["HF_TOKEN"]
)

llm_fast = ChatHuggingFace(llm=llm, verbose=True)

tools = [
    web_search,
    wikipedia_search,
    python_executor,
    arxiv_search,
    read_file,
    fetch_webpage,
    analyze_image,
    transcribe_audio,
]

llm_with_tools = llm_fast.bind_tools(tools)


class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

def assistant(state: AgentState):
    tool_count = sum(1 for msg in state["messages"] if msg.type == "tool")

    if tool_count >= MAX_TOOL_CALLS:
        force = SystemMessage(
            content="Provide your FINAL ANSWER now. Format: FINAL ANSWER: [answer]."
        )
        return {"messages": [llm_fast.invoke(state["messages"] + [force])]}

    return {"messages": [llm_with_tools.invoke(state["messages"])]}


class FinalAnswer(BaseModel):
    answer: str = Field(description="The exact final answer — concise, no extra words")


answer_extractor = llm_fast.with_structured_output(FinalAnswer)

def agent_func():

    
    builder = StateGraph(AgentState)

    # Define nodes: these do the work
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools, handle_tool_errors=True))

    # Define edges: these determine how the control flow moves
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges(
        "assistant",
        # If the latest message requires a tool, route to tools
        # Otherwise, provide a direct response
        tools_condition,
    )
    builder.add_edge("tools", "assistant")
    alfred = builder.compile()

    return alfred