File size: 7,311 Bytes
eb37674
96d23bf
3e615dc
3823b6b
a1c4a3e
 
 
5532431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9461d2
eb37674
 
 
 
 
 
 
 
 
 
 
 
 
2d81d4a
eb37674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64e0a97
 
 
5532431
64e0a97
3e615dc
5532431
96d23bf
fb6708d
5532431
3e615dc
 
64e0a97
3e615dc
 
64e0a97
96d23bf
3e615dc
 
 
 
 
 
96d23bf
3e615dc
 
 
 
 
 
5532431
3e615dc
5532431
3e615dc
5532431
3e615dc
96d23bf
3e615dc
3823b6b
 
 
 
 
64df9c5
3823b6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a420ae
 
 
 
a1c4a3e
 
7a420ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1c4a3e
7a420ae
 
 
 
 
 
 
a1c4a3e
 
 
 
 
 
 
 
8f40b76
a1c4a3e
 
 
 
 
 
 
 
 
7a420ae
 
a1c4a3e
7a420ae
 
 
bf02706
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
from langchain_core.tools import tool
import wikipediaapi
import pandas as pd
import requests
import fitz  # PyMuPDF
import io
from urllib.parse import urlparse
from typing import List, Dict
import pandas as pd
import re
from difflib import SequenceMatcher

def clean(text):
    return re.sub(r'[^a-zA-Z0-9 ]', '', text.lower())

def extract_relevant_table_info(query: str, tables: List[pd.DataFrame], min_score: float = 0.2) -> Dict[str, str]:
    query_clean = clean(query)
    results = {}

    for i, df in enumerate(tables):
        column_scores = []

        for col in df.columns:
            score = SequenceMatcher(None, query_clean, clean(str(col))).ratio()
            column_scores.append((col, score))

        # Keep columns above threshold
        relevant_cols = [col for col, score in column_scores if score >= min_score]

        if not relevant_cols:
            continue  # skip irrelevant tables

        compact_str = ", ".join(
            f"{row[relevant_cols[0]]}=" + ", ".join(f"{col}={row[col]}" for col in relevant_cols[1:])
            for _, row in df[relevant_cols].dropna().head(3).iterrows()
        )

        results[f"table_{i}"] = compact_str

    return results



@tool
def add(a: int, b: int) -> int:
    """
    Sums two values and returns the result of the sum

    Args:
        a: first number
        b: second number
    """
    return a + b

@tool
def subtract(a: int, b: int) -> int:
    """
    Subtracts one value from another and returns the result of the sum

    Args:
        a: first number
        b: second number
    """
    return a - b

@tool
def multiply(a: int, b: int) -> int:
    """
    Multiplies two values and returns the result of the sum

    Args:
        a: first number
        b: second number
    """
    return a * b

@tool
def divide(a: int, b: int) -> int:
    """
    Divides two values and returns the result of the sum

    Args:
        a: numerator
        b: denominator
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def search_wikipedia(query: str, page_title: str, language: str) -> str:
    """
    This tool searches Wikipedia for a specific page and returns its text and any HTML tables it contains.
    The function is capable of retrieving the most relevant information given the original query.

    Args:
        query: The original question that prompted the use of the function.
        page_title: Title of the Wikipedia page.
        language: Language code (e.g., "en", "es", "fr").

    Returns:
        A string containing the page title, text, and any extracted tables in markdown format.
    """
    try:
        wiki_wiki = wikipediaapi.Wikipedia(
            user_agent='AIAgent (gabriel_abilleira@tutanota.com)',
            language=language,
            extract_format=wikipediaapi.ExtractFormat.HTML
        )

        page = wiki_wiki.page(page_title)

        if not page.exists():
            return f"Error: Page '{page_title}' not found in language '{language}'."

        # Use the URL to read tables
        tables = pd.read_html(page.fullurl)
        markdown_tables = extract_relevant_table_info(query, tables, min_score = 0.2)

        table_output = "\n".join(list(markdown_tables.values())) if markdown_tables else "No tables found on this page."

        return f"Text: {page.summary[:500]}\n\n{table_output}"

    except Exception as e:
        return f"Error retrieving Wikipedia content: {str(e)}"

@tool
def duckduckgo_search(query: str) -> str:
    """Use DuckDuckGo to search the web for up-to-date information.
        Args:
            query: The query to search for on the web. It may be a literal url (e.g. https://www.youtube.com/watch?v=7ybEg14CP1g)
    """
    url = "https://api.duckduckgo.com/"
    params = {
        "q": query,
        "format": "json",
        "no_redirect": 1,
        "no_html": 1,
        "skip_disambig": 1,
    }

    try:
        response = requests.get(url, params=params)
        data = response.json()

        # Try the most useful fields
        if data.get("AbstractText"):
            return data["AbstractText"]
        elif data.get("Answer"):
            return data["Answer"]
        elif data.get("RelatedTopics"):
            # Return some related results
            results = data["RelatedTopics"][:3]
            return "\n".join(rt.get("Text", "") for rt in results if "Text" in rt)
        else:
            return "No good results found."

    except Exception as e:
        return f"Search failed: {e}"


@tool
def search_papers(query: str) -> str:
    """Search for academic papers and retrieve their content when possible."""

    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": 3,
        "fields": "title,abstract,authors,url,year"
    }

    try:
        response = requests.get(url, params=params)
        data = response.json()

        if not data.get("data"):
            return "No papers found."

        results = []

        for paper in data["data"]:
            title = paper.get("title", "No title")
            authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
            year = paper.get("year", "n.d.")
            abstract = paper.get("abstract", "No abstract available.")
            link = paper.get("url", "")

            full_text = "Full text not available."

            # Attempt to download and parse PDF (for arXiv)
            if "arxiv.org" in link:
                pdf_url = link.replace("abs", "pdf") + ".pdf"
                try:
                    pdf_response = requests.get(pdf_url)
                    doc = fitz.open(stream=pdf_response.content, filetype="pdf")
                    full_text = "\n".join(page.get_text() for page in doc[3:10])  # Only first 3 pages
                    doc.close()
                except Exception as pdf_err:
                    full_text = f"Failed to retrieve full text: {pdf_err}"

            result = f"""**{title}** ({year}) by {authors}
                        Abstract: {abstract}
                        Link: {link}
                        Full Text (first pages):\n{full_text}"""

            results.append(result)

        return "\n\n---\n\n".join(results)

    except Exception as e:
        return f"Error fetching papers: {e}"

@tool
def download_file(task_id: str) -> str:
    """
    Downloads a file associated with the given task ID.
    Returns the file path where the file is saved locally.

    Args:
        task_id: The task id to download attachment from.
    """
    file_url = f"{DEFAULT_API_URL}/files/{task_id}"
    local_file_path = f"downloads/{task_id}.file"

    print(f"Downloading file for task ID {task_id} from {file_url}...")
    try:
        response = requests.get(file_url, stream=True, timeout=15)
        response.raise_for_status()

        os.makedirs("downloads", exist_ok=True)
        with open(local_file_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)

        print(f"File downloaded successfully: {local_file_path}")
        return local_file_path
    except requests.exceptions.RequestException as e:
        print(f"Error downloading file for task {task_id}: {e}")
        raise