File size: 12,641 Bytes
ee4ab6d
8721ff6
ee4ab6d
8721ff6
 
ee4ab6d
8721ff6
ee4ab6d
8721ff6
7f0df87
 
ee4ab6d
 
8a95271
e80200c
cdc93a9
 
8721ff6
 
e80200c
8721ff6
 
 
 
 
 
e80200c
cdc93a9
8721ff6
 
e80200c
8721ff6
 
 
 
 
 
e80200c
cdc93a9
8721ff6
 
e80200c
8721ff6
 
 
 
 
 
e80200c
cdc93a9
015ebd7
8721ff6
e80200c
8721ff6
 
 
 
 
 
 
 
e80200c
cdc93a9
8721ff6
 
e80200c
8721ff6
 
 
 
 
 
e80200c
cdc93a9
 
ee4ab6d
e80200c
8721ff6
cdc93a9
 
ee4ab6d
8721ff6
 
 
 
e80200c
 
8721ff6
 
e80200c
cdc93a9
9d4d779
 
e80200c
8721ff6
cdc93a9
 
ee4ab6d
e80200c
8721ff6
cdc93a9
 
8721ff6
e80200c
8721ff6
cdc93a9
 
8721ff6
 
 
 
 
e80200c
 
cdc93a9
8721ff6
 
e80200c
7f0df87
 
e80200c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f0df87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80200c
7f0df87
 
 
 
e80200c
 
 
 
 
7f0df87
e80200c
7f0df87
e80200c
 
 
 
7f0df87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80200c
7f0df87
 
 
 
 
 
 
 
 
 
 
 
 
e80200c
 
 
7f0df87
e80200c
7f0df87
e80200c
 
8a95271
e80200c
 
7f0df87
 
 
 
 
 
 
 
 
 
e80200c
8a95271
 
 
 
 
 
 
 
 
ebfea41
8a95271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebfea41
8a95271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebfea41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f0df87
 
 
e80200c
 
7f0df87
 
 
 
 
 
e80200c
 
8721ff6
 
 
 
 
 
 
 
 
ebfea41
7f0df87
 
 
8a95271
 
ebfea41
 
e80200c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
from google import genai
from langchain_community.document_loaders import ArxivLoader
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.messages import ToolMessage
from langchain_core.tools import tool
from langchain_tavily import TavilySearch
from langchain.tools.retriever import create_retriever_tool
from markitdown import MarkItDown
from pathlib import Path
from typing import Dict
from urllib.parse import urlparse
import os


@tool(parse_docstring=True)
def multiply(a: int, b: int) -> int:
    """Multiply two numbers.

    Args:
        a: first int
        b: second int
    """
    return a * b


@tool(parse_docstring=True)
def add(a: int, b: int) -> int:
    """Add two numbers.

    Args:
        a: first int
        b: second int
    """
    return a + b


@tool(parse_docstring=True)
def subtract(a: int, b: int) -> int:
    """Subtract two numbers.

    Args:
        a: first int
        b: second int
    """
    return a - b


@tool(parse_docstring=True)
def divide(a: int, b: int) -> float:
    """Divide two numbers.

    Args:
        a: first int
        b: second int
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b


@tool(parse_docstring=True)
def modulus(a: int, b: int) -> int:
    """Get the modulus of two numbers.

    Args:
        a: first int
        b: second int
    """
    return a % b


@tool(parse_docstring=True)
def wiki_search(query: str) -> Dict[str, list]:
    """Search Wikipedia for a query and return maximum 3 results.

    Args:
        query: The search query.
    """
    search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ]
    )
    return {"wiki_results": formatted_search_docs}


@tool(parse_docstring=True)
def web_search(query: str) -> ToolMessage:
    """Search in the web with Tavily for a query and return maximum 4 results.

    Args:
        query: The search query.
    """
    return TavilySearch(max_results=5, include_images=False).invoke({"query": query})


@tool(parse_docstring=True)
def arvix_search(query: str) -> Dict[str, list]:
    """Search Arxiv for a query and return maximum 3 result.

    Args:
        query: The search query.
    """
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ]
    )
    return {"arvix_results": formatted_search_docs}


@tool(parse_docstring=True)
def read_video(url: str) -> Dict[str, any]:
    """Search a youtube video given its `url` and returns its metadata and transcription.

    Args:
        url: Video url direction.
    """

    # Validate URL
    parsed_url = urlparse(url)
    if not all([parsed_url.scheme, parsed_url.netloc]):
        raise ValueError(
            "Please provide a valid video URL with http:// or https:// prefix."
        )

    # Check if it's a YouTube URL
    if "youtube.com" not in url and "youtu.be" not in url:
        raise ValueError("Only YouTube videos are supported at this time.")

    # try:
    #     # Configure yt-dlp with minimal extraction
    #     ydl_opts = {
    #         "quiet": True,
    #         "no_warnings": True,
    #         "extract_flat": True,
    #         "no_playlist": True,
    #         "youtube_include_dash_manifest": False,
    #         "writeautomaticsub": True,
    #         "subtitleslangs": ["all", "-live_chat"],
    #     }

    #     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    #         # Try basic info extraction
    #         info = ydl.extract_info(url, download=False, process=False)
    #         info = json.dumps(ydl.sanitize_info(info))
    #         return info
    # except Exception as err:
    #     raise type(err)(f"Could not obtain video information > {err}")

    try:
        # Just with markitdown
        md = MarkItDown(enable_plugins=True)
        result = md.convert(url)
        return result.text_content
    except Exception as err:
        raise type(err)(f"Could not obtain video information > {err}")


@tool(parse_docstring=True)
def read_csv_file(file_path: str) -> str:
    """
    Reads and parses a CSV file to markdown.

    Args:
        file_path: Path to the CSV file
    """
    try:
        # Just with markitdown
        validate_file_path(file_path)
        validate_file_ext(file_path, ".csv")
        md = MarkItDown(enable_plugins=True)
        result = md.convert(file_path)
        return result.text_content
    except Exception as err:
        raise type(err)(f"Could not parse csv file > {err}")


@tool(parse_docstring=True)
def read_excel_file(file_path: str) -> str:
    """
    Reads and parses an Excel file to markdown.

    Args:
        file_path: Path to the Excel file
    """

    try:
        # Just with markitdown
        validate_file_path(file_path)
        validate_file_ext(file_path, ".xlx", ".xlsx")
        md = MarkItDown(enable_plugins=True)
        result = md.convert(file_path)
        return result.text_content
    except Exception as err:
        raise type(err)(f"Could not parse excel file > {err}")


@tool(parse_docstring=True)
def read_python_file(file_path: str) -> str:
    """
    Reads and parses an Python file to markdown.

    Args:
        file_path: Path to the Python file
    """

    try:
        # Just with markitdown
        validate_file_path(file_path)
        validate_file_ext(file_path, ".py")
        md = MarkItDown(enable_plugins=True)
        result = md.convert(file_path)
        return result.text_content
    except Exception as err:
        raise type(err)(f"Could not parse python file > {err}")


DEFAULT_DESCRIPTION_GOOGLE_MODEL = "gemini-2.0-flash"


@tool(parse_docstring=True)
def describe_image_file(file_path: str, query: str = "") -> str:
    """
    Reads an image file and describes it accordingly to an optional query.

    Args:
        file_path: Path to the image file
        query: Otional query to generate an expected image description
    """
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    model_name = os.environ.get(
        "DESCRIPTION_GOOGLE_MODEL", DEFAULT_DESCRIPTION_GOOGLE_MODEL
    )

    if api_key == "":
        raise EnvironmentError(
            "GOOGLE API KEY not present in environment, please do provide one."
        )
    if query == "":
        query = "Caption this image, do not ommit important detail as number of subjects, or time of day."

    try:
        validate_file_path(file_path)
        validate_file_ext(file_path, ".png", ".jpg", ".jpge")
        client = genai.Client(api_key=api_key)
        # upload file
        file = client.files.upload(file=file_path)
        # Request generation
        response = client.models.generate_content(
            model=model_name, contents=[file, query]
        )
        return response.text
    except Exception as err:
        raise type(err)(f"Could not generate an image description > {err}")


@tool(parse_docstring=True)
def describe_audio_file(file_path: str, query: str = "") -> str:
    """
    Reads an audio file and describes it accordingly to an optional query.

    Args:
        file_path: Path to the audio file
        query: Otional query to generate an expected image description
    """
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    model_name = os.environ.get(
        "DESCRIPTION_GOOGLE_MODEL", DEFAULT_DESCRIPTION_GOOGLE_MODEL
    )

    if api_key == "":
        raise EnvironmentError(
            "GOOGLE API KEY not present in environment, please do provide one."
        )
    if query == "":
        query = "Transcribe speech present in audio, if more than one speaker is detected use a notation of [speaker_n] where n would be different per each speaker."
    try:
        validate_file_path(file_path)
        validate_file_ext(
            file_path,
            ".mp3",
        )
        client = genai.Client(api_key=api_key)
        # upload file
        file = client.files.upload(file=file_path)
        # Request generation
        response = client.models.generate_content(
            model=model_name, contents=[file, query]
        )
        return response.text
    except Exception as err:
        raise type(err)(f"Could not generate an audio description > {err}")


@tool(parse_docstring=True)
def describe_video_file(file_path: str, query: str = "") -> str:
    """
    Reads an video from a file and describes it accordingly to an optional query.

    Args:
        file_path: Path to the Video file
        query: Otional query to generate an expected image description
    """
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    model_name = os.environ.get(
        "DESCRIPTION_GOOGLE_MODEL", DEFAULT_DESCRIPTION_GOOGLE_MODEL
    )

    if api_key == "":
        raise EnvironmentError(
            "GOOGLE API KEY not present in environment, please do provide one."
        )
    if query == "":
        query = "Transcribe the audio from this video, giving timestamps for salient events in the video. Also provide visual descriptions."
    try:
        validate_file_path(file_path)
        validate_file_ext(file_path, ".mp4", ".mpeg", ".avi")
        client = genai.Client(api_key=api_key)
        # upload file
        file = client.files.upload(file=file_path)
        client = genai.Client(api_key=api_key)
        # Request generation
        response = client.models.generate_content(
            model=model_name, contents=[file, query]
        )
        return response.text
    except Exception as err:
        raise type(err)(f"Could not generate an audio description > {err}")


@tool(parse_docstring=True)
def describe_youtube_video(video_url: str, query: str = "") -> str:
    """
    Reads an video from a youtube and describes it accordingly to an optional query.

    Args:
        video_url: URL to youtube video
        query: Otional query to generate an expected image description
    """
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    model_name = os.environ.get(
        "DESCRIPTION_GOOGLE_MODEL", DEFAULT_DESCRIPTION_GOOGLE_MODEL
    )

    if api_key == "":
        raise EnvironmentError(
            "GOOGLE API KEY not present in environment, please do provide one."
        )
    if query == "":
        query = "Transcribe the audio from this video, giving timestamps for salient events in the video. Also provide visual descriptions."
    try:
        validate_url(video_url, "youtube.com", "youtu.be")
        client = genai.Client(api_key=api_key)
        # upload file
        video_part = genai.types.Part(
            file_data=genai.types.FileData(file_uri=video_url)
        )
        prompt_part = genai.types.Part(text=query)
        # Request generation
        response = client.models.generate_content(
            model=model_name,
            contents=genai.types.Content(parts=[video_part, prompt_part]),
        )
        return response.text
    except Exception as err:
        raise type(err)(f"Could not generate an audio description > {err}")


def validate_url(url: str, *site: str):
    # Validate URL parts
    parsed_url = urlparse(url)
    if not all([parsed_url.scheme, parsed_url.netloc]):
        raise ValueError(
            "Please provide a valid video URL with http:// or https:// prefix."
        )

    # Check if it's a site URL
    if not any(site_ in url for site_ in site):
        raise ValueError(
            f"URL ({url}) is not one of supported sites ({' ,'.join(site_ for site_ in site)})."
        )


def validate_file_path(file_path: str):
    path = Path(file_path)
    assert path.exists()


def validate_file_ext(file_path: str, *extension: str):
    path = Path(file_path)
    extensions = set(ext for ext in extension)
    assert (
        path.suffix in extensions
    ), f"File extension {path.suffix} is not valid ({extensions})"


basic_tools = [
    multiply,
    add,
    subtract,
    divide,
    modulus,
    wiki_search,
    web_search,
    arvix_search,
    # read_video,
    read_csv_file,
    read_excel_file,
    read_python_file,
    describe_image_file,
    describe_audio_file,
    describe_video_file,
    describe_youtube_video,
]