FD900 commited on
Commit
ec60378
·
verified ·
1 Parent(s): 7251401

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +121 -1
tools.py CHANGED
@@ -1 +1,121 @@
1
- # tools.py - Tool definitions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import requests
3
+ import os
4
+
5
+ from time import sleep
6
+ from urllib.parse import urlparse
7
+ from typing import Optional, List
8
+ import yt_dlp
9
+ import imageio
10
+
11
+ from PIL import Image
12
+ from dotenv import load_dotenv
13
+ from smolagents import tool
14
+ import whisper
15
+
16
+ load_dotenv()
17
+
18
+ @tool
19
+ def use_vision_model(question: str, images: List[Image.Image]) -> str:
20
+ """
21
+ Use a Vision Model to answer a question about a set of images.
22
+ This stub exists for potential future Gemini or Mistral multimodal integration.
23
+ """
24
+ return "Vision model is not available for Mistral. Please integrate a separate endpoint for image analysis."
25
+
26
+ @tool
27
+ def review_youtube_video(url: str, question: str) -> str:
28
+ return "This tool is currently unsupported with Mistral. Please remove or replace."
29
+
30
+ @tool
31
+ def youtube_frames_to_images(url: str, sample_interval_seconds: int = 5) -> List[Image.Image]:
32
+ with tempfile.TemporaryDirectory() as tmpdir:
33
+ ydl_opts = {
34
+ 'format': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]/best',
35
+ 'outtmpl': os.path.join(tmpdir, 'video.%(ext)s'),
36
+ 'quiet': True,
37
+ 'noplaylist': True,
38
+ 'merge_output_format': 'mp4',
39
+ 'force_ipv4': True,
40
+ }
41
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
42
+ info = ydl.extract_info(url, download=True)
43
+
44
+ video_path = next((os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith('.mp4')), None)
45
+ reader = imageio.get_reader(video_path)
46
+ fps = reader.get_meta_data().get('fps')
47
+ frame_interval = int(fps * sample_interval_seconds)
48
+ images = [Image.fromarray(frame) for idx, frame in enumerate(reader) if idx % frame_interval == 0]
49
+ reader.close()
50
+ return images
51
+
52
+ @tool
53
+ def read_file(filepath: str) -> str:
54
+ try:
55
+ with open(filepath, 'r', encoding='utf-8') as file:
56
+ return file.read()
57
+ except Exception as e:
58
+ return f"Error reading file: {str(e)}"
59
+
60
+ @tool
61
+ def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
62
+ try:
63
+ if not filename:
64
+ filename = os.path.basename(urlparse(url).path) or f"download_{os.urandom(4).hex()}"
65
+ filepath = os.path.join(tempfile.gettempdir(), filename)
66
+ response = requests.get(url)
67
+ with open(filepath, 'wb') as f:
68
+ f.write(response.content)
69
+ return filepath
70
+ except Exception as e:
71
+ return f"Error downloading file: {str(e)}"
72
+
73
+ @tool
74
+ def extract_text_from_image(image_path: str) -> str:
75
+ try:
76
+ import pytesseract
77
+ return pytesseract.image_to_string(Image.open(image_path))
78
+ except Exception as e:
79
+ return f"Error extracting text: {str(e)}"
80
+
81
+ @tool
82
+ def analyze_csv_file(file_path: str, query: str) -> str:
83
+ try:
84
+ import pandas as pd
85
+ df = pd.read_csv(file_path)
86
+ return f"Loaded CSV with shape {df.shape} and columns: {df.columns.tolist()}"
87
+ except Exception as e:
88
+ return f"CSV error: {str(e)}"
89
+
90
+ @tool
91
+ def analyze_excel_file(file_path: str, query: str) -> str:
92
+ try:
93
+ import pandas as pd
94
+ df = pd.read_excel(file_path)
95
+ return f"Loaded Excel with shape {df.shape} and columns: {df.columns.tolist()}"
96
+ except Exception as e:
97
+ return f"Excel error: {str(e)}"
98
+
99
+ @tool
100
+ def youtube_transcribe(url: str) -> str:
101
+ model = whisper.load_model("small")
102
+ with tempfile.TemporaryDirectory() as tmpdir:
103
+ ydl_opts = {
104
+ 'format': 'bestaudio/best',
105
+ 'outtmpl': os.path.join(tmpdir, 'audio.%(ext)s'),
106
+ 'quiet': True,
107
+ 'noplaylist': True,
108
+ 'postprocessors': [{
109
+ 'key': 'FFmpegExtractAudio',
110
+ 'preferredcodec': 'wav',
111
+ }],
112
+ 'force_ipv4': True,
113
+ }
114
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
115
+ ydl.extract_info(url, download=True)
116
+ audio_path = next((os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith('.wav')), None)
117
+ return whisper.load_model("small").transcribe(audio_path)['text']
118
+
119
+ @tool
120
+ def transcribe_audio(audio_file_path: str) -> str:
121
+ return whisper.load_model("small").transcribe(audio_file_path)['text']