ORromu commited on
Commit
09a8047
·
verified ·
1 Parent(s): 26c9f12

Update tool.py

Browse files
Files changed (1) hide show
  1. tool.py +175 -0
tool.py CHANGED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import requests
4
+ from urllib.parse import urlparse, parse_qs
5
+ import pytesseract
6
+ from PIL import Image
7
+ import pandas as pd
8
+ import uuid
9
+ from youtube_transcript_api import YouTubeTranscriptApi
10
+
11
+
12
+ @tool
13
+ def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
14
+ """Save content to a file and return the path.
15
+
16
+ Args:
17
+ content (str): the content to save to the file
18
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
19
+ """
20
+ temp_dir = tempfile.gettempdir()
21
+ if filename is None:
22
+ temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
23
+ filepath = temp_file.name
24
+ else:
25
+ filepath = os.path.join(temp_dir, filename)
26
+
27
+ with open(filepath, "w") as f:
28
+ f.write(content)
29
+
30
+ return f"File saved to {filepath}. You can read this file to process its contents."
31
+
32
+
33
+ @tool
34
+ def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
35
+ """Download a file from a URL and save it to a temporary location.
36
+
37
+ Args:
38
+ url (str): the URL of the file to download.
39
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
40
+ """
41
+ try:
42
+ # Parse URL to get filename if not provided
43
+ if not filename:
44
+ path = urlparse(url).path
45
+ filename = os.path.basename(path)
46
+ if not filename:
47
+ filename = f"downloaded_{uuid.uuid4().hex[:8]}"
48
+
49
+ # Create temporary file
50
+ temp_dir = tempfile.gettempdir()
51
+ filepath = os.path.join(temp_dir, filename)
52
+
53
+ # Download the file
54
+ response = requests.get(url, stream=True)
55
+ response.raise_for_status()
56
+
57
+ # Save the file
58
+ with open(filepath, "wb") as f:
59
+ for chunk in response.iter_content(chunk_size=8192):
60
+ f.write(chunk)
61
+
62
+ return f"File downloaded to {filepath}. You can read this file to process its contents."
63
+ except Exception as e:
64
+ return f"Error downloading file: {str(e)}"
65
+
66
+
67
+ @tool
68
+ def extract_text_from_image(image_path: str) -> str:
69
+ """Extract text from an image using OCR library pytesseract (if available).
70
+
71
+ Args:
72
+ image_path (str): the path to the image file.
73
+ """
74
+ try:
75
+ # Open the image
76
+ image = Image.open(image_path)
77
+
78
+ # Extract text from the image
79
+ text = pytesseract.image_to_string(image)
80
+
81
+ return f"Extracted text from image:\n\n{text}"
82
+ except Exception as e:
83
+ return f"Error extracting text from image: {str(e)}"
84
+
85
+
86
+ @tool
87
+ def analyze_csv_file(file_path: str, query: str) -> str:
88
+ """Analyze a CSV file using pandas and answer a question about it.
89
+
90
+ Args:
91
+ file_path (str): the path to the CSV file.
92
+ query (str): Question about the data
93
+ """
94
+ try:
95
+ # Read the CSV file
96
+ df = pd.read_csv(file_path)
97
+
98
+ # Run various analyses based on the query
99
+ result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
100
+ result += f"Columns: {', '.join(df.columns)}\n\n"
101
+
102
+ # Add summary statistics
103
+ result += "Summary statistics:\n"
104
+ result += str(df.describe())
105
+
106
+ return result
107
+
108
+ except Exception as e:
109
+ return f"Error analyzing CSV file: {str(e)}"
110
+
111
+
112
+ @tool
113
+ def analyze_excel_file(file_path: str, query: str) -> str:
114
+ """Analyze an Excel file using pandas and answer a question about it.
115
+
116
+ Args:
117
+ file_path (str): the path to the Excel file.
118
+ query (str): Question about the data
119
+ """
120
+ try:
121
+ # Read the Excel file
122
+ df = pd.read_excel(file_path)
123
+
124
+ # Run various analyses based on the query
125
+ result = (
126
+ f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
127
+ )
128
+ result += f"Columns: {', '.join(df.columns)}\n\n"
129
+
130
+ # Add summary statistics
131
+ result += "Summary statistics:\n"
132
+ result += str(df.describe())
133
+
134
+ return result
135
+
136
+ except Exception as e:
137
+ return f"Error analyzing Excel file: {str(e)}"
138
+
139
+
140
+ ## Analyze Youtube Transcript tools
141
+
142
+ def extract_video_id(youtube_url: str) -> str | None:
143
+ """Extract the video ID from a YouTube URL.
144
+ Supports standard and shortened formats like:
145
+ - https://www.youtube.com/watch?v=VIDEO_ID
146
+ - https://youtu.be/VIDEO_ID
147
+ """
148
+ try:
149
+ parsed_url = urlparse(youtube_url)
150
+ host = parsed_url.hostname
151
+
152
+ if host in ("www.youtube.com", "youtube.com"):
153
+ return parse_qs(parsed_url.query).get("v", [None])[0]
154
+ elif host == "youtu.be":
155
+ return parsed_url.path.strip("/")
156
+ except Exception:
157
+ return None
158
+
159
+ return None
160
+
161
+ @tool
162
+ def get_youtube_transcript(youtube_url: str) -> str:
163
+ """Returns the transcript of a YouTube video as plain text.
164
+ Use this tool to extract spoken words from videos for Q&A, summarization,
165
+ or analysis. This does not include visual or on-screen content.
166
+ """
167
+ video_id = extract_video_id(youtube_url)
168
+ if not video_id:
169
+ return "Invalid or unsupported YouTube URL format."
170
+
171
+ try:
172
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
173
+ return " ".join(entry["text"] for entry in transcript)
174
+ except Exception as e:
175
+ return f"Transcript unavailable: {str(e)}"