bobobert4 commited on
Commit ·
ebfea41
1
Parent(s): fafc3f2
updated tools for video
Browse files
tools.py
CHANGED
|
@@ -181,19 +181,6 @@ def read_csv_file(file_path: str) -> str:
|
|
| 181 |
except Exception as err:
|
| 182 |
raise type(err)(f"Could not parse csv file > {err}")
|
| 183 |
|
| 184 |
-
# # Read the CSV file
|
| 185 |
-
# df = pd.read_csv(file_path)
|
| 186 |
-
|
| 187 |
-
# # Run various analyses based on the query
|
| 188 |
-
# result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
|
| 189 |
-
# result += f"Columns: {', '.join(df.columns)}\n\n"
|
| 190 |
-
|
| 191 |
-
# # Add summary statistics
|
| 192 |
-
# result += "Summary statistics:\n"
|
| 193 |
-
# result += str(df.describe())
|
| 194 |
-
|
| 195 |
-
# return result
|
| 196 |
-
|
| 197 |
|
| 198 |
@tool(parse_docstring=True)
|
| 199 |
def read_excel_file(file_path: str) -> str:
|
|
@@ -214,19 +201,6 @@ def read_excel_file(file_path: str) -> str:
|
|
| 214 |
except Exception as err:
|
| 215 |
raise type(err)(f"Could not parse excel file > {err}")
|
| 216 |
|
| 217 |
-
# # Read the Excel file
|
| 218 |
-
# df = pd.read_excel(file_path)
|
| 219 |
-
|
| 220 |
-
# # Run various analyses based on the query
|
| 221 |
-
# result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
|
| 222 |
-
# result += f"Columns: {', '.join(df.columns)}\n\n"
|
| 223 |
-
|
| 224 |
-
# # Add summary statistics
|
| 225 |
-
# result += "Summary statistics:\n"
|
| 226 |
-
# result += str(df.describe())
|
| 227 |
-
|
| 228 |
-
# return result
|
| 229 |
-
|
| 230 |
|
| 231 |
@tool(parse_docstring=True)
|
| 232 |
def read_python_file(file_path: str) -> str:
|
|
@@ -257,7 +231,7 @@ def describe_image_file(file_path: str, query: str = "") -> str:
|
|
| 257 |
Reads an image file and describes it accordingly to an optional query.
|
| 258 |
|
| 259 |
Args:
|
| 260 |
-
file_path: Path to the
|
| 261 |
query: Otional query to generate an expected image description
|
| 262 |
"""
|
| 263 |
api_key = os.environ.get("GOOGLE_API_KEY", "")
|
|
@@ -293,7 +267,7 @@ def describe_audio_file(file_path: str, query: str = "") -> str:
|
|
| 293 |
Reads an audio file and describes it accordingly to an optional query.
|
| 294 |
|
| 295 |
Args:
|
| 296 |
-
file_path: Path to the
|
| 297 |
query: Otional query to generate an expected image description
|
| 298 |
"""
|
| 299 |
api_key = os.environ.get("GOOGLE_API_KEY", "")
|
|
@@ -325,6 +299,95 @@ def describe_audio_file(file_path: str, query: str = "") -> str:
|
|
| 325 |
raise type(err)(f"Could not generate an audio description > {err}")
|
| 326 |
|
| 327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
def validate_file_path(file_path: str):
|
| 329 |
path = Path(file_path)
|
| 330 |
assert path.exists()
|
|
@@ -347,10 +410,12 @@ basic_tools = [
|
|
| 347 |
wiki_search,
|
| 348 |
web_search,
|
| 349 |
arvix_search,
|
| 350 |
-
read_video,
|
| 351 |
read_csv_file,
|
| 352 |
read_excel_file,
|
| 353 |
read_python_file,
|
| 354 |
describe_image_file,
|
| 355 |
describe_audio_file,
|
|
|
|
|
|
|
| 356 |
]
|
|
|
|
| 181 |
except Exception as err:
|
| 182 |
raise type(err)(f"Could not parse csv file > {err}")
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
@tool(parse_docstring=True)
|
| 186 |
def read_excel_file(file_path: str) -> str:
|
|
|
|
| 201 |
except Exception as err:
|
| 202 |
raise type(err)(f"Could not parse excel file > {err}")
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
@tool(parse_docstring=True)
|
| 206 |
def read_python_file(file_path: str) -> str:
|
|
|
|
| 231 |
Reads an image file and describes it accordingly to an optional query.
|
| 232 |
|
| 233 |
Args:
|
| 234 |
+
file_path: Path to the image file
|
| 235 |
query: Otional query to generate an expected image description
|
| 236 |
"""
|
| 237 |
api_key = os.environ.get("GOOGLE_API_KEY", "")
|
|
|
|
| 267 |
Reads an audio file and describes it accordingly to an optional query.
|
| 268 |
|
| 269 |
Args:
|
| 270 |
+
file_path: Path to the audio file
|
| 271 |
query: Otional query to generate an expected image description
|
| 272 |
"""
|
| 273 |
api_key = os.environ.get("GOOGLE_API_KEY", "")
|
|
|
|
| 299 |
raise type(err)(f"Could not generate an audio description > {err}")
|
| 300 |
|
| 301 |
|
| 302 |
+
@tool(parse_docstring=True)
|
| 303 |
+
def describe_video_file(file_path: str, query: str = "") -> str:
|
| 304 |
+
"""
|
| 305 |
+
Reads an video from a file and describes it accordingly to an optional query.
|
| 306 |
+
|
| 307 |
+
Args:
|
| 308 |
+
file_path: Path to the Video file
|
| 309 |
+
query: Otional query to generate an expected image description
|
| 310 |
+
"""
|
| 311 |
+
api_key = os.environ.get("GOOGLE_API_KEY", "")
|
| 312 |
+
model_name = os.environ.get(
|
| 313 |
+
"DESCRIPTION_GOOGLE_MODEL", DEFAULT_DESCRIPTION_GOOGLE_MODEL
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
if api_key == "":
|
| 317 |
+
raise EnvironmentError(
|
| 318 |
+
"GOOGLE API KEY not present in environment, please do provide one."
|
| 319 |
+
)
|
| 320 |
+
if query == "":
|
| 321 |
+
query = "Transcribe the audio from this video, giving timestamps for salient events in the video. Also provide visual descriptions."
|
| 322 |
+
try:
|
| 323 |
+
validate_file_path(file_path)
|
| 324 |
+
validate_file_ext(file_path, ".mp4", ".mpeg", ".avi")
|
| 325 |
+
client = genai.Client(api_key=api_key)
|
| 326 |
+
# upload file
|
| 327 |
+
file = client.files.upload(file=file_path)
|
| 328 |
+
client = genai.Client(api_key=api_key)
|
| 329 |
+
# Request generation
|
| 330 |
+
response = client.models.generate_content(
|
| 331 |
+
model=model_name, contents=[file, query]
|
| 332 |
+
)
|
| 333 |
+
return response.text
|
| 334 |
+
except Exception as err:
|
| 335 |
+
raise type(err)(f"Could not generate an audio description > {err}")
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
@tool(parse_docstring=True)
|
| 339 |
+
def describe_youtube_video(video_url: str, query: str = "") -> str:
|
| 340 |
+
"""
|
| 341 |
+
Reads an video from a youtube and describes it accordingly to an optional query.
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
video_url: URL to youtube video
|
| 345 |
+
query: Otional query to generate an expected image description
|
| 346 |
+
"""
|
| 347 |
+
api_key = os.environ.get("GOOGLE_API_KEY", "")
|
| 348 |
+
model_name = os.environ.get(
|
| 349 |
+
"DESCRIPTION_GOOGLE_MODEL", DEFAULT_DESCRIPTION_GOOGLE_MODEL
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
if api_key == "":
|
| 353 |
+
raise EnvironmentError(
|
| 354 |
+
"GOOGLE API KEY not present in environment, please do provide one."
|
| 355 |
+
)
|
| 356 |
+
if query == "":
|
| 357 |
+
query = "Transcribe the audio from this video, giving timestamps for salient events in the video. Also provide visual descriptions."
|
| 358 |
+
try:
|
| 359 |
+
validate_url(video_url, "youtube.com", "youtu.be")
|
| 360 |
+
client = genai.Client(api_key=api_key)
|
| 361 |
+
# upload file
|
| 362 |
+
video_part = genai.types.Part(
|
| 363 |
+
file_data=genai.types.FileData(file_uri=video_url)
|
| 364 |
+
)
|
| 365 |
+
prompt_part = genai.types.Part(text=query)
|
| 366 |
+
# Request generation
|
| 367 |
+
response = client.models.generate_content(
|
| 368 |
+
model=model_name,
|
| 369 |
+
contents=genai.types.Content(parts=[video_part, prompt_part]),
|
| 370 |
+
)
|
| 371 |
+
return response.text
|
| 372 |
+
except Exception as err:
|
| 373 |
+
raise type(err)(f"Could not generate an audio description > {err}")
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def validate_url(url: str, *site: str):
|
| 377 |
+
# Validate URL parts
|
| 378 |
+
parsed_url = urlparse(url)
|
| 379 |
+
if not all([parsed_url.scheme, parsed_url.netloc]):
|
| 380 |
+
raise ValueError(
|
| 381 |
+
"Please provide a valid video URL with http:// or https:// prefix."
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
# Check if it's a site URL
|
| 385 |
+
if not any(site_ in url for site_ in site):
|
| 386 |
+
raise ValueError(
|
| 387 |
+
f"URL ({url}) is not one of supported sites ({' ,'.join(site_ for site_ in site)})."
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
|
| 391 |
def validate_file_path(file_path: str):
|
| 392 |
path = Path(file_path)
|
| 393 |
assert path.exists()
|
|
|
|
| 410 |
wiki_search,
|
| 411 |
web_search,
|
| 412 |
arvix_search,
|
| 413 |
+
# read_video,
|
| 414 |
read_csv_file,
|
| 415 |
read_excel_file,
|
| 416 |
read_python_file,
|
| 417 |
describe_image_file,
|
| 418 |
describe_audio_file,
|
| 419 |
+
describe_video_file,
|
| 420 |
+
describe_youtube_video,
|
| 421 |
]
|