Spaces:
Sleeping
Sleeping
| import os | |
| import numpy | |
| import tempfile | |
| import requests | |
| import whisper | |
| import imageio | |
| import yt_dlp | |
| from PIL import Image | |
| from typing import List, Optional | |
| from urllib.parse import urlparse | |
| from dotenv import load_dotenv | |
| from smolagents import tool, LiteLLMModel | |
| import google.generativeai as genai | |
| from pytesseract import image_to_string | |
| load_dotenv() | |
| MODEL_ID = "gemini-2.5-flash" | |
| # Vision Tool | |
| def vision_tool(prompt: str, image_list: List[Image.Image]) -> str: | |
| """ | |
| Analyzes one or more images using a multimodal model. | |
| Args: | |
| prompt (str): The user question or task. | |
| image_list (List[PIL.Image.Image]): A list of image objects. | |
| Returns: | |
| str: Model's response to the prompt about the images. | |
| """ | |
| model = LiteLLMModel(model_id=MODEL_ID, api_key=os.getenv("GEMINI_API"), temperature=0.2) | |
| payload = [{"type": "text", "text": prompt}] + [{"type": "image", "image": img} for img in image_list] | |
| return model([{"role": "user", "content": payload}]).content | |
| # YouTube Frame Sampler | |
| def youtube_frames_to_images(url: str, every_n_seconds: int = 5) -> List[Image.Image]: | |
| """ | |
| Downloads a YouTube video and extracts frames at regular intervals. | |
| Args: | |
| url (str): The URL of the YouTube video to process. | |
| every_n_seconds (int): The time interval in seconds between extracted frames. | |
| Returns: | |
| List[Image.Image]: A list of sampled frames as PIL images. | |
| """ | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| ydl_cfg = { | |
| "format": "bestvideo+bestaudio/best", | |
| "outtmpl": os.path.join(temp_dir, "yt_video.%(ext)s"), | |
| "merge_output_format": "mp4", | |
| "quiet": True, | |
| "force_ipv4": True | |
| } | |
| with yt_dlp.YoutubeDL(ydl_cfg) as ydl: | |
| ydl.extract_info(url, download=True) | |
| video_file = next((os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.mp4')), None) | |
| reader = imageio.get_reader(video_file) | |
| fps = reader.get_meta_data().get("fps", 30) | |
| interval = int(fps * every_n_seconds) | |
| return [Image.fromarray(frame) for i, frame in enumerate(reader) if i % interval == 0] | |
| # YouTube QA via File URI | |
| def ask_youtube_video(url: str, question: str) -> str: | |
| """ | |
| Sends a YouTube video to a multimodal model and asks a question about it. | |
| Args: | |
| url (str): The URI of the video file (already uploaded and hosted). | |
| question (str): The natural language question to ask about the video. | |
| Returns: | |
| str: The model's answer to the question. | |
| """ | |
| try: | |
| client = genai.Client(api_key=os.getenv('GEMINI_API')) | |
| response = client.generate_content( | |
| model=MODEL_ID, | |
| contents=[ | |
| {"role": "user", "parts": [ | |
| {"text": question}, | |
| {"file_data": {"file_uri": url}} | |
| ]} | |
| ] | |
| ) | |
| return response.text | |
| except Exception as e: | |
| return f"Error asking {MODEL_ID} about video: {str(e)}" | |
| # File Reading Tool | |
| def read_text_file(file_path: str) -> str: | |
| """ | |
| Reads plain text content from a file. | |
| Args: | |
| file_path (str): The full path to the text file. | |
| Returns: | |
| str: The contents of the file, or an error message. | |
| """ | |
| try: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| return f.read() | |
| except Exception as e: | |
| return f"Error reading file: {e}" | |
| # File Downloader | |
| def file_from_url(url: str, save_as: Optional[str] = None) -> str: | |
| """ | |
| Downloads a file from a URL and saves it locally. | |
| Args: | |
| url (str): The URL of the file to download. | |
| save_as (Optional[str]): Optional filename to save the file as. | |
| Returns: | |
| str: The local file path or an error message. | |
| """ | |
| try: | |
| if not save_as: | |
| parsed = urlparse(url) | |
| save_as = os.path.basename(parsed.path) or f"file_{os.urandom(4).hex()}" | |
| file_path = os.path.join(tempfile.gettempdir(), save_as) | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| with open(file_path, "wb") as f: | |
| for chunk in response.iter_content(1024): | |
| f.write(chunk) | |
| return f"File saved to {file_path}" | |
| except Exception as e: | |
| return f"Download failed: {e}" | |
| # Audio Transcription (YouTube) | |
| def transcribe_youtube(yt_url: str) -> str: | |
| """ | |
| Transcribes the audio from a YouTube video using Whisper. | |
| Args: | |
| yt_url (str): The URL of the YouTube video. | |
| Returns: | |
| str: The transcribed text of the video. | |
| """ | |
| model = whisper.load_model("small") | |
| with tempfile.TemporaryDirectory() as tempdir: | |
| ydl_opts = { | |
| "format": "bestaudio", | |
| "outtmpl": os.path.join(tempdir, "audio.%(ext)s"), | |
| "postprocessors": [{ | |
| "key": "FFmpegExtractAudio", | |
| "preferredcodec": "wav" | |
| }], | |
| "quiet": True, | |
| "force_ipv4": True | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.extract_info(yt_url, download=True) | |
| wav_file = next((os.path.join(tempdir, f) for f in os.listdir(tempdir) if f.endswith(".wav")), None) | |
| return model.transcribe(wav_file)['text'] | |
| # Audio File Transcriber | |
| def audio_to_text(audio_path: str) -> str: | |
| """ | |
| Transcribes an uploaded audio file into text using Whisper. | |
| Args: | |
| audio_path (str): The local file path to the audio file. | |
| Returns: | |
| str: The transcribed text or an error message. | |
| """ | |
| try: | |
| model = whisper.load_model("small") | |
| result = model.transcribe(audio_path) | |
| return result['text'] | |
| except Exception as e: | |
| return f"Failed to transcribe: {e}" | |
| # OCR | |
| def extract_text_via_ocr(image_path: str) -> str: | |
| """ | |
| Extracts text from an image using Optical Character Recognition (OCR). | |
| Args: | |
| image_path (str): The local path to the image file. | |
| Returns: | |
| str: The extracted text or an error message. | |
| """ | |
| try: | |
| img = Image.open(image_path) | |
| return image_to_string(img) | |
| except Exception as e: | |
| return f"OCR failed: {e}" | |
| # CSV Analyzer | |
| def summarize_csv_data(path: str, query: str = "") -> str: | |
| """ | |
| Provides a summary of the contents of a CSV file. | |
| Args: | |
| path (str): The file path to the CSV file. | |
| query (str): Optional query to run on the data. | |
| Returns: | |
| str: Summary statistics and column details or an error message. | |
| """ | |
| try: | |
| import pandas as pd | |
| df = pd.read_csv(path) | |
| return f"Loaded CSV with {len(df)} rows. Columns: {list(df.columns)}\n\n{df.describe()}" | |
| except Exception as e: | |
| return f"CSV error: {e}" | |
| # Excel Analyzer | |
| def summarize_excel_data(path: str, query: str = "") -> str: | |
| """ | |
| Provides a summary of the contents of an Excel file. | |
| Args: | |
| path (str): The file path to the Excel file (.xls or .xlsx). | |
| query (str): Optional query to run on the data. | |
| Returns: | |
| str: Summary statistics and column details or an error message. | |
| """ | |
| try: | |
| import pandas as pd | |
| df = pd.read_excel(path) | |
| return f"Excel file with {len(df)} rows. Columns: {list(df.columns)}\n\n{df.describe()}" | |
| except Exception as e: | |
| return f"Excel error: {e}" | |