Final_Assignment_Template_smolagents

Runtime error

App Files Files Community

ORromu commited on May 15, 2025

Commit

09a8047

verified ·

1 Parent(s): 26c9f12

Update tool.py

Browse files

Files changed (1) hide show

tool.py +175 -0

tool.py CHANGED Viewed

	@@ -0,0 +1,175 @@

+import os
+import tempfile
+import requests
+from urllib.parse import urlparse, parse_qs
+import pytesseract
+from PIL import Image
+import pandas as pd
+import uuid
+from youtube_transcript_api import YouTubeTranscriptApi
+@tool
+def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
+    """Save content to a file and return the path.
+    Args:
+        content (str): the content to save to the file
+        filename (str, optional): the name of the file. If not provided, a random name file will be created.
+    """
+    temp_dir = tempfile.gettempdir()
+    if filename is None:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
+        filepath = temp_file.name
+    else:
+        filepath = os.path.join(temp_dir, filename)
+    with open(filepath, "w") as f:
+        f.write(content)
+    return f"File saved to {filepath}. You can read this file to process its contents."
+@tool
+def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
+    """Download a file from a URL and save it to a temporary location.
+    Args:
+        url (str): the URL of the file to download.
+        filename (str, optional): the name of the file. If not provided, a random name file will be created.
+    """
+    try:
+        # Parse URL to get filename if not provided
+        if not filename:
+            path = urlparse(url).path
+            filename = os.path.basename(path)
+            if not filename:
+                filename = f"downloaded_{uuid.uuid4().hex[:8]}"
+        # Create temporary file
+        temp_dir = tempfile.gettempdir()
+        filepath = os.path.join(temp_dir, filename)
+        # Download the file
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        # Save the file
+        with open(filepath, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return f"File downloaded to {filepath}. You can read this file to process its contents."
+    except Exception as e:
+        return f"Error downloading file: {str(e)}"
+@tool
+def extract_text_from_image(image_path: str) -> str:
+    """Extract text from an image using OCR library pytesseract (if available).
+    Args:
+        image_path (str): the path to the image file.
+    """
+    try:
+        # Open the image
+        image = Image.open(image_path)
+        # Extract text from the image
+        text = pytesseract.image_to_string(image)
+        return f"Extracted text from image:\n\n{text}"
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+@tool
+def analyze_csv_file(file_path: str, query: str) -> str:
+    """Analyze a CSV file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the CSV file.
+        query (str): Question about the data
+    """
+    try:
+        # Read the CSV file
+        df = pd.read_csv(file_path)
+        # Run various analyses based on the query
+        result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        # Add summary statistics
+        result += "Summary statistics:\n"
+        result += str(df.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing CSV file: {str(e)}"
+@tool
+def analyze_excel_file(file_path: str, query: str) -> str:
+    """Analyze an Excel file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the Excel file.
+        query (str): Question about the data
+    """
+    try:
+        # Read the Excel file
+        df = pd.read_excel(file_path)
+        # Run various analyses based on the query
+        result = (
+            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+        )
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        # Add summary statistics
+        result += "Summary statistics:\n"
+        result += str(df.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing Excel file: {str(e)}"
+## Analyze Youtube Transcript tools
+def extract_video_id(youtube_url: str) -> str | None:
+    """Extract the video ID from a YouTube URL.
+    Supports standard and shortened formats like:
+    - https://www.youtube.com/watch?v=VIDEO_ID
+    - https://youtu.be/VIDEO_ID
+    """
+    try:
+        parsed_url = urlparse(youtube_url)
+        host = parsed_url.hostname
+        if host in ("www.youtube.com", "youtube.com"):
+            return parse_qs(parsed_url.query).get("v", [None])[0]
+        elif host == "youtu.be":
+            return parsed_url.path.strip("/")
+    except Exception:
+        return None
+    return None
+@tool
+def get_youtube_transcript(youtube_url: str) -> str:
+    """Returns the transcript of a YouTube video as plain text.
+    Use this tool to extract spoken words from videos for Q&A, summarization,
+    or analysis. This does not include visual or on-screen content.
+    """
+    video_id = extract_video_id(youtube_url)
+    if not video_id:
+        return "Invalid or unsupported YouTube URL format."
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        return " ".join(entry["text"] for entry in transcript)
+    except Exception as e:
+        return f"Transcript unavailable: {str(e)}"