Spaces:

arterm-sedov
/

cmw-copilot

Running

arterm-sedov commited on Oct 14, 2025

Commit

dd70fb1

1 Parent(s): 4b3a4e0

Fix tools: improve file type/extension detection, robust MIME handling in download_file_to_path

- Enhance MIME type to extension mapping for broader support
- Add smart detection when URL/file extension mismatches Content-Type
- Refactor logic for clarity, modularity, and dry principles
- Logging clarified for download steps and edge cases
- All code fully linted and tested for type safety and error handling

Files changed (3) hide show

agent_ng/tabs/chat_tab.py +1 -0
tools/file_utils.py +101 -90
tools/tools.py +201 -37

agent_ng/tabs/chat_tab.py CHANGED Viewed

@@ -89,6 +89,7 @@ class ChatTab(QuickActionsMixin):
                     ".hpp", ".java", ".go", ".rs", ".rb", ".php", ".pl", ".swift",
                     ".kt", ".scala", ".sql", ".toml", ".env",  # Common text-based code formats
                     ".wav", ".mp3",  ".aiff", ".ogg", ".flac", ".aac",  # Audio files
                     ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".svg", ".tiff"  # Image files
                 ],
                 file_count="multiple",

                     ".hpp", ".java", ".go", ".rs", ".rb", ".php", ".pl", ".swift",
                     ".kt", ".scala", ".sql", ".toml", ".env",  # Common text-based code formats
                     ".wav", ".mp3",  ".aiff", ".ogg", ".flac", ".aac",  # Audio files
+                    ".mp4", ".mpeg", ".mpg", ".mov", ".avi", ".flv", ".webm", ".wmv", ".3gp", ".3gpp",  # Video files
                     ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".svg", ".tiff"  # Image files
                 ],
                 file_count="multiple",

tools/file_utils.py CHANGED Viewed

@@ -10,7 +10,6 @@ from typing import Optional, Dict, Any, List
 from pathlib import Path
 from pydantic import BaseModel, Field, field_validator
 class FileInfo(BaseModel):
     """Pydantic model for file information."""
     exists: bool = Field(description="Whether the file exists and is accessible")
@@ -19,7 +18,7 @@ class FileInfo(BaseModel):
     size: int = Field(0, description="File size in bytes")
     extension: str = Field("", description="File extension (lowercase)")
     error: Optional[str] = Field(None, description="Error message if file access failed")
     @field_validator('size')
     @classmethod
     def validate_size(cls, v):
@@ -27,7 +26,6 @@ class FileInfo(BaseModel):
             raise ValueError('File size cannot be negative')
         return v
 class TextFileResult(BaseModel):
     """Pydantic model for text file reading results."""
     success: bool = Field(description="Whether the file was successfully read")
@@ -36,7 +34,6 @@ class TextFileResult(BaseModel):
     file_info: Optional[FileInfo] = Field(None, description="File information")
     error: Optional[str] = Field(None, description="Error message if reading failed")
 class BinaryFileResult(BaseModel):
     """Pydantic model for binary file reading results."""
     success: bool = Field(description="Whether the file was successfully read")
@@ -44,7 +41,6 @@ class BinaryFileResult(BaseModel):
     file_info: Optional[FileInfo] = Field(None, description="File information")
     error: Optional[str] = Field(None, description="Error message if reading failed")
 class ToolResponse(BaseModel):
     """Pydantic model for standardized tool responses."""
     type: str = Field(default="tool_response", description="Response type identifier")
@@ -53,15 +49,14 @@ class ToolResponse(BaseModel):
     error: Optional[str] = Field(None, description="Error message if tool failed")
     file_info: Optional[FileInfo] = Field(None, description="File information if applicable")
 class FileUtils:
     """Utility class for common file operations."""
     @staticmethod
     def file_exists(file_path: str) -> bool:
         """Check if file exists and is accessible."""
         return os.path.exists(file_path) and os.path.isfile(file_path)
     @staticmethod
     def get_file_size(file_path: str) -> int:
         """Get file size in bytes."""
@@ -69,7 +64,7 @@ class FileUtils:
             return os.path.getsize(file_path)
         except OSError:
             return 0
     @staticmethod
     def get_file_info(file_path: str) -> FileInfo:
         """Get comprehensive file information with Pydantic validation."""
@@ -78,7 +73,7 @@ class FileUtils:
                 exists=False,
                 error=f"File not found: {file_path}"
             )
         try:
             return FileInfo(
                 exists=True,
@@ -92,22 +87,22 @@ class FileUtils:
                 exists=False,
                 error=f"Error getting file info: {str(e)}"
             )
     @staticmethod
     def read_text_file(file_path: str, encodings: List[str] = None) -> TextFileResult:
         """
         Read text file with multiple encoding fallback and Pydantic validation.
         Args:
             file_path: Path to the text file
             encodings: List of encodings to try (default: ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'])
         Returns:
             TextFileResult with validated content, encoding used, and metadata
         """
         if encodings is None:
             encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
         file_info = FileUtils.get_file_info(file_path)
         if not file_info.exists:
             return TextFileResult(
@@ -115,12 +110,12 @@ class FileUtils:
                 error=file_info.error,
                 file_info=file_info
             )
         for encoding in encodings:
             try:
                 with open(file_path, 'r', encoding=encoding) as f:
                     content = f.read()
                 return TextFileResult(
                     success=True,
                     content=content,
@@ -135,13 +130,13 @@ class FileUtils:
                     error=f"Error reading file: {str(e)}",
                     file_info=file_info
                 )
         return TextFileResult(
             success=False,
             error="File appears to be binary and cannot be read as text",
             file_info=file_info
         )
     @staticmethod
     def read_binary_file(file_path: str) -> BinaryFileResult:
         """Read binary file and return base64 encoded content with Pydantic validation."""
@@ -152,12 +147,12 @@ class FileUtils:
                 error=file_info.error,
                 file_info=file_info
             )
         try:
             import base64
             with open(file_path, 'rb') as f:
                 content = f.read()
             return BinaryFileResult(
                 success=True,
                 content=base64.b64encode(content).decode('utf-8'),
@@ -169,7 +164,7 @@ class FileUtils:
                 error=f"Error reading binary file: {str(e)}",
                 file_info=file_info
             )
     @staticmethod
     def create_tool_response(tool_name: str, result: str = None, error: str = None,
                            file_info: FileInfo = None) -> str:
@@ -187,16 +182,16 @@ class FileUtils:
             )
         else:
             sanitized_file_info = None
         response = ToolResponse(
             tool_name=tool_name,
             result=result,  # Full result, no truncation
             error=error,
             file_info=sanitized_file_info
         )
         return response.model_dump_json(indent=2)
     @staticmethod
     def format_file_size(size_bytes: int) -> str:
         """Format file size in human-readable format."""
@@ -208,46 +203,46 @@ class FileUtils:
             return f"{size_bytes // 1024} KB"
         else:
             return f"{size_bytes // (1024 * 1024)} MB"
     @staticmethod
     def file_to_base64(file_path: str) -> str:
         """
         Convert file to base64 encoded string.
         Args:
             file_path (str): Path to the file to convert
         Returns:
             str: Base64 encoded file content
         Raises:
             FileNotFoundError: If file doesn't exist
             IOError: If file can't be read
         """
         import base64
         if not FileUtils.file_exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
         try:
             with open(file_path, 'rb') as f:
                 file_content = f.read()
             return base64.b64encode(file_content).decode('utf-8')
         except Exception as e:
             raise IOError(f"Error reading file {file_path}: {str(e)}")
     @staticmethod
     def download_file_to_path(url: str, target_path: str = None) -> str:
         """
         Download file from URL to local path.
         Args:
             url (str): URL to download from
             target_path (str, optional): Local path to save to. If None, creates temp file.
         Returns:
             str: Path to downloaded file
         Raises:
             requests.RequestException: If download fails
             IOError: If file can't be written
@@ -255,23 +250,34 @@ class FileUtils:
         import requests
         import tempfile
         import os
         from urllib.parse import urlparse
         try:
             # First make a HEAD request to get Content-Type
-            head_response = requests.head(url)
             head_response.raise_for_status()
             if target_path is None:
                 # Create temp file with proper extension
                 parsed_url = urlparse(url)
                 filename = os.path.basename(parsed_url.path) or "downloaded_file"
                 # Extract extension from URL
                 _, url_ext = os.path.splitext(filename)
                 # Get Content-Type header
                 content_type = head_response.headers.get('content-type', '').lower()
                 # MIME type to extension mapping
                 mime_to_ext = {
                     # Documents
@@ -285,7 +291,7 @@ class FileUtils:
                     'application/rtf': '.rtf',
                     'application/zip': '.zip',
                     'application/x-zip-compressed': '.zip',
                     # Text formats
                     'text/plain': '.txt',
                     'text/html': '.html',
@@ -295,7 +301,7 @@ class FileUtils:
                     'text/xml': '.xml',
                     'application/json': '.json',
                     'application/xml': '.xml',
                     # Images
                     'image/jpeg': '.jpg',
                     'image/jpg': '.jpg',
@@ -305,41 +311,41 @@ class FileUtils:
                     'image/svg+xml': '.svg',
                     'image/bmp': '.bmp',
                     'image/tiff': '.tiff',
                     # Audio
                     'audio/mpeg': '.mp3',
                     'audio/wav': '.wav',
                     'audio/ogg': '.ogg',
                     'audio/mp4': '.m4a',
                     # Video
                     'video/mp4': '.mp4',
                     'video/avi': '.avi',
                     'video/quicktime': '.mov',
                     'video/x-msvideo': '.avi',
                 }
                 # Smart extension detection strategy:
                 # 1. If Content-Type is specific and matches known types, use it
                 # 2. If URL has a standard extension, use it
                 # 3. Fallback to Content-Type if URL extension is non-standard
                 ext = None
                 content_type_ext = None
                 url_ext_valid = False
                 # Get extension from Content-Type
                 for mime_type, extension in mime_to_ext.items():
                     if mime_type in content_type:
                         content_type_ext = extension
                         break
                 # Check if URL extension is valid (standard file extension)
                 if url_ext:
                     # Check if URL extension matches any known extension
                     known_extensions = set(mime_to_ext.values())
                     url_ext_valid = url_ext.lower() in known_extensions
                 # Decision logic
                 if content_type_ext and url_ext_valid:
                     # Both are valid - prefer Content-Type for accuracy
@@ -356,83 +362,84 @@ class FileUtils:
                 else:
                     # No extension found
                     ext = ''
                 temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
                 target_path = temp_file.name
                 temp_file.close()
             # Now download the file
-            response = requests.get(url, stream=True)
             response.raise_for_status()
             with open(target_path, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
             return target_path
         except Exception as e:
             raise IOError(f"Error downloading file from {url}: {str(e)}")
     @staticmethod
     def generate_unique_filename(original_filename: str, session_id: str = "default") -> str:
         """
         Generate a unique filename with timestamp and hash (no session prefix since we use session folders).
         Args:
             original_filename (str): Original filename from user upload
             session_id (str): Session ID for isolation (used for folder organization)
         Returns:
             str: Unique filename with timestamp and hash
         """
         import hashlib
         import time
         from pathlib import Path
         # Get file extension
         path_obj = Path(original_filename)
         name_without_ext = path_obj.stem
         extension = path_obj.suffix
         # Generate timestamp and hash (include session_id for uniqueness across sessions)
         timestamp = str(int(time.time() * 1000))  # milliseconds
         hash_suffix = hashlib.md5(f"{original_filename}{timestamp}{session_id}".encode()).hexdigest()[:8]
         # Create unique filename with session ID for better uniqueness and clarity
         unique_name = f"{session_id}_{name_without_ext}_{timestamp}_{hash_suffix}{extension}"
         return unique_name
     @staticmethod
     def get_gradio_cache_path() -> str:
         """
         Get the current Gradio cache directory path.
         Returns:
             str: Path to Gradio's cache directory
         """
         import os
         import tempfile
         # Check if GRADIO_TEMP_DIR is set
         gradio_temp = os.environ.get('GRADIO_TEMP_DIR')
         if gradio_temp:
             return gradio_temp
         # Default to system temp directory
         return tempfile.gettempdir()
     @staticmethod
     def resolve_file_reference(file_reference: str, agent=None) -> str:
         """
         Resolve file reference (filename or URL) to full file path.
         Args:
             file_reference (str): Original filename from user upload OR URL
             agent: Agent instance with file registry (optional)
         Returns:
             str: Full path to the file, or None if not found
         """
@@ -442,41 +449,45 @@ class FileUtils:
                 # Download URL to temp file
                 return FileUtils.download_file_to_path(file_reference)
             except Exception as e:
-                print(f"⚠️ Failed to download URL {file_reference}: {e}")
-                return None
         # It's a filename - resolve using agent's file registry
         if agent and hasattr(agent, 'get_file_path'):
             return agent.get_file_path(file_reference)
         return None
     @staticmethod
     def resolve_file_path(original_filename: str, agent=None) -> str:
         """
         Resolve original filename to full file path using agent's file registry.
         Args:
             original_filename (str): Original filename from user upload
             agent: Agent instance with file registry (optional)
         Returns:
             str: Full path to the file, or None if not found
         """
         if agent and hasattr(agent, 'get_file_path'):
             return agent.get_file_path(original_filename)
         return None
     @staticmethod
     def resolve_code_input(code_reference: str, agent=None) -> tuple[str, str]:
         """
         Resolve code reference to actual code content and detected language.
         Args:
             code_reference (str): Code content, filename, or URL
             agent: Agent instance for file resolution (optional)
         Returns:
             tuple: (code_content, detected_language)
         """
@@ -491,25 +502,25 @@ class FileUtils:
                 return result.content, language
             except Exception as e:
                 raise ValueError(f"Failed to download URL {code_reference}: {str(e)}")
         # Check if it's a file path (try to resolve via agent first, then direct path)
         file_path = None
         if agent and hasattr(agent, 'get_file_path'):
             file_path = agent.get_file_path(code_reference)
         if not file_path and os.path.exists(code_reference):
             file_path = code_reference
         if file_path and os.path.exists(file_path):
             result = FileUtils.read_text_file(file_path)
             if not result.success:
                 raise ValueError(f"Failed to read file: {result.error}")
             language = FileUtils.detect_language_from_extension(file_path)
             return result.content, language
         # It's code content - return as-is with no language detection
         return code_reference, None
     @staticmethod
     def detect_language_from_extension(file_path: str) -> str:
         """Detect programming language from file extension."""
@@ -534,7 +545,7 @@ class FileUtils:
             '.swift': 'swift'
         }
         return extension_map.get(Path(file_path).suffix.lower(), 'python')
     @staticmethod
     def is_text_file(file_path: str) -> bool:
         """Check if file is likely a text file based on extension."""
@@ -544,7 +555,7 @@ class FileUtils:
             '.cfg', '.conf', '.env', '.csv', '.tsv'
         }
         return Path(file_path).suffix.lower() in text_extensions
     @staticmethod
     def is_image_file(file_path: str) -> bool:
         """Check if file is likely an image file based on extension."""
@@ -552,7 +563,7 @@ class FileUtils:
             '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp', '.svg'
         }
         return Path(file_path).suffix.lower() in image_extensions
     @staticmethod
     def is_audio_file(file_path: str) -> bool:
         """Check if file is likely an audio file based on extension."""
@@ -560,7 +571,7 @@ class FileUtils:
             '.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma'
         }
         return Path(file_path).suffix.lower() in audio_extensions
     @staticmethod
     def is_video_file(file_path: str) -> bool:
         """Check if file is likely a video file based on extension."""
@@ -568,7 +579,7 @@ class FileUtils:
             '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mkv'
         }
         return Path(file_path).suffix.lower() in video_extensions
     @staticmethod
     def is_pdf_file(file_path: str) -> bool:
         """Check if file is likely a PDF file based on extension."""

 from pathlib import Path
 from pydantic import BaseModel, Field, field_validator
 class FileInfo(BaseModel):
     """Pydantic model for file information."""
     exists: bool = Field(description="Whether the file exists and is accessible")
     size: int = Field(0, description="File size in bytes")
     extension: str = Field("", description="File extension (lowercase)")
     error: Optional[str] = Field(None, description="Error message if file access failed")
     @field_validator('size')
     @classmethod
     def validate_size(cls, v):
             raise ValueError('File size cannot be negative')
         return v
 class TextFileResult(BaseModel):
     """Pydantic model for text file reading results."""
     success: bool = Field(description="Whether the file was successfully read")
     file_info: Optional[FileInfo] = Field(None, description="File information")
     error: Optional[str] = Field(None, description="Error message if reading failed")
 class BinaryFileResult(BaseModel):
     """Pydantic model for binary file reading results."""
     success: bool = Field(description="Whether the file was successfully read")
     file_info: Optional[FileInfo] = Field(None, description="File information")
     error: Optional[str] = Field(None, description="Error message if reading failed")
 class ToolResponse(BaseModel):
     """Pydantic model for standardized tool responses."""
     type: str = Field(default="tool_response", description="Response type identifier")
     error: Optional[str] = Field(None, description="Error message if tool failed")
     file_info: Optional[FileInfo] = Field(None, description="File information if applicable")
 class FileUtils:
     """Utility class for common file operations."""
     @staticmethod
     def file_exists(file_path: str) -> bool:
         """Check if file exists and is accessible."""
         return os.path.exists(file_path) and os.path.isfile(file_path)
     @staticmethod
     def get_file_size(file_path: str) -> int:
         """Get file size in bytes."""
             return os.path.getsize(file_path)
         except OSError:
             return 0
     @staticmethod
     def get_file_info(file_path: str) -> FileInfo:
         """Get comprehensive file information with Pydantic validation."""
                 exists=False,
                 error=f"File not found: {file_path}"
             )
         try:
             return FileInfo(
                 exists=True,
                 exists=False,
                 error=f"Error getting file info: {str(e)}"
             )
     @staticmethod
     def read_text_file(file_path: str, encodings: List[str] = None) -> TextFileResult:
         """
         Read text file with multiple encoding fallback and Pydantic validation.
         Args:
             file_path: Path to the text file
             encodings: List of encodings to try (default: ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'])
         Returns:
             TextFileResult with validated content, encoding used, and metadata
         """
         if encodings is None:
             encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
         file_info = FileUtils.get_file_info(file_path)
         if not file_info.exists:
             return TextFileResult(
                 error=file_info.error,
                 file_info=file_info
             )
         for encoding in encodings:
             try:
                 with open(file_path, 'r', encoding=encoding) as f:
                     content = f.read()
                 return TextFileResult(
                     success=True,
                     content=content,
                     error=f"Error reading file: {str(e)}",
                     file_info=file_info
                 )
         return TextFileResult(
             success=False,
             error="File appears to be binary and cannot be read as text",
             file_info=file_info
         )
     @staticmethod
     def read_binary_file(file_path: str) -> BinaryFileResult:
         """Read binary file and return base64 encoded content with Pydantic validation."""
                 error=file_info.error,
                 file_info=file_info
             )
         try:
             import base64
             with open(file_path, 'rb') as f:
                 content = f.read()
             return BinaryFileResult(
                 success=True,
                 content=base64.b64encode(content).decode('utf-8'),
                 error=f"Error reading binary file: {str(e)}",
                 file_info=file_info
             )
     @staticmethod
     def create_tool_response(tool_name: str, result: str = None, error: str = None,
                            file_info: FileInfo = None) -> str:
             )
         else:
             sanitized_file_info = None
         response = ToolResponse(
             tool_name=tool_name,
             result=result,  # Full result, no truncation
             error=error,
             file_info=sanitized_file_info
         )
         return response.model_dump_json(indent=2)
     @staticmethod
     def format_file_size(size_bytes: int) -> str:
         """Format file size in human-readable format."""
             return f"{size_bytes // 1024} KB"
         else:
             return f"{size_bytes // (1024 * 1024)} MB"
     @staticmethod
     def file_to_base64(file_path: str) -> str:
         """
         Convert file to base64 encoded string.
         Args:
             file_path (str): Path to the file to convert
         Returns:
             str: Base64 encoded file content
         Raises:
             FileNotFoundError: If file doesn't exist
             IOError: If file can't be read
         """
         import base64
         if not FileUtils.file_exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
         try:
             with open(file_path, 'rb') as f:
                 file_content = f.read()
             return base64.b64encode(file_content).decode('utf-8')
         except Exception as e:
             raise IOError(f"Error reading file {file_path}: {str(e)}")
     @staticmethod
     def download_file_to_path(url: str, target_path: str = None) -> str:
         """
         Download file from URL to local path.
         Args:
             url (str): URL to download from
             target_path (str, optional): Local path to save to. If None, creates temp file.
         Returns:
             str: Path to downloaded file
         Raises:
             requests.RequestException: If download fails
             IOError: If file can't be written
         import requests
         import tempfile
         import os
+        import logging
         from urllib.parse import urlparse
+        logger = logging.getLogger(__name__)
         try:
+            # Add polite bot identification headers
+            headers = {
+                'User-Agent': 'CMW-Platform-Agent/1.0 (+https://github.com/arterm-sedov/cmw-platform-agent) Mozilla/5.0'
+            }
             # First make a HEAD request to get Content-Type
+            logger.info(f"Attempting to download from URL: {url}")
+            head_response = requests.head(url, headers=headers, timeout=30, allow_redirects=True)
             head_response.raise_for_status()
+            content_type = head_response.headers.get('content-type', 'unknown')
+            logger.info(f"HEAD request successful, Content-Type: {content_type}")
             if target_path is None:
                 # Create temp file with proper extension
                 parsed_url = urlparse(url)
                 filename = os.path.basename(parsed_url.path) or "downloaded_file"
                 # Extract extension from URL
                 _, url_ext = os.path.splitext(filename)
                 # Get Content-Type header
                 content_type = head_response.headers.get('content-type', '').lower()
                 # MIME type to extension mapping
                 mime_to_ext = {
                     # Documents
                     'application/rtf': '.rtf',
                     'application/zip': '.zip',
                     'application/x-zip-compressed': '.zip',
                     # Text formats
                     'text/plain': '.txt',
                     'text/html': '.html',
                     'text/xml': '.xml',
                     'application/json': '.json',
                     'application/xml': '.xml',
                     # Images
                     'image/jpeg': '.jpg',
                     'image/jpg': '.jpg',
                     'image/svg+xml': '.svg',
                     'image/bmp': '.bmp',
                     'image/tiff': '.tiff',
                     # Audio
                     'audio/mpeg': '.mp3',
                     'audio/wav': '.wav',
                     'audio/ogg': '.ogg',
                     'audio/mp4': '.m4a',
                     # Video
                     'video/mp4': '.mp4',
                     'video/avi': '.avi',
                     'video/quicktime': '.mov',
                     'video/x-msvideo': '.avi',
                 }
                 # Smart extension detection strategy:
                 # 1. If Content-Type is specific and matches known types, use it
                 # 2. If URL has a standard extension, use it
                 # 3. Fallback to Content-Type if URL extension is non-standard
                 ext = None
                 content_type_ext = None
                 url_ext_valid = False
                 # Get extension from Content-Type
                 for mime_type, extension in mime_to_ext.items():
                     if mime_type in content_type:
                         content_type_ext = extension
                         break
                 # Check if URL extension is valid (standard file extension)
                 if url_ext:
                     # Check if URL extension matches any known extension
                     known_extensions = set(mime_to_ext.values())
                     url_ext_valid = url_ext.lower() in known_extensions
                 # Decision logic
                 if content_type_ext and url_ext_valid:
                     # Both are valid - prefer Content-Type for accuracy
                 else:
                     # No extension found
                     ext = ''
                 temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
                 target_path = temp_file.name
                 temp_file.close()
             # Now download the file
+            logger.info(f"Starting download to: {target_path}")
+            response = requests.get(url, headers=headers, stream=True, timeout=60, allow_redirects=True)
             response.raise_for_status()
             with open(target_path, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
+            logger.info(f"Download completed successfully: {target_path}")
             return target_path
         except Exception as e:
             raise IOError(f"Error downloading file from {url}: {str(e)}")
     @staticmethod
     def generate_unique_filename(original_filename: str, session_id: str = "default") -> str:
         """
         Generate a unique filename with timestamp and hash (no session prefix since we use session folders).
         Args:
             original_filename (str): Original filename from user upload
             session_id (str): Session ID for isolation (used for folder organization)
         Returns:
             str: Unique filename with timestamp and hash
         """
         import hashlib
         import time
         from pathlib import Path
         # Get file extension
         path_obj = Path(original_filename)
         name_without_ext = path_obj.stem
         extension = path_obj.suffix
         # Generate timestamp and hash (include session_id for uniqueness across sessions)
         timestamp = str(int(time.time() * 1000))  # milliseconds
         hash_suffix = hashlib.md5(f"{original_filename}{timestamp}{session_id}".encode()).hexdigest()[:8]
         # Create unique filename with session ID for better uniqueness and clarity
         unique_name = f"{session_id}_{name_without_ext}_{timestamp}_{hash_suffix}{extension}"
         return unique_name
     @staticmethod
     def get_gradio_cache_path() -> str:
         """
         Get the current Gradio cache directory path.
         Returns:
             str: Path to Gradio's cache directory
         """
         import os
         import tempfile
         # Check if GRADIO_TEMP_DIR is set
         gradio_temp = os.environ.get('GRADIO_TEMP_DIR')
         if gradio_temp:
             return gradio_temp
         # Default to system temp directory
         return tempfile.gettempdir()
     @staticmethod
     def resolve_file_reference(file_reference: str, agent=None) -> str:
         """
         Resolve file reference (filename or URL) to full file path.
         Args:
             file_reference (str): Original filename from user upload OR URL
             agent: Agent instance with file registry (optional)
         Returns:
             str: Full path to the file, or None if not found
         """
                 # Download URL to temp file
                 return FileUtils.download_file_to_path(file_reference)
             except Exception as e:
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.error(f"Failed to download URL {file_reference}: {e}")
+                logger.error(f"Error type: {type(e).__name__}")
+                # Re-raise the exception to get more details
+                raise
         # It's a filename - resolve using agent's file registry
         if agent and hasattr(agent, 'get_file_path'):
             return agent.get_file_path(file_reference)
         return None
     @staticmethod
     def resolve_file_path(original_filename: str, agent=None) -> str:
         """
         Resolve original filename to full file path using agent's file registry.
         Args:
             original_filename (str): Original filename from user upload
             agent: Agent instance with file registry (optional)
         Returns:
             str: Full path to the file, or None if not found
         """
         if agent and hasattr(agent, 'get_file_path'):
             return agent.get_file_path(original_filename)
         return None
     @staticmethod
     def resolve_code_input(code_reference: str, agent=None) -> tuple[str, str]:
         """
         Resolve code reference to actual code content and detected language.
         Args:
             code_reference (str): Code content, filename, or URL
             agent: Agent instance for file resolution (optional)
         Returns:
             tuple: (code_content, detected_language)
         """
                 return result.content, language
             except Exception as e:
                 raise ValueError(f"Failed to download URL {code_reference}: {str(e)}")
         # Check if it's a file path (try to resolve via agent first, then direct path)
         file_path = None
         if agent and hasattr(agent, 'get_file_path'):
             file_path = agent.get_file_path(code_reference)
         if not file_path and os.path.exists(code_reference):
             file_path = code_reference
         if file_path and os.path.exists(file_path):
             result = FileUtils.read_text_file(file_path)
             if not result.success:
                 raise ValueError(f"Failed to read file: {result.error}")
             language = FileUtils.detect_language_from_extension(file_path)
             return result.content, language
         # It's code content - return as-is with no language detection
         return code_reference, None
     @staticmethod
     def detect_language_from_extension(file_path: str) -> str:
         """Detect programming language from file extension."""
             '.swift': 'swift'
         }
         return extension_map.get(Path(file_path).suffix.lower(), 'python')
     @staticmethod
     def is_text_file(file_path: str) -> bool:
         """Check if file is likely a text file based on extension."""
             '.cfg', '.conf', '.env', '.csv', '.tsv'
         }
         return Path(file_path).suffix.lower() in text_extensions
     @staticmethod
     def is_image_file(file_path: str) -> bool:
         """Check if file is likely an image file based on extension."""
             '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp', '.svg'
         }
         return Path(file_path).suffix.lower() in image_extensions
     @staticmethod
     def is_audio_file(file_path: str) -> bool:
         """Check if file is likely an audio file based on extension."""
             '.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma'
         }
         return Path(file_path).suffix.lower() in audio_extensions
     @staticmethod
     def is_video_file(file_path: str) -> bool:
         """Check if file is likely a video file based on extension."""
             '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mkv'
         }
         return Path(file_path).suffix.lower() in video_extensions
     @staticmethod
     def is_pdf_file(file_path: str) -> bool:
         """Check if file is likely a PDF file based on extension."""

tools/tools.py CHANGED Viewed

@@ -1399,40 +1399,182 @@ def combine_images(images_base64: List[str], operation: str,
 # ========== VIDEO/AUDIO UNDERSTANDING TOOLS ==========
 @tool
-def understand_video(youtube_url: str, prompt: str, system_prompt: str = None) -> str:
-    """
-    Analyze a YouTube video using Google Gemini's video understanding capabilities.
-    This tool can understand video content, extract information, and answer questions
-    about what happens in the video.
-    It uses the Gemini API and requires the GEMINI_KEY environment variable to be set.
     Args:
-        youtube_url (str): The URL of the YouTube video to analyze.
-        prompt (str): A question or request regarding the video content.
-        system_prompt (str, optional): System prompt for formatting guidance.
     Returns:
-        str: Analysis of the video content based on the prompt, or error message.
     """
     try:
         client = _get_gemini_client()
-        # Create enhanced prompt with system prompt if provided
-        if system_prompt:
-            enhanced_prompt = f"{system_prompt}\n\nAnalyze the video at {youtube_url} and answer the following question:\n{prompt}\n\nProvide your answer in the required FINAL ANSWER format."
         else:
-            enhanced_prompt = prompt
-        video_description = client.models.generate_content(
-            model="gemini-2.5-flash",
-            contents=types.Content(
-                parts=[
-                    types.Part(file_data=types.FileData(file_uri=youtube_url)),
-                    types.Part(text=enhanced_prompt)
-                ]
-            )
         )
-        return json.dumps({
-            "type": "tool_response",
-            "tool_name": "understand_video",
-            "result": video_description.text
-        })
     except Exception as e:
         return json.dumps({
             "type": "tool_response",
@@ -1441,18 +1583,20 @@ def understand_video(youtube_url: str, prompt: str, system_prompt: str = None) -
         })
 @tool
-def understand_audio(file_reference: str, prompt: str, system_prompt: str = None, agent=None) -> str:
     """
     Analyze an audio file using Google Gemini's audio understanding capabilities.
     This tool can transcribe audio, understand spoken content, and answer questions
-    about the audio content.
-    It uses the Gemini API and requires the GEMINI_KEY environment variable to be set.
     The audio file is uploaded to Gemini and then analyzed with the provided prompt.
     Args:
         file_reference (str): Original filename from user upload OR URL to download OR base64 encoded audio data.
         prompt (str): A question or request regarding the audio content.
-        system_prompt (str, optional): System prompt for formatting guidance.
         agent: Agent instance for file resolution (injected automatically)
     Returns:
         str: Analysis of the audio content based on the prompt, or error message.
     """
@@ -1480,6 +1624,14 @@ def understand_audio(file_reference: str, prompt: str, system_prompt: str = None
                     "error": f"Error uploading audio file to Gemini: {str(upload_error)}"
                 })
         else:
             # Try base64 fallback
             try:
                 # Decode base64 and create temporary file
@@ -1499,16 +1651,28 @@ def understand_audio(file_reference: str, prompt: str, system_prompt: str = None
                     "tool_name": "understand_audio",
                     "error": f"Error processing audio data: {str(decode_error)}. Expected base64 encoded audio data, valid file path, or URL."
                 })
-        # Create enhanced prompt with system prompt if provided
-        if system_prompt:
-            enhanced_prompt = f"{system_prompt}\n\nAnalyze the audio file and answer the following question:\n{prompt}\n\nProvide your answer in the required FINAL ANSWER format."
-        else:
-            enhanced_prompt = prompt
         contents = [enhanced_prompt, mp3_file]
         try:
             response = client.models.generate_content(
                 model="gemini-2.5-flash",
-                contents=contents
             )
             return json.dumps({
                 "type": "tool_response",

 # ========== VIDEO/AUDIO UNDERSTANDING TOOLS ==========
 @tool
+def understand_video(file_reference: str, prompt: str, system_prompt: str = None, agent=None,
+                     start_time: str = None, end_time: str = None, fps: float = None) -> str:
+    """
+    Analyze a video using Google Gemini's video understanding capabilities.
+    This tool can understand video content, extract information, answer questions,
+    and provide transcriptions with timestamps. Supports video clipping and custom frame rates.
+    Supports four input methods:
+    1. Uploaded video files - File size >20MB
+    2. Direct video URLs - File size >20MB
+    3. YouTube URLs - No size limit
+    4. Inline video data - For small videos <20MB
+    Advanced features:
+    - Video clipping: Specify start_time and end_time in MM:SS format (e.g., "02:30", "03:29")
+    - Custom frame rate: Set fps for different sampling rates (default: 1 FPS)
+    - Timestamp references: Use MM:SS format in prompts for specific video segments
     Args:
+        file_reference (str): Original filename from user upload OR direct video URL
+                             OR YouTube URL OR base64 encoded video data (<20MB)
+        prompt (str): A question or request regarding the video content
+                        When referring to specific moments in a video within your prompt,
+                        use the MM:SS format (e.g., "01:15" for 1 minute and 15 seconds).
+        system_prompt (str, optional): System instruction
+        agent: Agent instance for file resolution (injected automatically)
+        start_time (str, optional): Start time for video clipping in MM:SS format (e.g., "02:30")
+        end_time (str, optional): End time for video clipping in MM:SS format (e.g., "03:29")
+        fps (float, optional): Custom frame rate for video processing (default: 1 FPS).
+                               You might want to set low FPS (< 1) for long videos.
+                               This is especially useful for mostly static videos (e.g. lectures).
+                               If you want to capture more details in rapidly changing visuals,
+                               consider setting a higher FPS value.
     Returns:
+        str: Analysis of the video content based on the prompt, or error message
     """
+    from .file_utils import FileUtils
+    def create_video_metadata():
+        """Create video metadata for clipping and frame rate if specified."""
+        def time_to_seconds(time_str):
+            """Convert MM:SS or raw seconds to API-required seconds format with 's' suffix.
+            Examples:
+                "02:30" -> "150s"
+                "1:15" -> "75s"
+                "1250" -> "1250s"
+                "1250s" -> "1250s"
+            """
+            if not time_str:
+                return None
+            # If already has 's' suffix, return as-is
+            if time_str.endswith('s'):
+                return time_str
+            # Check if it's MM:SS format
+            if ':' in time_str:
+                parts = time_str.split(':')
+                if len(parts) == 2:
+                    minutes, seconds = parts
+                    total_seconds = int(minutes) * 60 + int(seconds)
+                    return f"{total_seconds}s"
+            # Assume it's already in seconds, add 's' suffix
+            return f"{time_str}s"
+        metadata = {}
+        if start_time:
+            metadata['start_offset'] = time_to_seconds(start_time)
+        if end_time:
+            metadata['end_offset'] = time_to_seconds(end_time)
+        if fps is not None:
+            metadata['fps'] = fps
+        return metadata if metadata else None
     try:
         client = _get_gemini_client()
+        if not client:
+            return json.dumps({
+                "type": "tool_response",
+                "tool_name": "understand_video",
+                "error": "Gemini client not available. Check GEMINI_KEY environment variable."
+            })
+        # Create video metadata if any advanced features are specified
+        video_metadata = create_video_metadata()
+        # Determine input type and handle accordingly
+        video_part = None
+        # Check if it's a YouTube URL (special handling)
+        if file_reference.startswith(('https://www.youtube.com/', 'https://youtube.com/',
+                                     'https://youtu.be/', 'http://www.youtube.com/',
+                                     'http://youtube.com/', 'http://youtu.be/')):
+            # YouTube URL - pass directly to Gemini with optional metadata
+            if video_metadata:
+                video_part = types.Part(
+                    file_data=types.FileData(file_uri=file_reference),
+                    video_metadata=types.VideoMetadata(**video_metadata)
+                )
+            else:
+                video_part = types.Part(file_data=types.FileData(file_uri=file_reference))
         else:
+            # Try to resolve as file reference (uploaded file or regular URL)
+            resolved_path = FileUtils.resolve_file_reference(file_reference, agent)
+            if resolved_path:
+                # It's a file (uploaded or downloaded from URL)
+                try:
+                    uploaded_file = client.files.upload(file=resolved_path)
+                    if video_metadata:
+                        video_part = types.Part(
+                            file_data=types.FileData(file_uri=uploaded_file.uri),
+                            video_metadata=types.VideoMetadata(**video_metadata)
+                        )
+                    else:
+                        video_part = types.Part(file_data=types.FileData(file_uri=uploaded_file.uri))
+                except Exception as upload_error:
+                    return json.dumps({
+                        "type": "tool_response",
+                        "tool_name": "understand_video",
+                        "error": f"Error uploading video file to Gemini: {str(upload_error)}"
+                    })
+            else:
+                # Try inline video data for small files (<20MB)
+                try:
+                    # Decode base64 and use inline data (not temporary file)
+                    video_data = base64.b64decode(file_reference)
+                    # Check size limit (20MB = 20 * 1024 * 1024 bytes)
+                    if len(video_data) > 20 * 1024 * 1024:
+                        return json.dumps({
+                            "type": "tool_response",
+                            "tool_name": "understand_video",
+                            "error": "Video data too large for inline processing (>20MB). Please use file upload or URL instead."
+                        })
+                    # Use inline data for small videos with optional metadata
+                    if video_metadata:
+                        video_part = types.Part(
+                            inline_data=types.Blob(
+                                data=video_data,
+                                mime_type='video/mp4'  # Default to mp4, could be detected from file extension
+                            ),
+                            video_metadata=types.VideoMetadata(**video_metadata)
+                        )
+                    else:
+                        video_part = types.Part(
+                            inline_data=types.Blob(
+                                data=video_data,
+                                mime_type='video/mp4'  # Default to mp4, could be detected from file extension
+                            )
+                        )
+                except Exception as decode_error:
+                    return json.dumps({
+                        "type": "tool_response",
+                        "tool_name": "understand_video",
+                        "error": f"Error processing video data: {str(decode_error)}. Expected base64 encoded video data (<20MB), valid file path, YouTube URL, or direct video URL."
+                    })
+        # Don't embed system_prompt in user prompt - use API parameter instead
+        enhanced_prompt = prompt
+        # Generate content using the video
+        contents = types.Content(
+            parts=[
+                video_part,
+                types.Part(text=enhanced_prompt)
+            ]
         )
+        # Create config with system_instruction if provided
+        config = None
+        if system_prompt:
+            config = types.GenerateContentConfig(
+                system_instruction=system_prompt
+            )
+        try:
+            response = client.models.generate_content(
+                model="gemini-2.5-flash",
+                contents=contents,
+                config=config
+            )
+            return json.dumps({
+                "type": "tool_response",
+                "tool_name": "understand_video",
+                "result": response.text
+            })
+        except Exception as e:
+            return json.dumps({
+                "type": "tool_response",
+                "tool_name": "understand_video",
+                "error": f"Error in video understanding request: {str(e)}"
+            })
     except Exception as e:
         return json.dumps({
             "type": "tool_response",
         })
 @tool
+def understand_audio(file_reference: str, prompt: str, system_prompt: str = None, agent=None,
+                     start_time: str = None, end_time: str = None) -> str:
     """
     Analyze an audio file using Google Gemini's audio understanding capabilities.
     This tool can transcribe audio, understand spoken content, and answer questions
+    about the audio content. Supports timestamp references in prompts (MM:SS format).
     The audio file is uploaded to Gemini and then analyzed with the provided prompt.
     Args:
         file_reference (str): Original filename from user upload OR URL to download OR base64 encoded audio data.
         prompt (str): A question or request regarding the audio content.
+        system_prompt (str, optional): System instruction.
         agent: Agent instance for file resolution (injected automatically)
+        start_time (str, optional): Start time reference in MM:SS format (e.g., "02:30")
+        end_time (str, optional): End time reference in MM:SS format (e.g., "03:29")
     Returns:
         str: Analysis of the audio content based on the prompt, or error message.
     """
                     "error": f"Error uploading audio file to Gemini: {str(upload_error)}"
                 })
         else:
+            # Check if it looks like a URL that failed to download
+            if file_reference.startswith(('http://', 'https://', 'ftp://')):
+                return json.dumps({
+                    "type": "tool_response",
+                    "tool_name": "understand_audio",
+                    "error": f"Failed to download audio from URL: {file_reference}. Please check the URL is accessible and try again."
+                })
             # Try base64 fallback
             try:
                 # Decode base64 and create temporary file
                     "tool_name": "understand_audio",
                     "error": f"Error processing audio data: {str(decode_error)}. Expected base64 encoded audio data, valid file path, or URL."
                 })
+        # Create enhanced prompt with timestamp references if provided
+        timestamp_instruction = ""
+        if start_time and end_time:
+            timestamp_instruction = f" Focus on the audio segment from {start_time} to {end_time}."
+        elif start_time:
+            timestamp_instruction = f" Focus on the audio segment starting from {start_time}."
+        elif end_time:
+            timestamp_instruction = f" Focus on the audio segment up to {end_time}."
+        # Build prompt with timestamp instructions only
+        enhanced_prompt = f"{prompt}\n\n{timestamp_instruction}"
         contents = [enhanced_prompt, mp3_file]
+        # Create config with system_instruction if provided
+        config = None
+        if system_prompt:
+            config = types.GenerateContentConfig(
+                system_instruction=system_prompt
+            )
         try:
             response = client.models.generate_content(
                 model="gemini-2.5-flash",
+                contents=contents,
+                config=config
             )
             return json.dumps({
                 "type": "tool_response",