Spaces:
Runtime error
Runtime error
Yago Bolivar
refactor: update tool classes to inherit from Tool base class for consistency and improved structure
bffd09a
| from __future__ import annotations | |
| import os | |
| import mimetypes | |
| from typing import Self, Dict, Any | |
| from smolagents.tools import Tool | |
| class FileIdentifier(Tool): | |
| """ | |
| Identifies file types and maps them to the appropriate processing tool based on file extension. | |
| Useful for routing files to specialized tools such as speech-to-text, spreadsheet parser, image processor, etc. | |
| """ | |
| name = "file_identifier" | |
| description = "Identifies the file type and suggests a processing action based on its path." | |
| inputs = {'filepath': {'type': 'string', 'description': 'The path to the file to be identified.'}} | |
| outputs = {'file_info': {'type': 'object', 'description': 'A dictionary with file type information or an error.'}} | |
| output_type = "object" | |
| def __init__(self, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| mimetypes.init() | |
| # Mapping from simple type to action and common extensions | |
| self.file_type_map = { | |
| "audio": {"action": "speech-to-text", "extensions": [".mp3", ".wav", ".flac", ".aac", ".ogg"]}, | |
| "spreadsheet": {"action": "spreadsheet_parser", "extensions": [".xlsx", ".xls", ".ods"]}, | |
| "image": {"action": "image_processor", "extensions": [".png", ".jpg", ".jpeg", ".gif", ".bmp"]}, | |
| "python_code": {"action": "safe_code_interpreter", "extensions": [".py"]}, | |
| "pdf": {"action": "pdf_text_extractor", "extensions": [".pdf"]}, | |
| "text": {"action": "text_file_reader", "extensions": [".txt", ".md", ".rtf"]}, | |
| "csv": {"action": "csv_parser", "extensions": [".csv"]}, | |
| } | |
| # For quick lookup from extension to simple type | |
| self.extension_to_type = {} | |
| for simple_type, details in self.file_type_map.items(): | |
| for ext in details["extensions"]: | |
| self.extension_to_type[ext] = simple_type | |
| self.is_initialized = True | |
| def forward(self: Self, filepath: str) -> Dict[str, Any]: | |
| """ | |
| Identifies the file type and suggests a processing action. | |
| Args: | |
| filepath (str): The path to the file to be identified. | |
| Returns: | |
| Dict[str, Any]: A dictionary with 'filepath', 'determined_type', 'mime_type', | |
| 'suggested_action', or an 'error'. | |
| """ | |
| if not os.path.exists(filepath): | |
| return { | |
| "filepath": filepath, | |
| "error": "File not found" | |
| } | |
| mime_type, encoding = mimetypes.guess_type(filepath) | |
| file_extension = os.path.splitext(filepath)[1].lower() | |
| determined_type = "unknown" | |
| suggested_action = "unknown_handler" | |
| # Prioritize extension-based mapping for specific known types | |
| if file_extension in self.extension_to_type: | |
| determined_type = self.extension_to_type[file_extension] | |
| suggested_action = self.file_type_map[determined_type]["action"] | |
| elif mime_type: | |
| # Fallback to MIME type if extension is not specifically mapped | |
| if mime_type.startswith("audio/"): | |
| determined_type = "audio" | |
| suggested_action = self.file_type_map["audio"]["action"] | |
| elif mime_type.startswith("image/"): | |
| determined_type = "image" | |
| suggested_action = self.file_type_map["image"]["action"] | |
| elif mime_type == "application/pdf": | |
| determined_type = "pdf" | |
| suggested_action = self.file_type_map["pdf"]["action"] | |
| elif mime_type == "text/csv": | |
| determined_type = "csv" | |
| suggested_action = self.file_type_map["csv"]["action"] | |
| elif mime_type.startswith("text/"): # General text | |
| if file_extension == ".py": | |
| determined_type = "python_code" | |
| suggested_action = self.file_type_map["python_code"]["action"] | |
| else: | |
| determined_type = "text" | |
| suggested_action = self.file_type_map["text"]["action"] | |
| elif file_extension == ".xlsx" or mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
| determined_type = "spreadsheet" | |
| suggested_action = self.file_type_map["spreadsheet"]["action"] | |
| # If still unknown, but has a common extension not yet caught | |
| if determined_type == "unknown" and file_extension: | |
| if file_extension in self.extension_to_type: | |
| determined_type = self.extension_to_type[file_extension] | |
| suggested_action = self.file_type_map[determined_type]["action"] | |
| return { | |
| "filepath": filepath, | |
| "determined_type": determined_type, | |
| "file_extension": file_extension, | |
| "mime_type": mime_type, | |
| "suggested_action": suggested_action | |
| } | |
| if __name__ == '__main__': | |
| tool_instance = FileIdentifier() | |
| # Example: Create a dummy file for testing | |
| dummy_files = ["test.mp3", "document.xlsx", "image.png", "script.py", "unknown.xyz", "archive.zip"] | |
| for fname in dummy_files: | |
| with open(fname, "w") as f: | |
| f.write("dummy content") # Create empty file for testing | |
| result = tool_instance.forward(fname) | |
| print(f"File: {fname}, Info: {result}") | |
| os.remove(fname) # Clean up dummy file | |
| # Test with a non-existent file | |
| non_existent_file = "no_such_file.txt" | |
| result_non_existent = tool_instance.forward(non_existent_file) | |
| print(f"File: {non_existent_file}, Info: {result_non_existent}") |