from smolagents import Tool
from typing import Dict, Optional, Any
import logging

logger = logging.getLogger(__name__)


class TextCleanerTool(Tool):
    name = "clean_text"
    description = (
        "Cleans and normalizes text by removing or replacing unwanted elements"
    )
    inputs = {
        "text": {"type": "string", "description": "The input text to clean"},
        "fix_unicode": {
            "type": "boolean",
            "description": "Fix broken unicode characters and mojibake",
            "default": True,
        },
        "to_ascii": {
            "type": "boolean",
            "description": "Convert non-ASCII characters to their closest ASCII equivalents",
            "default": True,
        },
        "lower": {
            "type": "boolean",
            "description": "Convert text to lowercase",
            "default": True,
        },
        "no_line_breaks": {
            "type": "boolean",
            "description": "Replace line breaks with spaces",
            "default": False,
        },
        "no_urls": {
            "type": "boolean",
            "description": "Replace URLs with a token",
            "default": False,
        },
        "no_emails": {
            "type": "boolean",
            "description": "Replace email addresses with a token",
            "default": False,
        },
        "no_phone_numbers": {
            "type": "boolean",
            "description": "Replace phone numbers with a token",
            "default": False,
        },
        "no_numbers": {
            "type": "boolean",
            "description": "Replace all numbers with a token",
            "default": False,
        },
        "no_digits": {
            "type": "boolean",
            "description": "Replace all digits with 0",
            "default": False,
        },
        "no_currency_symbols": {
            "type": "boolean",
            "description": "Replace currency symbols with a token",
            "default": False,
        },
        "no_punct": {
            "type": "boolean",
            "description": "Remove all punctuation",
            "default": False,
        },
        "no_emoji": {
            "type": "boolean",
            "description": "Remove all emoji characters",
            "default": False,
        },
        "lang": {
            "type": "string",
            "description": "Language code for special handling ('en' or 'de' supported)",
            "default": "en",
        },
        "custom_replacements": {
            "type": "object",
            "description": "Dictionary of custom string replacements to apply",
            "optional": True,
        },
    }
    output_type = "string"

    def forward(
        self,
        text: str,
        fix_unicode: bool = True,
        to_ascii: bool = True,
        lower: bool = True,
        no_line_breaks: bool = False,
        no_urls: bool = False,
        no_emails: bool = False,
        no_phone_numbers: bool = False,
        no_numbers: bool = False,
        no_digits: bool = False,
        no_currency_symbols: bool = False,
        no_punct: bool = False,
        no_emoji: bool = False,
        lang: str = "en",
        custom_replacements: Optional[Dict[str, str]] = None,
    ) -> str:
        """Clean and normalize text by removing or replacing unwanted elements."""
        # Input validation
        if not text:
            return ""

        if not isinstance(text, str):
            try:
                text = str(text)
            except Exception as e:
                logger.error(f"Failed to convert input to string: {e}")
                return f"Error: Could not process input of type {type(text)}"

        # Import cleantext safely
        try:
            from cleantext import clean
        except ImportError:
            logger.error(
                "cleantext package not installed. Install with: pip install clean-text"
            )
            return "Error: Required dependency 'clean-text' is not installed."

        # Special parameter handling
        replace_params = {
            "replace_with_url": "<URL>",
            "replace_with_email": "<EMAIL>",
            "replace_with_phone_number": "<PHONE>",
            "replace_with_number": "<NUMBER>",
            "replace_with_digit": "0",
            "replace_with_currency_symbol": "<CUR>",
            "replace_with_punct": "",
        }

        try:
            # Apply cleantext with parameters
            cleaned_text = clean(
                text,
                fix_unicode=fix_unicode,
                to_ascii=to_ascii,
                lower=lower,
                no_line_breaks=no_line_breaks,
                no_urls=no_urls,
                no_emails=no_emails,
                no_phone_numbers=no_phone_numbers,
                no_numbers=no_numbers,
                no_digits=no_digits,
                no_currency_symbols=no_currency_symbols,
                no_punct=no_punct,
                no_emoji=no_emoji,
                lang=lang,
                **replace_params,
            )

            # Apply any custom replacements
            if custom_replacements:
                for old, new in custom_replacements.items():
                    cleaned_text = cleaned_text.replace(old, new)

            return cleaned_text

        except Exception as e:
            logger.error(f"Error cleaning text: {e}")
            return f"Error during text cleaning: {str(e)}"