from mcp.server.fastmcp import FastMCP
import os
from typing import Optional, List, Any, Dict
from huggingface_hub import InferenceClient

# Initialize the MCP server
mcp = FastMCP("Hugging Face tools")

# Get token from environment
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
    print("Warning: HF_TOKEN environment variable not set. Some authenticated requests may fail.")

client = InferenceClient(token=HF_TOKEN)

@mcp.tool()
def list_available_tasks() -> str:
    """Lists all the AI tasks supported by this server."""
    tasks = [
        "Audio-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Image",
        "Image-Text-to-Video", "Visual Question Answering", "Document Question Answering",
        "Video-Text-to-Text", "Visual Document Retrieval", "Depth Estimation",
        "Image Classification", "Object Detection", "Image Segmentation",
        "Text-to-Image", "Image-to-Text", "Image-to-Image", "Image-to-Video",
        "Unconditional Image Generation", "Video Classification", "Text-to-Video",
        "Zero-Shot Image Classification", "Mask Generation", "Zero-Shot Object Detection",
        "Text-to-3D", "Image-to-3D", "Image Feature Extraction", "Keypoint Detection",
        "Video-to-Video", "Text Classification", "Token Classification",
        "Table Question Answering", "Question Answering", "Zero-Shot Classification",
        "Translation", "Summarization", "Feature Extraction", "Text Generation",
        "Fill-Mask", "Sentence Similarity", "Text Ranking", "Text-to-Speech",
        "Text-to-Audio", "Automatic Speech Recognition", "Audio-to-Audio",
        "Audio Classification", "Voice Activity Detection", "Tabular Classification",
        "Tabular Regression", "Time Series Forecasting", "Reinforcement Learning",
        "Robotics", "Graph Machine Learning"
    ]
    return f"Supported Tasks: {', '.join(tasks)}"

@mcp.tool()
def visual_question_answering(image: str, question: str, model: Optional[str] = None) -> str:
    """
    Answer questions about an image.
    Args:
        image: URL or Base64 string of the image.
        question: The question to answer.
        model: Optional model ID (e.g., 'dandelin/vilt-b32-finetuned-vqa').
    """
    try:
        # Note: client.visual_question_answering takes URL/path or bytes/PIL, but for robustness we might pass URL directly if supported
        # or decode. utils.decode_image returns a PIL Image.
        # InferenceClient.visual_question_answering supports: image: Union[str, Path, bytes, BinaryIO]
        # If it's a URL, we can pass it directly. If it's B64, we need to decode.
        # For simplicity, let's decode everything to confirm it's valid, relying on utils.
        # Wait, utils needs 'requests' which is not in pyproject.toml yet. I need to add it or use urllib.
        # Actually client handles URLs.
        result = client.visual_question_answering(image, question, model=model)
        # Result is typically a list of dicts or a single object depending on api
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def text_to_image(prompt: str, model: Optional[str] = None) -> str:
    """
    Generate an image from text.
    Returns: Base64 encoded image string.
    """
    try:
        img = client.text_to_image(prompt, model=model)
        # Check if img is a PIL Image, sometimes it's bytes
        import utils
        if not isinstance(img, utils.Image.Image):
             # It might be bytes
             import io
             img = utils.Image.open(io.BytesIO(img))
        return utils.encode_image(img)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def image_classification(image: str, model: Optional[str] = None) -> str:
    """
    Classify an image.
    Args:
        image: URL or Base64 string.
    """
    try:
        result = client.image_classification(image, model=model)
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def object_detection(image: str, model: Optional[str] = None) -> str:
    """
    Detect objects in an image.
    Args:
        image: URL or Base64 string.
    """
    try:
        result = client.object_detection(image, model=model)
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def image_to_text(image: str, model: Optional[str] = None) -> str:
    """
    Generate a caption or text description for an image.
    Args:
        image: URL or Base64 string.
    """
    try:
        result = client.image_to_text(image, model=model)
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def text_generation(prompt: str, model: Optional[str] = None, max_new_tokens: int = 500) -> str:
    """
    Generate text based on a prompt.
    Args:
        prompt: Input text.
        model: Model ID.
        max_new_tokens: Maximum tokens to generate.
    """
    try:
        return client.text_generation(prompt, model=model, max_new_tokens=max_new_tokens)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def summarization(text: str, model: Optional[str] = None) -> str:
    """
    Summarize a text.
    """
    try:
        result = client.summarization(text, model=model)
        # Result is typically a list containing {'summary_text': ...}
        if isinstance(result, list) and len(result) > 0:
            return result[0].get('summary_text', str(result))
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def translation(text: str, model: Optional[str] = None) -> str:
    """
    Translate text. Model usually determines source/target languages.
    """
    try:
        # Note: InferenceClient translation often expects src_lang/tgt_lang depending on model,
        # but the simple API just takes text.
        result = client.translation(text, model=model)
        if isinstance(result, list) and len(result) > 0:
             return result[0].get('translation_text', str(result))
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def text_classification(text: str, model: Optional[str] = None) -> str:
    """
    Classify text (e.g. sentiment analysis).
    """
    try:
        result = client.text_classification(text, model=model)
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def automatic_speech_recognition(audio: str, model: Optional[str] = None) -> str:
    """
    Transcribe audio.
    Args:
        audio: URL or Base64 string of the audio file.
    """
    try:
        # client.automatic_speech_recognition handles URLs/bytes
        # If URL, pass directly. If not, maybe need to decode bytes?
        # ASR usually takes bytes or filename.
        # If base64 provided, we should decode.
        import base64
        if not (audio.startswith("http://") or audio.startswith("https://")):
             audio_data = base64.b64decode(audio)
             result = client.automatic_speech_recognition(audio_data, model=model)
        else:
             result = client.automatic_speech_recognition(audio, model=model)
        
        if isinstance(result, dict):
            return result.get('text', str(result))
        return str(result)
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def text_to_speech(text: str, model: Optional[str] = None) -> str:
    """
    Generate audio from text.
    Returns: Base64 encoded audio.
    """
    try:
        audio_bytes = client.text_to_speech(text, model=model)
        import base64
        return base64.b64encode(audio_bytes).decode('utf-8')
    except Exception as e:
        return f"Error: {e}"

@mcp.tool()
def generic_hf_inference(task: str, inputs: Dict[str, Any], model: Optional[str] = None) -> str:
    """
    Run any Hugging Face inference task that doesn't have a specific tool.
    Args:
        task: The task name (e.g., 'text-generation', 'translation').
        inputs: Dictionary of inputs required for the task.
        model: Model ID to use.
    """
    try:
        # We can use client.post for raw access
        # but parameters depend heavily on the task.
        # This is a fallback.
        import json
        result = client.post(json=inputs, model=model, task=task)
        return str(result)
    except Exception as e:
        return f"Error: {e}"