Dhanushsaireddy144's picture
Mcp Configured for HF Spaces deployment
9682111
from mcp.server.fastmcp import FastMCP
import os
from typing import Optional, List, Any, Dict
from huggingface_hub import InferenceClient
# Initialize the MCP server
mcp = FastMCP("Hugging Face tools")
# Get token from environment
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
print("Warning: HF_TOKEN environment variable not set. Some authenticated requests may fail.")
client = InferenceClient(token=HF_TOKEN)
@mcp.tool()
def list_available_tasks() -> str:
"""Lists all the AI tasks supported by this server."""
tasks = [
"Audio-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Image",
"Image-Text-to-Video", "Visual Question Answering", "Document Question Answering",
"Video-Text-to-Text", "Visual Document Retrieval", "Depth Estimation",
"Image Classification", "Object Detection", "Image Segmentation",
"Text-to-Image", "Image-to-Text", "Image-to-Image", "Image-to-Video",
"Unconditional Image Generation", "Video Classification", "Text-to-Video",
"Zero-Shot Image Classification", "Mask Generation", "Zero-Shot Object Detection",
"Text-to-3D", "Image-to-3D", "Image Feature Extraction", "Keypoint Detection",
"Video-to-Video", "Text Classification", "Token Classification",
"Table Question Answering", "Question Answering", "Zero-Shot Classification",
"Translation", "Summarization", "Feature Extraction", "Text Generation",
"Fill-Mask", "Sentence Similarity", "Text Ranking", "Text-to-Speech",
"Text-to-Audio", "Automatic Speech Recognition", "Audio-to-Audio",
"Audio Classification", "Voice Activity Detection", "Tabular Classification",
"Tabular Regression", "Time Series Forecasting", "Reinforcement Learning",
"Robotics", "Graph Machine Learning"
]
return f"Supported Tasks: {', '.join(tasks)}"
@mcp.tool()
def visual_question_answering(image: str, question: str, model: Optional[str] = None) -> str:
"""
Answer questions about an image.
Args:
image: URL or Base64 string of the image.
question: The question to answer.
model: Optional model ID (e.g., 'dandelin/vilt-b32-finetuned-vqa').
"""
try:
# Note: client.visual_question_answering takes URL/path or bytes/PIL, but for robustness we might pass URL directly if supported
# or decode. utils.decode_image returns a PIL Image.
# InferenceClient.visual_question_answering supports: image: Union[str, Path, bytes, BinaryIO]
# If it's a URL, we can pass it directly. If it's B64, we need to decode.
# For simplicity, let's decode everything to confirm it's valid, relying on utils.
# Wait, utils needs 'requests' which is not in pyproject.toml yet. I need to add it or use urllib.
# Actually client handles URLs.
result = client.visual_question_answering(image, question, model=model)
# Result is typically a list of dicts or a single object depending on api
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def text_to_image(prompt: str, model: Optional[str] = None) -> str:
"""
Generate an image from text.
Returns: Base64 encoded image string.
"""
try:
img = client.text_to_image(prompt, model=model)
# Check if img is a PIL Image, sometimes it's bytes
import utils
if not isinstance(img, utils.Image.Image):
# It might be bytes
import io
img = utils.Image.open(io.BytesIO(img))
return utils.encode_image(img)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def image_classification(image: str, model: Optional[str] = None) -> str:
"""
Classify an image.
Args:
image: URL or Base64 string.
"""
try:
result = client.image_classification(image, model=model)
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def object_detection(image: str, model: Optional[str] = None) -> str:
"""
Detect objects in an image.
Args:
image: URL or Base64 string.
"""
try:
result = client.object_detection(image, model=model)
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def image_to_text(image: str, model: Optional[str] = None) -> str:
"""
Generate a caption or text description for an image.
Args:
image: URL or Base64 string.
"""
try:
result = client.image_to_text(image, model=model)
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def text_generation(prompt: str, model: Optional[str] = None, max_new_tokens: int = 500) -> str:
"""
Generate text based on a prompt.
Args:
prompt: Input text.
model: Model ID.
max_new_tokens: Maximum tokens to generate.
"""
try:
return client.text_generation(prompt, model=model, max_new_tokens=max_new_tokens)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def summarization(text: str, model: Optional[str] = None) -> str:
"""
Summarize a text.
"""
try:
result = client.summarization(text, model=model)
# Result is typically a list containing {'summary_text': ...}
if isinstance(result, list) and len(result) > 0:
return result[0].get('summary_text', str(result))
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def translation(text: str, model: Optional[str] = None) -> str:
"""
Translate text. Model usually determines source/target languages.
"""
try:
# Note: InferenceClient translation often expects src_lang/tgt_lang depending on model,
# but the simple API just takes text.
result = client.translation(text, model=model)
if isinstance(result, list) and len(result) > 0:
return result[0].get('translation_text', str(result))
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def text_classification(text: str, model: Optional[str] = None) -> str:
"""
Classify text (e.g. sentiment analysis).
"""
try:
result = client.text_classification(text, model=model)
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def automatic_speech_recognition(audio: str, model: Optional[str] = None) -> str:
"""
Transcribe audio.
Args:
audio: URL or Base64 string of the audio file.
"""
try:
# client.automatic_speech_recognition handles URLs/bytes
# If URL, pass directly. If not, maybe need to decode bytes?
# ASR usually takes bytes or filename.
# If base64 provided, we should decode.
import base64
if not (audio.startswith("http://") or audio.startswith("https://")):
audio_data = base64.b64decode(audio)
result = client.automatic_speech_recognition(audio_data, model=model)
else:
result = client.automatic_speech_recognition(audio, model=model)
if isinstance(result, dict):
return result.get('text', str(result))
return str(result)
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def text_to_speech(text: str, model: Optional[str] = None) -> str:
"""
Generate audio from text.
Returns: Base64 encoded audio.
"""
try:
audio_bytes = client.text_to_speech(text, model=model)
import base64
return base64.b64encode(audio_bytes).decode('utf-8')
except Exception as e:
return f"Error: {e}"
@mcp.tool()
def generic_hf_inference(task: str, inputs: Dict[str, Any], model: Optional[str] = None) -> str:
"""
Run any Hugging Face inference task that doesn't have a specific tool.
Args:
task: The task name (e.g., 'text-generation', 'translation').
inputs: Dictionary of inputs required for the task.
model: Model ID to use.
"""
try:
# We can use client.post for raw access
# but parameters depend heavily on the task.
# This is a fallback.
import json
result = client.post(json=inputs, model=model, task=task)
return str(result)
except Exception as e:
return f"Error: {e}"