|
|
from mcp.server.fastmcp import FastMCP |
|
|
import os |
|
|
from typing import Optional, List, Any, Dict |
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
mcp = FastMCP("Hugging Face tools") |
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
if not HF_TOKEN: |
|
|
print("Warning: HF_TOKEN environment variable not set. Some authenticated requests may fail.") |
|
|
|
|
|
client = InferenceClient(token=HF_TOKEN) |
|
|
|
|
|
@mcp.tool() |
|
|
def list_available_tasks() -> str: |
|
|
"""Lists all the AI tasks supported by this server.""" |
|
|
tasks = [ |
|
|
"Audio-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Image", |
|
|
"Image-Text-to-Video", "Visual Question Answering", "Document Question Answering", |
|
|
"Video-Text-to-Text", "Visual Document Retrieval", "Depth Estimation", |
|
|
"Image Classification", "Object Detection", "Image Segmentation", |
|
|
"Text-to-Image", "Image-to-Text", "Image-to-Image", "Image-to-Video", |
|
|
"Unconditional Image Generation", "Video Classification", "Text-to-Video", |
|
|
"Zero-Shot Image Classification", "Mask Generation", "Zero-Shot Object Detection", |
|
|
"Text-to-3D", "Image-to-3D", "Image Feature Extraction", "Keypoint Detection", |
|
|
"Video-to-Video", "Text Classification", "Token Classification", |
|
|
"Table Question Answering", "Question Answering", "Zero-Shot Classification", |
|
|
"Translation", "Summarization", "Feature Extraction", "Text Generation", |
|
|
"Fill-Mask", "Sentence Similarity", "Text Ranking", "Text-to-Speech", |
|
|
"Text-to-Audio", "Automatic Speech Recognition", "Audio-to-Audio", |
|
|
"Audio Classification", "Voice Activity Detection", "Tabular Classification", |
|
|
"Tabular Regression", "Time Series Forecasting", "Reinforcement Learning", |
|
|
"Robotics", "Graph Machine Learning" |
|
|
] |
|
|
return f"Supported Tasks: {', '.join(tasks)}" |
|
|
|
|
|
@mcp.tool() |
|
|
def visual_question_answering(image: str, question: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Answer questions about an image. |
|
|
Args: |
|
|
image: URL or Base64 string of the image. |
|
|
question: The question to answer. |
|
|
model: Optional model ID (e.g., 'dandelin/vilt-b32-finetuned-vqa'). |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result = client.visual_question_answering(image, question, model=model) |
|
|
|
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def text_to_image(prompt: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Generate an image from text. |
|
|
Returns: Base64 encoded image string. |
|
|
""" |
|
|
try: |
|
|
img = client.text_to_image(prompt, model=model) |
|
|
|
|
|
import utils |
|
|
if not isinstance(img, utils.Image.Image): |
|
|
|
|
|
import io |
|
|
img = utils.Image.open(io.BytesIO(img)) |
|
|
return utils.encode_image(img) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def image_classification(image: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Classify an image. |
|
|
Args: |
|
|
image: URL or Base64 string. |
|
|
""" |
|
|
try: |
|
|
result = client.image_classification(image, model=model) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def object_detection(image: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Detect objects in an image. |
|
|
Args: |
|
|
image: URL or Base64 string. |
|
|
""" |
|
|
try: |
|
|
result = client.object_detection(image, model=model) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def image_to_text(image: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Generate a caption or text description for an image. |
|
|
Args: |
|
|
image: URL or Base64 string. |
|
|
""" |
|
|
try: |
|
|
result = client.image_to_text(image, model=model) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def text_generation(prompt: str, model: Optional[str] = None, max_new_tokens: int = 500) -> str: |
|
|
""" |
|
|
Generate text based on a prompt. |
|
|
Args: |
|
|
prompt: Input text. |
|
|
model: Model ID. |
|
|
max_new_tokens: Maximum tokens to generate. |
|
|
""" |
|
|
try: |
|
|
return client.text_generation(prompt, model=model, max_new_tokens=max_new_tokens) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def summarization(text: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Summarize a text. |
|
|
""" |
|
|
try: |
|
|
result = client.summarization(text, model=model) |
|
|
|
|
|
if isinstance(result, list) and len(result) > 0: |
|
|
return result[0].get('summary_text', str(result)) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def translation(text: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Translate text. Model usually determines source/target languages. |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
result = client.translation(text, model=model) |
|
|
if isinstance(result, list) and len(result) > 0: |
|
|
return result[0].get('translation_text', str(result)) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def text_classification(text: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Classify text (e.g. sentiment analysis). |
|
|
""" |
|
|
try: |
|
|
result = client.text_classification(text, model=model) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def automatic_speech_recognition(audio: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Transcribe audio. |
|
|
Args: |
|
|
audio: URL or Base64 string of the audio file. |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import base64 |
|
|
if not (audio.startswith("http://") or audio.startswith("https://")): |
|
|
audio_data = base64.b64decode(audio) |
|
|
result = client.automatic_speech_recognition(audio_data, model=model) |
|
|
else: |
|
|
result = client.automatic_speech_recognition(audio, model=model) |
|
|
|
|
|
if isinstance(result, dict): |
|
|
return result.get('text', str(result)) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def text_to_speech(text: str, model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Generate audio from text. |
|
|
Returns: Base64 encoded audio. |
|
|
""" |
|
|
try: |
|
|
audio_bytes = client.text_to_speech(text, model=model) |
|
|
import base64 |
|
|
return base64.b64encode(audio_bytes).decode('utf-8') |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
@mcp.tool() |
|
|
def generic_hf_inference(task: str, inputs: Dict[str, Any], model: Optional[str] = None) -> str: |
|
|
""" |
|
|
Run any Hugging Face inference task that doesn't have a specific tool. |
|
|
Args: |
|
|
task: The task name (e.g., 'text-generation', 'translation'). |
|
|
inputs: Dictionary of inputs required for the task. |
|
|
model: Model ID to use. |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
import json |
|
|
result = client.post(json=inputs, model=model, task=task) |
|
|
return str(result) |
|
|
except Exception as e: |
|
|
return f"Error: {e}" |
|
|
|
|
|
|