| | """
|
| | Hugging Face Agent Integration for OpenManus
|
| | Extends the main AI agent with access to thousands of HuggingFace models
|
| | """
|
| |
|
| | import os
|
| | from typing import Any, Dict, List, Optional
|
| |
|
| | from app.agent.base import BaseAgent
|
| | from app.huggingface_models import ModelCategory
|
| | from app.logger import logger
|
| | from app.tool.huggingface_models_tool import HuggingFaceModelsTool
|
| |
|
| |
|
| | class HuggingFaceAgent(BaseAgent):
|
| | """AI Agent with integrated HuggingFace model access"""
|
| |
|
| | def __init__(self, **config):
|
| | super().__init__(**config)
|
| |
|
| |
|
| | hf_token = os.getenv("HUGGINGFACE_TOKEN") or config.get("huggingface_token")
|
| | if not hf_token:
|
| | logger.warning(
|
| | "No Hugging Face token provided. HF models will not be available."
|
| | )
|
| | self.hf_tool = None
|
| | else:
|
| | self.hf_tool = HuggingFaceModelsTool(hf_token)
|
| |
|
| |
|
| | self.default_models = {
|
| | "text_generation": "MiniMax-M2",
|
| | "image_generation": "FLUX.1 Dev",
|
| | "speech_recognition": "Whisper Large v3",
|
| | "text_to_speech": "Kokoro 82M",
|
| | "image_classification": "ViT Base Patch16",
|
| | "embeddings": "Sentence Transformers All MiniLM",
|
| | "translation": "M2M100 1.2B",
|
| | "summarization": "PEGASUS XSum",
|
| | }
|
| |
|
| | async def generate_text_with_hf(
|
| | self,
|
| | prompt: str,
|
| | model_name: Optional[str] = None,
|
| | max_tokens: int = 200,
|
| | temperature: float = 0.7,
|
| | stream: bool = False,
|
| | ) -> Dict[str, Any]:
|
| | """Generate text using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["text_generation"]
|
| |
|
| | return await self.hf_tool.text_generation(
|
| | model_name=model_name,
|
| | prompt=prompt,
|
| | max_tokens=max_tokens,
|
| | temperature=temperature,
|
| | stream=stream,
|
| | )
|
| |
|
| | async def generate_image_with_hf(
|
| | self,
|
| | prompt: str,
|
| | model_name: Optional[str] = None,
|
| | negative_prompt: Optional[str] = None,
|
| | width: int = 1024,
|
| | height: int = 1024,
|
| | ) -> Dict[str, Any]:
|
| | """Generate images using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["image_generation"]
|
| |
|
| | return await self.hf_tool.generate_image(
|
| | model_name=model_name,
|
| | prompt=prompt,
|
| | negative_prompt=negative_prompt,
|
| | width=width,
|
| | height=height,
|
| | )
|
| |
|
| | async def transcribe_audio_with_hf(
|
| | self,
|
| | audio_data: bytes,
|
| | model_name: Optional[str] = None,
|
| | language: Optional[str] = None,
|
| | ) -> Dict[str, Any]:
|
| | """Transcribe audio using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["speech_recognition"]
|
| |
|
| | return await self.hf_tool.transcribe_audio(
|
| | model_name=model_name, audio_data=audio_data, language=language
|
| | )
|
| |
|
| | async def synthesize_speech_with_hf(
|
| | self,
|
| | text: str,
|
| | model_name: Optional[str] = None,
|
| | voice_id: Optional[str] = None,
|
| | ) -> Dict[str, Any]:
|
| | """Generate speech from text using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["text_to_speech"]
|
| |
|
| | return await self.hf_tool.text_to_speech(
|
| | model_name=model_name, text=text, voice_id=voice_id
|
| | )
|
| |
|
| | async def classify_image_with_hf(
|
| | self, image_data: bytes, model_name: Optional[str] = None, task: str = "general"
|
| | ) -> Dict[str, Any]:
|
| | """Classify images using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| |
|
| | if task == "nsfw":
|
| | model_name = "NSFW Image Detection"
|
| | elif task == "emotions":
|
| | model_name = "Facial Emotions Detection"
|
| | elif task == "deepfake":
|
| | model_name = "Deepfake Detection"
|
| | else:
|
| | model_name = model_name or self.default_models["image_classification"]
|
| |
|
| | return await self.hf_tool.classify_image(
|
| | model_name=model_name, image_data=image_data
|
| | )
|
| |
|
| | async def get_text_embeddings_with_hf(
|
| | self, texts: List[str], model_name: Optional[str] = None
|
| | ) -> Dict[str, Any]:
|
| | """Get text embeddings using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["embeddings"]
|
| |
|
| | return await self.hf_tool.get_embeddings(model_name=model_name, texts=texts)
|
| |
|
| | async def translate_with_hf(
|
| | self,
|
| | text: str,
|
| | target_language: str,
|
| | source_language: Optional[str] = None,
|
| | model_name: Optional[str] = None,
|
| | ) -> Dict[str, Any]:
|
| | """Translate text using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["translation"]
|
| |
|
| | return await self.hf_tool.translate_text(
|
| | model_name=model_name,
|
| | text=text,
|
| | source_language=source_language,
|
| | target_language=target_language,
|
| | )
|
| |
|
| | async def summarize_with_hf(
|
| | self, text: str, model_name: Optional[str] = None, max_length: int = 150
|
| | ) -> Dict[str, Any]:
|
| | """Summarize text using HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or self.default_models["summarization"]
|
| |
|
| | return await self.hf_tool.summarize_text(
|
| | model_name=model_name, text=text, max_length=max_length
|
| | )
|
| |
|
| | def get_available_hf_models(self, category: Optional[str] = None) -> Dict[str, Any]:
|
| | """Get list of available HuggingFace models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | return self.hf_tool.list_available_models(category)
|
| |
|
| | async def smart_model_selection(
|
| | self, task_description: str, content_type: str = "text"
|
| | ) -> str:
|
| | """
|
| | Intelligently select the best HuggingFace model for a task
|
| |
|
| | Args:
|
| | task_description: Description of what the user wants to do
|
| | content_type: Type of content (text, image, audio, video)
|
| | """
|
| | task_lower = task_description.lower()
|
| |
|
| |
|
| | if any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "video",
|
| | "movie",
|
| | "animation",
|
| | "motion",
|
| | "gif",
|
| | "sequence",
|
| | "frames",
|
| | ]
|
| | ):
|
| | if "generate" in task_lower or "create" in task_lower:
|
| | return "Stable Video Diffusion"
|
| | elif "analyze" in task_lower or "describe" in task_lower:
|
| | return "Video ChatGPT"
|
| | else:
|
| | return "AnimateDiff"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "code",
|
| | "programming",
|
| | "app",
|
| | "application",
|
| | "software",
|
| | "develop",
|
| | "build",
|
| | "function",
|
| | "class",
|
| | "api",
|
| | "database",
|
| | "website",
|
| | "frontend",
|
| | "backend",
|
| | ]
|
| | ):
|
| | if "app" in task_lower or "application" in task_lower:
|
| | return "CodeLlama 34B Instruct"
|
| | elif "python" in task_lower:
|
| | return "WizardCoder 34B"
|
| | elif "api" in task_lower:
|
| | return "StarCoder2 15B"
|
| | elif "explain" in task_lower or "comment" in task_lower:
|
| | return "Phind CodeLlama"
|
| | else:
|
| | return "DeepSeek Coder V2"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "3d",
|
| | "three dimensional",
|
| | "mesh",
|
| | "model",
|
| | "obj",
|
| | "stl",
|
| | "ar",
|
| | "vr",
|
| | "augmented reality",
|
| | "virtual reality",
|
| | "texture",
|
| | "material",
|
| | ]
|
| | ):
|
| | if "text" in task_lower and ("3d" in task_lower or "model" in task_lower):
|
| | return "Shap-E"
|
| | elif "image" in task_lower and "3d" in task_lower:
|
| | return "DreamFusion"
|
| | else:
|
| | return "Point-E"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "ocr",
|
| | "document",
|
| | "pdf",
|
| | "scan",
|
| | "extract text",
|
| | "handwriting",
|
| | "form",
|
| | "table",
|
| | "layout",
|
| | "invoice",
|
| | "receipt",
|
| | "contract",
|
| | ]
|
| | ):
|
| | if "handwriting" in task_lower or "handwritten" in task_lower:
|
| | return "TrOCR Handwritten"
|
| | elif "table" in task_lower:
|
| | return "TableTransformer"
|
| | elif "form" in task_lower:
|
| | return "FormNet"
|
| | else:
|
| | return "TrOCR Large"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "visual question",
|
| | "image question",
|
| | "describe image",
|
| | "multimodal",
|
| | "vision language",
|
| | "image text",
|
| | "cross modal",
|
| | ]
|
| | ):
|
| | if "chat" in task_lower or "conversation" in task_lower:
|
| | return "GPT-4V"
|
| | elif "question" in task_lower:
|
| | return "LLaVA"
|
| | else:
|
| | return "BLIP-2"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "story",
|
| | "creative",
|
| | "poem",
|
| | "poetry",
|
| | "novel",
|
| | "screenplay",
|
| | "script",
|
| | "blog",
|
| | "article",
|
| | "marketing",
|
| | "copy",
|
| | "advertising",
|
| | ]
|
| | ):
|
| | if "story" in task_lower or "novel" in task_lower:
|
| | return "Novel AI"
|
| | elif "poem" in task_lower or "poetry" in task_lower:
|
| | return "Poet Assistant"
|
| | elif "marketing" in task_lower or "copy" in task_lower:
|
| | return "Marketing Copy AI"
|
| | else:
|
| | return "GPT-3.5 Creative"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "game",
|
| | "character",
|
| | "npc",
|
| | "level",
|
| | "dialogue",
|
| | "asset",
|
| | "quest",
|
| | "gameplay",
|
| | "mechanic",
|
| | "unity",
|
| | "unreal",
|
| | ]
|
| | ):
|
| | if "character" in task_lower:
|
| | return "Character AI"
|
| | elif "level" in task_lower or "environment" in task_lower:
|
| | return "Level Designer"
|
| | elif "dialogue" in task_lower or "conversation" in task_lower:
|
| | return "Dialogue Writer"
|
| | else:
|
| | return "Asset Creator"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "research",
|
| | "scientific",
|
| | "paper",
|
| | "analysis",
|
| | "data",
|
| | "protein",
|
| | "molecule",
|
| | "chemistry",
|
| | "biology",
|
| | "physics",
|
| | "experiment",
|
| | ]
|
| | ):
|
| | if "protein" in task_lower or "folding" in task_lower:
|
| | return "AlphaFold"
|
| | elif "molecule" in task_lower or "chemistry" in task_lower:
|
| | return "ChemBERTa"
|
| | elif "data" in task_lower and "analysis" in task_lower:
|
| | return "Data Analyst"
|
| | else:
|
| | return "SciBERT"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "email",
|
| | "business",
|
| | "report",
|
| | "presentation",
|
| | "meeting",
|
| | "project",
|
| | "plan",
|
| | "proposal",
|
| | "memo",
|
| | "letter",
|
| | "professional",
|
| | ]
|
| | ):
|
| | if "email" in task_lower:
|
| | return "Email Assistant"
|
| | elif "presentation" in task_lower:
|
| | return "Presentation AI"
|
| | elif "report" in task_lower:
|
| | return "Report Writer"
|
| | elif "meeting" in task_lower:
|
| | return "Meeting Summarizer"
|
| | else:
|
| | return "Project Planner"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "music",
|
| | "audio",
|
| | "sound",
|
| | "voice clone",
|
| | "enhance",
|
| | "restore",
|
| | "upscale",
|
| | "remove background",
|
| | "inpaint",
|
| | "style transfer",
|
| | ]
|
| | ):
|
| | if "music" in task_lower:
|
| | return "MusicGen"
|
| | elif "voice" in task_lower and "clone" in task_lower:
|
| | return "Voice Cloner"
|
| | elif "upscale" in task_lower or "enhance" in task_lower:
|
| | return "Real-ESRGAN"
|
| | elif "background" in task_lower and "remove" in task_lower:
|
| | return "Background Remover"
|
| | elif "restore" in task_lower or "face" in task_lower:
|
| | return "GFPGAN"
|
| | else:
|
| | return "LaMa"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in [
|
| | "generate",
|
| | "write",
|
| | "create",
|
| | "compose",
|
| | "chat",
|
| | "conversation",
|
| | ]
|
| | ):
|
| | if "chat" in task_lower or "conversation" in task_lower:
|
| | return "Llama 3.1 8B Instruct"
|
| | else:
|
| | return "MiniMax-M2"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in ["image", "picture", "draw", "art", "photo", "visual"]
|
| | ):
|
| | if "fast" in task_lower or "quick" in task_lower:
|
| | return "FLUX.1 Schnell"
|
| | else:
|
| | return "FLUX.1 Dev"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in ["transcribe", "speech to text", "recognize", "audio"]
|
| | ):
|
| | if content_type == "audio" or "transcribe" in task_lower:
|
| | return "Whisper Large v3"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in ["speak", "voice", "text to speech", "tts"]
|
| | ):
|
| | if "fast" in task_lower:
|
| | return "Kokoro 82M"
|
| | else:
|
| | return "VibeVoice 1.5B"
|
| |
|
| |
|
| | elif (
|
| | any(
|
| | keyword in task_lower
|
| | for keyword in ["classify", "analyze image", "detect", "recognize"]
|
| | )
|
| | and content_type == "image"
|
| | ):
|
| | if "nsfw" in task_lower or "safe" in task_lower:
|
| | return "NSFW Image Detection"
|
| | elif "emotion" in task_lower or "face" in task_lower:
|
| | return "Facial Emotions Detection"
|
| | elif "deepfake" in task_lower or "fake" in task_lower:
|
| | return "Deepfake Detection"
|
| | else:
|
| | return "ViT Base Patch16"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower for keyword in ["translate", "language", "convert"]
|
| | ):
|
| | return "M2M100 1.2B"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in ["summarize", "summary", "abstract", "brief"]
|
| | ):
|
| | return "PEGASUS XSum"
|
| |
|
| |
|
| | elif any(
|
| | keyword in task_lower
|
| | for keyword in ["similar", "embed", "vector", "search", "match"]
|
| | ):
|
| | return "Sentence Transformers All MiniLM"
|
| |
|
| |
|
| | else:
|
| | return "MiniMax-M2"
|
| |
|
| | async def execute_hf_task(
|
| | self, task: str, content: Any, model_name: Optional[str] = None, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """
|
| | Execute any HuggingFace task with intelligent model selection
|
| |
|
| | Args:
|
| | task: Task description (e.g., "generate image", "transcribe audio")
|
| | content: Input content (text, image bytes, audio bytes)
|
| | model_name: Specific model to use (optional)
|
| | **kwargs: Additional parameters
|
| | """
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | try:
|
| | task_lower = task.lower()
|
| |
|
| |
|
| | content_type = "text"
|
| | if isinstance(content, bytes):
|
| | if (
|
| | b"PNG" in content[:20]
|
| | or b"JFIF" in content[:20]
|
| | or b"GIF" in content[:20]
|
| | ):
|
| | content_type = "image"
|
| | else:
|
| | content_type = "audio"
|
| |
|
| |
|
| | if not model_name:
|
| | model_name = await self.smart_model_selection(task, content_type)
|
| |
|
| |
|
| | if "generate" in task_lower and (
|
| | "image" in task_lower or "picture" in task_lower
|
| | ):
|
| | return await self.generate_image_with_hf(content, model_name, **kwargs)
|
| |
|
| | elif "transcribe" in task_lower or "speech to text" in task_lower:
|
| | return await self.transcribe_audio_with_hf(
|
| | content, model_name, **kwargs
|
| | )
|
| |
|
| | elif "text to speech" in task_lower or "tts" in task_lower:
|
| | return await self.synthesize_speech_with_hf(
|
| | content, model_name, **kwargs
|
| | )
|
| |
|
| | elif "classify" in task_lower and content_type == "image":
|
| | return await self.classify_image_with_hf(content, model_name, **kwargs)
|
| |
|
| | elif "embed" in task_lower or "vector" in task_lower:
|
| | texts = [content] if isinstance(content, str) else content
|
| | return await self.get_text_embeddings_with_hf(texts, model_name)
|
| |
|
| | elif "translate" in task_lower:
|
| | return await self.translate_with_hf(
|
| | content, model_name=model_name, **kwargs
|
| | )
|
| |
|
| | elif "summarize" in task_lower:
|
| | return await self.summarize_with_hf(content, model_name, **kwargs)
|
| |
|
| | else:
|
| |
|
| | return await self.generate_text_with_hf(content, model_name, **kwargs)
|
| |
|
| | except Exception as e:
|
| | logger.error(f"HuggingFace task execution failed: {e}")
|
| | return {"error": f"Task execution failed: {str(e)}"}
|
| |
|
| | async def chat_with_hf_models(
|
| | self, message: str, conversation_history: List[Dict] = None
|
| | ) -> Dict[str, Any]:
|
| | """
|
| | Enhanced chat with access to HuggingFace models
|
| |
|
| | This method extends the base agent's capabilities with HF models
|
| | """
|
| |
|
| | message_lower = message.lower()
|
| |
|
| |
|
| | if "list" in message_lower and (
|
| | "model" in message_lower or "hf" in message_lower
|
| | ):
|
| | return self.get_available_hf_models()
|
| |
|
| |
|
| | hf_keywords = [
|
| | "generate image",
|
| | "create image",
|
| | "draw",
|
| | "picture",
|
| | "transcribe",
|
| | "speech to text",
|
| | "audio",
|
| | "text to speech",
|
| | "speak",
|
| | "voice",
|
| | "translate",
|
| | "language",
|
| | "classify image",
|
| | "embed",
|
| | "vector",
|
| | "similarity",
|
| | "summarize",
|
| | ]
|
| |
|
| | if any(keyword in message_lower for keyword in hf_keywords):
|
| |
|
| | return await self.execute_hf_task(message, message)
|
| |
|
| |
|
| |
|
| | base_response = await super().chat(message, conversation_history)
|
| |
|
| |
|
| | if "image" in message_lower and "generate" in message_lower:
|
| |
|
| | base_response["hf_suggestion"] = {
|
| | "action": "generate_image",
|
| | "models": ["FLUX.1 Dev", "FLUX.1 Schnell", "Stable Diffusion XL"],
|
| | "message": "I can also generate images for you using HuggingFace models. Just ask!",
|
| | }
|
| |
|
| | return base_response
|
| |
|
| |
|
| |
|
| | async def generate_video_with_hf(
|
| | self, prompt: str, model_name: Optional[str] = None, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate video from text prompt"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "Stable Video Diffusion"
|
| | return await self.hf_tool.text_to_video(
|
| | model_name=model_name, prompt=prompt, **kwargs
|
| | )
|
| |
|
| | async def generate_code_with_hf(
|
| | self,
|
| | prompt: str,
|
| | language: str = "python",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate code from natural language description"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "CodeLlama 34B Instruct"
|
| | return await self.hf_tool.code_generation(
|
| | model_name=model_name, prompt=prompt, language=language, **kwargs
|
| | )
|
| |
|
| | async def generate_app_with_hf(
|
| | self,
|
| | description: str,
|
| | app_type: str = "web_app",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate complete application from description"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "CodeLlama 34B Instruct"
|
| | enhanced_prompt = f"Create a {app_type} application: {description}"
|
| | return await self.hf_tool.code_generation(
|
| | model_name=model_name, prompt=enhanced_prompt, **kwargs
|
| | )
|
| |
|
| | async def generate_3d_model_with_hf(
|
| | self, prompt: str, model_name: Optional[str] = None, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate 3D model from text description"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "Shap-E"
|
| | return await self.hf_tool.text_to_3d(
|
| | model_name=model_name, prompt=prompt, **kwargs
|
| | )
|
| |
|
| | async def process_document_with_hf(
|
| | self,
|
| | document_data: bytes,
|
| | task_type: str = "ocr",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Process documents with OCR and analysis"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | if task_type == "ocr":
|
| | model_name = model_name or "TrOCR Large"
|
| | return await self.hf_tool.ocr(
|
| | model_name=model_name, image_data=document_data, **kwargs
|
| | )
|
| | else:
|
| | model_name = model_name or "LayoutLMv3"
|
| | return await self.hf_tool.document_analysis(
|
| | model_name=model_name, document_data=document_data, **kwargs
|
| | )
|
| |
|
| | async def multimodal_chat_with_hf(
|
| | self, image_data: bytes, text: str, model_name: Optional[str] = None, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Chat with images using multimodal models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "BLIP-2"
|
| | return await self.hf_tool.vision_language(
|
| | model_name=model_name, image_data=image_data, text=text, **kwargs
|
| | )
|
| |
|
| | async def generate_music_with_hf(
|
| | self,
|
| | prompt: str,
|
| | duration: int = 30,
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate music from text description"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "MusicGen"
|
| | return await self.hf_tool.music_generation(
|
| | model_name=model_name, prompt=prompt, duration=duration, **kwargs
|
| | )
|
| |
|
| | async def enhance_image_with_hf(
|
| | self,
|
| | image_data: bytes,
|
| | task_type: str = "super_resolution",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Enhance images with various AI models"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | if task_type == "super_resolution":
|
| | model_name = model_name or "Real-ESRGAN"
|
| | return await self.hf_tool.super_resolution(
|
| | model_name=model_name, image_data=image_data, **kwargs
|
| | )
|
| | elif task_type == "background_removal":
|
| | model_name = model_name or "Background Remover"
|
| | return await self.hf_tool.background_removal(
|
| | model_name=model_name, image_data=image_data, **kwargs
|
| | )
|
| | elif task_type == "face_restoration":
|
| | model_name = model_name or "GFPGAN"
|
| | return await self.hf_tool.super_resolution(
|
| | model_name=model_name, image_data=image_data, **kwargs
|
| | )
|
| |
|
| | async def generate_creative_content_with_hf(
|
| | self,
|
| | prompt: str,
|
| | content_type: str = "story",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate creative content like stories, poems, etc."""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "GPT-3.5 Creative"
|
| | enhanced_prompt = f"Write a {content_type}: {prompt}"
|
| | return await self.hf_tool.creative_writing(
|
| | model_name=model_name, prompt=enhanced_prompt, **kwargs
|
| | )
|
| |
|
| | async def generate_game_content_with_hf(
|
| | self,
|
| | description: str,
|
| | content_type: str = "character",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate game development content"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "Character AI"
|
| | enhanced_prompt = f"Create game {content_type}: {description}"
|
| | return await self.hf_tool.creative_writing(
|
| | model_name=model_name, prompt=enhanced_prompt, **kwargs
|
| | )
|
| |
|
| | async def generate_business_document_with_hf(
|
| | self,
|
| | context: str,
|
| | document_type: str = "email",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate business documents and content"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "Email Assistant"
|
| | return await self.hf_tool.business_document(
|
| | model_name=model_name,
|
| | document_type=document_type,
|
| | context=context,
|
| | **kwargs,
|
| | )
|
| |
|
| | async def research_assistance_with_hf(
|
| | self,
|
| | topic: str,
|
| | research_type: str = "analysis",
|
| | model_name: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Research assistance and scientific content generation"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | model_name = model_name or "SciBERT"
|
| | enhanced_prompt = f"Research {research_type} on: {topic}"
|
| | return await self.hf_tool.text_generation(
|
| | model_name=model_name, prompt=enhanced_prompt, **kwargs
|
| | )
|
| |
|
| | def get_available_hf_models(self, category: Optional[str] = None) -> Dict[str, Any]:
|
| | """Get available models by category"""
|
| | if not self.hf_tool:
|
| | return {"error": "HuggingFace integration not available"}
|
| |
|
| | return self.hf_tool.list_available_models(category=category)
|
| |
|