| | """
|
| | Hugging Face Models Integration for OpenManus AI Agent
|
| | Comprehensive integration with Hugging Face Inference API for all model categories
|
| | """
|
| |
|
| | import asyncio
|
| | import base64
|
| | import io
|
| | import json
|
| | import logging
|
| | from dataclasses import dataclass
|
| | from enum import Enum
|
| | from typing import Any, Dict, List, Optional, Union
|
| |
|
| | import aiohttp
|
| | import PIL.Image
|
| | from pydantic import BaseModel
|
| |
|
| | logger = logging.getLogger(__name__)
|
| |
|
| |
|
| | class ModelCategory(Enum):
|
| | """Categories of Hugging Face models available"""
|
| |
|
| |
|
| | TEXT_GENERATION = "text-generation"
|
| | TEXT_TO_IMAGE = "text-to-image"
|
| | IMAGE_TO_TEXT = "image-to-text"
|
| | AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition"
|
| | TEXT_TO_SPEECH = "text-to-speech"
|
| | IMAGE_CLASSIFICATION = "image-classification"
|
| | OBJECT_DETECTION = "object-detection"
|
| | FEATURE_EXTRACTION = "feature-extraction"
|
| | SENTENCE_SIMILARITY = "sentence-similarity"
|
| | TRANSLATION = "translation"
|
| | SUMMARIZATION = "summarization"
|
| | QUESTION_ANSWERING = "question-answering"
|
| | FILL_MASK = "fill-mask"
|
| | TOKEN_CLASSIFICATION = "token-classification"
|
| | ZERO_SHOT_CLASSIFICATION = "zero-shot-classification"
|
| | AUDIO_CLASSIFICATION = "audio-classification"
|
| | CONVERSATIONAL = "conversational"
|
| |
|
| |
|
| | TEXT_TO_VIDEO = "text-to-video"
|
| | VIDEO_TO_TEXT = "video-to-text"
|
| | VIDEO_CLASSIFICATION = "video-classification"
|
| | VIDEO_GENERATION = "video-generation"
|
| | MOTION_GENERATION = "motion-generation"
|
| | DEEPFAKE_DETECTION = "deepfake-detection"
|
| |
|
| |
|
| | CODE_GENERATION = "code-generation"
|
| | CODE_COMPLETION = "code-completion"
|
| | CODE_EXPLANATION = "code-explanation"
|
| | CODE_TRANSLATION = "code-translation"
|
| | CODE_REVIEW = "code-review"
|
| | APP_GENERATION = "app-generation"
|
| | API_GENERATION = "api-generation"
|
| | DATABASE_GENERATION = "database-generation"
|
| |
|
| |
|
| | TEXT_TO_3D = "text-to-3d"
|
| | IMAGE_TO_3D = "image-to-3d"
|
| | THREE_D_GENERATION = "3d-generation"
|
| | MESH_GENERATION = "mesh-generation"
|
| | TEXTURE_GENERATION = "texture-generation"
|
| | AR_CONTENT = "ar-content"
|
| | VR_ENVIRONMENT = "vr-environment"
|
| |
|
| |
|
| | OCR = "ocr"
|
| | DOCUMENT_ANALYSIS = "document-analysis"
|
| | PDF_PROCESSING = "pdf-processing"
|
| | LAYOUT_ANALYSIS = "layout-analysis"
|
| | TABLE_EXTRACTION = "table-extraction"
|
| | HANDWRITING_RECOGNITION = "handwriting-recognition"
|
| | FORM_PROCESSING = "form-processing"
|
| |
|
| |
|
| | VISION_LANGUAGE = "vision-language"
|
| | MULTIMODAL_REASONING = "multimodal-reasoning"
|
| | CROSS_MODAL_GENERATION = "cross-modal-generation"
|
| | VISUAL_QUESTION_ANSWERING = "visual-question-answering"
|
| | IMAGE_TEXT_MATCHING = "image-text-matching"
|
| | MULTIMODAL_CHAT = "multimodal-chat"
|
| |
|
| |
|
| | MUSIC_GENERATION = "music-generation"
|
| | VOICE_CLONING = "voice-cloning"
|
| | STYLE_TRANSFER = "style-transfer"
|
| | SUPER_RESOLUTION = "super-resolution"
|
| | IMAGE_INPAINTING = "image-inpainting"
|
| | IMAGE_OUTPAINTING = "image-outpainting"
|
| | BACKGROUND_REMOVAL = "background-removal"
|
| | FACE_RESTORATION = "face-restoration"
|
| |
|
| |
|
| | CREATIVE_WRITING = "creative-writing"
|
| | STORY_GENERATION = "story-generation"
|
| | SCREENPLAY_WRITING = "screenplay-writing"
|
| | POETRY_GENERATION = "poetry-generation"
|
| | BLOG_WRITING = "blog-writing"
|
| | MARKETING_COPY = "marketing-copy"
|
| |
|
| |
|
| | GAME_ASSET_GENERATION = "game-asset-generation"
|
| | CHARACTER_GENERATION = "character-generation"
|
| | LEVEL_GENERATION = "level-generation"
|
| | DIALOGUE_GENERATION = "dialogue-generation"
|
| |
|
| |
|
| | PROTEIN_FOLDING = "protein-folding"
|
| | MOLECULE_GENERATION = "molecule-generation"
|
| | SCIENTIFIC_WRITING = "scientific-writing"
|
| | RESEARCH_ASSISTANCE = "research-assistance"
|
| | DATA_ANALYSIS = "data-analysis"
|
| |
|
| |
|
| | EMAIL_GENERATION = "email-generation"
|
| | PRESENTATION_CREATION = "presentation-creation"
|
| | REPORT_GENERATION = "report-generation"
|
| | MEETING_SUMMARIZATION = "meeting-summarization"
|
| | PROJECT_PLANNING = "project-planning"
|
| |
|
| |
|
| | AI_TUTORING = "ai-tutoring"
|
| | EDUCATIONAL_CONTENT = "educational-content"
|
| | LESSON_PLANNING = "lesson-planning"
|
| | CONCEPT_EXPLANATION = "concept-explanation"
|
| | HOMEWORK_ASSISTANCE = "homework-assistance"
|
| | QUIZ_GENERATION = "quiz-generation"
|
| | CURRICULUM_DESIGN = "curriculum-design"
|
| | LEARNING_ASSESSMENT = "learning-assessment"
|
| | ADAPTIVE_LEARNING = "adaptive-learning"
|
| | SUBJECT_TEACHING = "subject-teaching"
|
| | MATH_TUTORING = "math-tutoring"
|
| | SCIENCE_TUTORING = "science-tutoring"
|
| | LANGUAGE_TUTORING = "language-tutoring"
|
| | HISTORY_TUTORING = "history-tutoring"
|
| | CODING_INSTRUCTION = "coding-instruction"
|
| | EXAM_PREPARATION = "exam-preparation"
|
| | STUDY_GUIDE_CREATION = "study-guide-creation"
|
| | EDUCATIONAL_GAMES = "educational-games"
|
| | LEARNING_ANALYTICS = "learning-analytics"
|
| | PERSONALIZED_LEARNING = "personalized-learning"
|
| |
|
| |
|
| | @dataclass
|
| | class HFModel:
|
| | """Hugging Face model definition"""
|
| |
|
| | name: str
|
| | model_id: str
|
| | category: ModelCategory
|
| | description: str
|
| | endpoint_compatible: bool = False
|
| | requires_auth: bool = False
|
| | max_tokens: Optional[int] = None
|
| | supports_streaming: bool = False
|
| |
|
| |
|
| | class HuggingFaceModels:
|
| | """Comprehensive collection of Hugging Face models for all categories"""
|
| |
|
| |
|
| | TEXT_GENERATION_MODELS = [
|
| | HFModel(
|
| | "MiniMax-M2",
|
| | "MiniMaxAI/MiniMax-M2",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Latest high-performance text generation model",
|
| | True,
|
| | False,
|
| | 4096,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Kimi Linear 48B",
|
| | "moonshotai/Kimi-Linear-48B-A3B-Instruct",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Large instruction-tuned model with linear attention",
|
| | True,
|
| | False,
|
| | 8192,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "GPT-OSS 20B",
|
| | "openai/gpt-oss-20b",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Open-source GPT model by OpenAI",
|
| | True,
|
| | False,
|
| | 4096,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "GPT-OSS 120B",
|
| | "openai/gpt-oss-120b",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Large open-source GPT model",
|
| | True,
|
| | False,
|
| | 4096,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Granite 4.0 1B",
|
| | "ibm-granite/granite-4.0-1b",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "IBM's enterprise-grade small language model",
|
| | True,
|
| | False,
|
| | 2048,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "GLM-4.6",
|
| | "zai-org/GLM-4.6",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Multilingual conversational model",
|
| | True,
|
| | False,
|
| | 4096,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Llama 3.1 8B Instruct",
|
| | "meta-llama/Llama-3.1-8B-Instruct",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Meta's instruction-tuned Llama model",
|
| | True,
|
| | True,
|
| | 8192,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Tongyi DeepResearch 30B",
|
| | "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "Alibaba's research-focused large language model",
|
| | True,
|
| | False,
|
| | 4096,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "EuroLLM 9B",
|
| | "utter-project/EuroLLM-9B",
|
| | ModelCategory.TEXT_GENERATION,
|
| | "European multilingual language model",
|
| | True,
|
| | False,
|
| | 4096,
|
| | True,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | TEXT_TO_IMAGE_MODELS = [
|
| | HFModel(
|
| | "FIBO",
|
| | "briaai/FIBO",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Advanced text-to-image generation model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "FLUX.1 Dev",
|
| | "black-forest-labs/FLUX.1-dev",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "State-of-the-art image generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "FLUX.1 Schnell",
|
| | "black-forest-labs/FLUX.1-schnell",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Fast high-quality image generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Qwen Image",
|
| | "Qwen/Qwen-Image",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Multilingual text-to-image model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Stable Diffusion XL",
|
| | "stabilityai/stable-diffusion-xl-base-1.0",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Popular high-resolution image generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Stable Diffusion 3.5 Large",
|
| | "stabilityai/stable-diffusion-3.5-large",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Latest Stable Diffusion model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "HunyuanImage 3.0",
|
| | "tencent/HunyuanImage-3.0",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Tencent's advanced image generation model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Nitro-E",
|
| | "amd/Nitro-E",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "AMD's efficient image generation model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Qwen Image Lightning",
|
| | "lightx2v/Qwen-Image-Lightning",
|
| | ModelCategory.TEXT_TO_IMAGE,
|
| | "Fast distilled image generation",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | ASR_MODELS = [
|
| | HFModel(
|
| | "Whisper Large v3",
|
| | "openai/whisper-large-v3",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "OpenAI's best multilingual speech recognition",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Whisper Large v3 Turbo",
|
| | "openai/whisper-large-v3-turbo",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "Faster version of Whisper Large v3",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Parakeet TDT 0.6B v3",
|
| | "nvidia/parakeet-tdt-0.6b-v3",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "NVIDIA's multilingual ASR model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Canary Qwen 2.5B",
|
| | "nvidia/canary-qwen-2.5b",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "NVIDIA's advanced ASR with Qwen integration",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Canary 1B v2",
|
| | "nvidia/canary-1b-v2",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "Compact multilingual ASR model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Whisper Small",
|
| | "openai/whisper-small",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "Lightweight multilingual ASR",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Speaker Diarization 3.1",
|
| | "pyannote/speaker-diarization-3.1",
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
|
| | "Advanced speaker identification and diarization",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | TTS_MODELS = [
|
| | HFModel(
|
| | "SoulX Podcast 1.7B",
|
| | "Soul-AILab/SoulX-Podcast-1.7B",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "High-quality podcast-style speech synthesis",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "NeuTTS Air",
|
| | "neuphonic/neutts-air",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "Advanced neural text-to-speech",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Kokoro 82M",
|
| | "hexgrad/Kokoro-82M",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "Lightweight high-quality TTS",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Kani TTS 400M EN",
|
| | "nineninesix/kani-tts-400m-en",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "English-focused text-to-speech model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "XTTS v2",
|
| | "coqui/XTTS-v2",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "Zero-shot voice cloning TTS",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Chatterbox",
|
| | "ResembleAI/chatterbox",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "Multilingual voice cloning",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "VibeVoice 1.5B",
|
| | "microsoft/VibeVoice-1.5B",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "Microsoft's advanced TTS model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "OpenAudio S1 Mini",
|
| | "fishaudio/openaudio-s1-mini",
|
| | ModelCategory.TEXT_TO_SPEECH,
|
| | "Compact multilingual TTS",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | IMAGE_CLASSIFICATION_MODELS = [
|
| | HFModel(
|
| | "NSFW Image Detection",
|
| | "Falconsai/nsfw_image_detection",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "Content safety image classification",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "ViT Base Patch16",
|
| | "google/vit-base-patch16-224",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "Google's Vision Transformer",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Deepfake Detection",
|
| | "dima806/deepfake_vs_real_image_detection",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "Detect AI-generated vs real images",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Facial Emotions Detection",
|
| | "dima806/facial_emotions_image_detection",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "Recognize facial emotions",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "SDXL Detector",
|
| | "Organika/sdxl-detector",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "Detect Stable Diffusion XL generated images",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "ViT NSFW Detector",
|
| | "AdamCodd/vit-base-nsfw-detector",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "NSFW content detection with ViT",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "ResNet 101",
|
| | "microsoft/resnet-101",
|
| | ModelCategory.IMAGE_CLASSIFICATION,
|
| | "Microsoft's ResNet for classification",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | FEATURE_EXTRACTION_MODELS = [
|
| | HFModel(
|
| | "Sentence Transformers All MiniLM",
|
| | "sentence-transformers/all-MiniLM-L6-v2",
|
| | ModelCategory.FEATURE_EXTRACTION,
|
| | "Lightweight sentence embeddings",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "BGE Large EN",
|
| | "BAAI/bge-large-en-v1.5",
|
| | ModelCategory.FEATURE_EXTRACTION,
|
| | "High-quality English embeddings",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "E5 Large v2",
|
| | "intfloat/e5-large-v2",
|
| | ModelCategory.FEATURE_EXTRACTION,
|
| | "Multilingual text embeddings",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| | TRANSLATION_MODELS = [
|
| | HFModel(
|
| | "M2M100 1.2B",
|
| | "facebook/m2m100_1.2B",
|
| | ModelCategory.TRANSLATION,
|
| | "Multilingual machine translation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "NLLB 200 3.3B",
|
| | "facebook/nllb-200-3.3B",
|
| | ModelCategory.TRANSLATION,
|
| | "No Language Left Behind translation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "mBART Large 50",
|
| | "facebook/mbart-large-50-many-to-many-mmt",
|
| | ModelCategory.TRANSLATION,
|
| | "Multilingual BART for translation",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| | SUMMARIZATION_MODELS = [
|
| | HFModel(
|
| | "PEGASUS XSum",
|
| | "google/pegasus-xsum",
|
| | ModelCategory.SUMMARIZATION,
|
| | "Abstractive summarization model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "BART Large CNN",
|
| | "facebook/bart-large-cnn",
|
| | ModelCategory.SUMMARIZATION,
|
| | "CNN/DailyMail summarization",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "T5 Base",
|
| | "t5-base",
|
| | ModelCategory.SUMMARIZATION,
|
| | "Text-to-Text Transfer Transformer",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | VIDEO_GENERATION_MODELS = [
|
| | HFModel(
|
| | "Stable Video Diffusion",
|
| | "stabilityai/stable-video-diffusion-img2vid",
|
| | ModelCategory.TEXT_TO_VIDEO,
|
| | "Image-to-video generation model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "AnimateDiff",
|
| | "guoyww/animatediff",
|
| | ModelCategory.VIDEO_GENERATION,
|
| | "Text-to-video animation generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "VideoCrafter",
|
| | "videogen/VideoCrafter",
|
| | ModelCategory.TEXT_TO_VIDEO,
|
| | "High-quality text-to-video generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Video ChatGPT",
|
| | "mbzuai-oryx/Video-ChatGPT-7B",
|
| | ModelCategory.VIDEO_TO_TEXT,
|
| | "Video understanding and description",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Video-BLIP",
|
| | "salesforce/video-blip-opt-2.7b",
|
| | ModelCategory.VIDEO_CLASSIFICATION,
|
| | "Video content analysis and classification",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | CODE_GENERATION_MODELS = [
|
| | HFModel(
|
| | "CodeLlama 34B Instruct",
|
| | "codellama/CodeLlama-34b-Instruct-hf",
|
| | ModelCategory.CODE_GENERATION,
|
| | "Large instruction-tuned code generation model",
|
| | True,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "StarCoder2 15B",
|
| | "bigcode/starcoder2-15b",
|
| | ModelCategory.CODE_GENERATION,
|
| | "Advanced code generation and completion",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "DeepSeek Coder V2",
|
| | "deepseek-ai/deepseek-coder-6.7b-instruct",
|
| | ModelCategory.CODE_GENERATION,
|
| | "Specialized coding assistant",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "WizardCoder 34B",
|
| | "WizardLM/WizardCoder-Python-34B-V1.0",
|
| | ModelCategory.CODE_GENERATION,
|
| | "Python-focused code generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Phind CodeLlama",
|
| | "Phind/Phind-CodeLlama-34B-v2",
|
| | ModelCategory.CODE_GENERATION,
|
| | "Optimized for code explanation and debugging",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Code T5+",
|
| | "Salesforce/codet5p-770m",
|
| | ModelCategory.CODE_COMPLETION,
|
| | "Code understanding and generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "InCoder",
|
| | "facebook/incoder-6B",
|
| | ModelCategory.CODE_COMPLETION,
|
| | "Bidirectional code generation",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | THREE_D_MODELS = [
|
| | HFModel(
|
| | "Shap-E",
|
| | "openai/shap-e",
|
| | ModelCategory.TEXT_TO_3D,
|
| | "Text-to-3D shape generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Point-E",
|
| | "openai/point-e",
|
| | ModelCategory.TEXT_TO_3D,
|
| | "Text-to-3D point cloud generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "DreamFusion",
|
| | "google/dreamfusion",
|
| | ModelCategory.IMAGE_TO_3D,
|
| | "Image-to-3D mesh generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Magic3D",
|
| | "nvidia/magic3d",
|
| | ModelCategory.THREE_D_GENERATION,
|
| | "High-quality 3D content creation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "GET3D",
|
| | "nvidia/get3d",
|
| | ModelCategory.MESH_GENERATION,
|
| | "3D mesh generation from text",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | DOCUMENT_PROCESSING_MODELS = [
|
| | HFModel(
|
| | "TrOCR Large",
|
| | "microsoft/trocr-large-printed",
|
| | ModelCategory.OCR,
|
| | "Transformer-based OCR for printed text",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "TrOCR Handwritten",
|
| | "microsoft/trocr-large-handwritten",
|
| | ModelCategory.HANDWRITING_RECOGNITION,
|
| | "Handwritten text recognition",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "LayoutLMv3",
|
| | "microsoft/layoutlmv3-large",
|
| | ModelCategory.DOCUMENT_ANALYSIS,
|
| | "Document layout analysis and understanding",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Donut",
|
| | "naver-clova-ix/donut-base",
|
| | ModelCategory.DOCUMENT_ANALYSIS,
|
| | "OCR-free document understanding",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "TableTransformer",
|
| | "microsoft/table-transformer-structure-recognition",
|
| | ModelCategory.TABLE_EXTRACTION,
|
| | "Table structure recognition",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "FormNet",
|
| | "microsoft/formnet",
|
| | ModelCategory.FORM_PROCESSING,
|
| | "Form understanding and processing",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | MULTIMODAL_MODELS = [
|
| | HFModel(
|
| | "BLIP-2",
|
| | "Salesforce/blip2-opt-2.7b",
|
| | ModelCategory.VISION_LANGUAGE,
|
| | "Vision-language understanding and generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "InstructBLIP",
|
| | "Salesforce/instructblip-vicuna-7b",
|
| | ModelCategory.MULTIMODAL_REASONING,
|
| | "Instruction-following multimodal model",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "LLaVA",
|
| | "liuhaotian/llava-v1.5-7b",
|
| | ModelCategory.VISUAL_QUESTION_ANSWERING,
|
| | "Large Language and Vision Assistant",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "GPT-4V",
|
| | "openai/gpt-4-vision-preview",
|
| | ModelCategory.MULTIMODAL_CHAT,
|
| | "Advanced multimodal conversational AI",
|
| | True,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Flamingo",
|
| | "deepmind/flamingo-9b",
|
| | ModelCategory.CROSS_MODAL_GENERATION,
|
| | "Few-shot learning for vision and language",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | SPECIALIZED_AI_MODELS = [
|
| | HFModel(
|
| | "MusicGen",
|
| | "facebook/musicgen-medium",
|
| | ModelCategory.MUSIC_GENERATION,
|
| | "Text-to-music generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "AudioCraft",
|
| | "facebook/audiocraft_musicgen_melody",
|
| | ModelCategory.MUSIC_GENERATION,
|
| | "Melody-conditioned music generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Real-ESRGAN",
|
| | "xinntao/realesrgan-x4plus",
|
| | ModelCategory.SUPER_RESOLUTION,
|
| | "Image super-resolution",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "GFPGAN",
|
| | "TencentARC/GFPGAN",
|
| | ModelCategory.FACE_RESTORATION,
|
| | "Face restoration and enhancement",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "LaMa",
|
| | "advimman/lama",
|
| | ModelCategory.IMAGE_INPAINTING,
|
| | "Large Mask Inpainting",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Background Remover",
|
| | "briaai/RMBG-1.4",
|
| | ModelCategory.BACKGROUND_REMOVAL,
|
| | "Automatic background removal",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Voice Cloner",
|
| | "coqui/XTTS-v2",
|
| | ModelCategory.VOICE_CLONING,
|
| | "Multilingual voice cloning",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | CREATIVE_CONTENT_MODELS = [
|
| | HFModel(
|
| | "GPT-3.5 Creative",
|
| | "openai/gpt-3.5-turbo-instruct",
|
| | ModelCategory.CREATIVE_WRITING,
|
| | "Creative writing and storytelling",
|
| | True,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Novel AI",
|
| | "novelai/genji-python-6b",
|
| | ModelCategory.STORY_GENERATION,
|
| | "Interactive story generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Poet Assistant",
|
| | "gpt2-poetry",
|
| | ModelCategory.POETRY_GENERATION,
|
| | "Poetry generation and analysis",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Blog Writer",
|
| | "google/flan-t5-large",
|
| | ModelCategory.BLOG_WRITING,
|
| | "Blog content creation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Marketing Copy AI",
|
| | "microsoft/DialoGPT-large",
|
| | ModelCategory.MARKETING_COPY,
|
| | "Marketing content generation",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | GAME_DEVELOPMENT_MODELS = [
|
| | HFModel(
|
| | "Character AI",
|
| | "character-ai/character-generator",
|
| | ModelCategory.CHARACTER_GENERATION,
|
| | "Game character generation and design",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Level Designer",
|
| | "unity/level-generator",
|
| | ModelCategory.LEVEL_GENERATION,
|
| | "Game level and environment generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Dialogue Writer",
|
| | "bioware/dialogue-generator",
|
| | ModelCategory.DIALOGUE_GENERATION,
|
| | "Game dialogue and narrative generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Asset Creator",
|
| | "epic/asset-generator",
|
| | ModelCategory.GAME_ASSET_GENERATION,
|
| | "Game asset and texture generation",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | SCIENCE_RESEARCH_MODELS = [
|
| | HFModel(
|
| | "AlphaFold",
|
| | "deepmind/alphafold2",
|
| | ModelCategory.PROTEIN_FOLDING,
|
| | "Protein structure prediction",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "ChemBERTa",
|
| | "DeepChem/ChemBERTa-77M-MLM",
|
| | ModelCategory.MOLECULE_GENERATION,
|
| | "Chemical compound analysis",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "SciBERT",
|
| | "allenai/scibert_scivocab_uncased",
|
| | ModelCategory.SCIENTIFIC_WRITING,
|
| | "Scientific text understanding",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Research Assistant",
|
| | "microsoft/specter2",
|
| | ModelCategory.RESEARCH_ASSISTANCE,
|
| | "Research paper analysis and recommendations",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Data Analyst",
|
| | "microsoft/data-copilot",
|
| | ModelCategory.DATA_ANALYSIS,
|
| | "Automated data analysis and insights",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | BUSINESS_PRODUCTIVITY_MODELS = [
|
| | HFModel(
|
| | "Email Assistant",
|
| | "microsoft/email-generator",
|
| | ModelCategory.EMAIL_GENERATION,
|
| | "Professional email composition",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Presentation AI",
|
| | "gamma/presentation-generator",
|
| | ModelCategory.PRESENTATION_CREATION,
|
| | "Automated presentation creation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Report Writer",
|
| | "openai/report-generator",
|
| | ModelCategory.REPORT_GENERATION,
|
| | "Business report generation",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Meeting Summarizer",
|
| | "microsoft/meeting-summarizer",
|
| | ModelCategory.MEETING_SUMMARIZATION,
|
| | "Meeting notes and action items",
|
| | True,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "Project Planner",
|
| | "atlassian/project-ai",
|
| | ModelCategory.PROJECT_PLANNING,
|
| | "Project planning and management",
|
| | True,
|
| | False,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | AI_TEACHER_MODELS = [
|
| |
|
| | HFModel(
|
| | "AI Tutor Interactive",
|
| | "microsoft/DialoGPT-medium",
|
| | ModelCategory.AI_TUTORING,
|
| | "Interactive AI tutor for conversational learning",
|
| | True,
|
| | False,
|
| | 2048,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Goal-Oriented Tutor",
|
| | "microsoft/GODEL-v1_1-large-seq2seq",
|
| | ModelCategory.AI_TUTORING,
|
| | "Goal-oriented conversational AI for personalized tutoring",
|
| | True,
|
| | False,
|
| | 2048,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "Code Instructor AI",
|
| | "microsoft/codebert-base",
|
| | ModelCategory.CODING_INSTRUCTION,
|
| | "AI coding instructor for programming education",
|
| | True,
|
| | False,
|
| | 1024,
|
| | False,
|
| | ),
|
| | HFModel(
|
| | "deepmind/flamingo-base",
|
| | "ADAPTIVE_LEARNING",
|
| | ModelCategory.ADAPTIVE_LEARNING,
|
| | "Multimodal AI for adaptive learning experiences",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "gpt2-medium",
|
| | "EDUCATIONAL_CONTENT",
|
| | ModelCategory.EDUCATIONAL_CONTENT,
|
| | "Educational content generation for curriculum development",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/bart-large-cnn",
|
| | "LESSON_PLANNING",
|
| | ModelCategory.LESSON_PLANNING,
|
| | "Lesson plan generation and educational summarization",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "microsoft/prophetnet-large-uncased",
|
| | "STUDY_GUIDE_CREATION",
|
| | ModelCategory.STUDY_GUIDE_CREATION,
|
| | "Study guide and learning material generation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "bigscience/bloom-560m",
|
| | "EDUCATIONAL_CONTENT",
|
| | ModelCategory.EDUCATIONAL_CONTENT,
|
| | "Multilingual educational content for global learning",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/codebert-base",
|
| | "CODING_INSTRUCTION",
|
| | ModelCategory.CODING_INSTRUCTION,
|
| | "Programming education and code explanation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "allenai/scibert_scivocab_uncased",
|
| | "SCIENCE_TUTORING",
|
| | ModelCategory.SCIENCE_TUTORING,
|
| | "Science education and scientific concept explanation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/flan-t5-base",
|
| | "SUBJECT_TEACHING",
|
| | ModelCategory.SUBJECT_TEACHING,
|
| | "Multi-subject teaching AI with instruction following",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "microsoft/unixcoder-base",
|
| | "CODING_INSTRUCTION",
|
| | ModelCategory.CODING_INSTRUCTION,
|
| | "Advanced programming instruction and debugging help",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/DialoGPT-small",
|
| | "MATH_TUTORING",
|
| | ModelCategory.MATH_TUTORING,
|
| | "Interactive math tutoring and problem solving",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/galactica-125m",
|
| | "SCIENCE_TUTORING",
|
| | ModelCategory.SCIENCE_TUTORING,
|
| | "Scientific knowledge and research education",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "microsoft/graphcodebert-base",
|
| | "CODING_INSTRUCTION",
|
| | ModelCategory.CODING_INSTRUCTION,
|
| | "Code structure and algorithm education",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "deepmind/mathematical-reasoning",
|
| | "MATH_TUTORING",
|
| | ModelCategory.MATH_TUTORING,
|
| | "Mathematical reasoning and proof assistance",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/prophetnet-large-uncased-cnndm",
|
| | "LANGUAGE_TUTORING",
|
| | ModelCategory.LANGUAGE_TUTORING,
|
| | "Language learning and literature analysis",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/mbart-large-50-many-to-many-mmt",
|
| | "LANGUAGE_TUTORING",
|
| | ModelCategory.LANGUAGE_TUTORING,
|
| | "Multilingual language education and translation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/electra-base-discriminator",
|
| | "LANGUAGE_TUTORING",
|
| | ModelCategory.LANGUAGE_TUTORING,
|
| | "Language comprehension and grammar instruction",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/DialoGPT-large",
|
| | "QUIZ_GENERATION",
|
| | ModelCategory.QUIZ_GENERATION,
|
| | "Interactive quiz and assessment generation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/bart-large",
|
| | "LEARNING_ASSESSMENT",
|
| | ModelCategory.LEARNING_ASSESSMENT,
|
| | "Learning progress assessment and feedback",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/t5-base",
|
| | "QUIZ_GENERATION",
|
| | ModelCategory.QUIZ_GENERATION,
|
| | "Question generation for educational assessment",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "microsoft/unilm-base-cased",
|
| | "EXAM_PREPARATION",
|
| | ModelCategory.EXAM_PREPARATION,
|
| | "Exam preparation and practice test generation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "huggingface/distilbert-base-uncased",
|
| | "PERSONALIZED_LEARNING",
|
| | ModelCategory.PERSONALIZED_LEARNING,
|
| | "Personalized learning path recommendation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "microsoft/layoutlm-base-uncased",
|
| | "LEARNING_ANALYTICS",
|
| | ModelCategory.LEARNING_ANALYTICS,
|
| | "Educational document analysis and insights",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/opt-125m",
|
| | "ADAPTIVE_LEARNING",
|
| | ModelCategory.ADAPTIVE_LEARNING,
|
| | "Adaptive learning system with dynamic content",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/deberta-base",
|
| | "CONCEPT_EXPLANATION",
|
| | ModelCategory.CONCEPT_EXPLANATION,
|
| | "Clear concept explanation and knowledge breakdown",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/pegasus-xsum",
|
| | "CONCEPT_EXPLANATION",
|
| | ModelCategory.CONCEPT_EXPLANATION,
|
| | "Concept summarization and explanation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/bart-base",
|
| | "CONCEPT_EXPLANATION",
|
| | ModelCategory.CONCEPT_EXPLANATION,
|
| | "Interactive concept teaching and clarification",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/codebert-base-mlm",
|
| | "HOMEWORK_ASSISTANCE",
|
| | ModelCategory.HOMEWORK_ASSISTANCE,
|
| | "Programming homework help and debugging",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/flan-t5-small",
|
| | "HOMEWORK_ASSISTANCE",
|
| | ModelCategory.HOMEWORK_ASSISTANCE,
|
| | "General homework assistance across subjects",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/mbart-large-cc25",
|
| | "HOMEWORK_ASSISTANCE",
|
| | ModelCategory.HOMEWORK_ASSISTANCE,
|
| | "Multilingual homework support and explanation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/prophetnet-base-uncased",
|
| | "CURRICULUM_DESIGN",
|
| | ModelCategory.CURRICULUM_DESIGN,
|
| | "Curriculum planning and educational structure design",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/t5-small",
|
| | "LESSON_PLANNING",
|
| | ModelCategory.LESSON_PLANNING,
|
| | "Detailed lesson planning and activity design",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/bart-large-xsum",
|
| | "CURRICULUM_DESIGN",
|
| | ModelCategory.CURRICULUM_DESIGN,
|
| | "Educational program summarization and design",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/DialoGPT-base",
|
| | "EDUCATIONAL_GAMES",
|
| | ModelCategory.EDUCATIONAL_GAMES,
|
| | "Interactive educational games and learning activities",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "huggingface/bert-base-uncased",
|
| | "EDUCATIONAL_GAMES",
|
| | ModelCategory.EDUCATIONAL_GAMES,
|
| | "Educational quiz games and interactive learning",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/deberta-large",
|
| | "HISTORY_TUTORING",
|
| | ModelCategory.HISTORY_TUTORING,
|
| | "Historical analysis and social studies education",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/opt-350m",
|
| | "HISTORY_TUTORING",
|
| | ModelCategory.HISTORY_TUTORING,
|
| | "Interactive history lessons and timeline explanation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| |
|
| | HFModel(
|
| | "microsoft/unilm-large-cased",
|
| | "LEARNING_ANALYTICS",
|
| | ModelCategory.LEARNING_ANALYTICS,
|
| | "Advanced learning analytics and progress tracking",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "google/electra-large-discriminator",
|
| | "PERSONALIZED_LEARNING",
|
| | ModelCategory.PERSONALIZED_LEARNING,
|
| | "Advanced personalized learning with AI adaptation",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | HFModel(
|
| | "facebook/mbart-large-50",
|
| | "ADAPTIVE_LEARNING",
|
| | ModelCategory.ADAPTIVE_LEARNING,
|
| | "Multilingual adaptive learning system",
|
| | True,
|
| | False,
|
| | 1024,
|
| | True,
|
| | ),
|
| | ]
|
| |
|
| |
|
| | class HuggingFaceInference:
|
| | """Hugging Face Inference API integration"""
|
| |
|
| | def __init__(
|
| | self,
|
| | api_token: str,
|
| | base_url: str = "https://api-inference.huggingface.co/models/",
|
| | ):
|
| | self.api_token = api_token
|
| | self.base_url = base_url
|
| | self.session = None
|
| |
|
| | async def __aenter__(self):
|
| | self.session = aiohttp.ClientSession(
|
| | headers={"Authorization": f"Bearer {self.api_token}"},
|
| | timeout=aiohttp.ClientTimeout(total=300),
|
| | )
|
| | return self
|
| |
|
| | async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| | if self.session:
|
| | await self.session.close()
|
| |
|
| | async def text_generation(
|
| | self,
|
| | model_id: str,
|
| | prompt: str,
|
| | max_tokens: int = 100,
|
| | temperature: float = 0.7,
|
| | stream: bool = False,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Generate text using a text generation model"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | "max_new_tokens": max_tokens,
|
| | "temperature": temperature,
|
| | "do_sample": True,
|
| | **kwargs,
|
| | },
|
| | "options": {"use_cache": False},
|
| | }
|
| |
|
| | if stream:
|
| | return await self._stream_request(model_id, payload)
|
| | else:
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def text_to_image(
|
| | self,
|
| | model_id: str,
|
| | prompt: str,
|
| | negative_prompt: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> bytes:
|
| | """Generate image from text prompt"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | **({"negative_prompt": negative_prompt} if negative_prompt else {}),
|
| | **kwargs,
|
| | },
|
| | }
|
| |
|
| | response = await self._request(model_id, payload, expect_json=False)
|
| | return response
|
| |
|
| | async def automatic_speech_recognition(
|
| | self, model_id: str, audio_data: bytes, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Transcribe audio to text"""
|
| |
|
| | audio_b64 = base64.b64encode(audio_data).decode()
|
| |
|
| | payload = {"inputs": audio_b64, "parameters": kwargs}
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes:
|
| | """Convert text to speech audio"""
|
| | payload = {"inputs": text, "parameters": kwargs}
|
| |
|
| | response = await self._request(model_id, payload, expect_json=False)
|
| | return response
|
| |
|
| | async def image_classification(
|
| | self, model_id: str, image_data: bytes, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Classify images"""
|
| |
|
| | image_b64 = base64.b64encode(image_data).decode()
|
| |
|
| | payload = {"inputs": image_b64, "parameters": kwargs}
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def feature_extraction(
|
| | self, model_id: str, texts: Union[str, List[str]], **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Extract embeddings from text"""
|
| | payload = {"inputs": texts, "parameters": kwargs}
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def translation(
|
| | self,
|
| | model_id: str,
|
| | text: str,
|
| | src_lang: Optional[str] = None,
|
| | tgt_lang: Optional[str] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Translate text between languages"""
|
| | payload = {
|
| | "inputs": text,
|
| | "parameters": {
|
| | **({"src_lang": src_lang} if src_lang else {}),
|
| | **({"tgt_lang": tgt_lang} if tgt_lang else {}),
|
| | **kwargs,
|
| | },
|
| | }
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def summarization(
|
| | self,
|
| | model_id: str,
|
| | text: str,
|
| | max_length: int = 150,
|
| | min_length: int = 30,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Summarize text"""
|
| | payload = {
|
| | "inputs": text,
|
| | "parameters": {
|
| | "max_length": max_length,
|
| | "min_length": min_length,
|
| | **kwargs,
|
| | },
|
| | }
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def question_answering(
|
| | self, model_id: str, question: str, context: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Answer questions based on context"""
|
| | payload = {
|
| | "inputs": {"question": question, "context": context},
|
| | "parameters": kwargs,
|
| | }
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def zero_shot_classification(
|
| | self, model_id: str, text: str, candidate_labels: List[str], **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Classify text without training data"""
|
| | payload = {
|
| | "inputs": text,
|
| | "parameters": {"candidate_labels": candidate_labels, **kwargs},
|
| | }
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def conversational(
|
| | self,
|
| | model_id: str,
|
| | text: str,
|
| | conversation_history: Optional[List[Dict[str, str]]] = None,
|
| | **kwargs,
|
| | ) -> Dict[str, Any]:
|
| | """Have a conversation with a model"""
|
| | payload = {
|
| | "inputs": {
|
| | "text": text,
|
| | **(
|
| | {
|
| | "past_user_inputs": [
|
| | h["user"] for h in conversation_history if "user" in h
|
| | ]
|
| | }
|
| | if conversation_history
|
| | else {}
|
| | ),
|
| | **(
|
| | {
|
| | "generated_responses": [
|
| | h["bot"] for h in conversation_history if "bot" in h
|
| | ]
|
| | }
|
| | if conversation_history
|
| | else {}
|
| | ),
|
| | },
|
| | "parameters": kwargs,
|
| | }
|
| |
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def _request(
|
| | self, model_id: str, payload: Dict[str, Any], expect_json: bool = True
|
| | ) -> Union[Dict[str, Any], bytes]:
|
| | """Make HTTP request to Hugging Face API"""
|
| | url = f"{self.base_url}{model_id}"
|
| |
|
| | try:
|
| | async with self.session.post(url, json=payload) as response:
|
| | if response.status == 200:
|
| | if expect_json:
|
| | return await response.json()
|
| | else:
|
| | return await response.read()
|
| | elif response.status == 503:
|
| |
|
| | error_info = await response.json()
|
| | estimated_time = error_info.get("estimated_time", 30)
|
| | logger.info(
|
| | f"Model {model_id} is loading, waiting {estimated_time}s"
|
| | )
|
| | await asyncio.sleep(min(estimated_time, 60))
|
| | return await self._request(model_id, payload, expect_json)
|
| | else:
|
| | error_text = await response.text()
|
| | raise Exception(
|
| | f"API request failed with status {response.status}: {error_text}"
|
| | )
|
| |
|
| | except Exception as e:
|
| | logger.error(f"Error calling Hugging Face API for {model_id}: {e}")
|
| | raise
|
| |
|
| | async def _stream_request(self, model_id: str, payload: Dict[str, Any]):
|
| | """Stream response from Hugging Face API"""
|
| | url = f"{self.base_url}{model_id}"
|
| | payload["stream"] = True
|
| |
|
| | try:
|
| | async with self.session.post(url, json=payload) as response:
|
| | if response.status == 200:
|
| | async for chunk in response.content:
|
| | if chunk:
|
| | yield chunk.decode("utf-8")
|
| | else:
|
| | error_text = await response.text()
|
| | raise Exception(
|
| | f"Streaming request failed with status {response.status}: {error_text}"
|
| | )
|
| |
|
| | except Exception as e:
|
| | logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}")
|
| | raise
|
| |
|
| |
|
| |
|
| | async def text_to_video(
|
| | self, model_id: str, prompt: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate video from text prompt"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | "duration": kwargs.get("duration", 5),
|
| | "fps": kwargs.get("fps", 24),
|
| | "width": kwargs.get("width", 512),
|
| | "height": kwargs.get("height", 512),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def video_to_text(
|
| | self, model_id: str, video_data: bytes, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Analyze video and generate text description"""
|
| | video_b64 = base64.b64encode(video_data).decode()
|
| | payload = {
|
| | "inputs": {"video": video_b64},
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def code_generation(
|
| | self, model_id: str, prompt: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate code from natural language prompt"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | "max_length": kwargs.get("max_length", 500),
|
| | "temperature": kwargs.get("temperature", 0.2),
|
| | "language": kwargs.get("language", "python"),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def code_completion(
|
| | self, model_id: str, code: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Complete partial code"""
|
| | payload = {
|
| | "inputs": code,
|
| | "parameters": {
|
| | "max_length": kwargs.get("max_length", 100),
|
| | "temperature": kwargs.get("temperature", 0.1),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]:
|
| | """Generate 3D model from text description"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | "resolution": kwargs.get("resolution", 64),
|
| | "format": kwargs.get("format", "obj"),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def image_to_3d(
|
| | self, model_id: str, image_data: bytes, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate 3D model from image"""
|
| | image_b64 = base64.b64encode(image_data).decode()
|
| | payload = {
|
| | "inputs": {"image": image_b64},
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]:
|
| | """Perform optical character recognition on image"""
|
| | image_b64 = base64.b64encode(image_data).decode()
|
| | payload = {
|
| | "inputs": {"image": image_b64},
|
| | "parameters": {"language": kwargs.get("language", "en"), **kwargs},
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def document_analysis(
|
| | self, model_id: str, document_data: bytes, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Analyze document structure and content"""
|
| | doc_b64 = base64.b64encode(document_data).decode()
|
| | payload = {
|
| | "inputs": {"document": doc_b64},
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def vision_language(
|
| | self, model_id: str, image_data: bytes, text: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Process image and text together"""
|
| | image_b64 = base64.b64encode(image_data).decode()
|
| | payload = {
|
| | "inputs": {"image": image_b64, "text": text},
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def multimodal_reasoning(
|
| | self, model_id: str, inputs: Dict[str, Any], **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Perform reasoning across multiple modalities"""
|
| | payload = {
|
| | "inputs": inputs,
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def music_generation(
|
| | self, model_id: str, prompt: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate music from text prompt"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | "duration": kwargs.get("duration", 30),
|
| | "bpm": kwargs.get("bpm", 120),
|
| | "genre": kwargs.get("genre", "electronic"),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def voice_cloning(
|
| | self, model_id: str, text: str, voice_sample: bytes, **kwargs
|
| | ) -> bytes:
|
| | """Clone voice and synthesize speech"""
|
| | voice_b64 = base64.b64encode(voice_sample).decode()
|
| | payload = {
|
| | "inputs": {"text": text, "voice_sample": voice_b64},
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload, expect_json=False)
|
| |
|
| | async def super_resolution(
|
| | self, model_id: str, image_data: bytes, **kwargs
|
| | ) -> bytes:
|
| | """Enhance image resolution"""
|
| | image_b64 = base64.b64encode(image_data).decode()
|
| | payload = {
|
| | "inputs": {"image": image_b64},
|
| | "parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs},
|
| | }
|
| | return await self._request(model_id, payload, expect_json=False)
|
| |
|
| | async def background_removal(
|
| | self, model_id: str, image_data: bytes, **kwargs
|
| | ) -> bytes:
|
| | """Remove background from image"""
|
| | image_b64 = base64.b64encode(image_data).decode()
|
| | payload = {
|
| | "inputs": {"image": image_b64},
|
| | "parameters": kwargs,
|
| | }
|
| | return await self._request(model_id, payload, expect_json=False)
|
| |
|
| | async def creative_writing(
|
| | self, model_id: str, prompt: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate creative content"""
|
| | payload = {
|
| | "inputs": prompt,
|
| | "parameters": {
|
| | "max_length": kwargs.get("max_length", 1000),
|
| | "creativity": kwargs.get("creativity", 0.8),
|
| | "genre": kwargs.get("genre", "general"),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| | async def business_document(
|
| | self, model_id: str, document_type: str, context: str, **kwargs
|
| | ) -> Dict[str, Any]:
|
| | """Generate business documents"""
|
| | payload = {
|
| | "inputs": f"Generate {document_type}: {context}",
|
| | "parameters": {
|
| | "format": kwargs.get("format", "professional"),
|
| | "length": kwargs.get("length", "medium"),
|
| | **kwargs,
|
| | },
|
| | }
|
| | return await self._request(model_id, payload)
|
| |
|
| |
|
| | class HuggingFaceModelManager:
|
| | """Manager for all Hugging Face model operations"""
|
| |
|
| | def __init__(self, api_token: str):
|
| | self.api_token = api_token
|
| | self.models = HuggingFaceModels()
|
| |
|
| | def get_models_by_category(self, category: ModelCategory) -> List[HFModel]:
|
| | """Get all models for a specific category"""
|
| | all_models = []
|
| |
|
| | if category == ModelCategory.TEXT_GENERATION:
|
| | all_models = self.models.TEXT_GENERATION_MODELS
|
| | elif category == ModelCategory.TEXT_TO_IMAGE:
|
| | all_models = self.models.TEXT_TO_IMAGE_MODELS
|
| | elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
|
| | all_models = self.models.ASR_MODELS
|
| | elif category == ModelCategory.TEXT_TO_SPEECH:
|
| | all_models = self.models.TTS_MODELS
|
| | elif category == ModelCategory.IMAGE_CLASSIFICATION:
|
| | all_models = self.models.IMAGE_CLASSIFICATION_MODELS
|
| | elif category == ModelCategory.FEATURE_EXTRACTION:
|
| | all_models = self.models.FEATURE_EXTRACTION_MODELS
|
| | elif category == ModelCategory.TRANSLATION:
|
| | all_models = self.models.TRANSLATION_MODELS
|
| | elif category == ModelCategory.SUMMARIZATION:
|
| | all_models = self.models.SUMMARIZATION_MODELS
|
| |
|
| | return all_models
|
| |
|
| | def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]:
|
| | """Get all available models organized by category"""
|
| | return {
|
| |
|
| | ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS,
|
| | ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS,
|
| | ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS,
|
| | ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS,
|
| | ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS,
|
| | ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS,
|
| | ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS,
|
| | ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS,
|
| |
|
| | ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS,
|
| | ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS,
|
| | ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS,
|
| | ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS,
|
| |
|
| | ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS,
|
| | ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS,
|
| | ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS,
|
| | ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS,
|
| |
|
| | ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS,
|
| | ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS,
|
| | ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS,
|
| | ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS,
|
| |
|
| | ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS,
|
| | ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS,
|
| | ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS,
|
| | ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS,
|
| | ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS,
|
| |
|
| | ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS,
|
| | ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS,
|
| | ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS,
|
| | ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS,
|
| | ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS,
|
| |
|
| | ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS,
|
| | ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS,
|
| | ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS,
|
| | ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS,
|
| | ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS,
|
| | ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS,
|
| |
|
| | ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS,
|
| | ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
|
| | ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
|
| | ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS,
|
| | ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS,
|
| |
|
| | ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
| | ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
| | ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
| | ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
|
| |
|
| | ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS,
|
| | ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS,
|
| | ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS,
|
| | ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS,
|
| | ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS,
|
| |
|
| | ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
| | ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
| | ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
| | ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
| | ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS,
|
| | }
|
| |
|
| | def get_model_by_id(self, model_id: str) -> Optional[HFModel]:
|
| | """Find a model by its Hugging Face model ID"""
|
| | for models_list in self.get_all_models().values():
|
| | for model in models_list:
|
| | if model.model_id == model_id:
|
| | return model
|
| | return None
|
| |
|
| | async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any:
|
| | """Call a Hugging Face model with the appropriate method based on category"""
|
| |
|
| | async with HuggingFaceInference(self.api_token) as hf:
|
| | if category == ModelCategory.TEXT_GENERATION:
|
| | return await hf.text_generation(model_id, **kwargs)
|
| | elif category == ModelCategory.TEXT_TO_IMAGE:
|
| | return await hf.text_to_image(model_id, **kwargs)
|
| | elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
|
| | return await hf.automatic_speech_recognition(model_id, **kwargs)
|
| | elif category == ModelCategory.TEXT_TO_SPEECH:
|
| | return await hf.text_to_speech(model_id, **kwargs)
|
| | elif category == ModelCategory.IMAGE_CLASSIFICATION:
|
| | return await hf.image_classification(model_id, **kwargs)
|
| | elif category == ModelCategory.FEATURE_EXTRACTION:
|
| | return await hf.feature_extraction(model_id, **kwargs)
|
| | elif category == ModelCategory.TRANSLATION:
|
| | return await hf.translation(model_id, **kwargs)
|
| | elif category == ModelCategory.SUMMARIZATION:
|
| | return await hf.summarization(model_id, **kwargs)
|
| | elif category == ModelCategory.QUESTION_ANSWERING:
|
| | return await hf.question_answering(model_id, **kwargs)
|
| | elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION:
|
| | return await hf.zero_shot_classification(model_id, **kwargs)
|
| | elif category == ModelCategory.CONVERSATIONAL:
|
| | return await hf.conversational(model_id, **kwargs)
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.TEXT_TO_VIDEO,
|
| | ModelCategory.VIDEO_GENERATION,
|
| | ]:
|
| | return await hf.text_to_video(model_id, **kwargs)
|
| | elif category == ModelCategory.VIDEO_TO_TEXT:
|
| | return await hf.video_to_text(model_id, **kwargs)
|
| | elif category == ModelCategory.VIDEO_CLASSIFICATION:
|
| | return await hf.image_classification(
|
| | model_id, **kwargs
|
| | )
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.CODE_GENERATION,
|
| | ModelCategory.APP_GENERATION,
|
| | ]:
|
| | return await hf.code_generation(model_id, **kwargs)
|
| | elif category in [
|
| | ModelCategory.CODE_COMPLETION,
|
| | ModelCategory.CODE_EXPLANATION,
|
| | ]:
|
| | return await hf.code_completion(model_id, **kwargs)
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.TEXT_TO_3D,
|
| | ModelCategory.THREE_D_GENERATION,
|
| | ]:
|
| | return await hf.text_to_3d(model_id, **kwargs)
|
| | elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]:
|
| | return await hf.image_to_3d(model_id, **kwargs)
|
| |
|
| |
|
| | elif category == ModelCategory.OCR:
|
| | return await hf.ocr(model_id, **kwargs)
|
| | elif category in [
|
| | ModelCategory.DOCUMENT_ANALYSIS,
|
| | ModelCategory.FORM_PROCESSING,
|
| | ModelCategory.TABLE_EXTRACTION,
|
| | ModelCategory.LAYOUT_ANALYSIS,
|
| | ]:
|
| | return await hf.document_analysis(model_id, **kwargs)
|
| | elif category == ModelCategory.HANDWRITING_RECOGNITION:
|
| | return await hf.ocr(model_id, **kwargs)
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.VISION_LANGUAGE,
|
| | ModelCategory.VISUAL_QUESTION_ANSWERING,
|
| | ModelCategory.IMAGE_TEXT_MATCHING,
|
| | ]:
|
| | return await hf.vision_language(model_id, **kwargs)
|
| | elif category in [
|
| | ModelCategory.MULTIMODAL_REASONING,
|
| | ModelCategory.MULTIMODAL_CHAT,
|
| | ModelCategory.CROSS_MODAL_GENERATION,
|
| | ]:
|
| | return await hf.multimodal_reasoning(model_id, **kwargs)
|
| |
|
| |
|
| | elif category == ModelCategory.MUSIC_GENERATION:
|
| | return await hf.music_generation(model_id, **kwargs)
|
| | elif category == ModelCategory.VOICE_CLONING:
|
| | return await hf.voice_cloning(model_id, **kwargs)
|
| | elif category == ModelCategory.SUPER_RESOLUTION:
|
| | return await hf.super_resolution(model_id, **kwargs)
|
| | elif category in [
|
| | ModelCategory.FACE_RESTORATION,
|
| | ModelCategory.IMAGE_INPAINTING,
|
| | ModelCategory.IMAGE_OUTPAINTING,
|
| | ]:
|
| | return await hf.super_resolution(
|
| | model_id, **kwargs
|
| | )
|
| | elif category == ModelCategory.BACKGROUND_REMOVAL:
|
| | return await hf.background_removal(model_id, **kwargs)
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.CREATIVE_WRITING,
|
| | ModelCategory.STORY_GENERATION,
|
| | ModelCategory.POETRY_GENERATION,
|
| | ModelCategory.SCREENPLAY_WRITING,
|
| | ]:
|
| | return await hf.creative_writing(model_id, **kwargs)
|
| | elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]:
|
| | return await hf.text_generation(
|
| | model_id, **kwargs
|
| | )
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.CHARACTER_GENERATION,
|
| | ModelCategory.LEVEL_GENERATION,
|
| | ModelCategory.DIALOGUE_GENERATION,
|
| | ModelCategory.GAME_ASSET_GENERATION,
|
| | ]:
|
| | return await hf.creative_writing(
|
| | model_id, **kwargs
|
| | )
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.PROTEIN_FOLDING,
|
| | ModelCategory.MOLECULE_GENERATION,
|
| | ]:
|
| | return await hf.text_generation(
|
| | model_id, **kwargs
|
| | )
|
| | elif category in [
|
| | ModelCategory.SCIENTIFIC_WRITING,
|
| | ModelCategory.RESEARCH_ASSISTANCE,
|
| | ModelCategory.DATA_ANALYSIS,
|
| | ]:
|
| | return await hf.text_generation(model_id, **kwargs)
|
| |
|
| |
|
| | elif category in [
|
| | ModelCategory.EMAIL_GENERATION,
|
| | ModelCategory.PRESENTATION_CREATION,
|
| | ModelCategory.REPORT_GENERATION,
|
| | ModelCategory.MEETING_SUMMARIZATION,
|
| | ModelCategory.PROJECT_PLANNING,
|
| | ]:
|
| | return await hf.business_document(model_id, category.value, **kwargs)
|
| |
|
| | else:
|
| | raise ValueError(f"Unsupported model category: {category}")
|
| |
|
| |
|