diff --git "a/app/huggingface_models.py" "b/app/huggingface_models.py" deleted file mode 100644--- "a/app/huggingface_models.py" +++ /dev/null @@ -1,3445 +0,0 @@ -""" -Hugging Face Models Integration for OpenManus AI Agent -Comprehensive integration with Hugging Face Inference API for all model categories -""" - -import asyncio -import base64 -import io -import json -import logging -from dataclasses import dataclass -from enum import Enum -from typing import Any, Dict, List, Optional, Union - -import aiohttp -import PIL.Image -from pydantic import BaseModel - -logger = logging.getLogger(__name__) - - -class ModelCategory(Enum): - """Categories of Hugging Face models available""" - - # Core AI categories - TEXT_GENERATION = "text-generation" - TEXT_TO_IMAGE = "text-to-image" - IMAGE_TO_TEXT = "image-to-text" - AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition" - TEXT_TO_SPEECH = "text-to-speech" - IMAGE_CLASSIFICATION = "image-classification" - OBJECT_DETECTION = "object-detection" - FEATURE_EXTRACTION = "feature-extraction" - SENTENCE_SIMILARITY = "sentence-similarity" - TRANSLATION = "translation" - SUMMARIZATION = "summarization" - QUESTION_ANSWERING = "question-answering" - FILL_MASK = "fill-mask" - TOKEN_CLASSIFICATION = "token-classification" - ZERO_SHOT_CLASSIFICATION = "zero-shot-classification" - AUDIO_CLASSIFICATION = "audio-classification" - CONVERSATIONAL = "conversational" - - # Video and Motion - TEXT_TO_VIDEO = "text-to-video" - VIDEO_TO_TEXT = "video-to-text" - VIDEO_CLASSIFICATION = "video-classification" - VIDEO_GENERATION = "video-generation" - MOTION_GENERATION = "motion-generation" - DEEPFAKE_DETECTION = "deepfake-detection" - - # Code and Development - CODE_GENERATION = "code-generation" - CODE_COMPLETION = "code-completion" - CODE_EXPLANATION = "code-explanation" - CODE_TRANSLATION = "code-translation" - CODE_REVIEW = "code-review" - APP_GENERATION = "app-generation" - API_GENERATION = "api-generation" - DATABASE_GENERATION = "database-generation" - - # 3D and AR/VR - TEXT_TO_3D = "text-to-3d" - IMAGE_TO_3D = "image-to-3d" - THREE_D_GENERATION = "3d-generation" - MESH_GENERATION = "mesh-generation" - TEXTURE_GENERATION = "texture-generation" - AR_CONTENT = "ar-content" - VR_ENVIRONMENT = "vr-environment" - - # Document Processing - OCR = "ocr" - DOCUMENT_ANALYSIS = "document-analysis" - PDF_PROCESSING = "pdf-processing" - LAYOUT_ANALYSIS = "layout-analysis" - TABLE_EXTRACTION = "table-extraction" - HANDWRITING_RECOGNITION = "handwriting-recognition" - FORM_PROCESSING = "form-processing" - - # Multimodal AI - VISION_LANGUAGE = "vision-language" - MULTIMODAL_REASONING = "multimodal-reasoning" - CROSS_MODAL_GENERATION = "cross-modal-generation" - VISUAL_QUESTION_ANSWERING = "visual-question-answering" - IMAGE_TEXT_MATCHING = "image-text-matching" - MULTIMODAL_CHAT = "multimodal-chat" - - # Specialized AI - MUSIC_GENERATION = "music-generation" - VOICE_CLONING = "voice-cloning" - STYLE_TRANSFER = "style-transfer" - SUPER_RESOLUTION = "super-resolution" - IMAGE_INPAINTING = "image-inpainting" - IMAGE_OUTPAINTING = "image-outpainting" - BACKGROUND_REMOVAL = "background-removal" - FACE_RESTORATION = "face-restoration" - - # Content Creation - CREATIVE_WRITING = "creative-writing" - STORY_GENERATION = "story-generation" - SCREENPLAY_WRITING = "screenplay-writing" - POETRY_GENERATION = "poetry-generation" - BLOG_WRITING = "blog-writing" - MARKETING_COPY = "marketing-copy" - - # Game Development - GAME_ASSET_GENERATION = "game-asset-generation" - CHARACTER_GENERATION = "character-generation" - LEVEL_GENERATION = "level-generation" - DIALOGUE_GENERATION = "dialogue-generation" - - # Science and Research - PROTEIN_FOLDING = "protein-folding" - MOLECULE_GENERATION = "molecule-generation" - SCIENTIFIC_WRITING = "scientific-writing" - RESEARCH_ASSISTANCE = "research-assistance" - DATA_ANALYSIS = "data-analysis" - - # Business and Productivity - EMAIL_GENERATION = "email-generation" - PRESENTATION_CREATION = "presentation-creation" - REPORT_GENERATION = "report-generation" - MEETING_SUMMARIZATION = "meeting-summarization" - PROJECT_PLANNING = "project-planning" - - # AI Teacher and Education - AI_TUTORING = "ai-tutoring" - EDUCATIONAL_CONTENT = "educational-content" - LESSON_PLANNING = "lesson-planning" - CONCEPT_EXPLANATION = "concept-explanation" - HOMEWORK_ASSISTANCE = "homework-assistance" - QUIZ_GENERATION = "quiz-generation" - CURRICULUM_DESIGN = "curriculum-design" - LEARNING_ASSESSMENT = "learning-assessment" - ADAPTIVE_LEARNING = "adaptive-learning" - SUBJECT_TEACHING = "subject-teaching" - MATH_TUTORING = "math-tutoring" - SCIENCE_TUTORING = "science-tutoring" - LANGUAGE_TUTORING = "language-tutoring" - HISTORY_TUTORING = "history-tutoring" - CODING_INSTRUCTION = "coding-instruction" - EXAM_PREPARATION = "exam-preparation" - STUDY_GUIDE_CREATION = "study-guide-creation" - EDUCATIONAL_GAMES = "educational-games" - LEARNING_ANALYTICS = "learning-analytics" - PERSONALIZED_LEARNING = "personalized-learning" - - # Advanced Image Processing & Manipulation - IMAGE_EDITING = "image-editing" - FACE_SWAP = "face-swap" - FACE_ENHANCEMENT = "face-enhancement" - FACE_GENERATION = "face-generation" - PORTRAIT_EDITING = "portrait-editing" - PHOTO_RESTORATION = "photo-restoration" - IMAGE_UPSCALING = "image-upscaling" - COLOR_CORRECTION = "color-correction" - ARTISTIC_FILTER = "artistic-filter" - - # Advanced Speech & Audio - ADVANCED_TTS = "advanced-tts" - ADVANCED_STT = "advanced-stt" - VOICE_CONVERSION = "voice-conversion" - SPEECH_ENHANCEMENT = "speech-enhancement" - AUDIO_GENERATION = "audio-generation" - MULTILINGUAL_TTS = "multilingual-tts" - MULTILINGUAL_STT = "multilingual-stt" - REAL_TIME_TRANSLATION = "real-time-translation" - - # Interactive Avatar & Video Generation - TALKING_AVATAR = "talking-avatar" - AVATAR_GENERATION = "avatar-generation" - LIP_SYNC = "lip-sync" - FACIAL_ANIMATION = "facial-animation" - GESTURE_GENERATION = "gesture-generation" - VIRTUAL_PRESENTER = "virtual-presenter" - AI_ANCHOR = "ai-anchor" - - # Interactive Language & Conversation - INTERACTIVE_CHAT = "interactive-chat" - BILINGUAL_CONVERSATION = "bilingual-conversation" - CULTURAL_ADAPTATION = "cultural-adaptation" - CONTEXT_AWARE_CHAT = "context-aware-chat" - PERSONALITY_CHAT = "personality-chat" - ROLE_PLAY_CHAT = "role-play-chat" - DOMAIN_SPECIFIC_CHAT = "domain-specific-chat" - - # Qwen Specialized Categories - QWEN_REASONING = "qwen-reasoning" - QWEN_MATH = "qwen-math" - QWEN_CODE = "qwen-code" - QWEN_VISION = "qwen-vision" - QWEN_AUDIO = "qwen-audio" - - # DeepSeek Specialized Categories - DEEPSEEK_CODING = "deepseek-coding" - DEEPSEEK_REASONING = "deepseek-reasoning" - DEEPSEEK_MATH = "deepseek-math" - DEEPSEEK_RESEARCH = "deepseek-research" - - -@dataclass -class HFModel: - """Hugging Face model definition""" - - name: str - model_id: str - category: ModelCategory - description: str - endpoint_compatible: bool = False - requires_auth: bool = False - max_tokens: Optional[int] = None - supports_streaming: bool = False - - -class HuggingFaceModels: - """Comprehensive collection of Hugging Face models for all categories""" - - # Text Generation Models (Latest and Popular) - TEXT_GENERATION_MODELS = [ - HFModel( - "MiniMax-M2", - "MiniMaxAI/MiniMax-M2", - ModelCategory.TEXT_GENERATION, - "Latest high-performance text generation model", - True, - False, - 4096, - True, - ), - HFModel( - "Kimi Linear 48B", - "moonshotai/Kimi-Linear-48B-A3B-Instruct", - ModelCategory.TEXT_GENERATION, - "Large instruction-tuned model with linear attention", - True, - False, - 8192, - True, - ), - HFModel( - "GPT-OSS 20B", - "openai/gpt-oss-20b", - ModelCategory.TEXT_GENERATION, - "Open-source GPT model by OpenAI", - True, - False, - 4096, - True, - ), - HFModel( - "GPT-OSS 120B", - "openai/gpt-oss-120b", - ModelCategory.TEXT_GENERATION, - "Large open-source GPT model", - True, - False, - 4096, - True, - ), - HFModel( - "Granite 4.0 1B", - "ibm-granite/granite-4.0-1b", - ModelCategory.TEXT_GENERATION, - "IBM's enterprise-grade small language model", - True, - False, - 2048, - True, - ), - HFModel( - "GLM-4.6", - "zai-org/GLM-4.6", - ModelCategory.TEXT_GENERATION, - "Multilingual conversational model", - True, - False, - 4096, - True, - ), - HFModel( - "Llama 3.1 8B Instruct", - "meta-llama/Llama-3.1-8B-Instruct", - ModelCategory.TEXT_GENERATION, - "Meta's instruction-tuned Llama model", - True, - True, - 8192, - True, - ), - HFModel( - "Tongyi DeepResearch 30B", - "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", - ModelCategory.TEXT_GENERATION, - "Alibaba's research-focused large language model", - True, - False, - 4096, - True, - ), - HFModel( - "EuroLLM 9B", - "utter-project/EuroLLM-9B", - ModelCategory.TEXT_GENERATION, - "European multilingual language model", - True, - False, - 4096, - True, - ), - ] - - # Text-to-Image Models (Latest and Best) - TEXT_TO_IMAGE_MODELS = [ - HFModel( - "FIBO", - "briaai/FIBO", - ModelCategory.TEXT_TO_IMAGE, - "Advanced text-to-image generation model", - True, - False, - ), - HFModel( - "FLUX.1 Dev", - "black-forest-labs/FLUX.1-dev", - ModelCategory.TEXT_TO_IMAGE, - "State-of-the-art image generation", - True, - False, - ), - HFModel( - "FLUX.1 Schnell", - "black-forest-labs/FLUX.1-schnell", - ModelCategory.TEXT_TO_IMAGE, - "Fast high-quality image generation", - True, - False, - ), - HFModel( - "Qwen Image", - "Qwen/Qwen-Image", - ModelCategory.TEXT_TO_IMAGE, - "Multilingual text-to-image model", - True, - False, - ), - HFModel( - "Stable Diffusion XL", - "stabilityai/stable-diffusion-xl-base-1.0", - ModelCategory.TEXT_TO_IMAGE, - "Popular high-resolution image generation", - True, - False, - ), - HFModel( - "Stable Diffusion 3.5 Large", - "stabilityai/stable-diffusion-3.5-large", - ModelCategory.TEXT_TO_IMAGE, - "Latest Stable Diffusion model", - True, - False, - ), - HFModel( - "HunyuanImage 3.0", - "tencent/HunyuanImage-3.0", - ModelCategory.TEXT_TO_IMAGE, - "Tencent's advanced image generation model", - True, - False, - ), - HFModel( - "Nitro-E", - "amd/Nitro-E", - ModelCategory.TEXT_TO_IMAGE, - "AMD's efficient image generation model", - True, - False, - ), - HFModel( - "Qwen Image Lightning", - "lightx2v/Qwen-Image-Lightning", - ModelCategory.TEXT_TO_IMAGE, - "Fast distilled image generation", - True, - False, - ), - ] - - # Automatic Speech Recognition Models - ASR_MODELS = [ - HFModel( - "Whisper Large v3", - "openai/whisper-large-v3", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "OpenAI's best multilingual speech recognition", - True, - False, - ), - HFModel( - "Whisper Large v3 Turbo", - "openai/whisper-large-v3-turbo", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "Faster version of Whisper Large v3", - True, - False, - ), - HFModel( - "Parakeet TDT 0.6B v3", - "nvidia/parakeet-tdt-0.6b-v3", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "NVIDIA's multilingual ASR model", - True, - False, - ), - HFModel( - "Canary Qwen 2.5B", - "nvidia/canary-qwen-2.5b", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "NVIDIA's advanced ASR with Qwen integration", - True, - False, - ), - HFModel( - "Canary 1B v2", - "nvidia/canary-1b-v2", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "Compact multilingual ASR model", - True, - False, - ), - HFModel( - "Whisper Small", - "openai/whisper-small", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "Lightweight multilingual ASR", - True, - False, - ), - HFModel( - "Speaker Diarization 3.1", - "pyannote/speaker-diarization-3.1", - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, - "Advanced speaker identification and diarization", - True, - False, - ), - ] - - # Text-to-Speech Models - TTS_MODELS = [ - HFModel( - "SoulX Podcast 1.7B", - "Soul-AILab/SoulX-Podcast-1.7B", - ModelCategory.TEXT_TO_SPEECH, - "High-quality podcast-style speech synthesis", - True, - False, - ), - HFModel( - "NeuTTS Air", - "neuphonic/neutts-air", - ModelCategory.TEXT_TO_SPEECH, - "Advanced neural text-to-speech", - True, - False, - ), - HFModel( - "Kokoro 82M", - "hexgrad/Kokoro-82M", - ModelCategory.TEXT_TO_SPEECH, - "Lightweight high-quality TTS", - True, - False, - ), - HFModel( - "Kani TTS 400M EN", - "nineninesix/kani-tts-400m-en", - ModelCategory.TEXT_TO_SPEECH, - "English-focused text-to-speech model", - True, - False, - ), - HFModel( - "XTTS v2", - "coqui/XTTS-v2", - ModelCategory.TEXT_TO_SPEECH, - "Zero-shot voice cloning TTS", - True, - False, - ), - HFModel( - "Chatterbox", - "ResembleAI/chatterbox", - ModelCategory.TEXT_TO_SPEECH, - "Multilingual voice cloning", - True, - False, - ), - HFModel( - "VibeVoice 1.5B", - "microsoft/VibeVoice-1.5B", - ModelCategory.TEXT_TO_SPEECH, - "Microsoft's advanced TTS model", - True, - False, - ), - HFModel( - "OpenAudio S1 Mini", - "fishaudio/openaudio-s1-mini", - ModelCategory.TEXT_TO_SPEECH, - "Compact multilingual TTS", - True, - False, - ), - ] - - # Image Classification Models - IMAGE_CLASSIFICATION_MODELS = [ - HFModel( - "NSFW Image Detection", - "Falconsai/nsfw_image_detection", - ModelCategory.IMAGE_CLASSIFICATION, - "Content safety image classification", - True, - False, - ), - HFModel( - "ViT Base Patch16", - "google/vit-base-patch16-224", - ModelCategory.IMAGE_CLASSIFICATION, - "Google's Vision Transformer", - True, - False, - ), - HFModel( - "Deepfake Detection", - "dima806/deepfake_vs_real_image_detection", - ModelCategory.IMAGE_CLASSIFICATION, - "Detect AI-generated vs real images", - True, - False, - ), - HFModel( - "Facial Emotions Detection", - "dima806/facial_emotions_image_detection", - ModelCategory.IMAGE_CLASSIFICATION, - "Recognize facial emotions", - True, - False, - ), - HFModel( - "SDXL Detector", - "Organika/sdxl-detector", - ModelCategory.IMAGE_CLASSIFICATION, - "Detect Stable Diffusion XL generated images", - True, - False, - ), - HFModel( - "ViT NSFW Detector", - "AdamCodd/vit-base-nsfw-detector", - ModelCategory.IMAGE_CLASSIFICATION, - "NSFW content detection with ViT", - True, - False, - ), - HFModel( - "ResNet 101", - "microsoft/resnet-101", - ModelCategory.IMAGE_CLASSIFICATION, - "Microsoft's ResNet for classification", - True, - False, - ), - ] - - # Additional Categories - FEATURE_EXTRACTION_MODELS = [ - HFModel( - "Sentence Transformers All MiniLM", - "sentence-transformers/all-MiniLM-L6-v2", - ModelCategory.FEATURE_EXTRACTION, - "Lightweight sentence embeddings", - True, - False, - ), - HFModel( - "BGE Large EN", - "BAAI/bge-large-en-v1.5", - ModelCategory.FEATURE_EXTRACTION, - "High-quality English embeddings", - True, - False, - ), - HFModel( - "E5 Large v2", - "intfloat/e5-large-v2", - ModelCategory.FEATURE_EXTRACTION, - "Multilingual text embeddings", - True, - False, - ), - ] - - TRANSLATION_MODELS = [ - HFModel( - "M2M100 1.2B", - "facebook/m2m100_1.2B", - ModelCategory.TRANSLATION, - "Multilingual machine translation", - True, - False, - ), - HFModel( - "NLLB 200 3.3B", - "facebook/nllb-200-3.3B", - ModelCategory.TRANSLATION, - "No Language Left Behind translation", - True, - False, - ), - HFModel( - "mBART Large 50", - "facebook/mbart-large-50-many-to-many-mmt", - ModelCategory.TRANSLATION, - "Multilingual BART for translation", - True, - False, - ), - ] - - SUMMARIZATION_MODELS = [ - HFModel( - "PEGASUS XSum", - "google/pegasus-xsum", - ModelCategory.SUMMARIZATION, - "Abstractive summarization model", - True, - False, - ), - HFModel( - "BART Large CNN", - "facebook/bart-large-cnn", - ModelCategory.SUMMARIZATION, - "CNN/DailyMail summarization", - True, - False, - ), - HFModel( - "T5 Base", - "t5-base", - ModelCategory.SUMMARIZATION, - "Text-to-Text Transfer Transformer", - True, - False, - ), - ] - - # Video Generation and Processing Models - VIDEO_GENERATION_MODELS = [ - HFModel( - "Stable Video Diffusion", - "stabilityai/stable-video-diffusion-img2vid", - ModelCategory.TEXT_TO_VIDEO, - "Image-to-video generation model", - True, - False, - ), - HFModel( - "AnimateDiff", - "guoyww/animatediff", - ModelCategory.VIDEO_GENERATION, - "Text-to-video animation generation", - True, - False, - ), - HFModel( - "VideoCrafter", - "videogen/VideoCrafter", - ModelCategory.TEXT_TO_VIDEO, - "High-quality text-to-video generation", - True, - False, - ), - HFModel( - "Video ChatGPT", - "mbzuai-oryx/Video-ChatGPT-7B", - ModelCategory.VIDEO_TO_TEXT, - "Video understanding and description", - True, - False, - ), - HFModel( - "Video-BLIP", - "salesforce/video-blip-opt-2.7b", - ModelCategory.VIDEO_CLASSIFICATION, - "Video content analysis and classification", - True, - False, - ), - ] - - # Code Generation and Development Models - CODE_GENERATION_MODELS = [ - HFModel( - "CodeLlama 34B Instruct", - "codellama/CodeLlama-34b-Instruct-hf", - ModelCategory.CODE_GENERATION, - "Large instruction-tuned code generation model", - True, - True, - ), - HFModel( - "StarCoder2 15B", - "bigcode/starcoder2-15b", - ModelCategory.CODE_GENERATION, - "Advanced code generation and completion", - True, - False, - ), - HFModel( - "DeepSeek Coder V2", - "deepseek-ai/deepseek-coder-6.7b-instruct", - ModelCategory.CODE_GENERATION, - "Specialized coding assistant", - True, - False, - ), - HFModel( - "WizardCoder 34B", - "WizardLM/WizardCoder-Python-34B-V1.0", - ModelCategory.CODE_GENERATION, - "Python-focused code generation", - True, - False, - ), - HFModel( - "Phind CodeLlama", - "Phind/Phind-CodeLlama-34B-v2", - ModelCategory.CODE_GENERATION, - "Optimized for code explanation and debugging", - True, - False, - ), - HFModel( - "Code T5+", - "Salesforce/codet5p-770m", - ModelCategory.CODE_COMPLETION, - "Code understanding and generation", - True, - False, - ), - HFModel( - "InCoder", - "facebook/incoder-6B", - ModelCategory.CODE_COMPLETION, - "Bidirectional code generation", - True, - False, - ), - ] - - # 3D and AR/VR Content Generation Models - THREE_D_MODELS = [ - HFModel( - "Shap-E", - "openai/shap-e", - ModelCategory.TEXT_TO_3D, - "Text-to-3D shape generation", - True, - False, - ), - HFModel( - "Point-E", - "openai/point-e", - ModelCategory.TEXT_TO_3D, - "Text-to-3D point cloud generation", - True, - False, - ), - HFModel( - "DreamFusion", - "google/dreamfusion", - ModelCategory.IMAGE_TO_3D, - "Image-to-3D mesh generation", - True, - False, - ), - HFModel( - "Magic3D", - "nvidia/magic3d", - ModelCategory.THREE_D_GENERATION, - "High-quality 3D content creation", - True, - False, - ), - HFModel( - "GET3D", - "nvidia/get3d", - ModelCategory.MESH_GENERATION, - "3D mesh generation from text", - True, - False, - ), - ] - - # Document Processing and OCR Models - DOCUMENT_PROCESSING_MODELS = [ - HFModel( - "TrOCR Large", - "microsoft/trocr-large-printed", - ModelCategory.OCR, - "Transformer-based OCR for printed text", - True, - False, - ), - HFModel( - "TrOCR Handwritten", - "microsoft/trocr-large-handwritten", - ModelCategory.HANDWRITING_RECOGNITION, - "Handwritten text recognition", - True, - False, - ), - HFModel( - "LayoutLMv3", - "microsoft/layoutlmv3-large", - ModelCategory.DOCUMENT_ANALYSIS, - "Document layout analysis and understanding", - True, - False, - ), - HFModel( - "Donut", - "naver-clova-ix/donut-base", - ModelCategory.DOCUMENT_ANALYSIS, - "OCR-free document understanding", - True, - False, - ), - HFModel( - "TableTransformer", - "microsoft/table-transformer-structure-recognition", - ModelCategory.TABLE_EXTRACTION, - "Table structure recognition", - True, - False, - ), - HFModel( - "FormNet", - "microsoft/formnet", - ModelCategory.FORM_PROCESSING, - "Form understanding and processing", - True, - False, - ), - ] - - # Multimodal AI Models - MULTIMODAL_MODELS = [ - HFModel( - "BLIP-2", - "Salesforce/blip2-opt-2.7b", - ModelCategory.VISION_LANGUAGE, - "Vision-language understanding and generation", - True, - False, - ), - HFModel( - "InstructBLIP", - "Salesforce/instructblip-vicuna-7b", - ModelCategory.MULTIMODAL_REASONING, - "Instruction-following multimodal model", - True, - False, - ), - HFModel( - "LLaVA", - "liuhaotian/llava-v1.5-7b", - ModelCategory.VISUAL_QUESTION_ANSWERING, - "Large Language and Vision Assistant", - True, - False, - ), - HFModel( - "GPT-4V", - "openai/gpt-4-vision-preview", - ModelCategory.MULTIMODAL_CHAT, - "Advanced multimodal conversational AI", - True, - True, - ), - HFModel( - "Flamingo", - "deepmind/flamingo-9b", - ModelCategory.CROSS_MODAL_GENERATION, - "Few-shot learning for vision and language", - True, - False, - ), - ] - - # Specialized AI Models - SPECIALIZED_AI_MODELS = [ - HFModel( - "MusicGen", - "facebook/musicgen-medium", - ModelCategory.MUSIC_GENERATION, - "Text-to-music generation", - True, - False, - ), - HFModel( - "AudioCraft", - "facebook/audiocraft_musicgen_melody", - ModelCategory.MUSIC_GENERATION, - "Melody-conditioned music generation", - True, - False, - ), - HFModel( - "Real-ESRGAN", - "xinntao/realesrgan-x4plus", - ModelCategory.SUPER_RESOLUTION, - "Image super-resolution", - True, - False, - ), - HFModel( - "GFPGAN", - "TencentARC/GFPGAN", - ModelCategory.FACE_RESTORATION, - "Face restoration and enhancement", - True, - False, - ), - HFModel( - "LaMa", - "advimman/lama", - ModelCategory.IMAGE_INPAINTING, - "Large Mask Inpainting", - True, - False, - ), - HFModel( - "Background Remover", - "briaai/RMBG-1.4", - ModelCategory.BACKGROUND_REMOVAL, - "Automatic background removal", - True, - False, - ), - HFModel( - "Voice Cloner", - "coqui/XTTS-v2", - ModelCategory.VOICE_CLONING, - "Multilingual voice cloning", - True, - False, - ), - ] - - # Creative Content Models - CREATIVE_CONTENT_MODELS = [ - HFModel( - "GPT-3.5 Creative", - "openai/gpt-3.5-turbo-instruct", - ModelCategory.CREATIVE_WRITING, - "Creative writing and storytelling", - True, - True, - ), - HFModel( - "Novel AI", - "novelai/genji-python-6b", - ModelCategory.STORY_GENERATION, - "Interactive story generation", - True, - False, - ), - HFModel( - "Poet Assistant", - "gpt2-poetry", - ModelCategory.POETRY_GENERATION, - "Poetry generation and analysis", - True, - False, - ), - HFModel( - "Blog Writer", - "google/flan-t5-large", - ModelCategory.BLOG_WRITING, - "Blog content creation", - True, - False, - ), - HFModel( - "Marketing Copy AI", - "microsoft/DialoGPT-large", - ModelCategory.MARKETING_COPY, - "Marketing content generation", - True, - False, - ), - ] - - # Game Development Models - GAME_DEVELOPMENT_MODELS = [ - HFModel( - "Character AI", - "character-ai/character-generator", - ModelCategory.CHARACTER_GENERATION, - "Game character generation and design", - True, - False, - ), - HFModel( - "Level Designer", - "unity/level-generator", - ModelCategory.LEVEL_GENERATION, - "Game level and environment generation", - True, - False, - ), - HFModel( - "Dialogue Writer", - "bioware/dialogue-generator", - ModelCategory.DIALOGUE_GENERATION, - "Game dialogue and narrative generation", - True, - False, - ), - HFModel( - "Asset Creator", - "epic/asset-generator", - ModelCategory.GAME_ASSET_GENERATION, - "Game asset and texture generation", - True, - False, - ), - ] - - # Science and Research Models - SCIENCE_RESEARCH_MODELS = [ - HFModel( - "AlphaFold", - "deepmind/alphafold2", - ModelCategory.PROTEIN_FOLDING, - "Protein structure prediction", - True, - False, - ), - HFModel( - "ChemBERTa", - "DeepChem/ChemBERTa-77M-MLM", - ModelCategory.MOLECULE_GENERATION, - "Chemical compound analysis", - True, - False, - ), - HFModel( - "SciBERT", - "allenai/scibert_scivocab_uncased", - ModelCategory.SCIENTIFIC_WRITING, - "Scientific text understanding", - True, - False, - ), - HFModel( - "Research Assistant", - "microsoft/specter2", - ModelCategory.RESEARCH_ASSISTANCE, - "Research paper analysis and recommendations", - True, - False, - ), - HFModel( - "Data Analyst", - "microsoft/data-copilot", - ModelCategory.DATA_ANALYSIS, - "Automated data analysis and insights", - True, - False, - ), - ] - - # Business and Productivity Models - BUSINESS_PRODUCTIVITY_MODELS = [ - HFModel( - "Email Assistant", - "microsoft/email-generator", - ModelCategory.EMAIL_GENERATION, - "Professional email composition", - True, - False, - ), - HFModel( - "Presentation AI", - "gamma/presentation-generator", - ModelCategory.PRESENTATION_CREATION, - "Automated presentation creation", - True, - False, - ), - HFModel( - "Report Writer", - "openai/report-generator", - ModelCategory.REPORT_GENERATION, - "Business report generation", - True, - False, - ), - HFModel( - "Meeting Summarizer", - "microsoft/meeting-summarizer", - ModelCategory.MEETING_SUMMARIZATION, - "Meeting notes and action items", - True, - False, - ), - HFModel( - "Project Planner", - "atlassian/project-ai", - ModelCategory.PROJECT_PLANNING, - "Project planning and management", - True, - False, - ), - ] - - # AI Teacher Models - Best-in-Class Educational AI System - AI_TEACHER_MODELS = [ - # Primary AI Tutoring Models - Interactive & Conversational - HFModel( - "AI Tutor Interactive", - "microsoft/DialoGPT-medium", - ModelCategory.AI_TUTORING, - "Interactive AI tutor for conversational learning with dialogue management", - True, - False, - 2048, - True, - ), - HFModel( - "Goal-Oriented Tutor", - "microsoft/GODEL-v1_1-large-seq2seq", - ModelCategory.AI_TUTORING, - "Goal-oriented conversational AI for personalized tutoring sessions", - True, - False, - 2048, - True, - ), - HFModel( - "Advanced Instruction Tutor", - "google/flan-t5-large", - ModelCategory.AI_TUTORING, - "Advanced instruction-following AI tutor for complex educational tasks", - True, - False, - 2048, - True, - ), - # Educational Content Generation - Creative & Comprehensive - HFModel( - "Educational Content Creator Pro", - "facebook/bart-large", - ModelCategory.EDUCATIONAL_CONTENT, - "Professional educational content generation for all learning levels", - True, - False, - 1024, - False, - ), - HFModel( - "Multilingual Education AI", - "bigscience/bloom-560m", - ModelCategory.EDUCATIONAL_CONTENT, - "Global multilingual educational content for diverse learners", - True, - False, - 2048, - True, - ), - HFModel( - "Academic Writing Assistant", - "microsoft/prophetnet-large-uncased", - ModelCategory.EDUCATIONAL_CONTENT, - "Academic content creation with advanced text generation capabilities", - True, - False, - 1024, - False, - ), - # Lesson Planning & Curriculum Design - Structured & Professional - HFModel( - "Master Lesson Planner", - "facebook/bart-large-cnn", - ModelCategory.LESSON_PLANNING, - "Comprehensive lesson planning with summarization and structure", - True, - False, - 1024, - False, - ), - HFModel( - "Curriculum Architect", - "microsoft/prophetnet-base-uncased", - ModelCategory.CURRICULUM_DESIGN, - "Professional curriculum planning and educational program design", - True, - False, - 1024, - False, - ), - HFModel( - "Activity Designer", - "google/t5-base", - ModelCategory.LESSON_PLANNING, - "Interactive learning activity and exercise generation", - True, - False, - 512, - True, - ), - # Subject-Specific Excellence - STEM Focus - HFModel( - "Programming Mentor Pro", - "microsoft/codebert-base", - ModelCategory.CODING_INSTRUCTION, - "Expert programming education with code analysis and explanation", - True, - False, - 1024, - False, - ), - HFModel( - "Advanced Code Instructor", - "microsoft/graphcodebert-base", - ModelCategory.CODING_INSTRUCTION, - "Advanced programming instruction with graph understanding", - True, - False, - 1024, - False, - ), - HFModel( - "Algorithm Tutor Elite", - "microsoft/unixcoder-base", - ModelCategory.CODING_INSTRUCTION, - "Elite algorithm education and computational thinking development", - True, - False, - 1024, - False, - ), - # Science & Mathematics Excellence - HFModel( - "Science Research Educator", - "allenai/scibert_scivocab_uncased", - ModelCategory.SCIENCE_TUTORING, - "Scientific education with research-grade knowledge and vocabulary", - True, - False, - 512, - False, - ), - HFModel( - "Advanced Science AI", - "facebook/galactica-125m", - ModelCategory.SCIENCE_TUTORING, - "Advanced scientific knowledge and research methodology education", - True, - False, - 2048, - True, - ), - HFModel( - "Mathematical Reasoning Master", - "google/flan-t5-xl", - ModelCategory.MATH_TUTORING, - "Advanced mathematical reasoning, proofs, and problem-solving", - True, - False, - 2048, - True, - ), - HFModel( - "Interactive Math Tutor", - "microsoft/DialoGPT-small", - ModelCategory.MATH_TUTORING, - "Interactive mathematics tutoring with step-by-step explanations", - True, - False, - 1024, - True, - ), - # Language & Literature Excellence - HFModel( - "Multilingual Language Master", - "facebook/mbart-large-50-many-to-many-mmt", - ModelCategory.LANGUAGE_TUTORING, - "Advanced multilingual education and cross-language learning", - True, - False, - 1024, - False, - ), - HFModel( - "Literature & Language AI", - "microsoft/prophetnet-large-uncased-cnndm", - ModelCategory.LANGUAGE_TUTORING, - "Literature analysis and advanced language instruction", - True, - False, - 1024, - False, - ), - HFModel( - "Grammar & Comprehension Expert", - "google/electra-base-discriminator", - ModelCategory.LANGUAGE_TUTORING, - "Expert grammar instruction and reading comprehension development", - True, - False, - 512, - False, - ), - # Assessment & Evaluation Excellence - HFModel( - "Assessment Designer Pro", - "microsoft/DialoGPT-large", - ModelCategory.QUIZ_GENERATION, - "Professional assessment and quiz generation with interaction", - True, - False, - 2048, - True, - ), - HFModel( - "Learning Progress Analyzer", - "facebook/bart-large", - ModelCategory.LEARNING_ASSESSMENT, - "Comprehensive learning assessment and progress tracking", - True, - False, - 1024, - False, - ), - HFModel( - "Question Master AI", - "google/t5-base", - ModelCategory.QUIZ_GENERATION, - "Intelligent question generation for all educational levels", - True, - False, - 512, - True, - ), - HFModel( - "Exam Preparation Specialist", - "microsoft/unilm-base-cased", - ModelCategory.EXAM_PREPARATION, - "Specialized exam preparation and test strategy development", - True, - False, - 1024, - False, - ), - # Personalized & Adaptive Learning Excellence - HFModel( - "Personal Learning Architect", - "microsoft/deberta-v3-base", - ModelCategory.PERSONALIZED_LEARNING, - "Advanced personalized learning path creation and optimization", - True, - False, - 512, - False, - ), - HFModel( - "Adaptive Learning Engine", - "facebook/opt-125m", - ModelCategory.ADAPTIVE_LEARNING, - "Intelligent adaptive learning with dynamic content adjustment", - True, - False, - 2048, - True, - ), - HFModel( - "Learning Analytics Expert", - "microsoft/layoutlm-base-uncased", - ModelCategory.LEARNING_ANALYTICS, - "Advanced learning analytics and educational data interpretation", - True, - False, - 512, - False, - ), - # Concept Explanation & Understanding Masters - HFModel( - "Concept Explanation Master", - "microsoft/deberta-v3-base", - ModelCategory.CONCEPT_EXPLANATION, - "Master-level concept explanation and knowledge breakdown", - True, - False, - 512, - False, - ), - HFModel( - "Knowledge Synthesizer", - "google/pegasus-xsum", - ModelCategory.CONCEPT_EXPLANATION, - "Advanced knowledge synthesis and concept summarization", - True, - False, - 512, - False, - ), - HFModel( - "Interactive Concept Guide", - "facebook/bart-base", - ModelCategory.CONCEPT_EXPLANATION, - "Interactive concept teaching with clarification and examples", - True, - False, - 1024, - False, - ), - # Homework & Study Support Excellence - HFModel( - "Programming Homework Expert", - "microsoft/codebert-base-mlm", - ModelCategory.HOMEWORK_ASSISTANCE, - "Expert programming homework assistance and debugging support", - True, - False, - 1024, - False, - ), - HFModel( - "Universal Homework Helper", - "google/flan-t5-small", - ModelCategory.HOMEWORK_ASSISTANCE, - "Comprehensive homework assistance across all academic subjects", - True, - False, - 1024, - True, - ), - HFModel( - "Global Study Assistant", - "facebook/mbart-large-cc25", - ModelCategory.HOMEWORK_ASSISTANCE, - "Multilingual homework support with cultural context understanding", - True, - False, - 1024, - False, - ), - # Study Materials & Resources Excellence - HFModel( - "Study Guide Architect", - "microsoft/prophetnet-large-uncased", - ModelCategory.STUDY_GUIDE_CREATION, - "Professional study guide creation and learning material development", - True, - False, - 1024, - False, - ), - HFModel( - "Educational Resource Creator", - "facebook/bart-large-xsum", - ModelCategory.STUDY_GUIDE_CREATION, - "Comprehensive educational resource and reference material creation", - True, - False, - 1024, - False, - ), - # Interactive Learning & Gamification - HFModel( - "Educational Game Designer", - "microsoft/DialoGPT-base", - ModelCategory.EDUCATIONAL_GAMES, - "Interactive educational games and gamified learning experiences", - True, - False, - 1024, - True, - ), - HFModel( - "Learning Game Engine", - "google/bert-base-uncased", - ModelCategory.EDUCATIONAL_GAMES, - "Educational game mechanics and interactive learning systems", - True, - False, - 512, - False, - ), - # History & Social Studies Excellence - HFModel( - "History Professor AI", - "microsoft/deberta-large", - ModelCategory.HISTORY_TUTORING, - "Professor-level historical analysis and social studies education", - True, - False, - 1024, - False, - ), - HFModel( - "Interactive History Guide", - "facebook/opt-350m", - ModelCategory.HISTORY_TUTORING, - "Interactive historical narratives and timeline exploration", - True, - False, - 2048, - True, - ), - # Multi-Subject Teaching Excellence - HFModel( - "Master Subject Teacher", - "google/flan-t5-base", - ModelCategory.SUBJECT_TEACHING, - "Expert multi-subject teaching with instruction-following excellence", - True, - False, - 1024, - True, - ), - HFModel( - "Universal Educator AI", - "microsoft/unilm-large-cased", - ModelCategory.SUBJECT_TEACHING, - "Universal education AI with cross-disciplinary knowledge", - True, - False, - 1024, - False, - ), - # Advanced Analytics & Optimization - HFModel( - "Advanced Learning Analytics", - "microsoft/layoutlm-large-uncased", - ModelCategory.LEARNING_ANALYTICS, - "Enterprise-level learning analytics and educational insights", - True, - False, - 1024, - False, - ), - HFModel( - "Personalization Engine Pro", - "google/electra-large-discriminator", - ModelCategory.PERSONALIZED_LEARNING, - "Advanced AI personalization with learning style adaptation", - True, - False, - 512, - False, - ), - HFModel( - "Global Adaptive System", - "facebook/mbart-large-50", - ModelCategory.ADAPTIVE_LEARNING, - "Global adaptive learning system with multilingual capabilities", - True, - False, - 1024, - False, - ), - ] - - # Qwen Models - Advanced Reasoning and Multimodal AI - QWEN_MODELS = [ - # Qwen2.5 Series - Latest Models - HFModel( - "Qwen2.5-72B-Instruct", - "Qwen/Qwen2.5-72B-Instruct", - ModelCategory.TEXT_GENERATION, - "Large-scale instruction-following model for complex reasoning", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-32B-Instruct", - "Qwen/Qwen2.5-32B-Instruct", - ModelCategory.TEXT_GENERATION, - "High-performance instruction model for advanced tasks", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-14B-Instruct", - "Qwen/Qwen2.5-14B-Instruct", - ModelCategory.TEXT_GENERATION, - "Efficient large model with excellent reasoning capabilities", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-7B-Instruct", - "Qwen/Qwen2.5-7B-Instruct", - ModelCategory.TEXT_GENERATION, - "Optimized 7B model for general-purpose applications", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-3B-Instruct", - "Qwen/Qwen2.5-3B-Instruct", - ModelCategory.TEXT_GENERATION, - "Lightweight model for resource-constrained environments", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-1.5B-Instruct", - "Qwen/Qwen2.5-1.5B-Instruct", - ModelCategory.TEXT_GENERATION, - "Ultra-lightweight model for edge deployment", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-0.5B-Instruct", - "Qwen/Qwen2.5-0.5B-Instruct", - ModelCategory.TEXT_GENERATION, - "Minimal footprint model for basic applications", - True, - False, - 32768, - True, - ), - # Qwen2.5-Coder Series - Programming Specialists - HFModel( - "Qwen2.5-Coder-32B-Instruct", - "Qwen/Qwen2.5-Coder-32B-Instruct", - ModelCategory.QWEN_CODE, - "Advanced code generation and programming assistance", - True, - False, - 131072, - True, - ), - HFModel( - "Qwen2.5-Coder-14B-Instruct", - "Qwen/Qwen2.5-Coder-14B-Instruct", - ModelCategory.QWEN_CODE, - "Code generation with excellent debugging capabilities", - True, - False, - 131072, - True, - ), - HFModel( - "Qwen2.5-Coder-7B-Instruct", - "Qwen/Qwen2.5-Coder-7B-Instruct", - ModelCategory.QWEN_CODE, - "Efficient coding assistant for multiple languages", - True, - False, - 131072, - True, - ), - HFModel( - "Qwen2.5-Coder-3B-Instruct", - "Qwen/Qwen2.5-Coder-3B-Instruct", - ModelCategory.QWEN_CODE, - "Lightweight programming assistant", - True, - False, - 131072, - True, - ), - HFModel( - "Qwen2.5-Coder-1.5B-Instruct", - "Qwen/Qwen2.5-Coder-1.5B-Instruct", - ModelCategory.QWEN_CODE, - "Compact code generation model", - True, - False, - 131072, - True, - ), - # Qwen2.5-Math Series - Mathematical Reasoning - HFModel( - "Qwen2.5-Math-72B-Instruct", - "Qwen/Qwen2.5-Math-72B-Instruct", - ModelCategory.QWEN_MATH, - "Advanced mathematical problem solving and reasoning", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-Math-7B-Instruct", - "Qwen/Qwen2.5-Math-7B-Instruct", - ModelCategory.QWEN_MATH, - "Mathematical reasoning and calculation assistance", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2.5-Math-1.5B-Instruct", - "Qwen/Qwen2.5-Math-1.5B-Instruct", - ModelCategory.QWEN_MATH, - "Compact mathematical problem solver", - True, - False, - 32768, - True, - ), - # QwQ Series - Reasoning Specialists - HFModel( - "QwQ-32B-Preview", - "Qwen/QwQ-32B-Preview", - ModelCategory.QWEN_REASONING, - "Advanced reasoning and logical thinking model", - True, - False, - 32768, - True, - ), - # Qwen2-VL Series - Vision-Language Models - HFModel( - "Qwen2-VL-72B-Instruct", - "Qwen/Qwen2-VL-72B-Instruct", - ModelCategory.QWEN_VISION, - "Large-scale vision-language understanding and generation", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2-VL-7B-Instruct", - "Qwen/Qwen2-VL-7B-Instruct", - ModelCategory.QWEN_VISION, - "Efficient vision-language model for multimodal tasks", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen2-VL-2B-Instruct", - "Qwen/Qwen2-VL-2B-Instruct", - ModelCategory.QWEN_VISION, - "Lightweight vision-language model", - True, - False, - 32768, - True, - ), - # Qwen2-Audio Series - Audio Understanding - HFModel( - "Qwen2-Audio-7B-Instruct", - "Qwen/Qwen2-Audio-7B-Instruct", - ModelCategory.QWEN_AUDIO, - "Advanced audio understanding and generation", - True, - False, - 32768, - True, - ), - # Qwen Legacy Models - Still Powerful - HFModel( - "Qwen1.5-110B-Chat", - "Qwen/Qwen1.5-110B-Chat", - ModelCategory.CONVERSATIONAL, - "Large conversational model with broad knowledge", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen1.5-72B-Chat", - "Qwen/Qwen1.5-72B-Chat", - ModelCategory.CONVERSATIONAL, - "Conversational AI with excellent reasoning", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen1.5-32B-Chat", - "Qwen/Qwen1.5-32B-Chat", - ModelCategory.CONVERSATIONAL, - "Efficient chat model for interactive applications", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen1.5-14B-Chat", - "Qwen/Qwen1.5-14B-Chat", - ModelCategory.CONVERSATIONAL, - "Balanced performance chat model", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen1.5-7B-Chat", - "Qwen/Qwen1.5-7B-Chat", - ModelCategory.CONVERSATIONAL, - "Popular chat model with good performance", - True, - False, - 32768, - True, - ), - HFModel( - "Qwen1.5-4B-Chat", - "Qwen/Qwen1.5-4B-Chat", - ModelCategory.CONVERSATIONAL, - "Lightweight conversational AI", - True, - False, - 32768, - True, - ), - ] - - # DeepSeek Models - Coding and Reasoning Excellence - DEEPSEEK_MODELS = [ - # DeepSeek-V3 Series - Latest Generation - HFModel( - "DeepSeek-V3", - "deepseek-ai/DeepSeek-V3", - ModelCategory.DEEPSEEK_REASONING, - "Latest generation reasoning and knowledge model", - True, - False, - 65536, - True, - ), - HFModel( - "DeepSeek-V3-Base", - "deepseek-ai/DeepSeek-V3-Base", - ModelCategory.TEXT_GENERATION, - "Foundation model for various downstream tasks", - True, - False, - 65536, - True, - ), - # DeepSeek-V2.5 Series - HFModel( - "DeepSeek-V2.5", - "deepseek-ai/DeepSeek-V2.5", - ModelCategory.DEEPSEEK_REASONING, - "Advanced reasoning and general intelligence model", - True, - False, - 32768, - True, - ), - # DeepSeek-Coder Series - Programming Specialists - HFModel( - "DeepSeek-Coder-V2-Instruct", - "deepseek-ai/DeepSeek-Coder-V2-Instruct", - ModelCategory.DEEPSEEK_CODING, - "Advanced code generation and programming assistance", - True, - False, - 163840, - True, - ), - HFModel( - "DeepSeek-Coder-V2-Base", - "deepseek-ai/DeepSeek-Coder-V2-Base", - ModelCategory.DEEPSEEK_CODING, - "Foundation coding model for fine-tuning", - True, - False, - 163840, - True, - ), - HFModel( - "DeepSeek-Coder-33B-Instruct", - "deepseek-ai/deepseek-coder-33b-instruct", - ModelCategory.DEEPSEEK_CODING, - "Large-scale code generation and debugging", - True, - False, - 16384, - True, - ), - HFModel( - "DeepSeek-Coder-6.7B-Instruct", - "deepseek-ai/deepseek-coder-6.7b-instruct", - ModelCategory.DEEPSEEK_CODING, - "Efficient code assistance and generation", - True, - False, - 16384, - True, - ), - HFModel( - "DeepSeek-Coder-1.3B-Instruct", - "deepseek-ai/deepseek-coder-1.3b-instruct", - ModelCategory.DEEPSEEK_CODING, - "Lightweight coding assistant", - True, - False, - 16384, - True, - ), - # DeepSeek-Math Series - Mathematical Reasoning - HFModel( - "DeepSeek-Math-7B-Instruct", - "deepseek-ai/deepseek-math-7b-instruct", - ModelCategory.DEEPSEEK_MATH, - "Mathematical problem solving and reasoning", - True, - False, - 4096, - True, - ), - HFModel( - "DeepSeek-Math-7B-Base", - "deepseek-ai/deepseek-math-7b-base", - ModelCategory.DEEPSEEK_MATH, - "Foundation model for mathematical reasoning", - True, - False, - 4096, - True, - ), - # DeepSeek Chat Models - HFModel( - "DeepSeek-67B-Chat", - "deepseek-ai/deepseek-llm-67b-chat", - ModelCategory.CONVERSATIONAL, - "Large conversational model with strong reasoning", - True, - False, - 4096, - True, - ), - HFModel( - "DeepSeek-7B-Chat", - "deepseek-ai/deepseek-llm-7b-chat", - ModelCategory.CONVERSATIONAL, - "Efficient chat model for general conversations", - True, - False, - 4096, - True, - ), - # DeepSeek-VL Series - Vision-Language - HFModel( - "DeepSeek-VL-7B-Chat", - "deepseek-ai/deepseek-vl-7b-chat", - ModelCategory.VISION_LANGUAGE, - "Vision-language understanding and conversation", - True, - False, - 4096, - True, - ), - HFModel( - "DeepSeek-VL-1.3B-Chat", - "deepseek-ai/deepseek-vl-1.3b-chat", - ModelCategory.VISION_LANGUAGE, - "Lightweight vision-language model", - True, - False, - 4096, - True, - ), - ] - - # Advanced Image Editing Models - IMAGE_EDITING_MODELS = [ - # Professional Image Editing - HFModel( - "SDXL Inpainting", - "diffusers/stable-diffusion-xl-1.0-inpainting-0.1", - ModelCategory.IMAGE_EDITING, - "High-quality image inpainting and editing", - True, - False, - 1024, - False, - ), - HFModel( - "ControlNet Inpainting", - "lllyasviel/control_v11p_sd15_inpaint", - ModelCategory.IMAGE_EDITING, - "Controllable image inpainting with precise editing", - True, - False, - 512, - False, - ), - HFModel( - "InstantID Face Editor", - "InstantX/InstantID", - ModelCategory.FACE_ENHANCEMENT, - "Identity-preserving face editing and enhancement", - True, - False, - 512, - False, - ), - HFModel( - "Real-ESRGAN Upscaler", - "ai-forever/Real-ESRGAN", - ModelCategory.IMAGE_UPSCALING, - "Advanced image super-resolution and enhancement", - True, - False, - 1024, - False, - ), - HFModel( - "GFPGAN Face Restoration", - "Xintao/GFPGAN", - ModelCategory.FACE_RESTORATION, - "High-quality face restoration and enhancement", - True, - False, - 512, - False, - ), - HFModel( - "CodeFormer Face Restoration", - "sczhou/CodeFormer", - ModelCategory.FACE_RESTORATION, - "Robust face restoration for low-quality images", - True, - False, - 512, - False, - ), - HFModel( - "Background Removal", - "briaai/RMBG-1.4", - ModelCategory.BACKGROUND_REMOVAL, - "Precise background removal and segmentation", - True, - False, - 1024, - False, - ), - HFModel( - "U2-Net Background Removal", - "simonw/u2net-portrait-segmentation", - ModelCategory.BACKGROUND_REMOVAL, - "Portrait and object background removal", - True, - False, - 320, - False, - ), - HFModel( - "Photo Colorization", - "microsoft/beit-base-patch16-224-pt22k-ft22k", - ModelCategory.COLOR_CORRECTION, - "AI-powered photo colorization and enhancement", - True, - False, - 224, - False, - ), - HFModel( - "Style Transfer Neural", - "pytorch/vision", - ModelCategory.ARTISTIC_FILTER, - "Neural style transfer for artistic image effects", - True, - False, - 512, - False, - ), - ] - - # Face Swap and Manipulation Models - FACE_SWAP_MODELS = [ - # Advanced Face Swapping - HFModel( - "InsightFace SwapFace", - "deepinsight/inswapper_128.onnx", - ModelCategory.FACE_SWAP, - "High-quality face swapping with identity preservation", - True, - False, - 128, - False, - ), - HFModel( - "SimSwap Face Swap", - "ppogg/simswap_official", - ModelCategory.FACE_SWAP, - "Realistic face swapping for videos and images", - True, - False, - 224, - False, - ), - HFModel( - "FaceX-Zoo Face Swap", - "FacePerceiver/FaceX-Zoo", - ModelCategory.FACE_SWAP, - "Multi-purpose face analysis and swapping toolkit", - True, - False, - 112, - False, - ), - HFModel( - "Face Enhancement Pro", - "TencentARC/GFPGAN", - ModelCategory.FACE_ENHANCEMENT, - "Professional face enhancement and restoration", - True, - False, - 512, - False, - ), - HFModel( - "DualStyleGAN Face Edit", - "williamyang1991/DualStyleGAN", - ModelCategory.FACE_ENHANCEMENT, - "Style-controllable face image editing", - True, - False, - 1024, - False, - ), - HFModel( - "MegaPortraits Face Animate", - "NVlabs/MegaPortraits", - ModelCategory.FACIAL_ANIMATION, - "One-shot facial animation and expression transfer", - True, - False, - 256, - False, - ), - ] - - # Advanced TTS and STT Models - ADVANCED_SPEECH_MODELS = [ - # Multilingual Text-to-Speech - HFModel( - "XTTS v2 Multilingual", - "coqui/XTTS-v2", - ModelCategory.MULTILINGUAL_TTS, - "High-quality multilingual text-to-speech with voice cloning", - True, - False, - 24000, - True, - ), - HFModel( - "Bark Text-to-Speech", - "suno/bark", - ModelCategory.ADVANCED_TTS, - "Generative TTS with music, sound effects, and multiple speakers", - True, - False, - 24000, - False, - ), - HFModel( - "SpeechT5 TTS", - "microsoft/speecht5_tts", - ModelCategory.ADVANCED_TTS, - "High-quality neural text-to-speech synthesis", - True, - False, - 16000, - False, - ), - HFModel( - "VALL-E X Multilingual", - "Plachtaa/VALL-E-X", - ModelCategory.MULTILINGUAL_TTS, - "Zero-shot voice synthesis in multiple languages", - True, - False, - 24000, - False, - ), - HFModel( - "Arabic TTS", - "arabic-speech-corpus/tts-arabic", - ModelCategory.MULTILINGUAL_TTS, - "High-quality Arabic text-to-speech synthesis", - True, - False, - 22050, - False, - ), - HFModel( - "Tortoise TTS", - "jbetker/tortoise-tts", - ModelCategory.VOICE_CLONING, - "High-quality voice cloning and synthesis", - True, - False, - 22050, - False, - ), - # Advanced Speech-to-Text - HFModel( - "Whisper Large v3", - "openai/whisper-large-v3", - ModelCategory.MULTILINGUAL_STT, - "State-of-the-art multilingual speech recognition", - True, - False, - 30, - False, - ), - HFModel( - "Whisper Large v3 Turbo", - "openai/whisper-large-v3-turbo", - ModelCategory.MULTILINGUAL_STT, - "Fast multilingual speech recognition with high accuracy", - True, - False, - 30, - True, - ), - HFModel( - "Arabic Whisper", - "arabic-speech-corpus/whisper-large-arabic", - ModelCategory.MULTILINGUAL_STT, - "Optimized Arabic speech recognition model", - True, - False, - 30, - False, - ), - HFModel( - "MMS Speech Recognition", - "facebook/mms-1b-all", - ModelCategory.MULTILINGUAL_STT, - "Massively multilingual speech recognition (1000+ languages)", - True, - False, - 16000, - False, - ), - HFModel( - "Wav2Vec2 Arabic", - "facebook/wav2vec2-large-xlsr-53-arabic", - ModelCategory.MULTILINGUAL_STT, - "Arabic speech recognition with Wav2Vec2 architecture", - True, - False, - 16000, - False, - ), - HFModel( - "SpeechT5 ASR", - "microsoft/speecht5_asr", - ModelCategory.ADVANCED_STT, - "Advanced automatic speech recognition", - True, - False, - 16000, - False, - ), - # Real-time Translation and Voice Conversion - HFModel( - "SeamlessM4T", - "facebook/seamless-m4t-v2-large", - ModelCategory.REAL_TIME_TRANSLATION, - "Multilingual speech-to-speech translation", - True, - False, - 16000, - True, - ), - HFModel( - "Voice Conversion VITS", - "jaywalnut310/vits-ljs", - ModelCategory.VOICE_CONVERSION, - "High-quality voice conversion and synthesis", - True, - False, - 22050, - False, - ), - HFModel( - "RVC Voice Clone", - "lj1995/GPT-SoVITS", - ModelCategory.VOICE_CLONING, - "Real-time voice cloning and conversion", - True, - False, - 32000, - True, - ), - ] - - # Talking Avatar and Video Generation Models - TALKING_AVATAR_MODELS = [ - # Talking Head Generation - HFModel( - "SadTalker Talking Head", - "vinthony/SadTalker", - ModelCategory.TALKING_AVATAR, - "Generate talking head videos from audio and single image", - True, - False, - 256, - False, - ), - HFModel( - "Real-Time Face Animation", - "PaddlePaddle/PaddleGAN-FOM", - ModelCategory.FACIAL_ANIMATION, - "Real-time facial animation and expression control", - True, - False, - 256, - True, - ), - HFModel( - "LivePortrait Animation", - "KwaiVGI/LivePortrait", - ModelCategory.TALKING_AVATAR, - "High-quality portrait animation with lip sync", - True, - False, - 512, - False, - ), - HFModel( - "DualTalker Video", - "OpenTalker/DualTalker", - ModelCategory.TALKING_AVATAR, - "Dual-modal talking face generation with enhanced quality", - True, - False, - 256, - False, - ), - HFModel( - "Video Retalking", - "vinthony/video-retalking", - ModelCategory.LIP_SYNC, - "Audio-driven lip sync for existing videos", - True, - False, - 224, - False, - ), - HFModel( - "Wav2Lip Lip Sync", - "Rudrabha/Wav2Lip", - ModelCategory.LIP_SYNC, - "Accurate lip sync generation from audio", - True, - False, - 96, - False, - ), - HFModel( - "Digital Human Avatar", - "modelscope/damo-text-to-video-synthesis", - ModelCategory.VIRTUAL_PRESENTER, - "Generate digital human presenter videos", - True, - False, - 320, - False, - ), - HFModel( - "AI News Anchor", - "microsoft/DiT-XL-2-256", - ModelCategory.AI_ANCHOR, - "Professional AI news anchor and presenter generation", - True, - False, - 256, - False, - ), - HFModel( - "Avatar Gesture Control", - "ZhengPeng7/BiSeNet", - ModelCategory.GESTURE_GENERATION, - "Generate natural gestures and body language for avatars", - True, - False, - 512, - False, - ), - ] - - # Interactive Language Models (English-Arabic Focus) - INTERACTIVE_LANGUAGE_MODELS = [ - # Bilingual Conversation Models - HFModel( - "AceGPT Arabic-English", - "FreedomIntelligence/AceGPT-13B", - ModelCategory.BILINGUAL_CONVERSATION, - "Bilingual Arabic-English conversation model", - True, - False, - 4096, - True, - ), - HFModel( - "Jais Arabic Chat", - "core42/jais-13b-chat", - ModelCategory.BILINGUAL_CONVERSATION, - "Advanced Arabic conversation model with English support", - True, - False, - 2048, - True, - ), - HFModel( - "AraBART Conversational", - "aubmindlab/arabart-base-conversational", - ModelCategory.BILINGUAL_CONVERSATION, - "Arabic conversational AI with cultural understanding", - True, - False, - 1024, - True, - ), - HFModel( - "Multilingual Chat Assistant", - "microsoft/DialoGPT-large", - ModelCategory.INTERACTIVE_CHAT, - "Interactive chat assistant supporting multiple languages", - True, - False, - 1024, - True, - ), - HFModel( - "Cultural Context Chat", - "bigscience/bloom-7b1", - ModelCategory.CULTURAL_ADAPTATION, - "Culturally aware conversation model for diverse contexts", - True, - False, - 2048, - True, - ), - HFModel( - "Context-Aware Assistant", - "microsoft/GODEL-v1_1-large-seq2seq", - ModelCategory.CONTEXT_AWARE_CHAT, - "Context-aware conversational AI with memory", - True, - False, - 1024, - True, - ), - HFModel( - "Personality Chat Bot", - "microsoft/PersonaGPT", - ModelCategory.PERSONALITY_CHAT, - "Personality-driven conversational AI with distinct characters", - True, - False, - 1024, - True, - ), - HFModel( - "Role-Play Assistant", - "PygmalionAI/pygmalion-6b", - ModelCategory.ROLE_PLAY_CHAT, - "Interactive role-playing conversation model", - True, - False, - 2048, - True, - ), - HFModel( - "Domain Expert Chat", - "microsoft/DialoGPT-medium", - ModelCategory.DOMAIN_SPECIFIC_CHAT, - "Specialized domain conversation assistant", - True, - False, - 1024, - True, - ), - # Arabic Language Specialists - HFModel( - "Arabic GPT-J", - "aubmindlab/aragpt2-base", - ModelCategory.BILINGUAL_CONVERSATION, - "Arabic language generation and conversation", - True, - False, - 1024, - True, - ), - HFModel( - "Marbert Arabic Chat", - "UBC-NLP/MARBERT", - ModelCategory.BILINGUAL_CONVERSATION, - "Dialectal Arabic conversation model", - True, - False, - 512, - False, - ), - HFModel( - "ArabicBERT Chat", - "aubmindlab/bert-base-arabertv2", - ModelCategory.BILINGUAL_CONVERSATION, - "Modern Standard Arabic conversational understanding", - True, - False, - 512, - False, - ), - ] - - -class HuggingFaceInference: - """Hugging Face Inference API integration""" - - def __init__( - self, - api_token: str, - base_url: str = "https://api-inference.huggingface.co/models/", - ): - self.api_token = api_token - self.base_url = base_url - self.session = None - - async def __aenter__(self): - self.session = aiohttp.ClientSession( - headers={"Authorization": f"Bearer {self.api_token}"}, - timeout=aiohttp.ClientTimeout(total=300), # 5 minutes timeout - ) - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - if self.session: - await self.session.close() - - async def text_generation( - self, - model_id: str, - prompt: str, - max_tokens: int = 100, - temperature: float = 0.7, - stream: bool = False, - **kwargs, - ) -> Dict[str, Any]: - """Generate text using a text generation model""" - payload = { - "inputs": prompt, - "parameters": { - "max_new_tokens": max_tokens, - "temperature": temperature, - "do_sample": True, - **kwargs, - }, - "options": {"use_cache": False}, - } - - if stream: - return await self._stream_request(model_id, payload) - else: - return await self._request(model_id, payload) - - async def text_to_image( - self, - model_id: str, - prompt: str, - negative_prompt: Optional[str] = None, - **kwargs, - ) -> bytes: - """Generate image from text prompt""" - payload = { - "inputs": prompt, - "parameters": { - **({"negative_prompt": negative_prompt} if negative_prompt else {}), - **kwargs, - }, - } - - response = await self._request(model_id, payload, expect_json=False) - return response - - async def automatic_speech_recognition( - self, model_id: str, audio_data: bytes, **kwargs - ) -> Dict[str, Any]: - """Transcribe audio to text""" - # Convert audio bytes to base64 for API - audio_b64 = base64.b64encode(audio_data).decode() - - payload = {"inputs": audio_b64, "parameters": kwargs} - - return await self._request(model_id, payload) - - async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes: - """Convert text to speech audio""" - payload = {"inputs": text, "parameters": kwargs} - - response = await self._request(model_id, payload, expect_json=False) - return response - - async def image_classification( - self, model_id: str, image_data: bytes, **kwargs - ) -> Dict[str, Any]: - """Classify images""" - # Convert image to base64 - image_b64 = base64.b64encode(image_data).decode() - - payload = {"inputs": image_b64, "parameters": kwargs} - - return await self._request(model_id, payload) - - async def feature_extraction( - self, model_id: str, texts: Union[str, List[str]], **kwargs - ) -> Dict[str, Any]: - """Extract embeddings from text""" - payload = {"inputs": texts, "parameters": kwargs} - - return await self._request(model_id, payload) - - async def translation( - self, - model_id: str, - text: str, - src_lang: Optional[str] = None, - tgt_lang: Optional[str] = None, - **kwargs, - ) -> Dict[str, Any]: - """Translate text between languages""" - payload = { - "inputs": text, - "parameters": { - **({"src_lang": src_lang} if src_lang else {}), - **({"tgt_lang": tgt_lang} if tgt_lang else {}), - **kwargs, - }, - } - - return await self._request(model_id, payload) - - async def summarization( - self, - model_id: str, - text: str, - max_length: int = 150, - min_length: int = 30, - **kwargs, - ) -> Dict[str, Any]: - """Summarize text""" - payload = { - "inputs": text, - "parameters": { - "max_length": max_length, - "min_length": min_length, - **kwargs, - }, - } - - return await self._request(model_id, payload) - - async def question_answering( - self, model_id: str, question: str, context: str, **kwargs - ) -> Dict[str, Any]: - """Answer questions based on context""" - payload = { - "inputs": {"question": question, "context": context}, - "parameters": kwargs, - } - - return await self._request(model_id, payload) - - async def zero_shot_classification( - self, model_id: str, text: str, candidate_labels: List[str], **kwargs - ) -> Dict[str, Any]: - """Classify text without training data""" - payload = { - "inputs": text, - "parameters": {"candidate_labels": candidate_labels, **kwargs}, - } - - return await self._request(model_id, payload) - - async def conversational( - self, - model_id: str, - text: str, - conversation_history: Optional[List[Dict[str, str]]] = None, - **kwargs, - ) -> Dict[str, Any]: - """Have a conversation with a model""" - payload = { - "inputs": { - "text": text, - **( - { - "past_user_inputs": [ - h["user"] for h in conversation_history if "user" in h - ] - } - if conversation_history - else {} - ), - **( - { - "generated_responses": [ - h["bot"] for h in conversation_history if "bot" in h - ] - } - if conversation_history - else {} - ), - }, - "parameters": kwargs, - } - - return await self._request(model_id, payload) - - async def _request( - self, model_id: str, payload: Dict[str, Any], expect_json: bool = True - ) -> Union[Dict[str, Any], bytes]: - """Make HTTP request to Hugging Face API""" - url = f"{self.base_url}{model_id}" - - try: - async with self.session.post(url, json=payload) as response: - if response.status == 200: - if expect_json: - return await response.json() - else: - return await response.read() - elif response.status == 503: - # Model is loading, wait and retry - error_info = await response.json() - estimated_time = error_info.get("estimated_time", 30) - logger.info( - f"Model {model_id} is loading, waiting {estimated_time}s" - ) - await asyncio.sleep(min(estimated_time, 60)) # Cap at 60 seconds - return await self._request(model_id, payload, expect_json) - else: - error_text = await response.text() - raise Exception( - f"API request failed with status {response.status}: {error_text}" - ) - - except Exception as e: - logger.error(f"Error calling Hugging Face API for {model_id}: {e}") - raise - - async def _stream_request(self, model_id: str, payload: Dict[str, Any]): - """Stream response from Hugging Face API""" - url = f"{self.base_url}{model_id}" - payload["stream"] = True - - try: - async with self.session.post(url, json=payload) as response: - if response.status == 200: - async for chunk in response.content: - if chunk: - yield chunk.decode("utf-8") - else: - error_text = await response.text() - raise Exception( - f"Streaming request failed with status {response.status}: {error_text}" - ) - - except Exception as e: - logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}") - raise - - # New methods for expanded model categories - - async def text_to_video( - self, model_id: str, prompt: str, **kwargs - ) -> Dict[str, Any]: - """Generate video from text prompt""" - payload = { - "inputs": prompt, - "parameters": { - "duration": kwargs.get("duration", 5), - "fps": kwargs.get("fps", 24), - "width": kwargs.get("width", 512), - "height": kwargs.get("height", 512), - **kwargs, - }, - } - return await self._request(model_id, payload) - - async def video_to_text( - self, model_id: str, video_data: bytes, **kwargs - ) -> Dict[str, Any]: - """Analyze video and generate text description""" - video_b64 = base64.b64encode(video_data).decode() - payload = { - "inputs": {"video": video_b64}, - "parameters": kwargs, - } - return await self._request(model_id, payload) - - async def code_generation( - self, model_id: str, prompt: str, **kwargs - ) -> Dict[str, Any]: - """Generate code from natural language prompt""" - payload = { - "inputs": prompt, - "parameters": { - "max_length": kwargs.get("max_length", 500), - "temperature": kwargs.get("temperature", 0.2), - "language": kwargs.get("language", "python"), - **kwargs, - }, - } - return await self._request(model_id, payload) - - async def code_completion( - self, model_id: str, code: str, **kwargs - ) -> Dict[str, Any]: - """Complete partial code""" - payload = { - "inputs": code, - "parameters": { - "max_length": kwargs.get("max_length", 100), - "temperature": kwargs.get("temperature", 0.1), - **kwargs, - }, - } - return await self._request(model_id, payload) - - async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]: - """Generate 3D model from text description""" - payload = { - "inputs": prompt, - "parameters": { - "resolution": kwargs.get("resolution", 64), - "format": kwargs.get("format", "obj"), - **kwargs, - }, - } - return await self._request(model_id, payload) - - async def image_to_3d( - self, model_id: str, image_data: bytes, **kwargs - ) -> Dict[str, Any]: - """Generate 3D model from image""" - image_b64 = base64.b64encode(image_data).decode() - payload = { - "inputs": {"image": image_b64}, - "parameters": kwargs, - } - return await self._request(model_id, payload) - - async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]: - """Perform optical character recognition on image""" - image_b64 = base64.b64encode(image_data).decode() - payload = { - "inputs": {"image": image_b64}, - "parameters": {"language": kwargs.get("language", "en"), **kwargs}, - } - return await self._request(model_id, payload) - - async def document_analysis( - self, model_id: str, document_data: bytes, **kwargs - ) -> Dict[str, Any]: - """Analyze document structure and content""" - doc_b64 = base64.b64encode(document_data).decode() - payload = { - "inputs": {"document": doc_b64}, - "parameters": kwargs, - } - return await self._request(model_id, payload) - - async def vision_language( - self, model_id: str, image_data: bytes, text: str, **kwargs - ) -> Dict[str, Any]: - """Process image and text together""" - image_b64 = base64.b64encode(image_data).decode() - payload = { - "inputs": {"image": image_b64, "text": text}, - "parameters": kwargs, - } - return await self._request(model_id, payload) - - async def multimodal_reasoning( - self, model_id: str, inputs: Dict[str, Any], **kwargs - ) -> Dict[str, Any]: - """Perform reasoning across multiple modalities""" - payload = { - "inputs": inputs, - "parameters": kwargs, - } - return await self._request(model_id, payload) - - async def music_generation( - self, model_id: str, prompt: str, **kwargs - ) -> Dict[str, Any]: - """Generate music from text prompt""" - payload = { - "inputs": prompt, - "parameters": { - "duration": kwargs.get("duration", 30), - "bpm": kwargs.get("bpm", 120), - "genre": kwargs.get("genre", "electronic"), - **kwargs, - }, - } - return await self._request(model_id, payload) - - async def voice_cloning( - self, model_id: str, text: str, voice_sample: bytes, **kwargs - ) -> bytes: - """Clone voice and synthesize speech""" - voice_b64 = base64.b64encode(voice_sample).decode() - payload = { - "inputs": {"text": text, "voice_sample": voice_b64}, - "parameters": kwargs, - } - return await self._request(model_id, payload, expect_json=False) - - async def super_resolution( - self, model_id: str, image_data: bytes, **kwargs - ) -> bytes: - """Enhance image resolution""" - image_b64 = base64.b64encode(image_data).decode() - payload = { - "inputs": {"image": image_b64}, - "parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs}, - } - return await self._request(model_id, payload, expect_json=False) - - async def background_removal( - self, model_id: str, image_data: bytes, **kwargs - ) -> bytes: - """Remove background from image""" - image_b64 = base64.b64encode(image_data).decode() - payload = { - "inputs": {"image": image_b64}, - "parameters": kwargs, - } - return await self._request(model_id, payload, expect_json=False) - - async def creative_writing( - self, model_id: str, prompt: str, **kwargs - ) -> Dict[str, Any]: - """Generate creative content""" - payload = { - "inputs": prompt, - "parameters": { - "max_length": kwargs.get("max_length", 1000), - "creativity": kwargs.get("creativity", 0.8), - "genre": kwargs.get("genre", "general"), - **kwargs, - }, - } - return await self._request(model_id, payload) - - async def business_document( - self, model_id: str, document_type: str, context: str, **kwargs - ) -> Dict[str, Any]: - """Generate business documents""" - payload = { - "inputs": f"Generate {document_type}: {context}", - "parameters": { - "format": kwargs.get("format", "professional"), - "length": kwargs.get("length", "medium"), - **kwargs, - }, - } - return await self._request(model_id, payload) - - -class HuggingFaceModelManager: - """Manager for all Hugging Face model operations""" - - def __init__(self, api_token: str): - self.api_token = api_token - self.models = HuggingFaceModels() - - def get_models_by_category(self, category: ModelCategory) -> List[HFModel]: - """Get all models for a specific category""" - all_models = [] - - if category == ModelCategory.TEXT_GENERATION: - all_models = self.models.TEXT_GENERATION_MODELS - elif category == ModelCategory.TEXT_TO_IMAGE: - all_models = self.models.TEXT_TO_IMAGE_MODELS - elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: - all_models = self.models.ASR_MODELS - elif category == ModelCategory.TEXT_TO_SPEECH: - all_models = self.models.TTS_MODELS - elif category == ModelCategory.IMAGE_CLASSIFICATION: - all_models = self.models.IMAGE_CLASSIFICATION_MODELS - elif category == ModelCategory.FEATURE_EXTRACTION: - all_models = self.models.FEATURE_EXTRACTION_MODELS - elif category == ModelCategory.TRANSLATION: - all_models = self.models.TRANSLATION_MODELS - elif category == ModelCategory.SUMMARIZATION: - all_models = self.models.SUMMARIZATION_MODELS - - return all_models - - def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]: - """Get all available models organized by category""" - return { - # Core AI categories - ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS, - ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS, - ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS, - ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS, - ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS, - ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS, - ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS, - ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS, - # Video and Motion - ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS, - ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS, - ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS, - ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS, - # Code and Development - ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS, - ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS, - ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS, - ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS, - # 3D and AR/VR - ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS, - ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS, - ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS, - ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS, - # Document Processing - ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS, - ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS, - ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS, - ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS, - ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS, - # Multimodal AI - ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS, - ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS, - ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS, - ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS, - ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS, - # Specialized AI - ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS, - ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS, - ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS, - ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS, - ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS, - ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS, - # Creative Content - ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS, - ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS, - ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS, - ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS, - ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS, - # Game Development - ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, - ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, - ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, - ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, - # Science and Research - ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS, - ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS, - ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS, - ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS, - ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS, - # Business and Productivity - ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, - ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, - ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, - ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, - ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS, - # AI Teacher and Education Models - ModelCategory.AI_TUTORING: self.models.AI_TEACHER_MODELS, - ModelCategory.EDUCATIONAL_CONTENT: self.models.AI_TEACHER_MODELS, - ModelCategory.LESSON_PLANNING: self.models.AI_TEACHER_MODELS, - ModelCategory.CONCEPT_EXPLANATION: self.models.AI_TEACHER_MODELS, - ModelCategory.HOMEWORK_ASSISTANCE: self.models.AI_TEACHER_MODELS, - ModelCategory.QUIZ_GENERATION: self.models.AI_TEACHER_MODELS, - ModelCategory.CURRICULUM_DESIGN: self.models.AI_TEACHER_MODELS, - ModelCategory.LEARNING_ASSESSMENT: self.models.AI_TEACHER_MODELS, - ModelCategory.ADAPTIVE_LEARNING: self.models.AI_TEACHER_MODELS, - ModelCategory.SUBJECT_TEACHING: self.models.AI_TEACHER_MODELS, - ModelCategory.MATH_TUTORING: self.models.AI_TEACHER_MODELS, - ModelCategory.SCIENCE_TUTORING: self.models.AI_TEACHER_MODELS, - ModelCategory.LANGUAGE_TUTORING: self.models.AI_TEACHER_MODELS, - ModelCategory.HISTORY_TUTORING: self.models.AI_TEACHER_MODELS, - ModelCategory.CODING_INSTRUCTION: self.models.AI_TEACHER_MODELS, - ModelCategory.EXAM_PREPARATION: self.models.AI_TEACHER_MODELS, - ModelCategory.STUDY_GUIDE_CREATION: self.models.AI_TEACHER_MODELS, - ModelCategory.EDUCATIONAL_GAMES: self.models.AI_TEACHER_MODELS, - ModelCategory.LEARNING_ANALYTICS: self.models.AI_TEACHER_MODELS, - ModelCategory.PERSONALIZED_LEARNING: self.models.AI_TEACHER_MODELS, - # Qwen Models - ModelCategory.QWEN_REASONING: self.models.QWEN_MODELS, - ModelCategory.QWEN_MATH: self.models.QWEN_MODELS, - ModelCategory.QWEN_CODE: self.models.QWEN_MODELS, - ModelCategory.QWEN_VISION: self.models.QWEN_MODELS, - ModelCategory.QWEN_AUDIO: self.models.QWEN_MODELS, - # DeepSeek Models - ModelCategory.DEEPSEEK_CODING: self.models.DEEPSEEK_MODELS, - ModelCategory.DEEPSEEK_REASONING: self.models.DEEPSEEK_MODELS, - ModelCategory.DEEPSEEK_MATH: self.models.DEEPSEEK_MODELS, - ModelCategory.DEEPSEEK_RESEARCH: self.models.DEEPSEEK_MODELS, - # Advanced Image Processing & Manipulation - ModelCategory.IMAGE_EDITING: self.models.IMAGE_EDITING_MODELS, - ModelCategory.FACE_SWAP: self.models.FACE_SWAP_MODELS, - ModelCategory.FACE_ENHANCEMENT: self.models.FACE_SWAP_MODELS, - ModelCategory.FACE_GENERATION: self.models.FACE_SWAP_MODELS, - ModelCategory.PORTRAIT_EDITING: self.models.IMAGE_EDITING_MODELS, - ModelCategory.PHOTO_RESTORATION: self.models.IMAGE_EDITING_MODELS, - ModelCategory.IMAGE_UPSCALING: self.models.IMAGE_EDITING_MODELS, - ModelCategory.COLOR_CORRECTION: self.models.IMAGE_EDITING_MODELS, - ModelCategory.ARTISTIC_FILTER: self.models.IMAGE_EDITING_MODELS, - # Advanced Speech & Audio - ModelCategory.ADVANCED_TTS: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.ADVANCED_STT: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.VOICE_CONVERSION: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.SPEECH_ENHANCEMENT: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.AUDIO_GENERATION: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.MULTILINGUAL_TTS: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.MULTILINGUAL_STT: self.models.ADVANCED_SPEECH_MODELS, - ModelCategory.REAL_TIME_TRANSLATION: self.models.ADVANCED_SPEECH_MODELS, - # Interactive Avatar & Video Generation - ModelCategory.TALKING_AVATAR: self.models.TALKING_AVATAR_MODELS, - ModelCategory.AVATAR_GENERATION: self.models.TALKING_AVATAR_MODELS, - ModelCategory.LIP_SYNC: self.models.TALKING_AVATAR_MODELS, - ModelCategory.FACIAL_ANIMATION: self.models.TALKING_AVATAR_MODELS, - ModelCategory.GESTURE_GENERATION: self.models.TALKING_AVATAR_MODELS, - ModelCategory.VIRTUAL_PRESENTER: self.models.TALKING_AVATAR_MODELS, - ModelCategory.AI_ANCHOR: self.models.TALKING_AVATAR_MODELS, - # Interactive Language & Conversation - ModelCategory.INTERACTIVE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, - ModelCategory.BILINGUAL_CONVERSATION: self.models.INTERACTIVE_LANGUAGE_MODELS, - ModelCategory.CULTURAL_ADAPTATION: self.models.INTERACTIVE_LANGUAGE_MODELS, - ModelCategory.CONTEXT_AWARE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, - ModelCategory.PERSONALITY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, - ModelCategory.ROLE_PLAY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, - ModelCategory.DOMAIN_SPECIFIC_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, - } - - def get_model_by_id(self, model_id: str) -> Optional[HFModel]: - """Find a model by its Hugging Face model ID""" - for models_list in self.get_all_models().values(): - for model in models_list: - if model.model_id == model_id: - return model - return None - - async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any: - """Call a Hugging Face model with the appropriate method based on category""" - - async with HuggingFaceInference(self.api_token) as hf: - if category == ModelCategory.TEXT_GENERATION: - return await hf.text_generation(model_id, **kwargs) - elif category == ModelCategory.TEXT_TO_IMAGE: - return await hf.text_to_image(model_id, **kwargs) - elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: - return await hf.automatic_speech_recognition(model_id, **kwargs) - elif category == ModelCategory.TEXT_TO_SPEECH: - return await hf.text_to_speech(model_id, **kwargs) - elif category == ModelCategory.IMAGE_CLASSIFICATION: - return await hf.image_classification(model_id, **kwargs) - elif category == ModelCategory.FEATURE_EXTRACTION: - return await hf.feature_extraction(model_id, **kwargs) - elif category == ModelCategory.TRANSLATION: - return await hf.translation(model_id, **kwargs) - elif category == ModelCategory.SUMMARIZATION: - return await hf.summarization(model_id, **kwargs) - elif category == ModelCategory.QUESTION_ANSWERING: - return await hf.question_answering(model_id, **kwargs) - elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION: - return await hf.zero_shot_classification(model_id, **kwargs) - elif category == ModelCategory.CONVERSATIONAL: - return await hf.conversational(model_id, **kwargs) - - # Video and Motion categories - elif category in [ - ModelCategory.TEXT_TO_VIDEO, - ModelCategory.VIDEO_GENERATION, - ]: - return await hf.text_to_video(model_id, **kwargs) - elif category == ModelCategory.VIDEO_TO_TEXT: - return await hf.video_to_text(model_id, **kwargs) - elif category == ModelCategory.VIDEO_CLASSIFICATION: - return await hf.image_classification( - model_id, **kwargs - ) # Similar to image classification - - # Code and Development categories - elif category in [ - ModelCategory.CODE_GENERATION, - ModelCategory.APP_GENERATION, - ]: - return await hf.code_generation(model_id, **kwargs) - elif category in [ - ModelCategory.CODE_COMPLETION, - ModelCategory.CODE_EXPLANATION, - ]: - return await hf.code_completion(model_id, **kwargs) - - # 3D and AR/VR categories - elif category in [ - ModelCategory.TEXT_TO_3D, - ModelCategory.THREE_D_GENERATION, - ]: - return await hf.text_to_3d(model_id, **kwargs) - elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]: - return await hf.image_to_3d(model_id, **kwargs) - - # Document Processing categories - elif category == ModelCategory.OCR: - return await hf.ocr(model_id, **kwargs) - elif category in [ - ModelCategory.DOCUMENT_ANALYSIS, - ModelCategory.FORM_PROCESSING, - ModelCategory.TABLE_EXTRACTION, - ModelCategory.LAYOUT_ANALYSIS, - ]: - return await hf.document_analysis(model_id, **kwargs) - elif category == ModelCategory.HANDWRITING_RECOGNITION: - return await hf.ocr(model_id, **kwargs) # Similar to OCR - - # Multimodal AI categories - elif category in [ - ModelCategory.VISION_LANGUAGE, - ModelCategory.VISUAL_QUESTION_ANSWERING, - ModelCategory.IMAGE_TEXT_MATCHING, - ]: - return await hf.vision_language(model_id, **kwargs) - elif category in [ - ModelCategory.MULTIMODAL_REASONING, - ModelCategory.MULTIMODAL_CHAT, - ModelCategory.CROSS_MODAL_GENERATION, - ]: - return await hf.multimodal_reasoning(model_id, **kwargs) - - # Specialized AI categories - elif category == ModelCategory.MUSIC_GENERATION: - return await hf.music_generation(model_id, **kwargs) - elif category == ModelCategory.VOICE_CLONING: - return await hf.voice_cloning(model_id, **kwargs) - elif category == ModelCategory.SUPER_RESOLUTION: - return await hf.super_resolution(model_id, **kwargs) - elif category in [ - ModelCategory.FACE_RESTORATION, - ModelCategory.IMAGE_INPAINTING, - ModelCategory.IMAGE_OUTPAINTING, - ]: - return await hf.super_resolution( - model_id, **kwargs - ) # Similar processing - elif category == ModelCategory.BACKGROUND_REMOVAL: - return await hf.background_removal(model_id, **kwargs) - - # Creative Content categories - elif category in [ - ModelCategory.CREATIVE_WRITING, - ModelCategory.STORY_GENERATION, - ModelCategory.POETRY_GENERATION, - ModelCategory.SCREENPLAY_WRITING, - ]: - return await hf.creative_writing(model_id, **kwargs) - elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]: - return await hf.text_generation( - model_id, **kwargs - ) # Use standard text generation - - # Game Development categories - elif category in [ - ModelCategory.CHARACTER_GENERATION, - ModelCategory.LEVEL_GENERATION, - ModelCategory.DIALOGUE_GENERATION, - ModelCategory.GAME_ASSET_GENERATION, - ]: - return await hf.creative_writing( - model_id, **kwargs - ) # Creative generation - - # Science and Research categories - elif category in [ - ModelCategory.PROTEIN_FOLDING, - ModelCategory.MOLECULE_GENERATION, - ]: - return await hf.text_generation( - model_id, **kwargs - ) # Specialized text generation - elif category in [ - ModelCategory.SCIENTIFIC_WRITING, - ModelCategory.RESEARCH_ASSISTANCE, - ModelCategory.DATA_ANALYSIS, - ]: - return await hf.text_generation(model_id, **kwargs) - - # Business and Productivity categories - elif category in [ - ModelCategory.EMAIL_GENERATION, - ModelCategory.PRESENTATION_CREATION, - ModelCategory.REPORT_GENERATION, - ModelCategory.MEETING_SUMMARIZATION, - ModelCategory.PROJECT_PLANNING, - ]: - return await hf.business_document(model_id, category.value, **kwargs) - - # AI Teacher and Education categories - elif category in [ - ModelCategory.AI_TUTORING, - ModelCategory.EDUCATIONAL_CONTENT, - ModelCategory.LESSON_PLANNING, - ModelCategory.CONCEPT_EXPLANATION, - ModelCategory.HOMEWORK_ASSISTANCE, - ModelCategory.QUIZ_GENERATION, - ModelCategory.CURRICULUM_DESIGN, - ModelCategory.LEARNING_ASSESSMENT, - ModelCategory.ADAPTIVE_LEARNING, - ModelCategory.SUBJECT_TEACHING, - ModelCategory.MATH_TUTORING, - ModelCategory.SCIENCE_TUTORING, - ModelCategory.LANGUAGE_TUTORING, - ModelCategory.HISTORY_TUTORING, - ModelCategory.CODING_INSTRUCTION, - ModelCategory.EXAM_PREPARATION, - ModelCategory.STUDY_GUIDE_CREATION, - ModelCategory.EDUCATIONAL_GAMES, - ModelCategory.LEARNING_ANALYTICS, - ModelCategory.PERSONALIZED_LEARNING, - ]: - return await hf.text_generation( - model_id, **kwargs - ) # Educational content generation - - # Qwen Model categories - elif category in [ - ModelCategory.QWEN_REASONING, - ModelCategory.QWEN_MATH, - ModelCategory.QWEN_CODE, - ]: - return await hf.text_generation(model_id, **kwargs) - elif category == ModelCategory.QWEN_VISION: - return await hf.vision_language(model_id, **kwargs) - elif category == ModelCategory.QWEN_AUDIO: - return await hf.automatic_speech_recognition(model_id, **kwargs) - - # DeepSeek Model categories - elif category in [ - ModelCategory.DEEPSEEK_CODING, - ModelCategory.DEEPSEEK_REASONING, - ModelCategory.DEEPSEEK_MATH, - ModelCategory.DEEPSEEK_RESEARCH, - ]: - return await hf.text_generation(model_id, **kwargs) - - # Advanced Image Processing & Manipulation - elif category in [ - ModelCategory.IMAGE_EDITING, - ModelCategory.PORTRAIT_EDITING, - ModelCategory.PHOTO_RESTORATION, - ModelCategory.COLOR_CORRECTION, - ModelCategory.ARTISTIC_FILTER, - ]: - return await hf.text_to_image(model_id, **kwargs) # Image processing - elif category == ModelCategory.IMAGE_UPSCALING: - return await hf.super_resolution(model_id, **kwargs) - elif category in [ - ModelCategory.FACE_SWAP, - ModelCategory.FACE_ENHANCEMENT, - ModelCategory.FACE_GENERATION, - ]: - return await hf.text_to_image(model_id, **kwargs) # Face manipulation - - # Advanced Speech & Audio - elif category in [ - ModelCategory.ADVANCED_TTS, - ModelCategory.MULTILINGUAL_TTS, - ModelCategory.VOICE_CONVERSION, - ]: - return await hf.text_to_speech(model_id, **kwargs) - elif category in [ - ModelCategory.ADVANCED_STT, - ModelCategory.MULTILINGUAL_STT, - ModelCategory.SPEECH_ENHANCEMENT, - ]: - return await hf.automatic_speech_recognition(model_id, **kwargs) - elif category in [ - ModelCategory.AUDIO_GENERATION, - ModelCategory.REAL_TIME_TRANSLATION, - ]: - return await hf.text_to_speech(model_id, **kwargs) # Audio generation - - # Interactive Avatar & Video Generation - elif category in [ - ModelCategory.TALKING_AVATAR, - ModelCategory.AVATAR_GENERATION, - ModelCategory.LIP_SYNC, - ModelCategory.FACIAL_ANIMATION, - ModelCategory.GESTURE_GENERATION, - ModelCategory.VIRTUAL_PRESENTER, - ModelCategory.AI_ANCHOR, - ]: - return await hf.text_to_video(model_id, **kwargs) # Video generation - - # Interactive Language & Conversation - elif category in [ - ModelCategory.INTERACTIVE_CHAT, - ModelCategory.BILINGUAL_CONVERSATION, - ModelCategory.CULTURAL_ADAPTATION, - ModelCategory.CONTEXT_AWARE_CHAT, - ModelCategory.PERSONALITY_CHAT, - ModelCategory.ROLE_PLAY_CHAT, - ModelCategory.DOMAIN_SPECIFIC_CHAT, - ]: - return await hf.conversational(model_id, **kwargs) - - else: - raise ValueError(f"Unsupported model category: {category}")