Spaces:
Runtime error
Runtime error
File size: 6,773 Bytes
af68acb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import os
import json
import tensorflow as tf
import keras
import numpy as np
from tokenizers import Tokenizer
from huggingface_hub import hf_hub_download
from transformers import GPT2Tokenizer
import threading
from typing import Dict, Optional
from model_architecture import SAM1Model
class ModelManager:
"""
Manages multiple models and their loading/unloading based on demand
"""
def __init__(self):
self.models: Dict[str, keras.Model] = {}
self.tokenizers: Dict[str, Tokenizer] = {}
self.model_configs: Dict[str, dict] = {}
self.lock = threading.Lock()
# Model mapping
self.model_repos = {
"sam-x-nano": "Smilyai-labs/Sam-nano",
"sam-x-mini": "Smilyai-labs/Sam-mini",
"sam-x-fast": "Smilyai-labs/Sam-fast",
"sam-x-large": "Smilyai-labs/Sam-large-2", # Using Sam-large-2 as the large model
"sam-large-2": "Smilyai-labs/Sam-large-2"
}
# Performance optimizations that should be applied before TF import
NUM_CORES = os.cpu_count() or 4
os.environ['TF_NUM_INTEROP_THREADS'] = str(NUM_CORES)
os.environ['TF_NUM_INTRAOP_THREADS'] = str(NUM_CORES)
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Force CPU only for consistency
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1' # Intel optimization
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Reduce TF logging
# Configure TF threading
tf.config.threading.set_inter_op_parallelism_threads(NUM_CORES)
tf.config.threading.set_intra_op_parallelism_threads(NUM_CORES)
print(f"β
CPU optimized: {NUM_CORES} threads, oneDNN enabled")
def get_model_repo(self, model_type: str) -> str:
"""Get the Hugging Face repository for a given model type"""
return self.model_repos.get(model_type, self.model_repos["sam-x-large"])
def load_tokenizer(self, model_type: str) -> Tokenizer:
"""Load tokenizer for a specific model type"""
if model_type in self.tokenizers:
return self.tokenizers[model_type]
print(f"π Loading tokenizer for {model_type}...")
try:
# Load base tokenizer
from transformers import AutoTokenizer
hf_tokenizer = AutoTokenizer.from_pretrained("gpt2")
# Add special tokens specific to your models
special_tokens = [
"\n", "\n", "\n", "\n",
"<CONTINUE>",
"<im end for model tun>"
]
hf_tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})
# Save temporarily to create tokenizers instance
os.makedirs(f"./temp_tokenizer_{model_type}", exist_ok=True)
hf_tokenizer.save_pretrained(f"./temp_tokenizer_{model_type}")
tokenizer = Tokenizer.from_file(f"./temp_tokenizer_{model_type}/tokenizer.json")
print(f"β
Tokenizer loaded for {model_type} with vocab size: {tokenizer.get_vocab_size()}")
self.tokenizers[model_type] = tokenizer
return tokenizer
except Exception as e:
print(f"β Error loading tokenizer for {model_type}: {e}")
raise
def load_model(self, model_type: str) -> keras.Model:
"""Load a specific model by type"""
if model_type in self.models:
return self.models[model_type]
print(f"π Loading {model_type} model...")
try:
# Get the appropriate model repo
model_repo = self.get_model_repo(model_type)
cache_dir = f"./model_cache/{model_type}"
# Download config
config_path = hf_hub_download(model_repo, "config.json", cache_dir=cache_dir)
with open(config_path, 'r') as f:
config = json.load(f)
# Store model config
self.model_configs[model_type] = config
# Build model from config
model_config = {
'vocab_size': config.get('vocab_size', 50432),
'd_model': config.get('hidden_size', 768),
'n_layers': config.get('num_hidden_layers', 12),
'n_heads': config.get('num_attention_heads', 12),
'ff_mult': config.get('intermediate_size', 3072) / config.get('hidden_size', 768),
'max_len': config.get('max_position_embeddings', 2048),
'dropout': 0.1,
'rope_theta': config.get('rope_theta', 10000)
}
model = SAM1Model(config=model_config)
# Build model with dummy input
dummy_input = tf.zeros((1, 16), dtype=tf.int32)
_ = model(dummy_input, training=False, use_cache=False)
print(f"β
Model {model_type} loaded: {config.get('num_hidden_layers', 12)} layers")
# Try to load weights
try:
weights_path = hf_hub_download(model_repo, "model.weights.h5", cache_dir=cache_dir)
model.load_weights(weights_path)
print(f"β
Model weights loaded successfully for {model_type}!")
except Exception as e:
print(f"β οΈ Could not load weights for {model_type}, using random initialization: {e}")
# Warm up the model
print(f"π₯ Warming up model {model_type}...")
warmup_input = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.int32)
_, _ = model(warmup_input, training=False, use_cache=True)
print(f"β
Model {model_type} warmed up")
# Store the model
self.models[model_type] = model
return model
except Exception as e:
print(f"β Error loading model {model_type}: {e}")
raise
def get_model(self, model_type: str) -> tuple:
"""Get model and tokenizer for a specific type, loading if necessary"""
with self.lock:
# Ensure tokenizer is loaded
if model_type not in self.tokenizers:
self.load_tokenizer(model_type)
# Ensure model is loaded
if model_type not in self.models:
self.load_model(model_type)
return self.models[model_type], self.tokenizers[model_type], self.model_configs[model_type]
def list_available_models(self) -> list:
"""Get list of available model types"""
return list(self.model_repos.keys())
def is_model_loaded(self, model_type: str) -> bool:
"""Check if a model is currently loaded"""
return model_type in self.models |