Spaces:

Smilyai-labs
/

Sam-Z-chat

Sleeping

App Files Files Community

Keeby-smilyai commited on Oct 23

Commit

ac2a3fe

verified ·

1 Parent(s): 4e1d66f

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -8

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ MODEL_REPO = "Smilyai-labs/Sam-Z-1-tensorflow"
 CACHE_DIR = "./model_cache"
 # ============================================================================
-# Model Architecture Definitions (Required for Loading)
 # ============================================================================
 @keras.saving.register_keras_serializable()
@@ -35,13 +35,13 @@ class RotaryEmbedding(keras.layers.Layer):
         self.theta = theta
     def build(self, input_shape):
-        # Compute embeddings using numpy, then convert to TF tensors
         inv_freq = 1.0 / (self.theta ** (np.arange(0, self.dim, 2, dtype=np.float32) / self.dim))
         t = np.arange(self.max_len, dtype=np.float32)
         freqs = np.outer(t, inv_freq)
         emb = np.concatenate([freqs, freqs], axis=-1)
-        # Create non-trainable weights for cos and sin embeddings
         self.cos_cached = self.add_weight(
             name="cos_cached",
             shape=(self.max_len, self.dim),
@@ -225,7 +225,16 @@ print("✅ Model architecture registered")
 # Download model files
 config_path = hf_hub_download(MODEL_REPO, "config.json", cache_dir=CACHE_DIR)
-model_path = hf_hub_download(MODEL_REPO, "model.keras", cache_dir=CACHE_DIR)
 # Load config
 with open(config_path, 'r') as f:
@@ -257,8 +266,72 @@ if tokenizer.get_vocab_size() != config.get('vocab_size'):
 eos_token_id = config.get('eos_token_id', 50256)
-# Load model with TF function optimization
-model = keras.models.load_model(model_path, compile=False)
 # Create optimized inference function
 @tf.function(reduce_retracing=True)
@@ -619,11 +692,14 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=4):
-            # Chat interface
             chatbot = gr.Chatbot(
                 height=600,
                 show_label=False,
-                avatar_images=(None, "🤖" if not FESTIVE else "🎉"),
                 bubble_full_width=False
             )

 CACHE_DIR = "./model_cache"
 # ============================================================================
+# Model Architecture Definitions (FIXED for model loading)
 # ============================================================================
 @keras.saving.register_keras_serializable()
         self.theta = theta
     def build(self, input_shape):
+        # FIXED: Compute in numpy first to avoid symbolic tensor issues
         inv_freq = 1.0 / (self.theta ** (np.arange(0, self.dim, 2, dtype=np.float32) / self.dim))
         t = np.arange(self.max_len, dtype=np.float32)
         freqs = np.outer(t, inv_freq)
         emb = np.concatenate([freqs, freqs], axis=-1)
+        # Create as non-trainable weights instead of tf.constant
         self.cos_cached = self.add_weight(
             name="cos_cached",
             shape=(self.max_len, self.dim),
 # Download model files
 config_path = hf_hub_download(MODEL_REPO, "config.json", cache_dir=CACHE_DIR)
+# Try to download checkpoint weights first (more reliable)
+try:
+    weights_path = hf_hub_download(MODEL_REPO, "ckpt.weights.h5", cache_dir=CACHE_DIR)
+    print("✅ Found checkpoint weights (ckpt.weights.h5)")
+    use_checkpoint = True
+except Exception as e:
+    print(f"⚠️  Checkpoint not found, falling back to model.keras: {e}")
+    model_path = hf_hub_download(MODEL_REPO, "model.keras", cache_dir=CACHE_DIR)
+    use_checkpoint = False
 # Load config
 with open(config_path, 'r') as f:
 eos_token_id = config.get('eos_token_id', 50256)
+# ==============================================================================
+# Load Model - Priority: checkpoint weights > saved model
+# ==============================================================================
+print("\n🔄 Loading model...")
+if use_checkpoint:
+    print("📦 Building model from config and loading checkpoint weights...")
+    # Build model from scratch with config
+    model_config = {
+        'vocab_size': config['vocab_size'],
+        'd_model': config['hidden_size'],
+        'n_layers': config['num_hidden_layers'],
+        'n_heads': config['num_attention_heads'],
+        'ff_mult': config['intermediate_size'] / config['hidden_size'],
+        'max_len': config['max_position_embeddings'],
+        'dropout': 0.1,  # Default dropout
+        'rope_theta': config['rope_theta']
+    }
+    model = SAM1Model(config=model_config)
+    # Build model by running a dummy forward pass
+    dummy_input = tf.zeros((1, config['max_position_embeddings']), dtype=tf.int32)
+    _ = model(dummy_input, training=False)
+    print(f"✅ Model architecture built: {model.count_params():,} parameters")
+    # Load checkpoint weights
+    print(f"📥 Loading checkpoint weights from: {weights_path}")
+    model.load_weights(weights_path)
+    print("✅ Checkpoint weights loaded successfully!")
+else:
+    print("📦 Loading full saved model...")
+    try:
+        model = keras.models.load_model(model_path, compile=False)
+        print("✅ Model loaded successfully")
+    except Exception as e:
+        print(f"❌ Failed to load model: {e}")
+        print("\n🔄 Trying alternative: building from config + loading weights...")
+        # Fallback to building model
+        model_config = {
+            'vocab_size': config['vocab_size'],
+            'd_model': config['hidden_size'],
+            'n_layers': config['num_hidden_layers'],
+            'n_heads': config['num_attention_heads'],
+            'ff_mult': config['intermediate_size'] / config['hidden_size'],
+            'max_len': config['max_position_embeddings'],
+            'dropout': 0.1,
+            'rope_theta': config['rope_theta']
+        }
+        model = SAM1Model(config=model_config)
+        dummy_input = tf.zeros((1, config['max_position_embeddings']), dtype=tf.int32)
+        _ = model(dummy_input, training=False)
+        # Try to load weights from model.keras
+        try:
+            temp_model = keras.models.load_model(model_path, compile=False)
+            model.set_weights(temp_model.get_weights())
+            print("✅ Weights transferred successfully")
+        except:
+            print("❌ Could not load weights - model may not work correctly!")
+            raise
 # Create optimized inference function
 @tf.function(reduce_retracing=True)
     with gr.Row():
         with gr.Column(scale=4):
+            # Chat interface with bot avatar
             chatbot = gr.Chatbot(
                 height=600,
                 show_label=False,
+                avatar_images=(
+                    None,
+                    "https://cdn-uploads.huggingface.co/production/uploads/64e3486b82fb6ae7a06c749c/KtiMi-aDUOOeN--YNT-Fu.jpeg"
+                ),
                 bubble_full_width=False
             )