Spaces:

Smilyai-labs
/

Sam-Z-chat

Running

App Files Files Community

Keeby-smilyai commited on Nov 20

Commit

9580f69

verified ·

1 Parent(s): 5a3d225

Update app.py

Browse files

Files changed (1) hide show

app.py +284 -573

app.py CHANGED Viewed

@@ -15,19 +15,21 @@ import queue
 import hashlib
 import sqlite3
 from datetime import datetime
-from dataclasses import dataclass, field
-from typing import List, Dict, Optional
 import uuid
 # ==============================================================================
-# GPU/CPU Optimization
 # ==============================================================================
 tf.config.threading.set_inter_op_parallelism_threads(2)
 tf.config.threading.set_intra_op_parallelism_threads(4)
 tf.config.optimizer.set_jit(True)
 # ==============================================================================
-# Database Setup
 # ==============================================================================
 def init_db():
     conn = sqlite3.connect('sam_tasks.db', check_same_thread=False)
@@ -53,11 +55,10 @@ def init_db():
                   tokens_per_sec REAL DEFAULT 0,
                   FOREIGN KEY (user_id) REFERENCES users(id))''')
-    # Create admin account
     admin_pass = hashlib.sha256("admin123".encode()).hexdigest()
     try:
-        c.execute("INSERT INTO users (username, password_hash) VALUES (?, ?)",
-                  ("admin", admin_pass))
         conn.commit()
     except sqlite3.IntegrityError:
         pass
@@ -69,7 +70,7 @@ db_conn = init_db()
 db_lock = threading.Lock()
 # ==============================================================================
-# Model Architecture (Compact)
 # ==============================================================================
 @keras.saving.register_keras_serializable()
 class RotaryEmbedding(keras.layers.Layer):
@@ -124,11 +125,6 @@ class RMSNorm(keras.layers.Layer):
     def call(self, x):
         variance = tf.reduce_mean(tf.square(x), axis=-1, keepdims=True)
         return x * tf.math.rsqrt(variance + self.epsilon) * self.scale
-    def get_config(self):
-        config = super().get_config()
-        config.update({"epsilon": self.epsilon})
-        return config
 @keras.saving.register_keras_serializable()
 class TransformerBlock(keras.layers.Layer):
@@ -138,693 +134,408 @@ class TransformerBlock(keras.layers.Layer):
         self.n_heads = n_heads
         self.ff_dim = ff_dim
         self.dropout_rate = dropout
-        self.max_len = max_len
-        self.rope_theta = rope_theta
         self.head_dim = d_model // n_heads
-        self.layer_idx = layer_idx
         self.pre_attn_norm = RMSNorm()
         self.pre_ffn_norm = RMSNorm()
         self.q_proj = keras.layers.Dense(d_model, use_bias=False, name="q_proj")
         self.k_proj = keras.layers.Dense(d_model, use_bias=False, name="k_proj")
         self.v_proj = keras.layers.Dense(d_model, use_bias=False, name="v_proj")
         self.out_proj = keras.layers.Dense(d_model, use_bias=False, name="o_proj")
-        self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
         self.gate_proj = keras.layers.Dense(ff_dim, use_bias=False, name="gate_proj")
         self.up_proj = keras.layers.Dense(ff_dim, use_bias=False, name="up_proj")
         self.down_proj = keras.layers.Dense(d_model, use_bias=False, name="down_proj")
         self.dropout = keras.layers.Dropout(dropout)
-    def call(self, x, training=None):
-        B, T, D = tf.shape(x)[0], tf.shape(x)[1], self.d_model
-        dtype = x.dtype
         res = x
         y = self.pre_attn_norm(x)
-        q = tf.transpose(tf.reshape(self.q_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
-        k = tf.transpose(tf.reshape(self.k_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
-        v = tf.transpose(tf.reshape(self.v_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
-        q, k = self.rope(q, k)
-        scores = tf.matmul(q, k, transpose_b=True) / tf.sqrt(tf.cast(self.head_dim, dtype))
-        mask = tf.where(tf.linalg.band_part(tf.ones([T, T], dtype=dtype), -1, 0) == 0,
-                       tf.constant(-1e9, dtype=dtype), tf.constant(0.0, dtype=dtype))
-        scores += mask
-        attn = tf.matmul(tf.nn.softmax(scores, axis=-1), v)
-        attn = tf.reshape(tf.transpose(attn, [0, 2, 1, 3]), [B, T, D])
         x = res + self.dropout(self.out_proj(attn), training=training)
         res = x
         y = self.pre_ffn_norm(x)
         ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
-        return res + self.dropout(ffn, training=training)
     def get_config(self):
-        config = super().get_config()
-        config.update({
-            "d_model": self.d_model, "n_heads": self.n_heads, "ff_dim": self.ff_dim,
-            "dropout": self.dropout_rate, "max_len": self.max_len,
-            "rope_theta": self.rope_theta, "layer_idx": self.layer_idx
-        })
-        return config
 @keras.saving.register_keras_serializable()
 class SAM1Model(keras.Model):
-    def __init__(self, **kwargs):
-        super().__init__()
-        if 'config' in kwargs and isinstance(kwargs['config'], dict):
-            self.cfg = kwargs['config']
-        elif 'vocab_size' in kwargs:
-            self.cfg = kwargs
-        else:
-            self.cfg = kwargs.get('cfg', kwargs)
-        self.embed = keras.layers.Embedding(self.cfg['vocab_size'], self.cfg['d_model'], name="embed_tokens")
-        ff_dim = int(self.cfg['d_model'] * self.cfg['ff_mult'])
-        block_args = {
-            'd_model': self.cfg['d_model'], 'n_heads': self.cfg['n_heads'],
-            'ff_dim': ff_dim, 'dropout': self.cfg['dropout'],
-            'max_len': self.cfg['max_len'], 'rope_theta': self.cfg['rope_theta']
-        }
-        self.blocks = [TransformerBlock(name=f"block_{i}", layer_idx=i, **block_args)
-                      for i in range(self.cfg['n_layers'])]
-        self.norm = RMSNorm(name="final_norm")
-        self.lm_head = keras.layers.Dense(self.cfg['vocab_size'], use_bias=False, name="lm_head")
-    def call(self, input_ids, training=None):
         x = self.embed(input_ids)
-        for block in self.blocks:
-            x = block(x, training=training)
-        return self.lm_head(self.norm(x))
-    def get_config(self):
-        base_config = super().get_config()
-        base_config['config'] = self.cfg
-        return base_config
 # ==============================================================================
-# KV Cache for SAM-Z (Ultra-Fast)
 # ==============================================================================
-@dataclass
-class KVCache:
-    k_cache: List[tf.Tensor] = field(default_factory=list)
-    v_cache: List[tf.Tensor] = field(default_factory=list)
-    def update(self, layer_idx: int, k: tf.Tensor, v: tf.Tensor):
-        if layer_idx >= len(self.k_cache):
-            self.k_cache.append(k)
-            self.v_cache.append(v)
-        else:
-            self.k_cache[layer_idx] = tf.concat([self.k_cache[layer_idx], k], axis=2)
-            self.v_cache[layer_idx] = tf.concat([self.v_cache[layer_idx], v], axis=2)
-        return self.k_cache[layer_idx], self.v_cache[layer_idx]
-    def clear(self):
-        self.k_cache.clear()
-        self.v_cache.clear()
-# ==============================================================================
-# Load Models
-# ==============================================================================
-print("🚀 Loading SAM Models...")
-# SAM-X-1 (Reasoning with thinking)
-print("\n📦 Loading SAM-X-1-Large...")
 samx_weights = hf_hub_download("Smilyai-labs/Sam-1x-instruct", "ckpt.weights.h5")
 samx_config_path = hf_hub_download("Smilyai-labs/Sam-1-large-it-0002", "config.json")
 with open(samx_config_path, 'r') as f:
-    samx_cfg = json.load(f)
-samx_model_cfg = {
-    'vocab_size': samx_cfg['vocab_size'],
-    'd_model': samx_cfg['hidden_size'],
-    'n_layers': samx_cfg['num_hidden_layers'],
-    'n_heads': samx_cfg['num_attention_heads'],
-    'ff_mult': samx_cfg['intermediate_size'] / samx_cfg['hidden_size'],
-    'max_len': samx_cfg['max_position_embeddings'],
     'dropout': 0.0,
-    'rope_theta': samx_cfg['rope_theta']
-}
-samx_model = SAM1Model(config=samx_model_cfg)
-dummy = tf.zeros((1, 1), dtype=tf.int32)
-_ = samx_model(dummy)
 samx_model.load_weights(samx_weights)
 samx_model.trainable = False
-@tf.function(jit_compile=True)
-def samx_predict(inputs):
-    return samx_model(inputs, training=False)
-print("✅ SAM-X-1 loaded")
-# SAM-Z-1 (Fast with KV cache)
-print("\n📦 Loading SAM-Z-1...")
 samz_weights = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "ckpt.weights.h5")
 samz_config_path = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "config.json")
 with open(samz_config_path, 'r') as f:
-    samz_cfg = json.load(f)
-samz_model_cfg = {
-    'vocab_size': samz_cfg['vocab_size'],
-    'd_model': samz_cfg['hidden_size'],
-    'n_layers': samz_cfg['num_hidden_layers'],
-    'n_heads': samz_cfg['num_attention_heads'],
-    'ff_mult': samz_cfg['intermediate_size'] / samz_cfg['hidden_size'],
-    'max_len': samz_cfg['max_position_embeddings'],
     'dropout': 0.0,
-    'rope_theta': samz_cfg['rope_theta']
-}
-samz_model = SAM1Model(config=samz_model_cfg)
-_ = samz_model(dummy)
 samz_model.load_weights(samz_weights)
 samz_model.trainable = False
-@tf.function(jit_compile=True)
-def samz_predict(inputs):
-    return samz_model(inputs, training=False)
-print("✅ SAM-Z-1 loaded")
 # Tokenizer
-tokenizer_path = hf_hub_download("Smilyai-labs/Sam-1x-instruct", "tokenizer.json")
-tokenizer = Tokenizer.from_file(tokenizer_path)
 eos_token_id = 50256
-print(f"✅ Tokenizer ready (vocab: {tokenizer.get_vocab_size()})")
 # ==============================================================================
-# Background Task Processing
 # ==============================================================================
 task_queue = queue.Queue()
-active_tasks: Dict[str, Dict] = {}
 task_lock = threading.Lock()
-def create_task(user_id: int, model_name: str, prompt: str) -> str:
     task_id = str(uuid.uuid4())
     with db_lock:
         c = db_conn.cursor()
-        c.execute("""INSERT INTO tasks (id, user_id, model_name, prompt, status)
-                     VALUES (?, ?, ?, ?, ?)""",
-                  (task_id, user_id, model_name, prompt, "queued"))
         db_conn.commit()
-    with task_lock:
-        active_tasks[task_id] = {
-            'status': 'queued',
-            'progress': 0,
-            'result': '',
-            'tokens_generated': 0,
-            'tokens_per_sec': 0.0
-        }
-    task_queue.put((task_id, user_id, model_name, prompt))
     return task_id
-def update_task_status(task_id: str, status: str, progress: int = 0,
-                       result: str = '', tokens: int = 0, tps: float = 0.0):
-    with task_lock:
-        if task_id in active_tasks:
-            active_tasks[task_id].update({
-                'status': status,
-                'progress': progress,
-                'result': result,
-                'tokens_generated': tokens,
-                'tokens_per_sec': tps
-            })
     with db_lock:
         c = db_conn.cursor()
         c.execute("""UPDATE tasks SET status=?, progress=?, result=?,
-                     tokens_generated=?, tokens_per_sec=?
-                     WHERE id=?""",
                   (status, progress, result, tokens, tps, task_id))
-        if status == 'completed':
-            c.execute("UPDATE tasks SET completed_at=? WHERE id=?",
-                      (datetime.now().isoformat(), task_id))
         db_conn.commit()
-def generate_with_samx(prompt: str, task_id: str, max_tokens: int = 512):
-    """SAM-X-1: Reasoning model with <think> tags"""
-    input_ids = [i for i in tokenizer.encode(prompt).ids if i != eos_token_id]
-    generated = input_ids.copy()
-    result = ""
     start_time = time.time()
-    for step in range(max_tokens):
-        logits = samx_predict(tf.constant([generated], dtype=tf.int32))
-        next_logits = logits[0, -1, :].numpy()
-        # Temperature sampling
-        next_logits = next_logits / 0.7
-        probs = tf.nn.softmax(next_logits).numpy()
-        next_token = np.random.choice(len(probs), p=probs)
-        if next_token == eos_token_id:
-            break
-        generated.append(int(next_token))
-        # Decode periodically
-        if step % 10 == 0 or step == max_tokens - 1:
-            result = tokenizer.decode(generated[len(input_ids):])
-            elapsed = time.time() - start_time
-            tps = len(generated[len(input_ids):]) / elapsed if elapsed > 0 else 0
-            progress = int((step / max_tokens) * 100)
-            update_task_status(task_id, 'processing', progress, result,
-                             len(generated[len(input_ids):]), tps)
-    # Final result
-    result = tokenizer.decode(generated[len(input_ids):])
-    elapsed = time.time() - start_time
-    tps = len(generated[len(input_ids):]) / elapsed if elapsed > 0 else 0
-    update_task_status(task_id, 'completed', 100, result,
-                      len(generated[len(input_ids):]), tps)
-def generate_with_samz(prompt: str, task_id: str, max_tokens: int = 512):
-    """SAM-Z-1: Fast model with KV cache"""
-    input_ids = [i for i in tokenizer.encode(prompt).ids if i != eos_token_id]
-    generated = input_ids.copy()
-    result = ""
-    kv_cache = KVCache()
-    start_time = time.time()
     for step in range(max_tokens):
-        # Use KV cache for speed
-        if step == 0:
-            current_input = generated
-        else:
-            current_input = [generated[-1]]
-        logits = samz_predict(tf.constant([current_input], dtype=tf.int32))
-        next_logits = logits[0, -1, :].numpy()
-        # Fast sampling
-        next_logits = next_logits / 0.8
-        top_k = np.argpartition(next_logits, -40)[-40:]
-        top_k_logits = next_logits[top_k]
-        probs = tf.nn.softmax(top_k_logits).numpy()
-        next_token = top_k[np.random.choice(len(probs), p=probs)]
         if next_token == eos_token_id:
             break
         generated.append(int(next_token))
-        # Decode periodically
-        if step % 15 == 0 or step == max_tokens - 1:
-            result = tokenizer.decode(generated[len(input_ids):])
             elapsed = time.time() - start_time
-            tps = len(generated[len(input_ids):]) / elapsed if elapsed > 0 else 0
-            progress = int((step / max_tokens) * 100)
-            update_task_status(task_id, 'processing', progress, result,
-                             len(generated[len(input_ids):]), tps)
-    # Final result
-    result = tokenizer.decode(generated[len(input_ids):])
     elapsed = time.time() - start_time
-    tps = len(generated[len(input_ids):]) / elapsed if elapsed > 0 else 0
-    update_task_status(task_id, 'completed', 100, result,
-                      len(generated[len(input_ids):]), tps)
-def task_worker():
-    """Background worker thread"""
-    print("🔧 Task worker started")
     while True:
         try:
-            task_id, user_id, model_name, prompt = task_queue.get(timeout=1)
-            print(f"⚙️  Processing task {task_id[:8]}... ({model_name})")
-            update_task_status(task_id, 'processing', 0)
             try:
-                if 'SAM-X' in model_name or 'Large' in model_name:
-                    generate_with_samx(prompt, task_id)
                 else:
-                    generate_with_samz(prompt, task_id)
-                print(f"✅ Task {task_id[:8]} completed")
             except Exception as e:
-                print(f"❌ Task {task_id[:8]} failed: {e}")
-                update_task_status(task_id, 'failed', 0, f"Error: {str(e)}")
             task_queue.task_done()
         except queue.Empty:
             continue
-# Start worker threads (2 workers for parallel processing)
 for _ in range(2):
-    worker = threading.Thread(target=task_worker, daemon=True)
-    worker.start()
-# ==============================================================================
-# User Management
-# ==============================================================================
-def hash_password(password: str) -> str:
-    return hashlib.sha256(password.encode()).hexdigest()
-def create_user(username: str, password: str):
-    with db_lock:
-        try:
-            c = db_conn.cursor()
-            c.execute("INSERT INTO users (username, password_hash) VALUES (?, ?)",
-                      (username, hash_password(password)))
-            db_conn.commit()
-            return True, "Account created!"
-        except sqlite3.IntegrityError:
-            return False, "Username exists!"
-def authenticate(username: str, password: str):
-    with db_lock:
-        c = db_conn.cursor()
-        c.execute("SELECT id, password_hash FROM users WHERE username=?", (username,))
-        result = c.fetchone()
-        if result and result[1] == hash_password(password):
-            return True, result[0]
-        return False, None
-def get_user_tasks(user_id: int):
-    with db_lock:
-        c = db_conn.cursor()
-        c.execute("""SELECT id, model_name, prompt, status, progress,
-                            tokens_generated, tokens_per_sec, created_at
-                     FROM tasks WHERE user_id=?
-                     ORDER BY created_at DESC LIMIT 50""",
-                  (user_id,))
-        return c.fetchall()
-def get_user_active_tasks(user_id: int):
-    with db_lock:
-        c = db_conn.cursor()
-        c.execute("""SELECT COUNT(*) FROM tasks
-                     WHERE user_id=? AND status IN ('queued', 'processing')""",
-                  (user_id,))
-        return c.fetchone()[0]
 # ==============================================================================
-# Gradio UI
 # ==============================================================================
 css = """
-.container { max-width: 1400px; margin: 0 auto; }
-.task-card {
-    background: white;
-    border: 2px solid #e5e7eb;
-    border-radius: 12px;
-    padding: 16px;
-    margin: 8px 0;
-}
-.status-queued { color: #f59e0b; }
-.status-processing { color: #3b82f6; }
-.status-completed { color: #10b981; }
-.status-failed { color: #ef4444; }
-.progress-bar {
-    height: 8px;
-    background: #e5e7eb;
-    border-radius: 4px;
-    overflow: hidden;
-    margin: 8px 0;
 }
-.progress-fill {
-    height: 100%;
-    background: linear-gradient(90deg, #10b981, #059669);
-    transition: width 0.3s;
 }
 """
-with gr.Blocks(css=css, title="SAM Background Processor") as demo:
     user_id_state = gr.State(None)
-    gr.Markdown("# 🚀 SAM Multi-Task Processor")
-    gr.Markdown("Submit up to 5 background tasks. No need to stay on page!")
-    # Auth
     with gr.Group(visible=True) as auth_group:
-        gr.Markdown("### 🔐 Sign In / Sign Up")
-        auth_username = gr.Textbox(label="Username", placeholder="username")
-        auth_password = gr.Textbox(label="Password", type="password")
-        auth_btn = gr.Button("Continue", variant="primary")
         auth_msg = gr.Markdown("")
-    # Main UI
     with gr.Group(visible=False) as main_group:
-        with gr.Row():
-            gr.Markdown("### 🤖 Create Task")
-            user_display = gr.Markdown("")
         with gr.Row():
-            with gr.Column(scale=2):
-                model_choice = gr.Radio(
-                    choices=["SAM-X-1-Large (Reasoning)", "SAM-Z-1 (Fast)"],
-                    value="SAM-Z-1 (Fast)",
-                    label="Model"
-                )
-                prompt_input = gr.Textbox(
-                    label="Prompt",
-                    placeholder="Enter your prompt...",
-                    lines=4
-                )
-                submit_btn = gr.Button("🚀 Submit Task", variant="primary", size="lg")
-                task_msg = gr.Markdown("")
             with gr.Column(scale=1):
-                gr.Markdown("### ℹ️ Info")
-                gr.Markdown("""
-                - **SAM-X-1**: Reasoning model with `<think>` tags
-                - **SAM-Z-1**: Ultra-fast direct responses
-                - Max 5 concurrent tasks
-                - Results saved to database
-                - Background processing
-                """)
-        gr.Markdown("---")
-        with gr.Row():
-            gr.Markdown("### 📋 Your Tasks")
-            refresh_btn = gr.Button("🔄 Refresh", size="sm")
-        tasks_display = gr.HTML("")
-        auto_refresh = gr.Checkbox(label="Auto-refresh every 3 seconds", value=True)
-    # Auth handler
-    def handle_auth(username, password):
-        if len(username) < 3 or len(password) < 6:
-            return None, "❌ Invalid credentials", gr.update(), gr.update()
-        success, user_id = authenticate(username, password)
-        if not success:
-            success, msg = create_user(username, password)
-            if success:
-                success, user_id = authenticate(username, password)
-        if success:
-            return (
-                user_id,
-                f"✅ Welcome, **{username}**!",
-                gr.update(visible=False),
-                gr.update(visible=True)
-            )
-        return None, "❌ Authentication failed", gr.update(), gr.update()
-    # Submit task
-    def submit_task(user_id, model, prompt):
-        if not user_id:
-            return "❌ Please sign in", ""
-        if not prompt.strip():
-            return "❌ Prompt required", ""
-        active_count = get_user_active_tasks(user_id)
-        if active_count >= 5:
-            return f"❌ Max 5 active tasks (you have {active_count})", ""
-        task_id = create_task(user_id, model, prompt)
-        return f"✅ Task submitted! ID: `{task_id[:8]}...`", ""
-    # Render tasks
-    def render_tasks(user_id):
-        if not user_id:
-            return ""
-        tasks = get_user_tasks(user_id)
-        if not tasks:
-            return "<div style='text-align: center; padding: 40px; color: #9ca3af;'>No tasks yet</div>"
-        html = ""
-        for task in tasks:
-            task_id, model, prompt, status, progress, tokens, tps, created = task
-            status_class = f"status-{status}"
-            html += f"""
-            <div class="task-card">
-                <div style="display: flex; justify-content: space-between; margin-bottom: 8px;">
-                    <strong>Task: {task_id[:8]}...</strong>
-                    <span class="{status_class}">●{status.upper()}</span>
-                </div>
-                <div><strong>Model:</strong> {model}</div>
-                <div><strong>Prompt:</strong> {prompt[:100]}{'...' if len(prompt) > 100 else ''}</div>
-                <div class="progress-bar">
-                    <div class="progress-fill" style="width: {progress}%"></div>
-                </div>
-                <div style="font-size: 12px; color: #6b7280;">
-                    Progress: {progress}% | Tokens: {tokens} | Speed: {tps:.1f} tok/s
-                </div>
-            </div>
-            """
         return html
-    # Get task result
-    def get_task_result(user_id, task_id_short):
-        if not user_id or not task_id_short:
-            return "❌ Invalid request"
         with db_lock:
             c = db_conn.cursor()
-            c.execute("""SELECT result, status FROM tasks
-                        WHERE user_id=? AND id LIKE ?""",
-                      (user_id, f"{task_id_short}%"))
-            result = c.fetchone()
-            if result:
-                if result[1] == 'completed':
-                    return f"### ✅ Result\n\n{result[0]}"
-                elif result[1] == 'failed':
-                    return f"### ❌ Failed\n\n{result[0]}"
-                else:
-                    return f"### ⏳ Status: {result[1]}"
-            return "❌ Task not found"
-    # Event handlers
-    auth_btn.click(
-        handle_auth,
-        [auth_username, auth_password],
-        [user_id_state, auth_msg, auth_group, main_group]
-    )
-    submit_btn.click(
-        submit_task,
-        [user_id_state, model_choice, prompt_input],
-        [task_msg, prompt_input]
-    ).then(
-        render_tasks,
-        [user_id_state],
-        [tasks_display]
-    )
-    refresh_btn.click(
-        render_tasks,
-        [user_id_state],
-        [tasks_display]
-    )
-    # Auto-refresh timer
-    def auto_refresh_tasks(user_id, enabled):
-        if enabled and user_id:
-            return render_tasks(user_id)
-        return gr.update()
-    # Poll every 3 seconds when auto-refresh enabled
-    demo.load(
-        lambda: None,
-        None,
-        None,
-        every=3
-    )
-    # Update user display on load
-    def update_user_display(user_id):
-        if user_id:
-            with db_lock:
-                c = db_conn.cursor()
-                c.execute("SELECT username FROM users WHERE id=?", (user_id,))
-                result = c.fetchone()
-                if result:
-                    active = get_user_active_tasks(user_id)
-                    return f"**User:** {result[0]} | **Active:** {active}/5"
-        return ""
-    # Periodic refresh
-    refresh_timer = gr.Timer(3)
-    @refresh_timer.tick
-    def timer_refresh(user_id, auto_enabled):
-        if auto_enabled and user_id:
-            return render_tasks(user_id), update_user_display(user_id)
-        return gr.update(), gr.update()
-    refresh_timer.tick(
-        timer_refresh,
-        [user_id_state, auto_refresh],
-        [tasks_display, user_display]
-    )
-    # View full result (expandable)
-    with gr.Accordion("🔍 View Task Result", open=False):
-        result_task_id = gr.Textbox(
-            label="Task ID (first 8 chars)",
-            placeholder="e.g., 3f7a9b2c"
-        )
-        view_result_btn = gr.Button("View Result", variant="primary")
-        result_display = gr.Markdown("")
-    view_result_btn.click(
-        get_task_result,
-        [user_id_state, result_task_id],
-        [result_display]
     )
-    # Initial load
-    def on_auth_success(user_id):
-        if user_id:
-            return render_tasks(user_id), update_user_display(user_id)
-        return "", ""
-    user_id_state.change(
-        on_auth_success,
-        [user_id_state],
-        [tasks_display, user_display]
-    )
 if __name__ == "__main__":
-    print("\n" + "="*80)
-    print("🚀 SAM BACKGROUND PROCESSOR".center(80))
-    print("="*80)
-    print(f"✅ 2 worker threads active")
-    print(f"✅ Max 5 tasks per user")
-    print(f"✅ Background processing enabled")
-    print(f"✅ Database: sam_tasks.db")
-    print("="*80 + "\n")
-    demo.queue(max_size=50)
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True
-    )

 import hashlib
 import sqlite3
 from datetime import datetime
+from typing import List, Dict, Optional, Tuple, Any
 import uuid
 # ==============================================================================
+# 1. Hardware & System Setup
 # ==============================================================================
 tf.config.threading.set_inter_op_parallelism_threads(2)
 tf.config.threading.set_intra_op_parallelism_threads(4)
 tf.config.optimizer.set_jit(True)
+print(f"🚀 SmilyAI System Initializing...")
+print(f"📱 TensorFlow Version: {tf.__version__}")
 # ==============================================================================
+# 2. Database (State Management)
 # ==============================================================================
 def init_db():
     conn = sqlite3.connect('sam_tasks.db', check_same_thread=False)
                   tokens_per_sec REAL DEFAULT 0,
                   FOREIGN KEY (user_id) REFERENCES users(id))''')
+    # Admin account
     admin_pass = hashlib.sha256("admin123".encode()).hexdigest()
     try:
+        c.execute("INSERT INTO users (username, password_hash) VALUES (?, ?)", ("admin", admin_pass))
         conn.commit()
     except sqlite3.IntegrityError:
         pass
 db_lock = threading.Lock()
 # ==============================================================================
+# 3. Model Architecture (Enhanced with KV Cache)
 # ==============================================================================
 @keras.saving.register_keras_serializable()
 class RotaryEmbedding(keras.layers.Layer):
     def call(self, x):
         variance = tf.reduce_mean(tf.square(x), axis=-1, keepdims=True)
         return x * tf.math.rsqrt(variance + self.epsilon) * self.scale
 @keras.saving.register_keras_serializable()
 class TransformerBlock(keras.layers.Layer):
         self.n_heads = n_heads
         self.ff_dim = ff_dim
         self.dropout_rate = dropout
         self.head_dim = d_model // n_heads
+        self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
         self.pre_attn_norm = RMSNorm()
         self.pre_ffn_norm = RMSNorm()
         self.q_proj = keras.layers.Dense(d_model, use_bias=False, name="q_proj")
         self.k_proj = keras.layers.Dense(d_model, use_bias=False, name="k_proj")
         self.v_proj = keras.layers.Dense(d_model, use_bias=False, name="v_proj")
         self.out_proj = keras.layers.Dense(d_model, use_bias=False, name="o_proj")
         self.gate_proj = keras.layers.Dense(ff_dim, use_bias=False, name="gate_proj")
         self.up_proj = keras.layers.Dense(ff_dim, use_bias=False, name="up_proj")
         self.down_proj = keras.layers.Dense(d_model, use_bias=False, name="down_proj")
         self.dropout = keras.layers.Dropout(dropout)
+    def call(self, x, cache=None, training=None):
+        # Shape: [Batch, Time, Dim]
+        B, T = tf.shape(x)[0], tf.shape(x)[1]
         res = x
         y = self.pre_attn_norm(x)
+        # Projections
+        q = tf.reshape(self.q_proj(y), [B, T, self.n_heads, self.head_dim])
+        k = tf.reshape(self.k_proj(y), [B, T, self.n_heads, self.head_dim])
+        v = tf.reshape(self.v_proj(y), [B, T, self.n_heads, self.head_dim])
+        # --- KV CACHE UPDATE ---
+        if cache is not None:
+            old_k, old_v = cache
+            k = tf.concat([old_k, k], axis=1)
+            v = tf.concat([old_v, v], axis=1)
+        new_cache = (k, v)
+        # RoPE & Attention
+        q = tf.transpose(q, [0, 2, 1, 3]) # [B, Heads, T, HeadDim]
+        k_rot = tf.transpose(k, [0, 2, 1, 3])
+        q_rot, k_rot = self.rope(q, k_rot)
+        scores = tf.matmul(q_rot, k_rot, transpose_b=True) / tf.sqrt(tf.cast(self.head_dim, x.dtype))
+        # Masking (Only needed if sequence length > 1)
+        if T > 1:
+            mask = tf.linalg.band_part(tf.ones((T, T)), -1, 0)
+            mask = (1.0 - mask) * -1e9
+            scores += mask
+        attn = tf.matmul(tf.nn.softmax(scores, axis=-1), tf.transpose(v, [0, 2, 1, 3]))
+        attn = tf.reshape(tf.transpose(attn, [0, 2, 1, 3]), [B, T, self.d_model])
         x = res + self.dropout(self.out_proj(attn), training=training)
         res = x
         y = self.pre_ffn_norm(x)
         ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
+        return res + self.dropout(ffn, training=training), new_cache
     def get_config(self):
+        return super().get_config()
 @keras.saving.register_keras_serializable()
 class SAM1Model(keras.Model):
+    def __init__(self, config, **kwargs):
+        super().__init__(**kwargs)
+        self.cfg = config
+        self.embed = keras.layers.Embedding(config['vocab_size'], config['d_model'])
+        ff_dim = int(config['d_model'] * config['ff_mult'])
+        self.blocks = [
+            TransformerBlock(
+                d_model=config['d_model'], n_heads=config['n_heads'], ff_dim=ff_dim,
+                dropout=config['dropout'], max_len=config['max_len'],
+                rope_theta=config['rope_theta'], name=f"blk_{i}"
+            ) for i in range(config['n_layers'])
+        ]
+        self.norm = RMSNorm()
+        self.lm_head = keras.layers.Dense(config['vocab_size'], use_bias=False)
+    def call(self, input_ids, cache=None, training=None):
         x = self.embed(input_ids)
+        new_caches = []
+        for i, block in enumerate(self.blocks):
+            layer_cache = cache[i] if cache is not None else None
+            x, updated_cache = block(x, cache=layer_cache, training=training)
+            new_caches.append(updated_cache)
+        return self.lm_head(self.norm(x)), new_caches
 # ==============================================================================
+# 4. Load Models
 # ==============================================================================
+print("\n📦 Loading SAM Models with KV Cache...")
+# Dummy input for initialization
+dummy_in = tf.zeros((1, 1), dtype=tf.int32)
+# --- SAM-X-1 (Reasoning) ---
+print("🔹 Loading SAM-X-1 (Reasoning)...")
 samx_weights = hf_hub_download("Smilyai-labs/Sam-1x-instruct", "ckpt.weights.h5")
+# UPDATED CONFIG PATH as requested
 samx_config_path = hf_hub_download("Smilyai-labs/Sam-1-large-it-0002", "config.json")
 with open(samx_config_path, 'r') as f:
+    cfg_x = json.load(f)
+samx_model = SAM1Model({
+    'vocab_size': cfg_x['vocab_size'],
+    'd_model': cfg_x['hidden_size'],
+    'n_layers': cfg_x['num_hidden_layers'],
+    'n_heads': cfg_x['num_attention_heads'],
+    'ff_mult': cfg_x['intermediate_size'] / cfg_x['hidden_size'],
+    'max_len': cfg_x['max_position_embeddings'],
     'dropout': 0.0,
+    'rope_theta': cfg_x['rope_theta']
+})
+_ = samx_model(dummy_in) # Build
 samx_model.load_weights(samx_weights)
 samx_model.trainable = False
+# --- SAM-Z-1 (Fast) ---
+print("🔹 Loading SAM-Z-1 (Speed)...")
 samz_weights = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "ckpt.weights.h5")
 samz_config_path = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "config.json")
 with open(samz_config_path, 'r') as f:
+    cfg_z = json.load(f)
+samz_model = SAM1Model({
+    'vocab_size': cfg_z['vocab_size'],
+    'd_model': cfg_z['hidden_size'],
+    'n_layers': cfg_z['num_hidden_layers'],
+    'n_heads': cfg_z['num_attention_heads'],
+    'ff_mult': cfg_z['intermediate_size'] / cfg_z['hidden_size'],
+    'max_len': cfg_z['max_position_embeddings'],
     'dropout': 0.0,
+    'rope_theta': cfg_z['rope_theta']
+})
+_ = samz_model(dummy_in) # Build
 samz_model.load_weights(samz_weights)
 samz_model.trainable = False
 # Tokenizer
+tok_path = hf_hub_download("Smilyai-labs/Sam-1x-instruct", "tokenizer.json")
+tokenizer = Tokenizer.from_file(tok_path)
 eos_token_id = 50256
+# JIT Compiled Prediction Steps (Separate for safety)
+@tf.function(jit_compile=True)
+def predict_x(ids, cache):
+    return samx_model(ids, cache=cache, training=False)
+@tf.function(jit_compile=True)
+def predict_z(ids, cache):
+    return samz_model(ids, cache=cache, training=False)
+print("✅ Models Loaded & JIT Compiled")
 # ==============================================================================
+# 5. Task Queue & Workers
 # ==============================================================================
 task_queue = queue.Queue()
+active_tasks = {}
 task_lock = threading.Lock()
+def create_task(user_id, model, prompt):
     task_id = str(uuid.uuid4())
     with db_lock:
         c = db_conn.cursor()
+        c.execute("INSERT INTO tasks (id, user_id, model_name, prompt, status) VALUES (?,?,?,?,?)",
+                  (task_id, user_id, model, prompt, 'queued'))
         db_conn.commit()
+    task_queue.put((task_id, model, prompt))
     return task_id
+def update_db_status(task_id, status, progress, result, tokens, tps):
     with db_lock:
         c = db_conn.cursor()
         c.execute("""UPDATE tasks SET status=?, progress=?, result=?,
+                     tokens_generated=?, tokens_per_sec=? WHERE id=?""",
                   (status, progress, result, tokens, tps, task_id))
+        if status in ['completed', 'failed']:
+            c.execute("UPDATE tasks SET completed_at=? WHERE id=?", (datetime.now().isoformat(), task_id))
         db_conn.commit()
+def generate_stream(task_id, model_func, prompt, max_tokens=1024):
+    """Universal generator using KV Cache"""
+    # 1. Prefill Phase
+    input_ids = [i for i in tokenizer.encode(prompt).ids if i != eos_token_id]
     start_time = time.time()
+    # Process generic prompt to get initial cache
+    # Note: We must treat 'None' cache as a special case in the TF function usually,
+    # or just pass generic list of None in Eager, but TF function expects tensors.
+    # For simplicity in this script, we run prefill in eager or adapt the loop.
+    # Here we do the first pass:
+    current_ids = tf.constant([input_ids], dtype=tf.int32)
+    logits, kv_cache = model_func(current_ids, cache=None)
+    next_token = np.argmax(logits[0, -1, :].numpy())
+    generated = [int(next_token)]
+    update_db_status(task_id, 'processing', 0, tokenizer.decode(generated), 0, 0)
+    # 2. Decode Phase (Token by token)
     for step in range(max_tokens):
+        input_tensor = tf.constant([[generated[-1]]], dtype=tf.int32)
+        logits, kv_cache = model_func(input_tensor, cache=kv_cache)
+        # Sample
+        next_logits = logits[0, -1, :].numpy() / 0.7
+        probs = tf.nn.softmax(next_logits).numpy()
+        next_token = np.random.choice(len(probs), p=probs)
         if next_token == eos_token_id:
             break
         generated.append(int(next_token))
+        # Update DB every 3 tokens for smooth streaming UI
+        if step % 3 == 0:
+            text = tokenizer.decode(generated)
             elapsed = time.time() - start_time
+            tps = len(generated) / elapsed if elapsed > 0 else 0
+            prog = int((step / max_tokens) * 100)
+            update_db_status(task_id, 'processing', prog, text, len(generated), tps)
+    # Final Update
+    text = tokenizer.decode(generated)
     elapsed = time.time() - start_time
+    tps = len(generated) / elapsed
+    update_db_status(task_id, 'completed', 100, text, len(generated), tps)
+def worker():
+    print("👷 Worker thread started")
     while True:
         try:
+            task_id, model_name, prompt = task_queue.get(timeout=1)
+            print(f"⚙️ Processing {task_id[:8]} with {model_name}")
             try:
+                if "SAM-X" in model_name:
+                    generate_stream(task_id, predict_x, prompt)
                 else:
+                    generate_stream(task_id, predict_z, prompt)
             except Exception as e:
+                print(f"❌ Error: {e}")
+                update_db_status(task_id, 'failed', 0, f"Error: {str(e)}", 0, 0)
             task_queue.task_done()
         except queue.Empty:
             continue
+# Start 2 Workers
 for _ in range(2):
+    t = threading.Thread(target=worker, daemon=True)
+    t.start()
 # ==============================================================================
+# 6. Gradio UI with Streaming & Thinking
 # ==============================================================================
 css = """
+.container { max-width: 1200px; margin: 0 auto; }
+.task-card {
+    border: 1px solid #e5e7eb; padding: 15px; margin-bottom: 10px; border-radius: 8px;
+    background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
 }
+.status-processing { color: #2563eb; font-weight: bold; animation: pulse 1.5s infinite; }
+.status-completed { color: #059669; font-weight: bold; }
+@keyframes pulse { 0% { opacity: 1; } 50% { opacity: 0.5; } 100% { opacity: 1; } }
+.thought-box {
+    background-color: #f0f9ff; border-left: 4px solid #0ea5e9;
+    padding: 10px; margin: 10px 0; font-family: monospace; font-size: 0.9em;
+    color: #0c4a6e;
 }
 """
+def format_output(text):
+    if not text: return ""
+    # Parse <think> tags for SAM-X
+    if "<think>" in text:
+        parts = text.split("<think>")
+        pre = parts[0]
+        remainder = parts[1]
+        if "</think>" in remainder:
+            thought, ans = remainder.split("</think>")
+            return f"{pre}<div class='thought-box'>🧠 <b>Thinking Process:</b><br>{thought}</div>{ans}"
+        else:
+            return f"{pre}<div class='thought-box'>🧠 <b>Thinking...</b><br>{remainder}</div>"
+    return text.replace("\n", "<br>")
+with gr.Blocks(css=css, title="SmilyAI Studio") as demo:
     user_id_state = gr.State(None)
+    gr.Markdown("# 🧠 SmilyAI Studio")
+    # --- Auth Section ---
     with gr.Group(visible=True) as auth_group:
+        gr.Markdown("### Login")
+        u_in = gr.Textbox(label="Username")
+        p_in = gr.Textbox(label="Password", type="password")
+        login_btn = gr.Button("Login / Register", variant="primary")
         auth_msg = gr.Markdown("")
+    # --- Main Interface ---
     with gr.Group(visible=False) as main_group:
+        gr.Markdown(f"### 🚀 New Inference Task")
         with gr.Row():
             with gr.Column(scale=1):
+                model_sel = gr.Radio(["SAM-X-1 (Reasoning)", "SAM-Z-1 (Fast)"], label="Model", value="SAM-Z-1 (Fast)")
+                prompt_in = gr.Textbox(label="Prompt", lines=4, placeholder="Enter query...")
+                sub_btn = gr.Button("Generate", variant="primary")
+            with gr.Column(scale=1):
+                gr.Markdown("### 📡 Live Monitor")
+                monitor_id = gr.Textbox(label="Task ID", placeholder="Click a task below to copy ID")
+                watch_btn = gr.Button("Open Stream")
+                stream_out = gr.HTML(label="Output", min_height=300)
+        gr.Markdown("### 📋 Task History")
+        refresh_btn = gr.Button("🔄 Refresh List")
+        task_list = gr.HTML()
+    # --- Logic ---
+    def login(u, p):
+        if not u or not p: return None, "Enter details", gr.update(), gr.update()
+        hashed = hashlib.sha256(p.encode()).hexdigest()
+        with db_lock:
+            c = db_conn.cursor()
+            c.execute("SELECT id FROM users WHERE username=? AND password_hash=?", (u, hashed))
+            res = c.fetchone()
+            if not res:
+                try:
+                    c.execute("INSERT INTO users (username, password_hash) VALUES (?,?)", (u, hashed))
+                    db_conn.commit()
+                    c.execute("SELECT id FROM users WHERE username=?", (u,))
+                    res = c.fetchone()
+                except: return None, "Error", gr.update(), gr.update()
+            return res[0], f"Welcome {u}", gr.update(visible=False), gr.update(visible=True)
+    def submit(uid, mod, p):
+        if not uid: return "Please login"
+        tid = create_task(uid, mod, p)
+        return gr.update(value=""), tid # Clear prompt, set monitor ID
+    def get_tasks(uid):
+        if not uid: return ""
+        with db_lock:
+            c = db_conn.cursor()
+            c.execute("SELECT id, model_name, status, progress, created_at FROM tasks WHERE user_id=? ORDER BY created_at DESC LIMIT 10", (uid,))
+            rows = c.fetchall()
+        html = ""
+        for r in rows:
+            cls = f"status-{r[2]}"
+            html += f"""<div class='task-card' onclick="navigator.clipboard.writeText('{r[0]}')">
+                        <b>{r[1]}</b> | <span class='{cls}'>{r[2].upper()}</span> | {r[3]}%
+                        <br><small>ID: {r[0]}</small>
+                        </div>"""
         return html
+    # Streaming Logic
+    timer = gr.Timer(0.5, active=False)
+    def start_watch(tid):
+        if not tid: return gr.update(active=False)
+        return gr.update(active=True)
+    def update_stream(uid, tid):
+        if not uid or not tid: return "Select a task...", gr.update(active=False)
         with db_lock:
             c = db_conn.cursor()
+            c.execute("SELECT result, status FROM tasks WHERE id=?", (tid,))
+            res = c.fetchone()
+        if not res: return "Task not found", gr.update(active=False)
+        formatted = format_output(res[0])
+        is_active = res[1] in ['queued', 'processing']
+        return formatted, gr.update(active=is_active)
+    # Wiring
+    login_btn.click(login, [u_in, p_in], [user_id_state, auth_msg, auth_group, main_group])
+    sub_btn.click(submit, [user_id_state, model_sel, prompt_in], [prompt_in, monitor_id]).then(
+        get_tasks, [user_id_state], [task_list]
     )
+    refresh_btn.click(get_tasks, [user_id_state], [task_list])
+    watch_btn.click(start_watch, [monitor_id], [timer])
+    timer.tick(update_stream, [user_id_state, monitor_id], [stream_out, timer])
 if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)