Spaces:

Bc-AI
/

worker-universal

Runtime error

App Files Files Community

Bc-AI commited on Dec 18, 2025

Commit

af68acb

verified ·

1 Parent(s): a774e49

Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

Dockerfile +25 -0
README.md +12 -10
model_architecture.py +205 -0
model_manager.py +167 -0
requirements.txt +11 -0
shared/approval_system.py +168 -0
shared/chat_history.py +80 -0
shared/credits_system.py +323 -0
shared/fault_tolerance.py +371 -0
shared/load_balancer.py +458 -0
shared/models.py +70 -0
shared/node_types.py +104 -0
space-config.yaml +8 -0
worker_app.py +564 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY worker_app.py .
+COPY model_architecture.py .
+COPY model_manager.py .
+COPY ../shared ./shared
+# Expose port for the API
+EXPOSE 8000
+# Start the application
+CMD ["python", "worker_app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,12 @@
----
-title: Worker Universal
-emoji: 📉
-colorFrom: pink
-colorTo: blue
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# SACCP Worker_Universal Node
+This is a worker_universal node in the SACCP (Scalable Accelerated Compute Protocol) distributed computing network.
+## Node Type: WORKER_UNIVERSAL
+- Processes tasks according to SACCP protocol
+- Contributes computational resources to the network
+- Earns cloud credits for resource contribution
+## Architecture
+- Built with FastAPI and TensorFlow/Keras
+- Implements fault-tolerant operations
+- Integrated with SACCP credit system

model_architecture.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import tensorflow as tf
+import keras
+import numpy as np
+@keras.saving.register_keras_serializable()
+class RotaryEmbedding(keras.layers.Layer):
+    def __init__(self, dim, max_len=2048, theta=10000, **kwargs):
+        super().__init__(**kwargs)
+        self.dim = dim
+        self.max_len = max_len
+        self.theta = theta
+        self.built_cache = False
+        self.cos_cached = None
+        self.sin_cached = None
+    def build(self, input_shape):
+        super().build(input_shape)
+    def _build_cache(self):
+        if not self.built_cache:
+            inv_freq = 1.0 / (self.theta ** (tf.range(0, self.dim, 2, dtype=tf.float32) / self.dim))
+            t = tf.range(self.max_len, dtype=tf.float32)
+            freqs = tf.einsum("i,j->ij", t, inv_freq)
+            emb = tf.concat([freqs, freqs], axis=-1)
+            self.cos_cached = tf.constant(np.cos(emb.numpy()), dtype=tf.float32)
+            self.sin_cached = tf.constant(np.sin(emb.numpy()), dtype=tf.float32)
+            self.built_cache = True
+    def rotate_half(self, x):
+        x1, x2 = tf.split(x, 2, axis=-1)
+        return tf.concat([-x2, x1], axis=-1)
+    def call(self, q, k, offset=0):
+        """Apply rotary embeddings with position offset."""
+        self._build_cache()
+        seq_len = tf.shape(q)[2]
+        dtype = q.dtype
+        cos = tf.cast(self.cos_cached[offset:offset + seq_len, :], dtype)[None, None, :, :]
+        sin = tf.cast(self.sin_cached[offset:offset + seq_len, :], dtype)[None, None, :, :]
+        q_embed = (q * cos) + (self.rotate_half(q) * sin)
+        k_embed = (k * cos) + (self.rotate_half(k) * sin)
+        return q_embed, k_embed
+    def get_config(self):
+        config = super().get_config()
+        config.update({"dim": self.dim, "max_len": self.max_len, "theta": self.theta})
+        return config
+@keras.saving.register_keras_serializable()
+class RMSNorm(keras.layers.Layer):
+    def __init__(self, epsilon=1e-5, **kwargs):
+        super().__init__(**kwargs)
+        self.epsilon = epsilon
+        self.scale = None
+    def build(self, input_shape):
+        self.scale = self.add_weight(name="scale", shape=(input_shape[-1],), initializer="ones")
+        super().build(input_shape)
+    def call(self, x):
+        variance = tf.reduce_mean(tf.square(x), axis=-1, keepdims=True)
+        return x * tf.math.rsqrt(variance + self.epsilon) * self.scale
+    def get_config(self):
+        config = super().get_config()
+        config.update({"epsilon": self.epsilon})
+        return config
+@keras.saving.register_keras_serializable()
+class TransformerBlock(keras.layers.Layer):
+    def __init__(self, d_model, n_heads, ff_dim, dropout, max_len, rope_theta, layer_idx=0, **kwargs):
+        super().__init__(**kwargs)
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.ff_dim = ff_dim
+        self.dropout_rate = dropout
+        self.max_len = max_len
+        self.rope_theta = rope_theta
+        self.head_dim = d_model // n_heads
+        self.layer_idx = layer_idx
+    def build(self, input_shape):
+        self.pre_attn_norm = RMSNorm(name="pre_attn_norm")
+        self.pre_ffn_norm = RMSNorm(name="pre_ffn_norm")
+        self.q_proj = keras.layers.Dense(self.d_model, use_bias=False, name="q_proj")
+        self.k_proj = keras.layers.Dense(self.d_model, use_bias=False, name="k_proj")
+        self.v_proj = keras.layers.Dense(self.d_model, use_bias=False, name="v_proj")
+        self.out_proj = keras.layers.Dense(self.d_model, use_bias=False, name="o_proj")
+        self.rope = RotaryEmbedding(self.head_dim, max_len=self.max_len, theta=self.rope_theta)
+        self.gate_proj = keras.layers.Dense(self.ff_dim, use_bias=False, name="gate_proj")
+        self.up_proj = keras.layers.Dense(self.ff_dim, use_bias=False, name="up_proj")
+        self.down_proj = keras.layers.Dense(self.d_model, use_bias=False, name="down_proj")
+        self.dropout = keras.layers.Dropout(self.dropout_rate)
+        super().build(input_shape)
+    def call(self, x, training=None, past_kv=None, use_cache=False):
+        """Simplified call without KV cache for this example"""
+        B, T, D = tf.shape(x)[0], tf.shape(x)[1], self.d_model
+        dtype = x.dtype
+        res = x
+        y = self.pre_attn_norm(x)
+        # Multi-head attention
+        q = tf.transpose(tf.reshape(self.q_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
+        k = tf.transpose(tf.reshape(self.k_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
+        v = tf.transpose(tf.reshape(self.v_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
+        # Apply RoPE
+        q, k = self.rope(q, k, offset=0)
+        # Attention scores
+        scores = tf.matmul(q, k, transpose_b=True) / tf.sqrt(tf.cast(self.head_dim, dtype))
+        # Causal mask
+        mask = tf.linalg.band_part(tf.ones([T, T], dtype=dtype), -1, 0)  # Upper triangular
+        mask = tf.where(mask == 0, tf.constant(-1e9, dtype=dtype), tf.constant(0.0, dtype=dtype))
+        scores = scores + mask[None, None, :, :]
+        attn = tf.nn.softmax(scores, axis=-1)
+        attn_out = tf.matmul(attn, v)
+        attn_out = tf.transpose(attn_out, [0, 2, 1, 3])
+        attn_out = tf.reshape(attn_out, [B, T, self.d_model])
+        x = res + self.dropout(self.out_proj(attn_out), training=training)
+        # FFN
+        res = x
+        y = self.pre_ffn_norm(x)
+        ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
+        output = res + self.dropout(ffn, training=training)
+        return output, None  # Return None for past_kv in this simplified version
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "d_model": self.d_model,
+            "n_heads": self.n_heads,
+            "ff_dim": self.ff_dim,
+            "dropout": self.dropout_rate,
+            "max_len": self.max_len,
+            "rope_theta": self.rope_theta,
+            "layer_idx": self.layer_idx
+        })
+        return config
+@keras.saving.register_keras_serializable()
+class SAM1Model(keras.Model):
+    def __init__(self, **kwargs):
+        super().__init__()
+        if 'config' in kwargs and isinstance(kwargs['config'], dict):
+            self.cfg = kwargs['config']
+        elif 'vocab_size' in kwargs:
+            self.cfg = kwargs
+        else:
+            self.cfg = kwargs.get('cfg', kwargs)
+        self.embed = keras.layers.Embedding(self.cfg['vocab_size'], self.cfg['d_model'], name="embed_tokens")
+        ff_dim = int(self.cfg['d_model'] * self.cfg['ff_mult'])
+        block_args = {
+            'd_model': self.cfg['d_model'],
+            'n_heads': self.cfg['n_heads'],
+            'ff_dim': ff_dim,
+            'dropout': self.cfg['dropout'],
+            'max_len': self.cfg['max_len'],
+            'rope_theta': self.cfg['rope_theta']
+        }
+        self.blocks = [
+            TransformerBlock(name=f"block_{i}", layer_idx=i, **block_args)
+            for i in range(self.cfg['n_layers'])
+        ]
+        self.norm = RMSNorm(name="final_norm")
+        self.lm_head = keras.layers.Dense(self.cfg['vocab_size'], use_bias=False, name="lm_head")
+    def call(self, input_ids, training=None, past_kv=None, use_cache=False):
+        """
+        Simplified call without full KV cache implementation
+        """
+        x = self.embed(input_ids)
+        for block in self.blocks:
+            x, _ = block(x, training=training, past_kv=None, use_cache=False)
+        logits = self.lm_head(self.norm(x))
+        return logits, None  # Return None for past_kv in this simplified version
+    def get_config(self):
+        base_config = super().get_config()
+        base_config['config'] = self.cfg
+        return base_config
+def count_parameters(model):
+    """Count model parameters"""
+    total_params = 0
+    for weight in model.weights:
+        w = weight.numpy()
+        total_params += w.size
+    return total_params

model_manager.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+import json
+import tensorflow as tf
+import keras
+import numpy as np
+from tokenizers import Tokenizer
+from huggingface_hub import hf_hub_download
+from transformers import GPT2Tokenizer
+import threading
+from typing import Dict, Optional
+from model_architecture import SAM1Model
+class ModelManager:
+    """
+    Manages multiple models and their loading/unloading based on demand
+    """
+    def __init__(self):
+        self.models: Dict[str, keras.Model] = {}
+        self.tokenizers: Dict[str, Tokenizer] = {}
+        self.model_configs: Dict[str, dict] = {}
+        self.lock = threading.Lock()
+        # Model mapping
+        self.model_repos = {
+            "sam-x-nano": "Smilyai-labs/Sam-nano",
+            "sam-x-mini": "Smilyai-labs/Sam-mini",
+            "sam-x-fast": "Smilyai-labs/Sam-fast",
+            "sam-x-large": "Smilyai-labs/Sam-large-2",  # Using Sam-large-2 as the large model
+            "sam-large-2": "Smilyai-labs/Sam-large-2"
+        }
+        # Performance optimizations that should be applied before TF import
+        NUM_CORES = os.cpu_count() or 4
+        os.environ['TF_NUM_INTEROP_THREADS'] = str(NUM_CORES)
+        os.environ['TF_NUM_INTRAOP_THREADS'] = str(NUM_CORES)
+        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # Force CPU only for consistency
+        os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'  # Intel optimization
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'   # Reduce TF logging
+        # Configure TF threading
+        tf.config.threading.set_inter_op_parallelism_threads(NUM_CORES)
+        tf.config.threading.set_intra_op_parallelism_threads(NUM_CORES)
+        print(f"✅ CPU optimized: {NUM_CORES} threads, oneDNN enabled")
+    def get_model_repo(self, model_type: str) -> str:
+        """Get the Hugging Face repository for a given model type"""
+        return self.model_repos.get(model_type, self.model_repos["sam-x-large"])
+    def load_tokenizer(self, model_type: str) -> Tokenizer:
+        """Load tokenizer for a specific model type"""
+        if model_type in self.tokenizers:
+            return self.tokenizers[model_type]
+        print(f"🚀 Loading tokenizer for {model_type}...")
+        try:
+            # Load base tokenizer
+            from transformers import AutoTokenizer
+            hf_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+            # Add special tokens specific to your models
+            special_tokens = [
+                "\n", "\n", "\n", "\n",
+                "<CONTINUE>",
+                "<im end for model tun>"
+            ]
+            hf_tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})
+            # Save temporarily to create tokenizers instance
+            os.makedirs(f"./temp_tokenizer_{model_type}", exist_ok=True)
+            hf_tokenizer.save_pretrained(f"./temp_tokenizer_{model_type}")
+            tokenizer = Tokenizer.from_file(f"./temp_tokenizer_{model_type}/tokenizer.json")
+            print(f"✅ Tokenizer loaded for {model_type} with vocab size: {tokenizer.get_vocab_size()}")
+            self.tokenizers[model_type] = tokenizer
+            return tokenizer
+        except Exception as e:
+            print(f"❌ Error loading tokenizer for {model_type}: {e}")
+            raise
+    def load_model(self, model_type: str) -> keras.Model:
+        """Load a specific model by type"""
+        if model_type in self.models:
+            return self.models[model_type]
+        print(f"🚀 Loading {model_type} model...")
+        try:
+            # Get the appropriate model repo
+            model_repo = self.get_model_repo(model_type)
+            cache_dir = f"./model_cache/{model_type}"
+            # Download config
+            config_path = hf_hub_download(model_repo, "config.json", cache_dir=cache_dir)
+            with open(config_path, 'r') as f:
+                config = json.load(f)
+            # Store model config
+            self.model_configs[model_type] = config
+            # Build model from config
+            model_config = {
+                'vocab_size': config.get('vocab_size', 50432),
+                'd_model': config.get('hidden_size', 768),
+                'n_layers': config.get('num_hidden_layers', 12),
+                'n_heads': config.get('num_attention_heads', 12),
+                'ff_mult': config.get('intermediate_size', 3072) / config.get('hidden_size', 768),
+                'max_len': config.get('max_position_embeddings', 2048),
+                'dropout': 0.1,
+                'rope_theta': config.get('rope_theta', 10000)
+            }
+            model = SAM1Model(config=model_config)
+            # Build model with dummy input
+            dummy_input = tf.zeros((1, 16), dtype=tf.int32)
+            _ = model(dummy_input, training=False, use_cache=False)
+            print(f"✅ Model {model_type} loaded: {config.get('num_hidden_layers', 12)} layers")
+            # Try to load weights
+            try:
+                weights_path = hf_hub_download(model_repo, "model.weights.h5", cache_dir=cache_dir)
+                model.load_weights(weights_path)
+                print(f"✅ Model weights loaded successfully for {model_type}!")
+            except Exception as e:
+                print(f"⚠️ Could not load weights for {model_type}, using random initialization: {e}")
+            # Warm up the model
+            print(f"🔥 Warming up model {model_type}...")
+            warmup_input = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.int32)
+            _, _ = model(warmup_input, training=False, use_cache=True)
+            print(f"✅ Model {model_type} warmed up")
+            # Store the model
+            self.models[model_type] = model
+            return model
+        except Exception as e:
+            print(f"❌ Error loading model {model_type}: {e}")
+            raise
+    def get_model(self, model_type: str) -> tuple:
+        """Get model and tokenizer for a specific type, loading if necessary"""
+        with self.lock:
+            # Ensure tokenizer is loaded
+            if model_type not in self.tokenizers:
+                self.load_tokenizer(model_type)
+            # Ensure model is loaded
+            if model_type not in self.models:
+                self.load_model(model_type)
+            return self.models[model_type], self.tokenizers[model_type], self.model_configs[model_type]
+    def list_available_models(self) -> list:
+        """Get list of available model types"""
+        return list(self.model_repos.keys())
+    def is_model_loaded(self, model_type: str) -> bool:
+        """Check if a model is currently loaded"""
+        return model_type in self.models

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# Requirements for Worker Nodes
+keras==2.15.0
+tensorflow==2.15.0
+fastapi==0.104.1
+uvicorn==0.24.0
+requests==2.31.0
+huggingface_hub==0.20.1
+tokenizers==0.15.0
+transformers==4.35.2
+numpy==1.24.3
+pytz==2023.3.post1

shared/approval_system.py ADDED Viewed

	@@ -0,0 +1,168 @@

+"""
+Smilyai Approval System for SACCP Network
+Handles approval of HEAD nodes and other security features
+"""
+from enum import Enum
+from pydantic import BaseModel
+from typing import Optional, List, Dict, Any
+import time
+import uuid
+class ApprovalStatus(str, Enum):
+    PENDING = "pending"
+    APPROVED = "approved"
+    REJECTED = "rejected"
+    REVOKED = "revoked"
+class ApprovalType(str, Enum):
+    HEAD_NODE = "head_node"
+    SPECIAL_ACCESS = "special_access"
+    RESOURCE_INTENSIVE_TASK = "resource_intensive_task"
+class ApprovalRequest(BaseModel):
+    """Request for smilyai approval"""
+    request_id: str
+    node_id: str
+    endpoint: str
+    request_type: ApprovalType
+    request_data: Dict[str, Any]
+    reason: str
+    requested_at: int
+    requested_by: str  # User or system that requested
+class ApprovalResponse(BaseModel):
+    """Response to an approval request"""
+    request_id: str
+    status: ApprovalStatus
+    approved_by: Optional[str] = None
+    approved_at: Optional[int] = None
+    rejection_reason: Optional[str] = None
+    notes: Optional[str] = None
+class SmilyaiApprovalSystem:
+    """System for managing smilyai approvals"""
+    def __init__(self):
+        self.approval_requests: Dict[str, ApprovalRequest] = {}
+        self.approval_responses: Dict[str, ApprovalResponse] = {}
+        self.approved_nodes: set = set()
+        self.approval_rules: List[Dict[str, Any]] = []
+    def request_approval(self, node_id: str, endpoint: str, request_type: ApprovalType,
+                         request_data: Dict[str, Any], reason: str, requested_by: str) -> str:
+        """Request smilyai approval for an action"""
+        request_id = f"approval_{int(time.time())}_{uuid.uuid4().hex[:8]}"
+        approval_request = ApprovalRequest(
+            request_id=request_id,
+            node_id=node_id,
+            endpoint=endpoint,
+            request_type=request_type,
+            request_data=request_data,
+            reason=reason,
+            requested_at=int(time.time()),
+            requested_by=requested_by
+        )
+        self.approval_requests[request_id] = approval_request
+        # For HEAD nodes, auto-approve if they meet basic requirements
+        if request_type == ApprovalType.HEAD_NODE:
+            basic_approved = self._check_basic_requirements(request_data)
+            if basic_approved:
+                # In a real system, this would go to human review, but for now we'll auto-approve
+                # with a short delay to simulate the review process
+                response = ApprovalResponse(
+                    request_id=request_id,
+                    status=ApprovalStatus.APPROVED,
+                    approved_by="smilyai_system",
+                    approved_at=int(time.time()),
+                    notes="Basic requirements met, auto-approved"
+                )
+                self.approval_responses[request_id] = response
+                self.approved_nodes.add(node_id)
+                return request_id
+        return request_id
+    def _check_basic_requirements(self, request_data: Dict[str, Any]) -> bool:
+        """Check if a node meets basic requirements for approval"""
+        # Requirements for HEAD nodes:
+        # - Must have secure endpoint (HTTPS)
+        # - Must have certain minimum resources
+        # - Must provide certain credentials
+        endpoint = request_data.get('endpoint', '')
+        capabilities = request_data.get('capabilities', {})
+        # Check if endpoint is secure
+        has_secure_endpoint = 'https://' in endpoint
+        # Check minimum resources required for HEAD nodes
+        min_cpu = capabilities.get('cpu_count', 0) >= 4
+        min_memory = capabilities.get('memory_gb', 0) >= 16  # At least 16GB RAM for HEAD
+        min_disk = capabilities.get('disk_space_gb', 0) >= 50  # At least 50GB disk
+        # For HEAD nodes specifically, we want robust systems
+        has_good_hardware = min_cpu and min_memory and min_disk
+        # Check if it's a GPU node (which might be inappropriate for HEAD)
+        is_gpu_node = capabilities.get('gpu_available', False)
+        # HEAD nodes should be dedicated compute/storage, not primarily GPU-focused
+        is_appropriate_for_head = not is_gpu_node or capabilities.get('node_type') != 'gpu'
+        return (has_secure_endpoint or has_good_hardware) and is_appropriate_for_head
+    def review_approval_request(self, request_id: str, status: ApprovalStatus,
+                                reviewer: str, rejection_reason: Optional[str] = None,
+                                notes: Optional[str] = None) -> bool:
+        """Review and respond to an approval request"""
+        if request_id not in self.approval_requests:
+            return False
+        response = ApprovalResponse(
+            request_id=request_id,
+            status=status,
+            approved_by=reviewer,
+            approved_at=int(time.time()) if status == ApprovalStatus.APPROVED else None,
+            rejection_reason=rejection_reason,
+            notes=notes
+        )
+        self.approval_responses[request_id] = response
+        # Update approved nodes set
+        if status == ApprovalStatus.APPROVED:
+            request = self.approval_requests[request_id]
+            self.approved_nodes.add(request.node_id)
+        elif status in [ApprovalStatus.REJECTED, ApprovalStatus.REVOKED]:
+            request = self.approval_requests[request_id]
+            self.approved_nodes.discard(request.node_id)
+        return True
+    def is_approved(self, node_id: str, approval_type: ApprovalType) -> bool:
+        """Check if a node is approved for a specific type of access"""
+        if approval_type == ApprovalType.HEAD_NODE:
+            return node_id in self.approved_nodes
+        return False  # Other types would have different checks
+    def get_pending_requests(self) -> List[ApprovalRequest]:
+        """Get list of pending approval requests"""
+        pending = []
+        for req_id, req in self.approval_requests.items():
+            response = self.approval_responses.get(req_id)
+            if not response or response.status == ApprovalStatus.PENDING:
+                pending.append(req)
+        return pending
+# Global instance of the approval system
+smilyai_approval_system = SmilyaiApprovalSystem()

shared/chat_history.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import json
+import time
+from datetime import datetime
+from typing import List, Dict, Any
+from .models import ChatMessage
+def save_chat_history(messages: List[ChatMessage], model_name: str, response: str, filename: str = "chat.md"):
+    """
+    Save chat history to a markdown file with timestamp and model information
+    """
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    # Prepare the markdown content
+    history_content = f"""
+## Chat Session: {timestamp}
+**Model Used:** {model_name}
+---
+"""
+    # Add all messages to the markdown file
+    for msg in messages:
+        role_prefix = "**User:**" if msg.role.lower() == "user" else "**Assistant:**"
+        history_content += f"\n{role_prefix} {msg.content}\n\n"
+    # Add the final response from the assistant
+    history_content += f"\n**Assistant Response:** {response}\n\n---\n\n"
+    # Append to the chat history file
+    with open(filename, "a", encoding="utf-8") as file:
+        file.write(history_content)
+def save_detailed_chat_log(request_data: Dict[str, Any], response_data: str, model_name: str, processing_time: float, filename: str = "chat.md"):
+    """
+    Save detailed chat log with metadata
+    """
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    log_content = f"""
+## Chat Request Log: {timestamp}
+- **Model:** {model_name}
+- **Processing Time:** {processing_time:.2f}s
+- **Max Tokens:** {request_data.get('max_tokens', 512)}
+- **Temperature:** {request_data.get('temperature', 0.8)}
+### Input Messages:
+"""
+    # Add the messages from the request
+    messages = request_data.get('messages', [])
+    for msg in messages:
+        role = msg.get('role', 'unknown')
+        content = msg.get('content', '')
+        role_display = "**User**" if role.lower() == 'user' else "**Assistant**"
+        log_content += f"- {role_display}: {content}\n"
+    log_content += f"\n### Model Response:\n{response_data}\n\n---\n\n"
+    # Append to the file
+    with open(filename, "a", encoding="utf-8") as file:
+        file.write(log_content)
+def initialize_chat_file(filename: str = "chat.md"):
+    """
+    Initialize the chat history file with header if it doesn't exist
+    """
+    if not os.path.exists(filename):
+        header = f"""# Chat History
+Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+This file contains the history of all chat conversations processed by the multi-node API system.
+---
+"""
+        with open(filename, "w", encoding="utf-8") as file:
+            file.write(header)

shared/credits_system.py ADDED Viewed

	@@ -0,0 +1,323 @@

+"""
+Cloud Credits System for SACCP Network
+Handles credit tracking, earning, and spending in the distributed network
+"""
+import json
+import sqlite3
+import time
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from enum import Enum
+from dataclasses import dataclass
+from pydantic import BaseModel
+class TransactionType(str, Enum):
+    EARNED = "earned"
+    SPENT = "spent"
+    TRANSFERRED = "transferred"
+class CreditReason(str, Enum):
+    TASK_COMPLETION = "task_completion"
+    RESOURCE_CONTRIBUTION = "resource_contribution"
+    SERVICE_PURCHASE = "service_purchase"
+    REFERRAL_BONUS = "referral_bonus"
+    STAKING_REWARD = "staking_reward"
+@dataclass
+class CreditTransaction:
+    """Represents a credit transaction"""
+    transaction_id: str
+    node_id: str
+    amount: float
+    transaction_type: TransactionType
+    reason: CreditReason
+    timestamp: int
+    service_type: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+class CreditBalance(BaseModel):
+    """Model for node credit balance"""
+    node_id: str
+    balance: float
+    total_earned: float
+    total_spent: float
+    last_updated: int
+class CreditsSystem:
+    """Main system for managing cloud credits in the SACCP network"""
+    def __init__(self, db_path: str = "./saccp_credits.db"):
+        self.db_path = db_path
+        self._init_db()
+    def _init_db(self):
+        """Initialize the credits database"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        # Create balances table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS balances (
+                node_id TEXT PRIMARY KEY,
+                balance REAL DEFAULT 0.0,
+                total_earned REAL DEFAULT 0.0,
+                total_spent REAL DEFAULT 0.0,
+                last_updated INTEGER
+            )
+        ''')
+        # Create transactions table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS transactions (
+                transaction_id TEXT PRIMARY KEY,
+                node_id TEXT NOT NULL,
+                amount REAL NOT NULL,
+                transaction_type TEXT NOT NULL,
+                reason TEXT NOT NULL,
+                timestamp INTEGER NOT NULL,
+                service_type TEXT,
+                metadata TEXT  -- JSON string
+            )
+        ''')
+        conn.commit()
+        conn.close()
+    def add_credits(self, node_id: str, amount: float, reason: CreditReason,
+                    service_type: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) -> bool:
+        """Add credits to a node's balance"""
+        if amount <= 0:
+            return False
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        try:
+            # Get current balance
+            cursor.execute('SELECT balance, total_earned FROM balances WHERE node_id = ?', (node_id,))
+            result = cursor.fetchone()
+            if result:
+                current_balance, total_earned = result
+                new_balance = current_balance + amount
+                new_total_earned = total_earned + amount
+            else:
+                new_balance = amount
+                new_total_earned = amount
+                # Insert new record if it doesn't exist
+                cursor.execute('''
+                    INSERT INTO balances (node_id, balance, total_earned, total_spent, last_updated)
+                    VALUES (?, ?, ?, ?, ?)
+                ''', (node_id, 0.0, 0.0, 0.0, int(time.time())))
+            # Update balance
+            cursor.execute('''
+                UPDATE balances
+                SET balance = ?, total_earned = ?, last_updated = ?
+                WHERE node_id = ?
+            ''', (new_balance, new_total_earned, int(time.time()), node_id))
+            # Record transaction
+            transaction_id = f"credit_{int(time.time())}_{node_id}_{hash(str(time.time()))}"
+            cursor.execute('''
+                INSERT INTO transactions
+                (transaction_id, node_id, amount, transaction_type, reason, timestamp, service_type, metadata)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            ''', (
+                transaction_id,
+                node_id,
+                amount,
+                TransactionType.EARNED.value,
+                reason.value,
+                int(time.time()),
+                service_type,
+                json.dumps(metadata) if metadata else None
+            ))
+            conn.commit()
+            return True
+        except Exception as e:
+            conn.rollback()
+            print(f"Error adding credits: {e}")
+            return False
+        finally:
+            conn.close()
+    def spend_credits(self, node_id: str, amount: float, reason: CreditReason,
+                      service_type: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) -> bool:
+        """Spend credits from a node's balance"""
+        if amount <= 0:
+            return False
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        try:
+            # Get current balance
+            cursor.execute('SELECT balance FROM balances WHERE node_id = ?', (node_id,))
+            result = cursor.fetchone()
+            if not result:
+                return False  # Node doesn't exist
+            current_balance = result[0]
+            if current_balance < amount:
+                return False  # Insufficient credits
+            # Update balance
+            new_balance = current_balance - amount
+            cursor.execute('''
+                UPDATE balances
+                SET balance = ?, total_spent = total_spent + ?, last_updated = ?
+                WHERE node_id = ?
+            ''', (new_balance, amount, int(time.time()), node_id))
+            # Record transaction
+            transaction_id = f"debit_{int(time.time())}_{node_id}_{hash(str(time.time()))}"
+            cursor.execute('''
+                INSERT INTO transactions
+                (transaction_id, node_id, amount, transaction_type, reason, timestamp, service_type, metadata)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            ''', (
+                transaction_id,
+                node_id,
+                -amount,  # Negative because it's a debit
+                TransactionType.SPENT.value,
+                reason.value,
+                int(time.time()),
+                service_type,
+                json.dumps(metadata) if metadata else None
+            ))
+            conn.commit()
+            return True
+        except Exception as e:
+            conn.rollback()
+            print(f"Error spending credits: {e}")
+            return False
+        finally:
+            conn.close()
+    def get_balance(self, node_id: str) -> CreditBalance:
+        """Get credit balance for a node"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT balance, total_earned, total_spent, last_updated
+            FROM balances
+            WHERE node_id = ?
+        ''', (node_id,))
+        result = cursor.fetchone()
+        conn.close()
+        if result:
+            balance, total_earned, total_spent, last_updated = result
+            return CreditBalance(
+                node_id=node_id,
+                balance=balance,
+                total_earned=total_earned,
+                total_spent=total_spent,
+                last_updated=last_updated
+            )
+        else:
+            # Return default values if node doesn't exist
+            return CreditBalance(
+                node_id=node_id,
+                balance=0.0,
+                total_earned=0.0,
+                total_spent=0.0,
+                last_updated=int(time.time())
+            )
+    def get_transaction_history(self, node_id: str, limit: int = 50) -> List[CreditTransaction]:
+        """Get transaction history for a node"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT transaction_id, amount, transaction_type, reason, timestamp, service_type, metadata
+            FROM transactions
+            WHERE node_id = ?
+            ORDER BY timestamp DESC
+            LIMIT ?
+        ''', (node_id, limit))
+        rows = cursor.fetchall()
+        conn.close()
+        transactions = []
+        for row in rows:
+            transaction_id, amount, trans_type, reason, timestamp, service_type, metadata_str = row
+            metadata = json.loads(metadata_str) if metadata_str else None
+            transaction = CreditTransaction(
+                transaction_id=transaction_id,
+                node_id=node_id,
+                amount=amount,
+                transaction_type=TransactionType(trans_type),
+                reason=CreditReason(reason),
+                timestamp=timestamp,
+                service_type=service_type,
+                metadata=metadata
+            )
+            transactions.append(transaction)
+        return transactions
+    def transfer_credits(self, from_node_id: str, to_node_id: str, amount: float,
+                         reason: CreditReason = CreditReason.TRANSFERRED) -> bool:
+        """Transfer credits from one node to another"""
+        if amount <= 0:
+            return False
+        # First spend from sender
+        if not self.spend_credits(from_node_id, amount, reason):
+            return False
+        # Then add to receiver
+        if not self.add_credits(to_node_id, amount, reason):
+            # Rollback: if adding to receiver fails, refund sender
+            self.add_credits(from_node_id, amount, CreditReason.REFUND,
+                           metadata={"original_transaction": "transfer_failed"})
+            return False
+        return True
+    def get_top_nodes_by_balance(self, limit: int = 10) -> List[Dict[str, Any]]:
+        """Get top nodes by credit balance"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT node_id, balance, total_earned, total_spent
+            FROM balances
+            ORDER BY balance DESC
+            LIMIT ?
+        ''', (limit,))
+        rows = cursor.fetchall()
+        conn.close()
+        top_nodes = []
+        for row in rows:
+            node_id, balance, total_earned, total_spent = row
+            top_nodes.append({
+                "node_id": node_id,
+                "balance": balance,
+                "total_earned": total_earned,
+                "total_spent": total_spent
+            })
+        return top_nodes
+# Global instance of the credits system
+credits_system = CreditsSystem()

shared/fault_tolerance.py ADDED Viewed

	@@ -0,0 +1,371 @@

+"""
+Fault Tolerance System for SACCP Network
+Handles node failures, retries, task redistribution, and network resilience
+"""
+import time
+import threading
+from typing import Dict, List, Optional, Any
+from datetime import datetime, timedelta
+from enum import Enum
+import random
+import asyncio
+class FailureType(Enum):
+    NODE_DISCONNECTED = "node_disconnected"
+    TASK_TIMEOUT = "task_timeout"
+    HEARTBEAT_FAILED = "heartbeat_failed"
+    NETWORK_ERROR = "network_error"
+    RESOURCE_EXHAUSTED = "resource_exhausted"
+class RecoveryStrategy(Enum):
+    RETRY = "retry"
+    REDISTRIBUTE = "redistribute"
+    FAIL_OVER = "fail_over"
+    DROP_TASK = "drop_task"
+class NodeStatus(Enum):
+    HEALTHY = "healthy"
+    UNRESPONSIVE = "unresponsive"
+    FAILED = "failed"
+    RECOVERING = "recovering"
+class FaultToleranceManager:
+    """
+    Manages fault tolerance across the SACCP network
+    """
+    def __init__(self):
+        self.nodes: Dict[str, Dict[str, Any]] = {}
+        self.active_tasks: Dict[str, Dict[str, Any]] = {}
+        self.failed_tasks: List[Dict[str, Any]] = []
+        self.failure_history: List[Dict[str, Any]] = []
+        self.recovery_queue: List[Dict[str, Any]] = []
+        self.lock = threading.Lock()
+        # Configuration
+        self.heartbeat_interval = 30  # seconds
+        self.heartbeat_timeout = 60   # seconds
+        self.max_retries = 3
+        self.retry_delay = 5  # seconds
+        self.network_monitoring_enabled = True
+        # Start monitoring thread
+        self.monitoring_thread = threading.Thread(target=self._network_monitoring_loop, daemon=True)
+        self.monitoring_thread.start()
+    def register_node(self, node_id: str, node_type: str, capabilities: Dict[str, Any]) -> bool:
+        """Register a node with the fault tolerance system"""
+        with self.lock:
+            self.nodes[node_id] = {
+                "node_id": node_id,
+                "node_type": node_type,
+                "capabilities": capabilities,
+                "status": NodeStatus.HEALTHY,
+                "last_heartbeat": time.time(),
+                "failure_count": 0,
+                "consecutive_failures": 0,
+                "tasks_processed": 0,
+                "tasks_failed": 0
+            }
+            return True
+    def remove_node(self, node_id: str) -> bool:
+        """Remove a node from the system (when permanently offline)"""
+        with self.lock:
+            if node_id in self.nodes:
+                del self.nodes[node_id]
+                # Reassign tasks assigned to this node
+                self._reassign_node_tasks(node_id)
+                return True
+            return False
+    def heartbeat(self, node_id: str) -> bool:
+        """Process heartbeat from a node"""
+        with self.lock:
+            if node_id not in self.nodes:
+                return False
+            node = self.nodes[node_id]
+            node["last_heartbeat"] = time.time()
+            node["status"] = NodeStatus.HEALTHY
+            node["consecutive_failures"] = 0  # Reset on successful heartbeat
+            return True
+    def record_task_assignment(self, task_id: str, node_id: str, task_details: Dict[str, Any]) -> bool:
+        """Record that a task was assigned to a node"""
+        with self.lock:
+            self.active_tasks[task_id] = {
+                "task_id": task_id,
+                "node_id": node_id,
+                "assignment_time": time.time(),
+                "task_details": task_details,
+                "retry_count": 0,
+                "status": "assigned"
+            }
+            return True
+    def record_task_completion(self, task_id: str, node_id: str) -> bool:
+        """Record successful task completion"""
+        with self.lock:
+            if task_id in self.active_tasks:
+                del self.active_tasks[task_id]
+                # Update node statistics
+                if node_id in self.nodes:
+                    self.nodes[node_id]["tasks_processed"] += 1
+                return True
+            return False
+    def record_task_failure(self, task_id: str, node_id: str, failure_type: FailureType,
+                           error_details: Optional[str] = None) -> RecoveryStrategy:
+        """Record task failure and determine recovery strategy"""
+        with self.lock:
+            # Record the failure
+            failure_record = {
+                "task_id": task_id,
+                "node_id": node_id,
+                "failure_type": failure_type.value,
+                "error_details": error_details,
+                "timestamp": time.time()
+            }
+            self.failure_history.append(failure_record)
+            # Update node failure statistics
+            if node_id in self.nodes:
+                node = self.nodes[node_id]
+                node["tasks_failed"] += 1
+                node["failure_count"] += 1
+                node["consecutive_failures"] += 1
+                # Check if node should be marked as failed
+                if node["consecutive_failures"] >= 3:  # 3 consecutive failures
+                    node["status"] = NodeStatus.FAILED
+            # Get the task record
+            task_record = self.active_tasks.get(task_id)
+            if not task_record:
+                return RecoveryStrategy.DROP_TASK
+            # Determine recovery strategy based on failure type and retry count
+            if task_record["retry_count"] < self.max_retries:
+                # For timeout failures, try redistributing to a different node
+                if failure_type == FailureType.TASK_TIMEOUT:
+                    return RecoveryStrategy.REDISTRIBUTE
+                # For node disconnections, try fail-over to another node
+                elif failure_type == FailureType.NODE_DISCONNECTED:
+                    return RecoveryStrategy.FAIL_OVER
+                # For other failures, try retrying on the same node
+                else:
+                    return RecoveryStrategy.RETRY
+            else:
+                # Max retries reached, drop the task
+                if task_id in self.active_tasks:
+                    del self.active_tasks[task_id]
+                    self.failed_tasks.append(task_record)
+                return RecoveryStrategy.DROP_TASK
+    def _reassign_node_tasks(self, failed_node_id: str):
+        """Reassign tasks from a failed node to healthy nodes"""
+        tasks_to_reassign = []
+        with self.lock:
+            # Find tasks assigned to the failed node
+            for task_id, task_record in self.active_tasks.items():
+                if task_record["node_id"] == failed_node_id:
+                    tasks_to_reassign.append(task_id)
+        # Reassign each task
+        for task_id in tasks_to_reassign:
+            self._attempt_task_redistribution(task_id)
+    def _attempt_task_redistribution(self, task_id: str) -> bool:
+        """Attempt to redistribute a task to a different node"""
+        with self.lock:
+            if task_id not in self.active_tasks:
+                return False
+            task_record = self.active_tasks[task_id]
+            # Find a healthy alternative node
+            new_node = self._find_alternative_node(task_record["task_details"])
+            if not new_node:
+                # No alternative node available, retry later
+                return False
+            # Update task assignment
+            old_node_id = task_record["node_id"]
+            task_record["node_id"] = new_node["node_id"]
+            task_record["retry_count"] += 1
+            task_record["assignment_time"] = time.time()
+            # Update node stats
+            if old_node_id in self.nodes:
+                self.nodes[old_node_id]["tasks_failed"] += 1
+            if new_node["node_id"] in self.nodes:
+                self.nodes[new_node["node_id"]]["tasks_processed"] += 1
+            return True
+    def _find_alternative_node(self, task_requirements: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """Find an alternative healthy node that can handle the task"""
+        with self.lock:
+            for node_id, node in self.nodes.items():
+                if node["status"] == NodeStatus.HEALTHY:
+                    # Check if node meets task requirements
+                    if self._node_meets_requirements(node, task_requirements):
+                        return node
+            return None
+    def _node_meets_requirements(self, node: Dict[str, Any], requirements: Dict[str, Any]) -> bool:
+        """Check if a node meets specific requirements for a task"""
+        # Check if node has required resources
+        capabilities = node["capabilities"]
+        # Example: Check if the node has enough memory for the task
+        required_memory = requirements.get("memory_required", 0)
+        available_memory = capabilities.get("memory_gb", 0)
+        if required_memory > available_memory:
+            return False
+        # Check if node type is compatible with task type
+        required_node_types = requirements.get("compatible_node_types", [])
+        if required_node_types and node["node_type"] not in required_node_types:
+            return False
+        return True
+    def _network_monitoring_loop(self):
+        """Background thread to monitor network health and handle failures"""
+        while self.network_monitoring_enabled:
+            time.sleep(1)  # Check every second
+            # Check for node timeouts
+            if int(time.time()) % 10 == 0:  # Every 10 seconds
+                self._check_node_health()
+            # Process recovery queue
+            self._process_recovery_queue()
+    def _check_node_health(self):
+        """Check for nodes that have missed heartbeats"""
+        current_time = time.time()
+        with self.lock:
+            for node_id, node in self.nodes.items():
+                time_since_heartbeat = current_time - node["last_heartbeat"]
+                if time_since_heartbeat > self.heartbeat_timeout:
+                    # Node is unresponsive
+                    if node["status"] != NodeStatus.FAILED:
+                        node["status"] = NodeStatus.UNRESPONSIVE
+                        # Record the failure
+                        failure_record = {
+                            "node_id": node_id,
+                            "failure_type": FailureType.HEARTBEAT_FAILED.value,
+                            "timestamp": current_time,
+                            "details": f"Node {node_id} missed heartbeat for {time_since_heartbeat}s"
+                        }
+                        self.failure_history.append(failure_record)
+                        # Add to recovery queue
+                        self.recovery_queue.append({
+                            "type": "node_recovery",
+                            "node_id": node_id,
+                            "action": "reconnect",
+                            "timestamp": current_time + self.retry_delay
+                        })
+    def _process_recovery_queue(self):
+        """Process items in the recovery queue"""
+        current_time = time.time()
+        items_to_process = []
+        with self.lock:
+            for item in self.recovery_queue[:]:  # Copy list to avoid modification during iteration
+                if current_time >= item["timestamp"]:
+                    items_to_process.append(item)
+        # Process each item outside the lock to avoid blocking
+        for item in items_to_process:
+            self._execute_recovery_action(item)
+            # Remove processed item from queue
+            with self.lock:
+                if item in self.recovery_queue:
+                    self.recovery_queue.remove(item)
+    def _execute_recovery_action(self, recovery_item: Dict[str, Any]):
+        """Execute a specific recovery action"""
+        action_type = recovery_item["type"]
+        if action_type == "node_recovery":
+            node_id = recovery_item["node_id"]
+            if recovery_item["action"] == "reconnect":
+                # Try to reconnect by marking node as healthy
+                # In a real implementation, this would try to reestablish connection
+                with self.lock:
+                    if node_id in self.nodes:
+                        node = self.nodes[node_id]
+                        if node["status"] in [NodeStatus.UNRESPONSIVE, NodeStatus.FAILED]:
+                            # In a real system, we would attempt reconnection
+                            # For simulation, we'll just reset to healthy
+                            node["status"] = NodeStatus.HEALTHY
+                            node["consecutive_failures"] = 0
+        elif action_type == "task_redistribution":
+            task_id = recovery_item["task_id"]
+            # Attempt to redistribute the task
+            self._attempt_task_redistribution(task_id)
+    def get_network_health(self) -> Dict[str, Any]:
+        """Get overall network health statistics"""
+        with self.lock:
+            healthy_nodes = 0
+            unresponsive_nodes = 0
+            failed_nodes = 0
+            for node in self.nodes.values():
+                if node["status"] == NodeStatus.HEALTHY:
+                    healthy_nodes += 1
+                elif node["status"] == NodeStatus.UNRESPONSIVE:
+                    unresponsive_nodes += 1
+                elif node["status"] == NodeStatus.FAILED:
+                    failed_nodes += 1
+            total_tasks = len(self.active_tasks) + len(self.failed_tasks)
+            return {
+                "total_nodes": len(self.nodes),
+                "healthy_nodes": healthy_nodes,
+                "unresponsive_nodes": unresponsive_nodes,
+                "failed_nodes": failed_nodes,
+                "active_tasks": len(self.active_tasks),
+                "failed_tasks": len(self.failed_tasks),
+                "total_tasks_processed": sum(node["tasks_processed"] for node in self.nodes.values()),
+                "total_tasks_failed": sum(node["tasks_failed"] for node in self.nodes.values()),
+                "recovery_attempts": len(self.recovery_queue)
+            }
+    def get_failed_nodes(self) -> List[Dict[str, Any]]:
+        """Get list of currently failed nodes"""
+        with self.lock:
+            failed = []
+            for node in self.nodes.values():
+                if node["status"] == NodeStatus.FAILED:
+                    failed.append(node)
+            return failed
+# Global instance
+fault_tolerance_manager = FaultToleranceManager()

shared/load_balancer.py ADDED Viewed

	@@ -0,0 +1,458 @@

+"""
+Dynamic Load Balancer for SACCP Network
+Distributes tasks across different node types based on availability, capacity, and performance
+"""
+import time
+import heapq
+from typing import Dict, List, Optional, Any, Tuple
+from enum import Enum
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+import threading
+import random
+class TaskPriority(Enum):
+    LOW = 1
+    NORMAL = 2
+    HIGH = 3
+    CRITICAL = 4
+class NodeType(Enum):
+    HEAD = "head"
+    RAM = "ram"
+    DISK = "disk"
+    COMPUTE = "compute"
+    GPU = "gpu"
+    TPU = "tpu"
+    NPU = "npu"
+@dataclass
+class Task:
+    """Represents a task to be distributed"""
+    task_id: str
+    task_type: str
+    priority: TaskPriority
+    resource_requirements: Dict[str, Any]  # CPU, memory, etc.
+    estimated_duration: float  # in seconds
+    created_at: float
+    assigned_node: Optional[str] = None
+    assigned_at: Optional[float] = None
+@dataclass
+class Node:
+    """Represents a node in the network"""
+    node_id: str
+    node_type: NodeType
+    capabilities: Dict[str, Any]  # CPU, memory, etc.
+    current_load: float
+    tasks_queued: int
+    tasks_completed: int
+    tasks_failed: int
+    last_heartbeat: float
+    performance_score: float  # 0.0-1.0 based on historical performance
+    is_available: bool = True
+    max_concurrent_tasks: int = 10
+    current_tasks: int = 0
+class LoadBalancer:
+    """
+    Dynamic load balancer that distributes tasks across node types
+    """
+    def __init__(self):
+        self.nodes: Dict[str, Node] = {}
+        self.task_queue: List[Tuple[int, float, Task]] = []  # Priority queue: (-priority, creation_time, task)
+        self.assigned_tasks: Dict[str, str] = {}  # task_id -> node_id
+        self.node_stats: Dict[str, Dict[str, Any]] = {}
+        self.lock = threading.Lock()
+        # Configuration
+        self.heartbeat_timeout = 90  # seconds
+        self.task_timeout = 300  # seconds
+        self.load_balancing_algorithm = "weighted_least_connections"
+    def register_node(self, node_id: str, node_type: NodeType, capabilities: Dict[str, Any]) -> bool:
+        """Register a node with the load balancer"""
+        with self.lock:
+            self.nodes[node_id] = Node(
+                node_id=node_id,
+                node_type=node_type,
+                capabilities=capabilities,
+                current_load=0.0,
+                tasks_queued=0,
+                tasks_completed=0,
+                tasks_failed=0,
+                last_heartbeat=time.time(),
+                performance_score=0.8,  # Default performance score
+                max_concurrent_tasks=capabilities.get("max_concurrent_tasks", 10)
+            )
+            # Initialize node stats
+            self.node_stats[node_id] = {
+                "avg_task_duration": 0,
+                "success_rate": 1.0,
+                "response_time_avg": 0.1
+            }
+            return True
+    def heartbeat_node(self, node_id: str) -> bool:
+        """Update node heartbeat"""
+        with self.lock:
+            if node_id in self.nodes:
+                self.nodes[node_id].last_heartbeat = time.time()
+                self.nodes[node_id].is_available = True
+                return True
+            return False
+    def heartbeat_batch_nodes(self, node_ids: List[str]) -> int:
+        """Update heartbeats for multiple nodes"""
+        count = 0
+        for node_id in node_ids:
+            if self.heartbeat_node(node_id):
+                count += 1
+        return count
+    def deregister_node(self, node_id: str) -> bool:
+        """Remove a node from the load balancer"""
+        with self.lock:
+            if node_id in self.nodes:
+                # Move assigned tasks to queue for reassignment
+                self._reassign_node_tasks(node_id)
+                del self.nodes[node_id]
+                if node_id in self.node_stats:
+                    del self.node_stats[node_id]
+                return True
+            return False
+    def submit_task(self, task: Task) -> Optional[str]:
+        """Submit a task for distribution"""
+        with self.lock:
+            # Add task to priority queue
+            # Priority: Higher priority first, then oldest first
+            priority_key = (-task.priority.value, task.created_at)
+            heapq.heappush(self.task_queue, priority_key + (task,))
+            # Try to assign the task immediately
+            node_id = self._find_suitable_node(task)
+            if node_id:
+                assigned = self._assign_task_to_node(task.task_id, node_id)
+                if assigned:
+                    return node_id
+            return None  # Task queued but not yet assigned
+    def get_task_assignment(self, task_id: str) -> Optional[str]:
+        """Get the node assigned to a task"""
+        with self.lock:
+            return self.assigned_tasks.get(task_id)
+    def complete_task(self, task_id: str, node_id: str, success: bool = True, duration: float = 0) -> bool:
+        """Mark a task as completed"""
+        with self.lock:
+            # Update node stats
+            if node_id in self.nodes:
+                node = self.nodes[node_id]
+                if success:
+                    node.tasks_completed += 1
+                    node.current_tasks -= 1
+                else:
+                    node.tasks_failed += 1
+                    node.current_tasks -= 1
+                # Update task queue count
+                node.tasks_queued = max(0, node.tasks_queued - 1)
+                # Update node stats for performance calculation
+                if node_id in self.node_stats:
+                    stats = self.node_stats[node_id]
+                    if success and duration > 0:
+                        # Update average task duration
+                        if stats["avg_task_duration"] == 0:
+                            stats["avg_task_duration"] = duration
+                        else:
+                            stats["avg_task_duration"] = (
+                                stats["avg_task_duration"] * 0.7 + duration * 0.3
+                            )
+                        # Update success rate
+                        total_tasks = node.tasks_completed + node.tasks_failed
+                        if total_tasks > 0:
+                            stats["success_rate"] = node.tasks_completed / total_tasks
+                # Update node performance score
+                self._update_node_performance_score(node_id)
+            # Remove from assigned tasks
+            if task_id in self.assigned_tasks:
+                del self.assigned_tasks[task_id]
+                # Try to assign new tasks to available nodes
+                self._attempt_task_assignments()
+            return True
+    def _find_suitable_node(self, task: Task) -> Optional[str]:
+        """Find the most suitable node for a task"""
+        with self.lock:
+            # Get all available nodes
+            available_nodes = [
+                node for node in self.nodes.values()
+                if self._is_node_suitable(node, task)
+            ]
+            if not available_nodes:
+                return None
+            # Sort nodes by the selected algorithm
+            if self.load_balancing_algorithm == "weighted_least_connections":
+                # Prioritize nodes with fewer connections and higher performance
+                available_nodes.sort(key=lambda n: (
+                    n.current_tasks / n.max_concurrent_tasks,  # Load factor
+                    -n.performance_score  # Higher performance first
+                ))
+            elif self.load_balancing_algorithm == "weighted_response_time":
+                # Prioritize nodes with better historical response time
+                available_nodes.sort(key=lambda n: (
+                    -n.performance_score,  # Higher performance first
+                    n.current_tasks / n.max_concurrent_tasks  # Lower load first
+                ))
+            elif self.load_balancing_algorithm == "node_type_priority":
+                # Prioritize specific node type for the task
+                preferred_type = task.resource_requirements.get("preferred_node_type")
+                available_nodes.sort(key=lambda n: (
+                    0 if n.node_type.value == preferred_type else 1,  # Preferred type first
+                    n.current_tasks / n.max_concurrent_tasks,  # Then lower load
+                    -n.performance_score  # Then higher performance
+                ))
+            else:
+                # Default: least connections with performance consideration
+                available_nodes.sort(key=lambda n: (
+                    n.current_tasks / n.max_concurrent_tasks,
+                    -n.performance_score
+                ))
+            # Return the best node (first in sorted list)
+            if available_nodes:
+                return available_nodes[0].node_id
+            return None
+    def _is_node_suitable(self, node: Node, task: Task) -> bool:
+        """Check if a node is suitable for a task"""
+        if not node.is_available:
+            return False
+        # Check if node has timed out
+        if time.time() - node.last_heartbeat > self.heartbeat_timeout:
+            node.is_available = False
+            return False
+        # Check node type compatibility
+        required_types = task.resource_requirements.get("compatible_node_types", [])
+        if required_types and node.node_type.value not in required_types:
+            return False
+        # Check resource requirements
+        reqs = task.resource_requirements
+        caps = node.capabilities
+        # Check memory requirement
+        if reqs.get("memory_required", 0) > caps.get("memory_gb", 0):
+            return False
+        # Check GPU requirement
+        if reqs.get("needs_gpu", False) and not caps.get("gpu_available", False):
+            return False
+        # Check if node has reached max concurrent tasks
+        if node.current_tasks >= node.max_concurrent_tasks:
+            return False
+        # Check if node has capacity based on current load
+        if node.current_load > 0.9:  # Node is over 90% loaded
+            return False
+        return True
+    def _assign_task_to_node(self, task_id: str, node_id: str) -> bool:
+        """Assign a task to a specific node"""
+        with self.lock:
+            if node_id not in self.nodes:
+                return False
+            node = self.nodes[node_id]
+            task = self._get_task_by_id(task_id)
+            if not task:
+                return False
+            # Update node statistics
+            node.current_tasks += 1
+            node.tasks_queued += 1
+            # Update assigned tasks
+            self.assigned_tasks[task_id] = node_id
+            task.assigned_node = node_id
+            task.assigned_at = time.time()
+            # Update node load (estimated based on task duration)
+            estimated_load = min(0.2, task.estimated_duration / 3600.0)  # Cap at 20% for long tasks
+            node.current_load = min(1.0, node.current_load + estimated_load)
+            return True
+    def _get_task_by_id(self, task_id: str) -> Optional[Task]:
+        """Get a task by ID from the queue"""
+        # Find in priority queue
+        for _, _, task in self.task_queue:
+            if task.task_id == task_id:
+                return task
+        return None
+    def _reassign_node_tasks(self, node_id: str):
+        """Reassign tasks from a failed node"""
+        tasks_to_reassign = []
+        # Find tasks assigned to this node
+        for task_id, assigned_node_id in self.assigned_tasks.items():
+            if assigned_node_id == node_id:
+                tasks_to_reassign.append(task_id)
+        # Try to reassign each task
+        for task_id in tasks_to_reassign:
+            task = self._get_task_by_id(task_id)
+            if task:
+                # Put task back in queue for reassignment
+                self.submit_task(task)
+            if task_id in self.assigned_tasks:
+                del self.assigned_tasks[task_id]
+    def _attempt_task_assignments(self):
+        """Try to assign queued tasks to available nodes"""
+        with self.lock:
+            # Make a copy of the queue to iterate without modification issues
+            tasks_to_retry = []
+            while self.task_queue:
+                priority, creation_time, task = heapq.heappop(self.task_queue)
+                # Check if task is expired
+                if time.time() - task.created_at > self.task_timeout:
+                    continue  # Skip expired tasks
+                # Try to assign the task
+                node_id = self._find_suitable_node(task)
+                if node_id:
+                    if self._assign_task_to_node(task.task_id, node_id):
+                        # Successfully assigned, don't add back to queue
+                        continue
+                    else:
+                        # Assignment failed, add back to retry list
+                        tasks_to_retry.append((priority, creation_time, task))
+                else:
+                    # No suitable node found, add back to retry list
+                    tasks_to_retry.append((priority, creation_time, task))
+            # Put unassigned tasks back in the queue
+            for item in tasks_to_retry:
+                heapq.heappush(self.task_queue, item)
+    def _update_node_performance_score(self, node_id: str):
+        """Update the performance score for a node based on its stats"""
+        if node_id not in self.nodes or node_id not in self.node_stats:
+            return
+        node = self.nodes[node_id]
+        stats = self.node_stats[node_id]
+        # Calculate performance score based on multiple factors
+        total_tasks = node.tasks_completed + node.tasks_failed
+        success_rate = stats["success_rate"]
+        # Base score on success rate (60%), response time (25%), and load (15%)
+        success_weight = 0.6
+        response_weight = 0.25
+        load_weight = 0.15
+        # Success rate contribution (0.0 to 1.0)
+        success_score = success_rate
+        # Response time contribution (better response = higher score)
+        avg_duration = stats["avg_task_duration"]
+        response_score = 1.0 / (1.0 + avg_duration / 100.0)  # Normalize
+        # Load contribution (avoid overloading high-performing nodes)
+        load_score = 1.0 - min(1.0, node.current_load)
+        # Calculate final score
+        performance_score = (
+            success_score * success_weight +
+            response_score * response_weight +
+            load_score * load_weight
+        )
+        node.performance_score = min(1.0, max(0.0, performance_score))
+    def get_node_loads(self) -> Dict[str, float]:
+        """Get current load for each node"""
+        with self.lock:
+            return {node_id: node.current_load for node_id, node in self.nodes.items()}
+    def get_node_status(self) -> List[Dict[str, Any]]:
+        """Get comprehensive status of all nodes"""
+        with self.lock:
+            status_list = []
+            for node_id, node in self.nodes.items():
+                # Check if node is still active
+                is_active = time.time() - node.last_heartbeat < self.heartbeat_timeout
+                node.is_available = is_active
+                status_list.append({
+                    "node_id": node.node_id,
+                    "node_type": node.node_type.value,
+                    "is_available": is_active,
+                    "current_load": node.current_load,
+                    "current_tasks": node.current_tasks,
+                    "tasks_queued": node.tasks_queued,
+                    "tasks_completed": node.tasks_completed,
+                    "tasks_failed": node.tasks_failed,
+                    "performance_score": node.performance_score,
+                    "max_concurrent_tasks": node.max_concurrent_tasks,
+                    "capabilities": node.capabilities,
+                    "last_heartbeat": node.last_heartbeat
+                })
+            return status_list
+    def get_task_queue_status(self) -> Dict[str, Any]:
+        """Get status of the task queue"""
+        with self.lock:
+            return {
+                "total_queued_tasks": len(self.task_queue),
+                "priority_distribution": {
+                    "critical": len([t for _, _, t in self.task_queue if t.priority == TaskPriority.CRITICAL]),
+                    "high": len([t for _, _, t in self.task_queue if t.priority == TaskPriority.HIGH]),
+                    "normal": len([t for _, _, t in self.task_queue if t.priority == TaskPriority.NORMAL]),
+                    "low": len([t for _, _, t in self.task_queue if t.priority == TaskPriority.LOW])
+                },
+                "average_wait_time": self._calculate_avg_wait_time()
+            }
+    def _calculate_avg_wait_time(self) -> float:
+        """Calculate average wait time for tasks in queue"""
+        if not self.task_queue:
+            return 0
+        current_time = time.time()
+        total_wait = sum(current_time - task.created_at for _, _, task in self.task_queue)
+        return total_wait / len(self.task_queue) if self.task_queue else 0
+# Global instance
+load_balancer = LoadBalancer()

shared/models.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from enum import Enum
+class NodeType(str, Enum):
+    HEAD = "head"
+    RAM = "ram"
+    DISK = "disk"
+    COMPUTE = "compute"
+    GPU = "gpu"
+    TPU = "tpu"
+    NPU = "npu"
+class ChatMessage(BaseModel):
+    role: str  # "user" or "assistant"
+    content: str
+class ChatRequest(BaseModel):
+    messages: List[ChatMessage]
+    model: str = "sam-x-nano"
+    max_tokens: Optional[int] = 512
+    temperature: Optional[float] = 0.8
+    top_k: Optional[int] = 40
+    top_p: Optional[float] = 0.9
+    repetition_penalty: Optional[float] = 1.1
+    stream: Optional[bool] = False  # Support for streaming
+    use_token_distribution: Optional[bool] = False  # Enable token-by-token distribution for autoregressive models
+class ChatResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[Dict[str, Any]]
+    usage: Optional[Dict[str, int]] = None
+class StreamChoice(BaseModel):
+    index: int
+    delta: Dict[str, Any]  # For streaming, contains the delta content
+    finish_reason: Optional[str] = None
+class ChatStreamResponse(BaseModel):
+    id: str
+    object: str = "chat.completion.chunk"
+    created: int
+    model: str
+    choices: List[StreamChoice]
+class WorkerStatus(BaseModel):
+    model_name: str
+    node_type: Optional[NodeType] = None
+    is_active: bool
+    load: float
+    last_heartbeat: int
+    capabilities: Optional[Dict[str, Any]] = None
+class TaskFileRequest(BaseModel):
+    task_type: str
+    model_name: str
+    task_data: Dict[str, Any]
+    priority: str = "normal"
+    max_workers: int = 1

shared/node_types.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from enum import Enum
+class NodeType(str, Enum):
+    HEAD = "head"
+    RAM = "ram"
+    DISK = "disk"
+    COMPUTE = "compute"
+    GPU = "gpu"
+    TPU = "tpu"
+    NPU = "npu"
+class NodeCapabilities(BaseModel):
+    """Capabilities of a node in the SACCP network"""
+    node_type: NodeType
+    cpu_count: int
+    memory_gb: float
+    disk_space_gb: float
+    gpu_available: bool
+    gpu_info: Optional[Dict[str, Any]] = None
+    tpu_available: bool
+    npu_available: bool
+    network_bandwidth_mbps: Optional[float] = None
+    uptime_hours: Optional[float] = None
+    smilyai_approved: bool = False  # For HEAD nodes approval
+    performance_score: float = 1.0
+class NodeRegistrationRequest(BaseModel):
+    """Request model for node registration with the SACCP network"""
+    node_id: str
+    endpoint: str
+    capabilities: NodeCapabilities
+    node_version: str = "1.0.0"
+class NodeRegistrationResponse(BaseModel):
+    """Response model for node registration"""
+    success: bool
+    node_id: str
+    message: str
+    approval_status: str  # pending, approved, rejected
+class NodeListResponse(BaseModel):
+    """Response model for listing network nodes"""
+    nodes: List[Dict[str, Any]]
+    total_nodes: int
+    online_nodes: int
+class NodeStatus(BaseModel):
+    """Status of a node in the network"""
+    node_id: str
+    node_type: NodeType
+    endpoint: str
+    is_online: bool
+    last_heartbeat: int
+    capabilities: NodeCapabilities
+    tasks_completed: int
+    tasks_failed: int
+    credits_earned: float
+class CreditTransaction(BaseModel):
+    """Model for credit transactions in the SACCP ecosystem"""
+    transaction_id: str
+    node_id: str
+    amount: float
+    transaction_type: str  # 'earned', 'spent', 'transferred'
+    reason: str  # 'task_completion', 'resource_contribution', 'service_purchase', etc.
+    timestamp: int
+    service_type: Optional[str] = None  # For service purchases
+class CreditBalance(BaseModel):
+    """Model for node credit balance"""
+    node_id: str
+    balance: float
+    total_earned: float
+    total_spent: float
+    transactions: List[CreditTransaction]
+class ServiceOffering(BaseModel):
+    """Model for services available in the SACCP marketplace"""
+    service_id: str
+    service_name: str
+    description: str
+    price_per_unit: float
+    unit_type: str  # 'hour', 'gb_storage', 'compute_hour', etc.
+    provider_node_id: Optional[str] = None
+    availability: bool = True
+class ServiceRequest(BaseModel):
+    """Request for a service from the marketplace"""
+    service_id: str
+    node_id: str
+    quantity: float
+    parameters: Optional[Dict[str, Any]] = None

space-config.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+# SACCP Node Space Configuration
+runtime:
+  cpu: "medium"
+  memory: "16x"
+  accelerator: "cpu"  # Will be configured based on node type
+env:
+  NODE_TYPE: "universal"
+  MODEL_TYPE: "universal"

worker_app.py ADDED Viewed

	@@ -0,0 +1,564 @@

+import os
+import time
+import json
+import asyncio
+from datetime import datetime
+from typing import Dict, List, Optional
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+import uvicorn
+from pydantic import BaseModel
+from shared.models import ChatRequest, ChatResponse, ChatMessage
+import tensorflow as tf
+import keras
+import numpy as np
+from tokenizers import Tokenizer
+from huggingface_hub import hf_hub_download
+import requests
+from transformers import GPT2Tokenizer
+from .model_manager import ModelManager
+app = FastAPI(
+    title="Universal Worker Node for Sam-X Models",
+    description="Processing node that supports all Sam-X model types dynamically",
+    version="2.0.0"
+)
+# Global model manager instance
+model_manager = ModelManager()
+model_loaded = True  # Always true since we're using lazy loading
+# Performance optimizations
+NUM_CORES = os.cpu_count() or 4
+os.environ['TF_NUM_INTEROP_THREADS'] = str(NUM_CORES)
+os.environ['TF_NUM_INTRAOP_THREADS'] = str(NUM_CORES)
+os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # Force CPU only
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'  # Intel optimization
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'   # Reduce TF logging
+# Configure TF threading
+tf.config.threading.set_inter_op_parallelism_threads(NUM_CORES)
+tf.config.threading.set_intra_op_parallelism_threads(NUM_CORES)
+print(f"✅ CPU optimized: {NUM_CORES} threads, oneDNN enabled")
+def format_chat_prompt(messages: List[Dict[str, str]]) -> str:
+    """Format chat messages into a prompt for the model"""
+    prompt = ""
+    for msg in messages:
+        role = msg.get('role', 'user')
+        content = msg.get('content', '')
+        if role.lower() == 'user':
+            prompt += f"""
+{content}
+"""
+        elif role.lower() == 'assistant':
+            prompt += f"""
+{content}
+"""
+        else:
+            # System or other roles
+            prompt += f"{content}\n"
+    # Add assistant prefix for the response
+    prompt += """
+"""
+    return prompt
+def sample_token(logits, temperature=0.8, top_k=40, top_p=0.9, repetition_penalty=1.1):
+    """Sample next token from logits"""
+    # Apply temperature
+    logits = logits / temperature
+    # Apply repetition penalty
+    if repetition_penalty != 1.0:
+        logits = np.where(logits < 0, logits * repetition_penalty, logits / repetition_penalty)
+    # Convert to probabilities
+    probs = np.exp(logits - np.max(logits))  # Numerical stability
+    probs = probs / np.sum(probs)
+    # Top-k filtering
+    if top_k > 0 and top_k < len(probs):
+        top_k_idx = np.argpartition(probs, -top_k)[-top_k:]
+        top_k_probs = probs[top_k_idx]
+        top_k_probs = top_k_probs / np.sum(top_k_probs)  # Normalize
+        sampled_idx = np.random.choice(len(top_k_idx), p=top_k_probs)
+        return top_k_idx[sampled_idx]
+    # Top-p (nucleus) sampling
+    if top_p < 1.0:
+        sorted_idx = np.argsort(probs)[::-1]
+        sorted_probs = probs[sorted_idx]
+        cumulative_probs = np.cumsum(sorted_probs)
+        cutoff_idx = np.searchsorted(cumulative_probs, top_p)
+        cutoff_idx = min(cutoff_idx + 1, len(sorted_idx))
+        nucleus_idx = sorted_idx[:cutoff_idx]
+        nucleus_probs = probs[nucleus_idx]
+        nucleus_probs = nucleus_probs / np.sum(nucleus_probs)  # Normalize
+        sampled_idx = np.random.choice(len(nucleus_idx), p=nucleus_probs)
+        return nucleus_idx[sampled_idx]
+    # Regular sampling
+    return np.random.choice(len(probs), p=probs)
+def generate_response(model: keras.Model, tokenizer: Tokenizer, config: dict,
+                     prompt: str, max_tokens: int = 512, temperature: float = 0.8,
+                     top_k: int = 40, top_p: float = 0.9, repetition_penalty: float = 1.1) -> str:
+    """Generate response from the model"""
+    # Tokenize the prompt
+    prompt_ids = tokenizer.encode(prompt).ids
+    input_ids = tf.constant([prompt_ids], dtype=tf.int32)
+    # Run the model
+    generated_ids = []
+    current_ids = input_ids
+    # Process tokens one by one (simplified generation without KV cache for this example)
+    for i in range(max_tokens):
+        with tf.device('/CPU:0'):  # Use CPU for inference
+            logits, _ = model(current_ids, training=False, use_cache=False)
+            next_token_logits = logits[0, -1, :].numpy()
+        # Sample next token
+        next_token_id = sample_token(next_token_logits, temperature, top_k, top_p, repetition_penalty)
+        # Add to generated sequence
+        generated_ids.append(next_token_id)
+        current_ids = tf.constant([[next_token_id]], dtype=tf.int32)
+        # Stop if we hit an end token
+        eos_token_id = config.get('eos_token_id', 50256)
+        stop_token_ids = [eos_token_id, tokenizer.token_to_id("\n"), tokenizer.token_to_id("<im end for model tun>")]
+        if next_token_id in stop_token_ids and next_token_id is not None:
+            break
+    # Decode the generated tokens
+    generated_text = tokenizer.decode(generated_ids)
+    # Clean up the response
+    # Remove any end tokens that might have been included
+    stop_tokens = ["\n", "<im end for model tun>"]
+    for token in stop_tokens:
+        idx = generated_text.find(token)
+        if idx != -1:
+            generated_text = generated_text[:idx]
+    return generated_text.strip()
+async def generate_streaming_response(model: keras.Model, tokenizer: Tokenizer, config: dict,
+                                    prompt: str, max_tokens: int = 512, temperature: float = 0.8,
+                                    top_k: int = 40, top_p: float = 0.9, repetition_penalty: float = 1.1):
+    """Generate streaming response from the model"""
+    import json
+    import time
+    # Tokenize the prompt
+    prompt_ids = tokenizer.encode(prompt).ids
+    input_ids = tf.constant([prompt_ids], dtype=tf.int32)
+    # Run the model
+    generated_ids = []
+    current_ids = input_ids
+    # Send initial chunk with role
+    initial_chunk = {
+        "id": f"chat-{int(time.time())}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": "dynamic_model",  # Will be set by the calling function
+        "choices": [{
+            "index": 0,
+            "delta": {"role": "assistant", "content": ""},
+            "finish_reason": None
+        }]
+    }
+    yield f"data: {json.dumps(initial_chunk)}\n\n"
+    # Process tokens one by one with streaming - this is where SACCP token distribution happens
+    for i in range(max_tokens):
+        with tf.device('/CPU:0'):  # Use CPU for inference
+            logits, _ = model(current_ids, training=False, use_cache=False)
+            next_token_logits = logits[0, -1, :].numpy()
+        # Sample next token
+        next_token_id = sample_token(next_token_logits, temperature, top_k, top_p, repetition_penalty)
+        # Add to generated sequence
+        generated_ids.append(next_token_id)
+        current_ids = tf.constant([[next_token_id]], dtype=tf.int32)
+        # Decode this single token to get text
+        token_text = tokenizer.decode([next_token_id])
+        # Create chunk with the token
+        chunk = {
+            "id": f"chat-{int(time.time())}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": "dynamic_model",  # Will be set by the calling function
+            "choices": [{
+                "index": 0,
+                "delta": {"content": token_text},
+                "finish_reason": None
+            }]
+        }
+        yield f"data: {json.dumps(chunk)}\n\n"
+        # Check if we should stop
+        eos_token_id = config.get('eos_token_id', 50256)
+        stop_token_ids = [eos_token_id, tokenizer.token_to_id("\n"), tokenizer.token_to_id("<im end for model tun>")]
+        if next_token_id in stop_token_ids and next_token_id is not None:
+            break
+    # Send final chunk
+    final_chunk = {
+        "id": f"chat-{int(time.time())}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": "dynamic_model",  # Will be set by the calling function
+        "choices": [{
+            "index": 0,
+            "delta": {},
+            "finish_reason": "stop"
+        }]
+    }
+    yield f"data: {json.dumps(final_chunk)}\n\n"
+async def generate_token_by_token_streaming_response(model: keras.Model, tokenizer: Tokenizer, config: dict,
+                                                    prompt: str, max_tokens: int = 512, temperature: float = 0.8,
+                                                    top_k: int = 40, top_p: float = 0.9, repetition_penalty: float = 1.1):
+    """Generate streaming response with token-by-token processing, suitable for SACCP distribution"""
+    import json
+    import time
+    # Tokenize the prompt
+    prompt_ids = tokenizer.encode(prompt).ids
+    input_ids = tf.constant([prompt_ids], dtype=tf.int32)
+    # Initialize sequence
+    current_ids = input_ids
+    generated_text = ""
+    # Send initial chunk with role
+    initial_chunk = {
+        "id": f"chat-{int(time.time())}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": "dynamic_model",
+        "choices": [{
+            "index": 0,
+            "delta": {"role": "assistant", "content": ""},
+            "finish_reason": None
+        }]
+    }
+    yield f"data: {json.dumps(initial_chunk)}\n\n"
+    for i in range(max_tokens):
+        # Process one token at a time (in a real SACCP scenario, this could be distributed)
+        with tf.device('/CPU:0'):
+            logits, _ = model(current_ids, training=False, use_cache=False)
+            next_token_logits = logits[0, -1, :].numpy()
+        # Sample next token
+        next_token_id = sample_token(next_token_logits, temperature, top_k, top_p, repetition_penalty)
+        # Decode token to text
+        token_text = tokenizer.decode([next_token_id])
+        # Update the generated text
+        generated_text += token_text
+        # Create and yield chunk for this token
+        chunk = {
+            "id": f"token-{i}-{int(time.time())}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": "dynamic_model",
+            "choices": [{
+                "index": 0,
+                "delta": {"content": token_text},
+                "finish_reason": None
+            }]
+        }
+        yield f"data: {json.dumps(chunk)}\n\n"
+        # Prepare for next iteration
+        current_ids = tf.constant([[next_token_id]], dtype=tf.int32)
+        # Check for stopping conditions
+        eos_token_id = config.get('eos_token_id', 50256)
+        stop_token_ids = [eos_token_id, tokenizer.token_to_id("\n"), tokenizer.token_to_id("<im end for model tun>")]
+        if next_token_id in stop_token_ids and next_token_id is not None:
+            break
+    # Final chunk
+    final_chunk = {
+        "id": f"chat-{int(time.time())}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": "dynamic_model",
+        "choices": [{
+            "index": 0,
+            "delta": {},
+            "finish_reason": "stop"
+        }]
+    }
+    yield f"data: {json.dumps(final_chunk)}\n\n"
+@app.on_event("startup")
+def startup_event():
+    """Initialize model manager on startup"""
+    global model_loaded
+    print("Initializing universal worker...")
+    print(f"Available models: {model_manager.list_available_models()}")
+    try:
+        print("✅ Universal worker initialized successfully!")
+        print("This worker can dynamically load any Sam-X model based on requests")
+    except Exception as e:
+        print(f"❌ Worker initialization failed: {e}")
+        model_loaded = False
+@app.post("/chat/completions")
+async def chat_completions(request: ChatRequest):
+    """Process chat completion request"""
+    global model_loaded
+    try:
+        # Extract model type from request
+        model_type = request.model.lower()
+        # Validate model type
+        available_models = model_manager.list_available_models()
+        if model_type not in available_models:
+            # Find closest matching model
+            matching_models = [m for m in available_models if model_type in m or m in model_type]
+            if matching_models:
+                model_type = matching_models[0]  # Use first available match
+            else:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Model {request.model} not available. Available models: {available_models}"
+                )
+        # Get the appropriate model and tokenizer for this request
+        model, tokenizer, config = model_manager.get_model(model_type)
+        # Format the messages into a single prompt
+        messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+        prompt = format_chat_prompt(messages)
+        # If streaming is requested, return StreamingResponse
+        if request.stream:
+            async def generate():
+                async for chunk in generate_streaming_response(
+                    model=model,
+                    tokenizer=tokenizer,
+                    config=config,
+                    prompt=prompt,
+                    max_tokens=request.max_tokens,
+                    temperature=request.temperature,
+                    top_k=request.top_k,
+                    top_p=request.top_p,
+                    repetition_penalty=request.repetition_penalty
+                ):
+                    # Update model name in chunk
+                    import json
+                    chunk_data = json.loads(chunk[7:-4])  # Extract JSON from "data: {...}\n\n"
+                    chunk_data["model"] = request.model
+                    updated_chunk = f"data: {json.dumps(chunk_data)}\n\n"
+                    yield updated_chunk
+            return StreamingResponse(generate(), media_type="text/event-stream")
+        # Otherwise, generate full response
+        start_time = time.time()
+        response_text = generate_response(
+            model=model,
+            tokenizer=tokenizer,
+            config=config,
+            prompt=prompt,
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_k=request.top_k,
+            top_p=request.top_p,
+            repetition_penalty=request.repetition_penalty
+        )
+        processing_time = time.time() - start_time
+        # Create response in OpenAI-compatible format
+        response = ChatResponse(
+            id=f"chat-{int(time.time())}",
+            model=request.model,  # Use original model name
+            choices=[
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": response_text},
+                    "finish_reason": "stop"
+                }
+            ],
+            usage={
+                "prompt_tokens": len(prompt),
+                "completion_tokens": len(response_text),
+                "total_tokens": len(prompt) + len(response_text)
+            }
+        )
+        print(f"Generated response in {processing_time:.2f}s for model {request.model} (loaded as {model_type})")
+        return response.dict()
+    except Exception as e:
+        print(f"Error processing request: {e}")
+        raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy" if model_loaded else "unhealthy",
+        "model_loaded": model_loaded,
+        "timestamp": int(time.time()),
+        "supported_models": model_manager.list_available_models(),
+        "loaded_models": list(model_manager.models.keys())
+    }
+@app.get("/model-info")
+async def model_info(model_type: str = "sam-x-large"):
+    """Get information about a specific model"""
+    try:
+        if model_type not in model_manager.list_available_models():
+            raise HTTPException(
+                status_code=404,
+                detail=f"Model {model_type} not available. Available: {model_manager.list_available_models()}"
+            )
+        model, tokenizer, config = model_manager.get_model(model_type)
+        return {
+            "model_type": model_type,
+            "vocab_size": tokenizer.get_vocab_size(),
+            "parameters": int(model.count_params()) if model else 0,
+            "max_context_length": config.get('max_position_embeddings', 2048),
+            "loaded": model_manager.is_model_loaded(model_type),
+            "num_hidden_layers": config.get('num_hidden_layers', 12),
+            "hidden_size": config.get('hidden_size', 768),
+            "num_attention_heads": config.get('num_attention_heads', 12)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error getting model info: {str(e)}")
+@app.get("/models")
+async def list_models():
+    """List all available models"""
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": model_name,
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "universal-worker"
+            }
+            for model_name in model_manager.list_available_models()
+        ]
+    }
+@app.post("/saccp/process-task")
+async def process_saccp_task(request: dict):
+    """Process a SACCP task - interface for distributed computing"""
+    try:
+        task_type = request.get("task_type", "inference")
+        model_type = request.get("model_name", "sam-x-large")
+        task_data = request.get("task_data", {})
+        # Get the appropriate model and tokenizer
+        model, tokenizer, config = model_manager.get_model(model_type)
+        if task_type == "inference":
+            prompt = task_data.get("prompt", "")
+            max_tokens = task_data.get("max_tokens", 512)
+            temperature = task_data.get("temperature", 0.8)
+            result = generate_response(
+                model=model,
+                tokenizer=tokenizer,
+                config=config,
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature
+            )
+            return {
+                "status": "success",
+                "result": result,
+                "model_used": model_type
+            }
+        elif task_type == "token_generation":
+            # Handle token-by-token generation task for autoregressive models
+            current_context = task_data.get("current_context", [])
+            generation_params = task_data.get("generation_params", {})
+            if not current_context:
+                # If no context provided, return error
+                raise HTTPException(status_code=400, detail="Current context required for token generation")
+            # Convert context to tensor
+            input_ids = tf.constant([current_context], dtype=tf.int32)
+            # Run the model on the context
+            with tf.device('/CPU:0'):
+                logits, _ = model(input_ids, training=False, use_cache=False)
+                # Get logits for the last token position
+                next_token_logits = logits[0, -1, :].numpy()
+            # Apply generation parameters
+            temperature = generation_params.get("temperature", 0.8)
+            top_k = generation_params.get("top_k", 40)
+            top_p = generation_params.get("top_p", 0.9)
+            repetition_penalty = generation_params.get("repetition_penalty", 1.1)
+            # Sample next token
+            next_token_id = sample_token(next_token_logits, temperature, top_k, top_p, repetition_penalty)
+            # Decode token to text
+            token_text = tokenizer.decode([next_token_id])
+            return {
+                "status": "success",
+                "token_id": int(next_token_id),
+                "token_text": token_text,
+                "model_used": model_type,
+                "next_position": len(current_context)
+            }
+        else:
+            # For other task types, we can extend this
+            raise HTTPException(status_code=400, detail=f"Task type {task_type} not supported")
+    except Exception as e:
+        print(f"Error processing SACCP task: {e}")
+        raise HTTPException(status_code=500, detail=f"Error processing SACCP task: {str(e)}")
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)