warming666
/

ScDiVa

@@ -1,298 +1,45 @@
 """
-ScDiVa: A Foundation Model for Single-cell Genomics
-Model Architecture Definition
-This file contains the core architecture definition of ScDiVa.
-It allows loading pre-trained weights for inference.
 """
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from typing import Optional, Dict, Tuple, Union
-import math
-import os
-class ScDiVaConfig:
-    def __init__(
-        self,
-        num_genes: int = 41818,  # Updated to match paper (Table 4)
-        hidden_size: int = 512,
-        num_hidden_layers: int = 12,
-        num_attention_heads: int = 8,
-        intermediate_size: int = 2048,
-        hidden_dropout_prob: float = 0.1,
-        attention_probs_dropout_prob: float = 0.1,
-        max_position_embeddings: int = 1200,
-        layer_norm_eps: float = 1e-5,
-        latent_dim: int = 128,
-        num_cell_types: int = 100,
-        use_variational: bool = True,
-        **kwargs
-    ):
-        self.num_genes = num_genes
-        self.hidden_size = hidden_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.intermediate_size = intermediate_size
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_probs_dropout_prob = attention_probs_dropout_prob
-        self.max_position_embeddings = max_position_embeddings
-        self.layer_norm_eps = layer_norm_eps
-        self.latent_dim = latent_dim
-        self.num_cell_types = num_cell_types
-        self.use_variational = use_variational
-class GeneEmbedding(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.gene_projection = nn.Linear(config.num_genes, config.hidden_size)
-        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-    def forward(self, gene_expression: torch.Tensor) -> torch.Tensor:
-        embeddings = self.gene_projection(gene_expression)
-        embeddings = self.layer_norm(embeddings)
-        embeddings = self.dropout(embeddings)
-        return embeddings
-class MultiHeadAttention(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.num_attention_heads = config.num_attention_heads
-        self.attention_head_size = config.hidden_size // config.num_attention_heads
-        self.all_head_size = self.num_attention_heads * self.attention_head_size
-        self.query = nn.Linear(config.hidden_size, self.all_head_size)
-        self.key = nn.Linear(config.hidden_size, self.all_head_size)
-        self.value = nn.Linear(config.hidden_size, self.all_head_size)
-        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
-        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
-        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
-    def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
-        new_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
-        x = x.view(*new_shape)
-        return x.permute(0, 2, 1, 3)
-    def forward(self, hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        query_layer = self.transpose_for_scores(self.query(hidden_states))
-        key_layer = self.transpose_for_scores(self.key(hidden_states))
-        value_layer = self.transpose_for_scores(self.value(hidden_states))
-        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
-        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
-        if attention_mask is not None:
-            attention_scores = attention_scores + attention_mask
-        attention_probs = F.softmax(attention_scores, dim=-1)
-        attention_probs = self.dropout(attention_probs)
-        context_layer = torch.matmul(attention_probs, value_layer)
-        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
-        new_shape = context_layer.size()[:-2] + (self.all_head_size,)
-        context_layer = context_layer.view(*new_shape)
-        attention_output = self.dense(context_layer)
-        attention_output = self.dropout(attention_output)
-        attention_output = self.layer_norm(attention_output + hidden_states)
-        return attention_output
-class FeedForward(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.dense1 = nn.Linear(config.hidden_size, config.intermediate_size)
-        self.dense2 = nn.Linear(config.intermediate_size, config.hidden_size)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        residual = hidden_states
-        hidden_states = self.dense1(hidden_states)
-        hidden_states = F.gelu(hidden_states)
-        hidden_states = self.dense2(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = self.layer_norm(hidden_states + residual)
-        return hidden_states
-class TransformerLayer(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.attention = MultiHeadAttention(config)
-        self.feed_forward = FeedForward(config)
-    def forward(self, hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        attention_output = self.attention(hidden_states, attention_mask)
-        layer_output = self.feed_forward(attention_output)
-        return layer_output
-class TransformerEncoder(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.layers = nn.ModuleList([
-            TransformerLayer(config) for _ in range(config.num_hidden_layers)
-        ])
-    def forward(self, hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        for layer in self.layers:
-            hidden_states = layer(hidden_states, attention_mask)
-        return hidden_states
-class VariationalLayer(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.mu_projection = nn.Linear(config.hidden_size, config.latent_dim)
-        self.logvar_projection = nn.Linear(config.hidden_size, config.latent_dim)
-    def reparameterize(self, mu: torch.Tensor, logvar: torch.Tensor) -> torch.Tensor:
-        std = torch.exp(0.5 * logvar)
-        eps = torch.randn_like(std)
-        return mu + eps * std
-    def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        mu = self.mu_projection(hidden_states)
-        logvar = self.logvar_projection(hidden_states)
-        z = self.reparameterize(mu, logvar)
-        return z, mu, logvar
-class AnnotationHead(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.dense = nn.Linear(config.latent_dim, config.hidden_size)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-        self.classifier = nn.Linear(config.hidden_size, config.num_cell_types)
-    def forward(self, latent_representation: torch.Tensor) -> torch.Tensor:
-        hidden = F.gelu(self.dense(latent_representation))
-        hidden = self.dropout(hidden)
-        logits = self.classifier(hidden)
-        return logits
-class BatchIntegrationHead(nn.Module):
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.dense = nn.Linear(config.latent_dim, config.hidden_size)
-        self.decoder = nn.Linear(config.hidden_size, config.num_genes)
-    def forward(self, latent_representation: torch.Tensor) -> torch.Tensor:
-        hidden = F.gelu(self.dense(latent_representation))
-        reconstructed = self.decoder(hidden)
-        return reconstructed
-class ScDiVaModel(nn.Module):
-    """
-    ScDiVa: Single-cell Deep Variational Analysis Model
-    """
-    def __init__(self, config: ScDiVaConfig):
-        super().__init__()
-        self.config = config
-        self.gene_embedding = GeneEmbedding(config)
-        self.encoder = TransformerEncoder(config)
-        self.variational_layer = VariationalLayer(config)
-        self.annotation_head = AnnotationHead(config)
-        self.batch_integration_head = BatchIntegrationHead(config)
-    def encode(self, gene_expression: torch.Tensor, attention_mask: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
-        """
-        Input Shape: (batch_size, num_genes)
-        Returns: Dict containing latent, mu, logvar
-        """
-        embeddings = self.gene_embedding(gene_expression)
-        embeddings = embeddings.unsqueeze(1)  # (B, 1, H)
-        encoded = self.encoder(embeddings, attention_mask)  # (B, 1, H)
-        encoded = encoded.squeeze(1)  # (B, H)
-        z, mu, logvar = self.variational_layer(encoded)
-        return {"latent": z, "mu": mu, "logvar": logvar}
-    def predict(self, gene_expression: torch.Tensor, task: str = "annotation", attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        """
-        Inference interface:
-        - task="annotation": returns classification logits
-        - task="batch_integration": returns reconstructed expression
-        """
-        encoding = self.encode(gene_expression, attention_mask)
-        latent = encoding["latent"]
-        if task == "annotation":
-            return self.annotation_head(latent)
-        elif task == "batch_integration":
-            return self.batch_integration_head(latent)
         else:
-            raise ValueError(f"Unknown task: {task}")
-    @classmethod
-    def from_pretrained(
-        cls,
-        model_name_or_path: str,
-        map_location: Optional[str] = None,
-        strict: bool = True,
-        use_auth_token: Optional[str] = None,
-    ) -> "ScDiVaModel":
-        """
-        Load pre-trained model from local path or Hugging Face Hub.
-        Supports directly loading from 'warming666/ScDiVa'.
-        """
-        config = ScDiVaConfig()
-        model = cls(config)
-        if map_location is None:
-            map_location = "cpu"
-        ckpt_path: Optional[str] = None
-        # 1. Try Local File
-        if os.path.exists(model_name_or_path):
-            if os.path.isfile(model_name_or_path):
-                ckpt_path = model_name_or_path
-            elif os.path.isdir(model_name_or_path):
-                # Search for typical weights file
-                for name in ["pytorch_model.bin", "model.safetensors", "model.pt"]:
-                    p = os.path.join(model_name_or_path, name)
-                    if os.path.exists(p):
-                        ckpt_path = p
-                        break
-        # 2. Try Hugging Face Hub Download
-        if ckpt_path is None:
-            try:
-                from huggingface_hub import hf_hub_download
-                print(f"[ScDiVa] Attempting to download weights from HF: {model_name_or_path}")
-                # Try safetensors first, then bin
-                try:
-                    ckpt_path = hf_hub_download(repo_id=model_name_or_path, filename="model.safetensors", token=use_auth_token)
-                except:
-                    # Fallback to pytorch_model.bin
-                    try:
-                        ckpt_path = hf_hub_download(repo_id=model_name_or_path, filename="pytorch_model.bin", token=use_auth_token)
-                    except:
-                        pass
-            except ImportError:
-                print("[ScDiVa] Warning: `huggingface_hub` not installed. Cannot download from HF.")
-            except Exception as e:
-                print(f"[ScDiVa] Warning: HF download error (check network/repo ID): {e}")
-        # 3. Load or Fallback to Random Init (Demo Mode)
-        if ckpt_path is None:
-            print(f"[ScDiVa] Warning: No weights found at '{model_name_or_path}'. Using random initialization (DEMO MODE).")
-            return model
-        print(f"[ScDiVa] Loading weights from {ckpt_path}...")
-        try:
-            state = torch.load(ckpt_path, map_location=map_location)
-            # Support both raw state_dict and dictionary containing state_dict
-            state_dict = state["state_dict"] if isinstance(state, dict) and "state_dict" in state else state
-            missing, unexpected = model.load_state_dict(state_dict, strict=strict)
-            if missing:
-                print(f"[ScDiVa] Missing keys: {len(missing)}")
-            if unexpected:
-                print(f"[ScDiVa] Unexpected keys: {len(unexpected)}")
-            print("✅ Model weights loaded successfully.")
-        except Exception as e:
-            print(f"[ScDiVa] Error loading weights: {e}")
-            print("[ScDiVa] Model structure initialized with random weights.")
-        return model

 """
+ScDiVa Inference SDK
+High-level wrappers for single-cell analysis tasks.
 """
 import torch
+import numpy as np
+from modeling_scdiva import ScDiVaModel
+class ScDiVaInference:
+    def __init__(self, model_name: str = "warming666/ScDiVa", device: str = None):
+        if device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
         else:
+            self.device = device
+        print(f"Initializing ScDiVa on {self.device}...")
+        self.model = ScDiVaModel.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.model.eval()
+    def _preprocess(self, adata) -> torch.Tensor:
+        # Placeholder for preprocessing (normalization, etc.)
+        # In real usage, this aligns genes and converts to tensor
+        if hasattr(adata.X, "toarray"):
+            expr = adata.X.toarray()
+        else:
+            expr = adata.X
+        return torch.tensor(expr, dtype=torch.float32).to(self.device)
+    def annotate(self, adata):
+        data = self._preprocess(adata)
+        with torch.no_grad():
+            logits = self.model.predict(data, task="annotation")
+            preds = torch.argmax(logits, dim=1).cpu().numpy()
+        return preds
+    def integrate_batches(self, adata_list):
+        # Placeholder for integration logic
+        results = []
+        for adata in adata_list:
+            data = self._preprocess(adata)
+            with torch.no_grad():
+                emb = self.model.encode(data)["latent"]
+                results.append(emb.cpu().numpy())
+        return np.concatenate(results, axis=0)