Spaces:

Fred808
/

ilob

Paused

App Files Files Community

Fred808 commited on Oct 24, 2025

Commit

9cf6256

verified ·

1 Parent(s): acc5466

Update tensor_server.py

Browse files

Files changed (1) hide show

tensor_server.py +42 -12

tensor_server.py CHANGED Viewed

@@ -144,21 +144,51 @@ def load_chunk(chunk: ModelChunk) -> torch.nn.Module:
             raise ValueError(f"Chunk file not found: {chunk_file}")
         # For raw binary chunks, we'll create a simple buffer module
-        class ChunkBuffer(torch.nn.Module):
-            def __init__(self, chunk_path: str, config: Dict):
                 super().__init__()
                 self.chunk_path = chunk_path
-                self.config = config
-                self.start_offset = config.get('start_offset', 0)
-                self.size = config.get('size_bytes', 0)
-                # expose vocab_offset on the module for aggregator use
-                self.vocab_offset = int(config.get('vocab_offset', 0))
             def forward(self, x: torch.Tensor) -> torch.Tensor:
-                # In a real implementation, this would process the input
-                # using the chunk data. For now, we'll just return the input
-                # as this is just for testing the distribution system
-                return x
         # Create and return the chunk buffer
         chunk_model = ChunkBuffer(chunk_file, chunk_config)

             raise ValueError(f"Chunk file not found: {chunk_file}")
         # For raw binary chunks, we'll create a simple buffer module
+        class ChunkBuffer(nn.Module):
+            """
+            A single Florence-2 caption chunk that receives pre-encoded image embeddings
+            and produces partial vocabulary logits.
+            """
+            def __init__(self, chunk_path: str, config: dict):
                 super().__init__()
+                # Get dimensions from config
+                input_dim = config.get("input_dim", 1024)            # Florence-2 embedding dim
+                output_dim = config.get("output_dim", 1000)          # size of vocab shard
+                dropout = config.get("dropout", 0.1)
+                # Optional: chunk_path can point to pretrained weights
                 self.chunk_path = chunk_path
+                # Main projection layer: embedding → partial vocab logits
+                self.linear = nn.Linear(input_dim, output_dim)
+                # Optional normalization + dropout (stabilizes training or inference variance)
+                self.norm = nn.LayerNorm(input_dim)
+                self.dropout = nn.Dropout(dropout)
+                # Initialize weights (small variance, stable logits)
+                nn.init.xavier_uniform_(self.linear.weight)
+                nn.init.zeros_(self.linear.bias)
             def forward(self, x: torch.Tensor) -> torch.Tensor:
+                """
+                Args:
+                    x: Florence-2 image embedding tensor, shape [batch, 1024]
+                Returns:
+                    logits for this vocab shard, shape [batch, output_dim]
+                """
+                # Normalize + dropout
+                x = self.norm(x)
+                x = self.dropout(x)
+                # Linear projection to vocab slice
+                logits = self.linear(x)
+                # (Optional) softmax for probabilities, but usually the main model handles this
+                # probs = F.softmax(logits, dim=-1)
+                return logits
         # Create and return the chunk buffer
         chunk_model = ChunkBuffer(chunk_file, chunk_config)