Spaces:

Fred808
/

tserv

Paused

App Files Files Community

Fred808 commited on Oct 24, 2025

Commit

554d262

verified ·

1 Parent(s): 94a6cd4

Update tensor_server.py

Browse files

Files changed (1) hide show

tensor_server.py +53 -22

tensor_server.py CHANGED Viewed

@@ -144,32 +144,63 @@ def load_chunk(chunk: ModelChunk) -> torch.nn.Module:
             raise ValueError(f"Chunk file not found: {chunk_file}")
         # For raw binary chunks, we'll create a simple buffer module
-        class ChunkBuffer(torch.nn.Module):
-            def __init__(self, chunk_path: str, config: Dict):
                 super().__init__()
                 self.chunk_path = chunk_path
-                self.config = config
-                self.start_offset = config.get('start_offset', 0)
-                self.size = config.get('size_bytes', 0)
-                # expose vocab_offset on the module for aggregator use
-                self.vocab_offset = int(config.get('vocab_offset', 0))
             def forward(self, x: torch.Tensor) -> torch.Tensor:
-                # In a real implementation, this would process the input
-                # using the chunk data. For now, we'll just return the input
-                # as this is just for testing the distribution system
-                return x
-        # Create and return the chunk buffer
-        chunk_model = ChunkBuffer(chunk_file, chunk_config)
-        # Ensure the chunk_model.config is the up-to-date config (including any assigned offsets)
-        chunk_model.config = chunk_config
-        print(f"[INFO] Loaded chunk {chunk.chunk_id} ({chunk_config.get('size_bytes', 0)} bytes) from {chunk.files[0]}")
-        return chunk_model
-    except Exception as e:
-        raise Exception(f"Failed to load chunk: {str(e)}")
 async def process_tensor(chunk_id: int, inputs: torch.Tensor) -> torch.Tensor:
     """Process input tensor through the specified chunk"""

             raise ValueError(f"Chunk file not found: {chunk_file}")
         # For raw binary chunks, we'll create a simple buffer module
+        class ChunkBuffer(nn.Module):
+            """
+            A single Florence-2 caption chunk that receives pre-encoded image embeddings
+            and produces partial vocabulary logits.
+            """
+            def __init__(self, chunk_path: str, config: dict):
                 super().__init__()
+                # Get dimensions from config
+                input_dim = config.get("input_dim", 1024)            # Florence-2 embedding dim
+                output_dim = config.get("output_dim", 1000)          # size of vocab shard
+                dropout = config.get("dropout", 0.1)
+                # Optional: chunk_path can point to pretrained weights
                 self.chunk_path = chunk_path
+                # Main projection layer: embedding → partial vocab logits
+                self.linear = nn.Linear(input_dim, output_dim)
+                # Optional normalization + dropout (stabilizes training or inference variance)
+                self.norm = nn.LayerNorm(input_dim)
+                self.dropout = nn.Dropout(dropout)
+                # Initialize weights (small variance, stable logits)
+                nn.init.xavier_uniform_(self.linear.weight)
+                nn.init.zeros_(self.linear.bias)
             def forward(self, x: torch.Tensor) -> torch.Tensor:
+                """
+                Args:
+                    x: Florence-2 image embedding tensor, shape [batch, 1024]
+                Returns:
+                    logits for this vocab shard, shape [batch, output_dim]
+                """
+                # Normalize + dropout
+                x = self.norm(x)
+                x = self.dropout(x)
+                # Linear projection to vocab slice
+                logits = self.linear(x)
+                # (Optional) softmax for probabilities, but usually the main model handles this
+                # probs = F.softmax(logits, dim=-1)
+                return logits
+                # Create and return the chunk buffer
+                chunk_model = ChunkBuffer(chunk_file, chunk_config)
+                # Ensure the chunk_model.config is the up-to-date config (including any assigned offsets)
+                chunk_model.config = chunk_config
+                print(f"[INFO] Loaded chunk {chunk.chunk_id} ({chunk_config.get('size_bytes', 0)} bytes) from {chunk.files[0]}")
+                return chunk_model
+            except Exception as e:
+                raise Exception(f"Failed to load chunk: {str(e)}")
 async def process_tensor(chunk_id: int, inputs: torch.Tensor) -> torch.Tensor:
     """Process input tensor through the specified chunk"""