Spaces:

Fred808
/

tserv

Paused

App Files Files Community

Fred808 commited on Oct 13, 2025

Commit

45e602a

verified ·

1 Parent(s): 7bd9c49

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +44 -0
requirements.txt +6 -0
tensor_server.py +271 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
+FROM python:3.11-slim-bullseye
+WORKDIR /app
+# Enable contrib and non-free repos, and install system dependencies
+RUN sed -i 's/main/main contrib non-free/' /etc/apt/sources.list && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+    unrar \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+ENV DEBIAN_FRONTEND=noninteractive
+# Python + dependencies
+RUN apt-get update && apt-get install -y python3 python3-pip git && \
+    pip3 install --upgrade pip
+# Set working dir
+WORKDIR /app
+# Copy and install requirements
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy app code
+COPY . .
+# ybyjngamhtcuaupc gsmt
+# Make the entire /app directory fully writeable for all users
+RUN chmod -R 777 /app
+# Ensure the app runs as the same user as the Space UI
+RUN useradd -m -u 1000 user
+USER user
+# Launch FastAPI download server on container start
+CMD ["uvicorn", "tensor_server:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi==0.104.0
+uvicorn==0.23.2
+torch>=2.0.0
+numpy>=1.24.0
+psutil>=5.9.0
+pydantic>=2.0.0

tensor_server.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import os
+import json
+import torch
+import psutil
+import asyncio
+from datetime import datetime
+from typing import Dict, List, Optional
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import uvicorn
+import numpy as np
+# ===== Config =====
+class Settings:
+    # Server configuration
+    HOST = "0.0.0.0"  # Listen on all interfaces
+    PORT = 8001
+    SERVER_ID = os.getenv("SERVER_ID", "tensor1")  # Unique ID for this tensor server
+    # The IP or hostname where this tensor server is accessible
+    PUBLIC_URL = os.getenv("PUBLIC_URL", f"https://fred808-ilob.hf.space")
+    # URLs for other services (should be actual IP addresses or hostnames)
+    CONTROLLER_URL = os.getenv("CONTROLLER_URL", "http://192.168.1.100:8000")
+    AGGREGATOR_URL = os.getenv("AGGREGATOR_URL", "http://192.168.1.104:8002")
+    # Model settings
+    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    MAX_BATCH_SIZE = 32
+    METRICS_UPDATE_INTERVAL = 5  # seconds
+    MODEL_DIR = "model_chunks"
+    @classmethod
+    def from_env(cls):
+        """Load settings from environment variables"""
+        cls.HOST = os.getenv("TENSOR_HOST", cls.HOST)
+        cls.PORT = int(os.getenv("TENSOR_PORT", cls.PORT))
+        cls.SERVER_ID = os.getenv("SERVER_ID", cls.SERVER_ID)
+        cls.CONTROLLER_URL = os.getenv("CONTROLLER_URL", cls.CONTROLLER_URL)
+        cls.AGGREGATOR_URL = os.getenv("AGGREGATOR_URL", cls.AGGREGATOR_URL)
+        return cls
+# ===== Models =====
+class ModelChunk(BaseModel):
+    """Represents a received model chunk configuration"""
+    chunk_id: int
+    files: List[str]
+    config: Dict
+class InferenceRequest(BaseModel):
+    """Represents an inference request"""
+    inputs: List[List[float]]
+    batch_size: Optional[int] = None
+class MetricsData(BaseModel):
+    """Server metrics data"""
+    cpu_usage: float
+    memory_usage: float
+    gpu_usage: Optional[float]
+    active_requests: int
+    total_requests: int
+    average_response_time: float
+    last_error: Optional[str]
+    error_count: int
+# ===== FastAPI App =====
+app = FastAPI(
+    title="Tensor Server",
+    description="Handles model chunk computations",
+    version="1.0.0"
+)
+# ===== State =====
+class ServerState:
+    def __init__(self):
+        self.loaded_chunks: Dict[int, torch.nn.Module] = {}
+        self.active_requests: int = 0
+        self.total_requests: int = 0
+        self.request_times: List[float] = []
+        self.error_count: int = 0
+        self.last_error: Optional[str] = None
+        self.is_computing: bool = False
+state = ServerState()
+# ===== Metrics Collection =====
+async def collect_metrics() -> MetricsData:
+    """Collect current server metrics"""
+    # CPU and memory metrics
+    cpu_usage = psutil.cpu_percent()
+    memory = psutil.virtual_memory()
+    memory_usage = memory.percent
+    # GPU metrics if available
+    gpu_usage = None
+    if torch.cuda.is_available():
+        try:
+            gpu_usage = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated() * 100
+        except:
+            pass
+    # Calculate average response time
+    avg_response_time = sum(state.request_times) / len(state.request_times) if state.request_times else 0
+    return MetricsData(
+        cpu_usage=cpu_usage,
+        memory_usage=memory_usage,
+        gpu_usage=gpu_usage,
+        active_requests=state.active_requests,
+        total_requests=state.total_requests,
+        average_response_time=avg_response_time,
+        last_error=state.last_error,
+        error_count=state.error_count
+    )
+async def update_metrics_loop():
+    """Background task to update metrics periodically"""
+    while True:
+        try:
+            metrics = await collect_metrics()
+            # Store metrics for health checks
+            state.current_metrics = metrics
+        except Exception as e:
+            print(f"[ERROR] Failed to update metrics: {str(e)}")
+        await asyncio.sleep(Settings.METRICS_UPDATE_INTERVAL)
+# ===== Helper Functions =====
+def load_chunk(chunk: ModelChunk) -> torch.nn.Module:
+    """Load a model chunk into memory"""
+    try:
+        # Create chunk directory if it doesn't exist
+        os.makedirs(Settings.MODEL_DIR, exist_ok=True)
+        # Get chunk configuration
+        input_size = chunk.config["input_size"]
+        output_size = chunk.config["output_size"]
+        weight_keys = chunk.config["weight_keys"]
+        # Create a simple linear transformation for this chunk
+        chunk_model = torch.nn.Linear(input_size, output_size)
+        chunk_model = chunk_model.to(Settings.DEVICE)
+        # Load the weights
+        chunk_file = os.path.join(Settings.MODEL_DIR, chunk.files[0])
+        if os.path.exists(chunk_file):
+            weights = torch.load(chunk_file, map_location=Settings.DEVICE)
+            # Initialize weights from the loaded state dict
+            with torch.no_grad():
+                # Combine weights if multiple keys
+                if len(weight_keys) > 1:
+                    combined_weight = torch.cat([weights[k] for k in weight_keys], dim=0)
+                    chunk_model.weight.copy_(combined_weight)
+                else:
+                    chunk_model.weight.copy_(weights[weight_keys[0]])
+        return chunk_model
+    except Exception as e:
+        raise Exception(f"Failed to load chunk: {str(e)}")
+async def process_tensor(chunk_id: int, inputs: torch.Tensor) -> torch.Tensor:
+    """Process input tensor through the specified chunk"""
+    if chunk_id not in state.loaded_chunks:
+        raise HTTPException(status_code=400, detail=f"Chunk {chunk_id} not loaded")
+    chunk_model = state.loaded_chunks[chunk_id]
+    with torch.no_grad():
+        outputs = chunk_model(inputs)
+    return outputs
+# ===== API Endpoints =====
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    metrics = await collect_metrics()
+    return {
+        "status": "healthy",
+        "device": Settings.DEVICE,
+        "loaded_chunks": list(state.loaded_chunks.keys()),
+        "metrics": metrics.dict()
+    }
+@app.get("/metrics")
+async def get_metrics():
+    """Get current server metrics"""
+    return await collect_metrics()
+@app.post("/load_chunk")
+async def load_model_chunk(chunk: ModelChunk):
+    """Load a model chunk into memory"""
+    try:
+        # Load the chunk
+        chunk_model = load_chunk(chunk)
+        state.loaded_chunks[chunk.chunk_id] = chunk_model
+        return {
+            "status": "loaded",
+            "chunk_id": chunk.chunk_id,
+            "device": str(next(chunk_model.parameters()).device)
+        }
+    except Exception as e:
+        state.error_count += 1
+        state.last_error = str(e)
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/compute/{chunk_id}")
+async def compute(chunk_id: int, request: InferenceRequest):
+    """Perform computation on inputs using specified chunk"""
+    try:
+        start_time = datetime.now()
+        state.active_requests += 1
+        state.total_requests += 1
+        # Convert inputs to tensor
+        inputs = torch.tensor(request.inputs, dtype=torch.float32, device=Settings.DEVICE)
+        # Split into batches if needed
+        batch_size = request.batch_size or Settings.MAX_BATCH_SIZE
+        if len(inputs) > batch_size:
+            batches = torch.split(inputs, batch_size)
+            outputs = []
+            for batch in batches:
+                batch_output = await process_tensor(chunk_id, batch)
+                outputs.append(batch_output)
+            output_tensor = torch.cat(outputs, dim=0)
+        else:
+            output_tensor = await process_tensor(chunk_id, inputs)
+        # Convert output to list
+        output_list = output_tensor.cpu().numpy().tolist()
+        # Update metrics
+        end_time = datetime.now()
+        processing_time = (end_time - start_time).total_seconds()
+        state.request_times.append(processing_time)
+        # Keep only last 100 request times
+        state.request_times = state.request_times[-100:]
+        return {
+            "outputs": output_list,
+            "processing_time": processing_time
+        }
+    except Exception as e:
+        state.error_count += 1
+        state.last_error = str(e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        state.active_requests -= 1
+@app.on_event("startup")
+async def startup_event():
+    """Start background tasks"""
+    asyncio.create_task(update_metrics_loop())
+# ===== Main Execution =====
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", 8001))  # Default to 8001 to avoid conflict with controller
+    print(f"[INFO] Starting tensor server on port {port}")
+    print(f"[INFO] Using device: {Settings.DEVICE}")
+    print(f"[INFO] API Documentation available at http://localhost:{port}/docs")
+    uvicorn.run(
+        "tensor_server:app",
+        host="0.0.0.0",
+        port=port,
+        reload=False
+    )