Spaces:
Runtime error
Runtime error
| I'll help you create Docker files and set up hosting for the DeepCoder model. Let me create the necessary files for both Docker setup and potential Hugging Face deployment. | |
| # Dockerfile for DeepCoder AI Model | |
| FROM python:3.11-slim | |
| # Set working directory | |
| WORKDIR /app | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| curl \ | |
| wget \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Docker Model Runner (assuming it's a Python package or CLI tool) | |
| RUN pip install --no-cache-dir \ | |
| torch \ | |
| transformers \ | |
| accelerate \ | |
| bitsandbytes \ | |
| huggingface_hub | |
| # Create directories for model and cache | |
| RUN mkdir -p /app/models /app/cache | |
| # Set environment variables | |
| ENV MODEL_NAME="ai/deepcoder-preview" | |
| ENV MODEL_VARIANT="14B-Q4_K_M" | |
| ENV HUGGINGFACE_HUB_CACHE="/app/cache" | |
| ENV TRANSFORMERS_CACHE="/app/cache" | |
| # Copy application files | |
| COPY requirements.txt . | |
| COPY app.py . | |
| COPY download_model.py . | |
| # Install Python dependencies | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Download model during build (optional - can be done at runtime) | |
| # RUN python download_model.py | |
| # Expose port for API | |
| EXPOSE 8000 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ | |
| CMD curl -f http://localhost:8000/health || exit 1 | |
| # Run the application | |
| CMD ["python", "app.py"] | |
| version: '3.8' | |
| services: | |
| deepcoder-api: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| container_name: deepcoder-model | |
| ports: | |
| - "8000:8000" | |
| environment: | |
| - MODEL_NAME=ai/deepcoder-preview | |
| - MODEL_VARIANT=14B-Q4_K_M | |
| - HUGGINGFACE_HUB_CACHE=/app/cache | |
| - CUDA_VISIBLE_DEVICES=0 | |
| volumes: | |
| - ./models:/app/models | |
| - ./cache:/app/cache | |
| - ./logs:/app/logs | |
| restart: unless-stopped | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:8000/health"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| start_period: 40s | |
| nginx: | |
| image: nginx:alpine | |
| container_name: deepcoder-nginx | |
| ports: | |
| - "80:80" | |
| - "443:443" | |
| volumes: | |
| - ./nginx.conf:/etc/nginx/nginx.conf | |
| - ./ssl:/etc/nginx/ssl | |
| depends_on: | |
| - deepcoder-api | |
| restart: unless-stopped | |
| volumes: | |
| models: | |
| cache: | |
| logs: | |
| #!/usr/bin/env python3 | |
| """ | |
| DeepCoder Model API Server | |
| Serves the DeepCoder-14B model via FastAPI | |
| """ | |
| import os | |
| import asyncio | |
| import logging | |
| from typing import Optional, Dict, Any | |
| import uvicorn | |
| from fastapi import FastAPI, HTTPException, BackgroundTasks | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from huggingface_hub import hf_hub_download | |
| import json | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Configuration | |
| MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview") | |
| MODEL_VARIANT = os.getenv("MODEL_VARIANT", "14B-Q4_K_M") | |
| CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache") | |
| MAX_TOKENS = 131072 # 131K context length | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| app = FastAPI( | |
| title="DeepCoder API", | |
| description="AI Code Generation Model API", | |
| version="1.0.0" | |
| ) | |
| # CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Global model variables | |
| tokenizer = None | |
| model = None | |
| model_loaded = False | |
| class CodeRequest(BaseModel): | |
| prompt: str = Field(..., description="Code generation prompt") | |
| temperature: float = Field(0.6, ge=0.0, le=2.0, description="Sampling temperature") | |
| top_p: float = Field(0.95, ge=0.0, le=1.0, description="Top-p sampling") | |
| max_tokens: int = Field(2048, ge=1, le=8192, description="Maximum tokens to generate") | |
| stop_sequences: Optional[list] = Field(None, description="Stop sequences") | |
| class CodeResponse(BaseModel): | |
| generated_code: str | |
| model_info: Dict[str, Any] | |
| generation_params: Dict[str, Any] | |
| async def load_model(): | |
| """Load the DeepCoder model and tokenizer""" | |
| global tokenizer, model, model_loaded | |
| if model_loaded: | |
| return | |
| try: | |
| logger.info(f"Loading model: {MODEL_NAME}") | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, | |
| cache_dir=CACHE_DIR, | |
| trust_remote_code=True | |
| ) | |
| # Load model with appropriate settings for the quantized version | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| cache_dir=CACHE_DIR, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
| device_map="auto" if DEVICE == "cuda" else None, | |
| load_in_4bit=True if "Q4" in MODEL_VARIANT else False, | |
| ) | |
| if DEVICE == "cpu" and hasattr(model, 'to'): | |
| model = model.to(DEVICE) | |
| model_loaded = True | |
| logger.info(f"Model loaded successfully on {DEVICE}") | |
| except Exception as e: | |
| logger.error(f"Error loading model: {str(e)}") | |
| raise | |
| async def startup_event(): | |
| """Load model on startup""" | |
| await load_model() | |
| async def root(): | |
| return { | |
| "message": "DeepCoder API", | |
| "model": MODEL_NAME, | |
| "variant": MODEL_VARIANT, | |
| "status": "ready" if model_loaded else "loading" | |
| } | |
| async def health_check(): | |
| return { | |
| "status": "healthy" if model_loaded else "loading", | |
| "model_loaded": model_loaded, | |
| "device": DEVICE, | |
| "gpu_available": torch.cuda.is_available() | |
| } | |
| async def model_info(): | |
| """Get model information""" | |
| if not model_loaded: | |
| raise HTTPException(status_code=503, detail="Model not loaded yet") | |
| return { | |
| "model_name": MODEL_NAME, | |
| "variant": MODEL_VARIANT, | |
| "max_context_length": MAX_TOKENS, | |
| "device": DEVICE, | |
| "model_size": "14B parameters", | |
| "quantization": "Q4_K_M" if "Q4" in MODEL_VARIANT else "None", | |
| "benchmarks": { | |
| "LiveCodeBench_v5_Pass@1": "60.6%", | |
| "Codeforces_Elo": 1936, | |
| "Codeforces_Percentile": "95.3", | |
| "HumanEval+_Accuracy": "92.6%" | |
| } | |
| } | |
| async def generate_code(request: CodeRequest): | |
| """Generate code using the DeepCoder model""" | |
| if not model_loaded: | |
| raise HTTPException(status_code=503, detail="Model not loaded yet") | |
| try: | |
| # Tokenize input | |
| inputs = tokenizer( | |
| request.prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=MAX_TOKENS - request.max_tokens | |
| ) | |
| if DEVICE == "cuda": | |
| inputs = {k: v.to(DEVICE) for k, v in inputs.items()} | |
| # Generation parameters | |
| generation_kwargs = { | |
| "max_new_tokens": request.max_tokens, | |
| "temperature": request.temperature, | |
| "top_p": request.top_p, | |
| "do_sample": True, | |
| "pad_token_id": tokenizer.eos_token_id, | |
| } | |
| if request.stop_sequences: | |
| generation_kwargs["stop_sequences"] = request.stop_sequences | |
| # Generate | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, **generation_kwargs) | |
| # Decode output | |
| generated_tokens = outputs[0][inputs["input_ids"].shape[1]:] | |
| generated_code = tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
| return CodeResponse( | |
| generated_code=generated_code, | |
| model_info={ | |
| "model_name": MODEL_NAME, | |
| "variant": MODEL_VARIANT, | |
| "device": DEVICE | |
| }, | |
| generation_params={ | |
| "temperature": request.temperature, | |
| "top_p": request.top_p, | |
| "max_tokens": request.max_tokens | |
| } | |
| ) | |
| except Exception as e: | |
| logger.error(f"Generation error: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}") | |
| async def chat_completion(request: CodeRequest): | |
| """Chat-style completion for code assistance""" | |
| # Add system context for better code generation | |
| system_prompt = """You are DeepCoder, an expert AI programming assistant. Generate high-quality, well-commented code that follows best practices.""" | |
| full_prompt = f"{system_prompt}\n\nUser: {request.prompt}\n\nAssistant:" | |
| # Create modified request with system prompt | |
| modified_request = CodeRequest( | |
| prompt=full_prompt, | |
| temperature=request.temperature, | |
| top_p=request.top_p, | |
| max_tokens=request.max_tokens, | |
| stop_sequences=request.stop_sequences | |
| ) | |
| return await generate_code(modified_request) | |
| if __name__ == "__main__": | |
| uvicorn.run( | |
| "app:app", | |
| host="0.0.0.0", | |
| port=8000, | |
| reload=False, | |
| log_level="info" | |
| ) | |
| fastapi==0.104.1 | |
| uvicorn[standard]==0.24.0 | |
| torch>=2.0.0 | |
| transformers>=4.35.0 | |
| accelerate>=0.24.0 | |
| bitsandbytes>=0.41.0 | |
| huggingface_hub>=0.19.0 | |
| pydantic>=2.5.0 | |
| python-multipart==0.0.6 | |
| jinja2>=3.1.0 | |
| aiofiles>=23.0.0 | |
| nvidia-ml-py3>=7.352.0 | |
| psutil>=5.9.0 | |
| requests>=2.31.0 | |
| #!/usr/bin/env python3 | |
| """ | |
| Download script for DeepCoder model | |
| Downloads and caches the model for faster container startup | |
| """ | |
| import os | |
| import logging | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from huggingface_hub import snapshot_download | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview") | |
| CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache") | |
| def download_model(): | |
| """Download the model and tokenizer""" | |
| try: | |
| logger.info(f"Downloading model: {MODEL_NAME}") | |
| # Download model files | |
| snapshot_download( | |
| repo_id=MODEL_NAME, | |
| cache_dir=CACHE_DIR, | |
| resume_download=True | |
| ) | |
| # Verify by loading tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, | |
| cache_dir=CACHE_DIR, | |
| trust_remote_code=True | |
| ) | |
| logger.info("Model downloaded successfully") | |
| logger.info(f"Vocab size: {tokenizer.vocab_size}") | |
| logger.info(f"Cache directory: {CACHE_DIR}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error downloading model: {str(e)}") | |
| return False | |
| if __name__ == "__main__": | |
| success = download_model() | |
| if not success: | |
| exit(1) | |
| logger.info("Download complete!") | |
| events { | |
| worker_connections 1024; | |
| } | |
| http { | |
| upstream deepcoder_backend { | |
| server deepcoder-api:8000; | |
| } | |
| # Rate limiting | |
| limit_req_zone $binary_remote_addr zone=api:10m rate=10r/m; | |
| server { | |
| listen 80; | |
| server_name localhost; | |
| # Security headers | |
| add_header X-Frame-Options DENY; | |
| add_header X-Content-Type-Options nosniff; | |
| add_header X-XSS-Protection "1; mode=block"; | |
| # Increase client max body size for large code submissions | |
| client_max_body_size 10M; | |
| # Timeouts for long-running generation requests | |
| proxy_connect_timeout 60s; | |
| proxy_send_timeout 300s; | |
| proxy_read_timeout 300s; | |
| location / { | |
| proxy_pass http://deepcoder_backend; | |
| proxy_set_header Host $host; | |
| proxy_set_header X-Real-IP $remote_addr; | |
| proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | |
| proxy_set_header X-Forwarded-Proto $scheme; | |
| } | |
| location /generate { | |
| limit_req zone=api burst=5 nodelay; | |
| proxy_pass http://deepcoder_backend; | |
| proxy_set_header Host $host; | |
| proxy_set_header X-Real-IP $remote_addr; | |
| proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | |
| proxy_set_header X-Forwarded-Proto $scheme; | |
| } | |
| location /health { | |
| proxy_pass http://deepcoder_backend; | |
| access_log off; | |
| } | |
| } | |
| } | |
| #!/bin/bash | |
| # setup.sh - Setup script for DeepCoder deployment | |
| set -e | |
| echo "π DeepCoder Model Setup" | |
| echo "========================" | |
| # Create necessary directories | |
| echo "π Creating directories..." | |
| mkdir -p models cache logs ssl | |
| # Set permissions | |
| chmod 755 models cache logs | |
| chmod 700 ssl | |
| # Pull the DeepCoder model using Docker Model Runner | |
| echo "π¦ Pulling DeepCoder model..." | |
| if command -v docker &> /dev/null; then | |
| # Assuming docker model runner is available | |
| docker model pull ai/deepcoder-preview | |
| else | |
| echo "β οΈ Docker not found. Please install Docker first." | |
| exit 1 | |
| fi | |
| # Check for GPU support | |
| echo "π Checking GPU support..." | |
| if command -v nvidia-smi &> /dev/null; then | |
| echo "β NVIDIA GPU detected:" | |
| nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader | |
| # Check for Docker GPU support | |
| if docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi &> /dev/null; then | |
| echo "β Docker GPU support verified" | |
| export GPU_SUPPORT=true | |
| else | |
| echo "β οΈ Docker GPU support not available" | |
| export GPU_SUPPORT=false | |
| fi | |
| else | |
| echo "β οΈ No GPU detected. Running on CPU." | |
| export GPU_SUPPORT=false | |
| fi | |
| # Build and start containers | |
| echo "ποΈ Building Docker containers..." | |
| docker-compose build | |
| echo "π Starting services..." | |
| if [ "$GPU_SUPPORT" = true ]; then | |
| docker-compose up -d | |
| else | |
| # Remove GPU requirements for CPU-only deployment | |
| sed 's/devices:/# devices:/g' docker-compose.yml | \ | |
| sed 's/- driver: nvidia/# - driver: nvidia/g' | \ | |
| sed 's/count: 1/# count: 1/g' | \ | |
| sed 's/capabilities: \[gpu\]/# capabilities: [gpu]/g' > docker-compose-cpu.yml | |
| docker-compose -f docker-compose-cpu.yml up -d | |
| fi | |
| # Wait for services to be ready | |
| echo "β³ Waiting for services to start..." | |
| sleep 30 | |
| # Health check | |
| echo "π₯ Performing health check..." | |
| for i in {1..10}; do | |
| if curl -f http://localhost:8000/health > /dev/null 2>&1; then | |
| echo "β DeepCoder API is healthy!" | |
| break | |
| else | |
| echo "β³ Waiting for API to be ready... (attempt $i/10)" | |
| sleep 10 | |
| fi | |
| done | |
| # Show status | |
| echo "π Service Status:" | |
| docker-compose ps | |
| echo "" | |
| echo "π DeepCoder setup complete!" | |
| echo "API endpoint: http://localhost:8000" | |
| echo "Health check: http://localhost:8000/health" | |
| echo "Model info: http://localhost:8000/model/info" | |
| echo "" | |
| echo "To test the API:" | |
| echo "curl -X POST http://localhost:8000/generate \\" | |
| echo " -H 'Content-Type: application/json' \\" | |
| echo " -d '{\"prompt\": \"def fibonacci(n):\", \"max_tokens\": 200}'" | |
| ########################################### | |
| # deploy-hf.sh - Hugging Face Spaces deployment | |
| ########################################### | |
| cat > deploy-hf.sh << 'EOL' | |
| #!/bin/bash | |
| # Deploy to Hugging Face Spaces | |
| set -e | |
| echo "π€ Deploying to Hugging Face Spaces" | |
| echo "====================================" | |
| # Check if git is configured | |
| if ! git config user.email > /dev/null; then | |
| echo "β οΈ Please configure git:" | |
| echo "git config --global user.email 'your-email@example.com'" | |
| echo "git config --global user.name 'Your Name'" | |
| exit 1 | |
| fi | |
| # Check if HF_TOKEN is set | |
| if [ -z "$HF_TOKEN" ]; then | |
| echo "β οΈ Please set your Hugging Face token:" | |
| echo "export HF_TOKEN=your_hf_token_here" | |
| exit 1 | |
| fi | |
| SPACE_NAME=${1:-"deepcoder-api"} | |
| HF_USERNAME=${2:-$(whoami)} | |
| echo "Creating Space: $HF_USERNAME/$SPACE_NAME" | |
| # Create Hugging Face Space files | |
| cat > README.md << EOF | |
| --- | |
| title: DeepCoder API | |
| emoji: π | |
| colorFrom: blue | |
| colorTo: green | |
| sdk: docker | |
| pinned: false | |
| license: mit | |
| --- | |
| # DeepCoder API | |
| High-performance code generation API powered by DeepCoder-14B model. | |
| ## Features | |
| - π― 60.6% pass rate on LiveCodeBench v5 | |
| - π 1936 Elo rating on Codeforces (95.3 percentile) | |
| - π 92.6% accuracy on HumanEval+ | |
| - β‘ 131K token context length | |
| - π§ Optimized Q4_K_M quantization | |
| ## API Endpoints | |
| - \`POST /generate\` - Generate code from prompts | |
| - \`POST /chat\` - Chat-style code assistance | |
| - \`GET /model/info\` - Model information | |
| - \`GET /health\` - Health check | |
| ## Usage | |
| \`\`\`bash | |
| curl -X POST /generate \\ | |
| -H 'Content-Type: application/json' \\ | |
| -d '{"prompt": "def fibonacci(n):", "max_tokens": 200}' | |
| \`\`\` | |
| EOF | |
| # Create Dockerfile for HF Spaces | |
| cat > Dockerfile.hf << EOF | |
| FROM python:3.11-slim | |
| WORKDIR /app | |
| RUN apt-get update && apt-get install -y curl git && rm -rf /var/lib/apt/lists/* | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| COPY . . | |
| EXPOSE 7860 | |
| CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] | |
| EOF | |
| # Update app.py for HF Spaces (port 7860) | |
| sed 's/port=8000/port=7860/g' app.py > app_hf.py | |
| mv app_hf.py app.py | |
| # Initialize git repo if not exists | |
| if [ ! -d .git ]; then | |
| git init | |
| git lfs install | |
| fi | |
| # Track large model files with git LFS | |
| echo "*.bin filter=lfs diff=lfs merge=lfs -text" >> .gitattributes | |
| echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes | |
| # Add remote if not exists | |
| if ! git remote get-url origin > /dev/null 2>&1; then | |
| git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME | |
| fi | |
| # Commit and push | |
| git add . | |
| git commit -m "Initial DeepCoder API deployment" || true | |
| git push -u origin main | |
| echo "β Deployed to: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME" | |
| EOL | |
| chmod +x deploy-hf.sh | |
| echo "π Additional deployment script created: deploy-hf.sh" | |