likhonsheikh commited on
Commit
aba8087
Β·
verified Β·
1 Parent(s): 1dfc148

Upload 10 files

Browse files
Files changed (10) hide show
  1. Dockerfile +46 -0
  2. README.md +44 -10
  3. app.py +226 -0
  4. deploy-hf.sh +108 -0
  5. docker-compose.yml +51 -0
  6. download_model.py +51 -0
  7. nginx.conf +52 -0
  8. requirements.txt +14 -0
  9. setup-files.py +662 -0
  10. setup.sh +81 -0
Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ curl \
9
+ wget \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Install Python ML dependencies
14
+ RUN pip install --no-cache-dir \
15
+ torch \
16
+ transformers \
17
+ accelerate \
18
+ bitsandbytes \
19
+ huggingface_hub
20
+
21
+ # Create directories for model and cache
22
+ RUN mkdir -p /app/models /app/cache
23
+
24
+ # Set environment variables
25
+ ENV MODEL_NAME="ai/deepcoder-preview"
26
+ ENV MODEL_VARIANT="14B-Q4_K_M"
27
+ ENV HUGGINGFACE_HUB_CACHE="/app/cache"
28
+ ENV TRANSFORMERS_CACHE="/app/cache"
29
+
30
+ # Copy application files
31
+ COPY requirements.txt .
32
+ COPY app.py .
33
+ COPY download_model.py .
34
+
35
+ # Install Python dependencies
36
+ RUN pip install --no-cache-dir -r requirements.txt
37
+
38
+ # Expose port for API
39
+ EXPOSE 8000
40
+
41
+ # Health check
42
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
43
+ CMD curl -f http://localhost:8000/health || exit 1
44
+
45
+ # Run the application
46
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,44 @@
1
- ---
2
- title: Model
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepCoder Docker Deployment
2
+
3
+ Complete Docker setup for deploying the DeepCoder-14B AI code generation model.
4
+
5
+ ## Quick Start
6
+
7
+ 1. **Setup and Deploy:**
8
+ \`\`\`bash
9
+ chmod +x setup.sh
10
+ ./setup.sh
11
+ \`\`\`
12
+
13
+ 2. **Test the API:**
14
+ \`\`\`bash
15
+ curl -X POST http://localhost:8000/generate \
16
+ -H 'Content-Type: application/json' \
17
+ -d '{"prompt": "def fibonacci(n):", "max_tokens": 200}'
18
+ \`\`\`
19
+
20
+ ## Deployment Options
21
+
22
+ ### Local Docker
23
+ - Run `./setup.sh` for automatic setup
24
+ - Supports both GPU and CPU deployment
25
+ - Includes Nginx reverse proxy with rate limiting
26
+
27
+ ### Hugging Face Spaces
28
+ - Run `./deploy-hf.sh [space-name] [username]`
29
+ - Requires `HF_TOKEN` environment variable
30
+ - Automatically configures for HF Spaces (port 7860)
31
+
32
+ ## API Endpoints
33
+
34
+ - `POST /generate` - Generate code from prompts
35
+ - `POST /chat` - Chat-style code assistance
36
+ - `GET /model/info` - Model benchmarks and info
37
+ - `GET /health` - Health check
38
+
39
+ ## Requirements
40
+
41
+ - Docker & Docker Compose
42
+ - 16GB+ RAM (32GB recommended)
43
+ - NVIDIA GPU with 8GB+ VRAM (optional, falls back to CPU)
44
+ - 50GB+ disk space for model cache
app.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ DeepCoder Model API Server
4
+ Serves the DeepCoder-14B model via FastAPI
5
+ """
6
+
7
+ import os
8
+ import asyncio
9
+ import logging
10
+ from typing import Optional, Dict, Any
11
+ import uvicorn
12
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from pydantic import BaseModel, Field
15
+ import torch
16
+ from transformers import AutoTokenizer, AutoModelForCausalLM
17
+ from huggingface_hub import hf_hub_download
18
+ import json
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Configuration
25
+ MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview")
26
+ MODEL_VARIANT = os.getenv("MODEL_VARIANT", "14B-Q4_K_M")
27
+ CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache")
28
+ MAX_TOKENS = 131072 # 131K context length
29
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
30
+
31
+ app = FastAPI(
32
+ title="DeepCoder API",
33
+ description="AI Code Generation Model API",
34
+ version="1.0.0"
35
+ )
36
+
37
+ # CORS middleware
38
+ app.add_middleware(
39
+ CORSMiddleware,
40
+ allow_origins=["*"],
41
+ allow_credentials=True,
42
+ allow_methods=["*"],
43
+ allow_headers=["*"],
44
+ )
45
+
46
+ # Global model variables
47
+ tokenizer = None
48
+ model = None
49
+ model_loaded = False
50
+
51
+ class CodeRequest(BaseModel):
52
+ prompt: str = Field(..., description="Code generation prompt")
53
+ temperature: float = Field(0.6, ge=0.0, le=2.0, description="Sampling temperature")
54
+ top_p: float = Field(0.95, ge=0.0, le=1.0, description="Top-p sampling")
55
+ max_tokens: int = Field(2048, ge=1, le=8192, description="Maximum tokens to generate")
56
+ stop_sequences: Optional[list] = Field(None, description="Stop sequences")
57
+
58
+ class CodeResponse(BaseModel):
59
+ generated_code: str
60
+ model_info: Dict[str, Any]
61
+ generation_params: Dict[str, Any]
62
+
63
+ async def load_model():
64
+ """Load the DeepCoder model and tokenizer"""
65
+ global tokenizer, model, model_loaded
66
+
67
+ if model_loaded:
68
+ return
69
+
70
+ try:
71
+ logger.info(f"Loading model: {MODEL_NAME}")
72
+
73
+ # Load tokenizer
74
+ tokenizer = AutoTokenizer.from_pretrained(
75
+ MODEL_NAME,
76
+ cache_dir=CACHE_DIR,
77
+ trust_remote_code=True
78
+ )
79
+
80
+ # Load model with appropriate settings for the quantized version
81
+ model = AutoModelForCausalLM.from_pretrained(
82
+ MODEL_NAME,
83
+ cache_dir=CACHE_DIR,
84
+ trust_remote_code=True,
85
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
86
+ device_map="auto" if DEVICE == "cuda" else None,
87
+ load_in_4bit=True if "Q4" in MODEL_VARIANT else False,
88
+ )
89
+
90
+ if DEVICE == "cpu" and hasattr(model, 'to'):
91
+ model = model.to(DEVICE)
92
+
93
+ model_loaded = True
94
+ logger.info(f"Model loaded successfully on {DEVICE}")
95
+
96
+ except Exception as e:
97
+ logger.error(f"Error loading model: {str(e)}")
98
+ raise
99
+
100
+ @app.on_event("startup")
101
+ async def startup_event():
102
+ """Load model on startup"""
103
+ await load_model()
104
+
105
+ @app.get("/")
106
+ async def root():
107
+ return {
108
+ "message": "DeepCoder API",
109
+ "model": MODEL_NAME,
110
+ "variant": MODEL_VARIANT,
111
+ "status": "ready" if model_loaded else "loading"
112
+ }
113
+
114
+ @app.get("/health")
115
+ async def health_check():
116
+ return {
117
+ "status": "healthy" if model_loaded else "loading",
118
+ "model_loaded": model_loaded,
119
+ "device": DEVICE,
120
+ "gpu_available": torch.cuda.is_available()
121
+ }
122
+
123
+ @app.get("/model/info")
124
+ async def model_info():
125
+ """Get model information"""
126
+ if not model_loaded:
127
+ raise HTTPException(status_code=503, detail="Model not loaded yet")
128
+
129
+ return {
130
+ "model_name": MODEL_NAME,
131
+ "variant": MODEL_VARIANT,
132
+ "max_context_length": MAX_TOKENS,
133
+ "device": DEVICE,
134
+ "model_size": "14B parameters",
135
+ "quantization": "Q4_K_M" if "Q4" in MODEL_VARIANT else "None",
136
+ "benchmarks": {
137
+ "LiveCodeBench_v5_Pass@1": "60.6%",
138
+ "Codeforces_Elo": 1936,
139
+ "Codeforces_Percentile": "95.3",
140
+ "HumanEval+_Accuracy": "92.6%"
141
+ }
142
+ }
143
+
144
+ @app.post("/generate", response_model=CodeResponse)
145
+ async def generate_code(request: CodeRequest):
146
+ """Generate code using the DeepCoder model"""
147
+ if not model_loaded:
148
+ raise HTTPException(status_code=503, detail="Model not loaded yet")
149
+
150
+ try:
151
+ # Tokenize input
152
+ inputs = tokenizer(
153
+ request.prompt,
154
+ return_tensors="pt",
155
+ truncation=True,
156
+ max_length=MAX_TOKENS - request.max_tokens
157
+ )
158
+
159
+ if DEVICE == "cuda":
160
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
161
+
162
+ # Generation parameters
163
+ generation_kwargs = {
164
+ "max_new_tokens": request.max_tokens,
165
+ "temperature": request.temperature,
166
+ "top_p": request.top_p,
167
+ "do_sample": True,
168
+ "pad_token_id": tokenizer.eos_token_id,
169
+ }
170
+
171
+ if request.stop_sequences:
172
+ generation_kwargs["stop_sequences"] = request.stop_sequences
173
+
174
+ # Generate
175
+ with torch.no_grad():
176
+ outputs = model.generate(**inputs, **generation_kwargs)
177
+
178
+ # Decode output
179
+ generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
180
+ generated_code = tokenizer.decode(generated_tokens, skip_special_tokens=True)
181
+
182
+ return CodeResponse(
183
+ generated_code=generated_code,
184
+ model_info={
185
+ "model_name": MODEL_NAME,
186
+ "variant": MODEL_VARIANT,
187
+ "device": DEVICE
188
+ },
189
+ generation_params={
190
+ "temperature": request.temperature,
191
+ "top_p": request.top_p,
192
+ "max_tokens": request.max_tokens
193
+ }
194
+ )
195
+
196
+ except Exception as e:
197
+ logger.error(f"Generation error: {str(e)}")
198
+ raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")
199
+
200
+ @app.post("/chat")
201
+ async def chat_completion(request: CodeRequest):
202
+ """Chat-style completion for code assistance"""
203
+ # Add system context for better code generation
204
+ system_prompt = """You are DeepCoder, an expert AI programming assistant. Generate high-quality, well-commented code that follows best practices."""
205
+
206
+ full_prompt = f"{system_prompt}\n\nUser: {request.prompt}\n\nAssistant:"
207
+
208
+ # Create modified request with system prompt
209
+ modified_request = CodeRequest(
210
+ prompt=full_prompt,
211
+ temperature=request.temperature,
212
+ top_p=request.top_p,
213
+ max_tokens=request.max_tokens,
214
+ stop_sequences=request.stop_sequences
215
+ )
216
+
217
+ return await generate_code(modified_request)
218
+
219
+ if __name__ == "__main__":
220
+ uvicorn.run(
221
+ "app:app",
222
+ host="0.0.0.0",
223
+ port=8000,
224
+ reload=False,
225
+ log_level="info"
226
+ )
deploy-hf.sh ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Deploy to Hugging Face Spaces
3
+
4
+ set -e
5
+
6
+ echo "πŸ€— Deploying to Hugging Face Spaces"
7
+ echo "===================================="
8
+
9
+ # Check if git is configured
10
+ if ! git config user.email > /dev/null; then
11
+ echo "⚠️ Please configure git:"
12
+ echo "git config --global user.email 'your-email@example.com'"
13
+ echo "git config --global user.name 'Your Name'"
14
+ exit 1
15
+ fi
16
+
17
+ # Check if HF_TOKEN is set
18
+ if [ -z "$HF_TOKEN" ]; then
19
+ echo "⚠️ Please set your Hugging Face token:"
20
+ echo "export HF_TOKEN=your_hf_token_here"
21
+ exit 1
22
+ fi
23
+
24
+ SPACE_NAME=${1:-"deepcoder-api"}
25
+ HF_USERNAME=${2:-$(whoami)}
26
+
27
+ echo "Creating Space: $HF_USERNAME/$SPACE_NAME"
28
+
29
+ # Create Hugging Face Space files
30
+ cat > README.md << EOF
31
+ ---
32
+ title: DeepCoder API
33
+ emoji: πŸš€
34
+ colorFrom: blue
35
+ colorTo: green
36
+ sdk: docker
37
+ pinned: false
38
+ license: mit
39
+ ---
40
+
41
+ # DeepCoder API
42
+
43
+ High-performance code generation API powered by DeepCoder-14B model.
44
+
45
+ ## Features
46
+ - 🎯 60.6% pass rate on LiveCodeBench v5
47
+ - πŸ† 1936 Elo rating on Codeforces (95.3 percentile)
48
+ - πŸ“ 92.6% accuracy on HumanEval+
49
+ - ⚑ 131K token context length
50
+ - πŸ”§ Optimized Q4_K_M quantization
51
+
52
+ ## API Endpoints
53
+ - \`POST /generate\` - Generate code from prompts
54
+ - \`POST /chat\` - Chat-style code assistance
55
+ - \`GET /model/info\` - Model information
56
+ - \`GET /health\` - Health check
57
+
58
+ ## Usage
59
+ \`\`\`bash
60
+ curl -X POST /generate \\
61
+ -H 'Content-Type: application/json' \\
62
+ -d '{"prompt": "def fibonacci(n):", "max_tokens": 200}'
63
+ \`\`\`
64
+ EOF
65
+
66
+ # Create Dockerfile for HF Spaces
67
+ cat > Dockerfile.hf << EOF
68
+ FROM python:3.11-slim
69
+
70
+ WORKDIR /app
71
+
72
+ RUN apt-get update && apt-get install -y curl git && rm -rf /var/lib/apt/lists/*
73
+
74
+ COPY requirements.txt .
75
+ RUN pip install --no-cache-dir -r requirements.txt
76
+
77
+ COPY . .
78
+
79
+ EXPOSE 7860
80
+
81
+ CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
82
+ EOF
83
+
84
+ # Update app.py for HF Spaces (port 7860)
85
+ sed 's/port=8000/port=7860/g' app.py > app_hf.py
86
+ mv app_hf.py app.py
87
+
88
+ # Initialize git repo if not exists
89
+ if [ ! -d .git ]; then
90
+ git init
91
+ git lfs install
92
+ fi
93
+
94
+ # Track large model files with git LFS
95
+ echo "*.bin filter=lfs diff=lfs merge=lfs -text" >> .gitattributes
96
+ echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes
97
+
98
+ # Add remote if not exists
99
+ if ! git remote get-url origin > /dev/null 2>&1; then
100
+ git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
101
+ fi
102
+
103
+ # Commit and push
104
+ git add .
105
+ git commit -m "Initial DeepCoder API deployment" || true
106
+ git push -u origin main
107
+
108
+ echo "βœ… Deployed to: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
docker-compose.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ deepcoder-api:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ container_name: deepcoder-model
9
+ ports:
10
+ - "8000:8000"
11
+ environment:
12
+ - MODEL_NAME=ai/deepcoder-preview
13
+ - MODEL_VARIANT=14B-Q4_K_M
14
+ - HUGGINGFACE_HUB_CACHE=/app/cache
15
+ - CUDA_VISIBLE_DEVICES=0
16
+ volumes:
17
+ - ./models:/app/models
18
+ - ./cache:/app/cache
19
+ - ./logs:/app/logs
20
+ restart: unless-stopped
21
+ deploy:
22
+ resources:
23
+ reservations:
24
+ devices:
25
+ - driver: nvidia
26
+ count: 1
27
+ capabilities: [gpu]
28
+ healthcheck:
29
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
30
+ interval: 30s
31
+ timeout: 10s
32
+ retries: 3
33
+ start_period: 40s
34
+
35
+ nginx:
36
+ image: nginx:alpine
37
+ container_name: deepcoder-nginx
38
+ ports:
39
+ - "80:80"
40
+ - "443:443"
41
+ volumes:
42
+ - ./nginx.conf:/etc/nginx/nginx.conf
43
+ - ./ssl:/etc/nginx/ssl
44
+ depends_on:
45
+ - deepcoder-api
46
+ restart: unless-stopped
47
+
48
+ volumes:
49
+ models:
50
+ cache:
51
+ logs:
download_model.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Download script for DeepCoder model
4
+ Downloads and caches the model for faster container startup
5
+ """
6
+
7
+ import os
8
+ import logging
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
+ from huggingface_hub import snapshot_download
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview")
16
+ CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache")
17
+
18
+ def download_model():
19
+ """Download the model and tokenizer"""
20
+ try:
21
+ logger.info(f"Downloading model: {MODEL_NAME}")
22
+
23
+ # Download model files
24
+ snapshot_download(
25
+ repo_id=MODEL_NAME,
26
+ cache_dir=CACHE_DIR,
27
+ resume_download=True
28
+ )
29
+
30
+ # Verify by loading tokenizer
31
+ tokenizer = AutoTokenizer.from_pretrained(
32
+ MODEL_NAME,
33
+ cache_dir=CACHE_DIR,
34
+ trust_remote_code=True
35
+ )
36
+
37
+ logger.info("Model downloaded successfully")
38
+ logger.info(f"Vocab size: {tokenizer.vocab_size}")
39
+ logger.info(f"Cache directory: {CACHE_DIR}")
40
+
41
+ return True
42
+
43
+ except Exception as e:
44
+ logger.error(f"Error downloading model: {str(e)}")
45
+ return False
46
+
47
+ if __name__ == "__main__":
48
+ success = download_model()
49
+ if not success:
50
+ exit(1)
51
+ logger.info("Download complete!")
nginx.conf ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ events {
2
+ worker_connections 1024;
3
+ }
4
+
5
+ http {
6
+ upstream deepcoder_backend {
7
+ server deepcoder-api:8000;
8
+ }
9
+
10
+ # Rate limiting
11
+ limit_req_zone $binary_remote_addr zone=api:10m rate=10r/m;
12
+
13
+ server {
14
+ listen 80;
15
+ server_name localhost;
16
+
17
+ # Security headers
18
+ add_header X-Frame-Options DENY;
19
+ add_header X-Content-Type-Options nosniff;
20
+ add_header X-XSS-Protection "1; mode=block";
21
+
22
+ # Increase client max body size for large code submissions
23
+ client_max_body_size 10M;
24
+
25
+ # Timeouts for long-running generation requests
26
+ proxy_connect_timeout 60s;
27
+ proxy_send_timeout 300s;
28
+ proxy_read_timeout 300s;
29
+
30
+ location / {
31
+ proxy_pass http://deepcoder_backend;
32
+ proxy_set_header Host $host;
33
+ proxy_set_header X-Real-IP $remote_addr;
34
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
35
+ proxy_set_header X-Forwarded-Proto $scheme;
36
+ }
37
+
38
+ location /generate {
39
+ limit_req zone=api burst=5 nodelay;
40
+ proxy_pass http://deepcoder_backend;
41
+ proxy_set_header Host $host;
42
+ proxy_set_header X-Real-IP $remote_addr;
43
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
44
+ proxy_set_header X-Forwarded-Proto $scheme;
45
+ }
46
+
47
+ location /health {
48
+ proxy_pass http://deepcoder_backend;
49
+ access_log off;
50
+ }
51
+ }
52
+ }
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ torch>=2.0.0
4
+ transformers>=4.35.0
5
+ accelerate>=0.24.0
6
+ bitsandbytes>=0.41.0
7
+ huggingface_hub>=0.19.0
8
+ pydantic>=2.5.0
9
+ python-multipart==0.0.6
10
+ jinja2>=3.1.0
11
+ aiofiles>=23.0.0
12
+ nvidia-ml-py3>=7.352.0
13
+ psutil>=5.9.0
14
+ requests>=2.31.0
setup-files.py ADDED
@@ -0,0 +1,662 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ I'll help you create Docker files and set up hosting for the DeepCoder model. Let me create the necessary files for both Docker setup and potential Hugging Face deployment.
2
+
3
+ # Dockerfile for DeepCoder AI Model
4
+ FROM python:3.11-slim
5
+
6
+ # Set working directory
7
+ WORKDIR /app
8
+
9
+ # Install system dependencies
10
+ RUN apt-get update && apt-get install -y \
11
+ curl \
12
+ wget \
13
+ git \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install Docker Model Runner (assuming it's a Python package or CLI tool)
17
+ RUN pip install --no-cache-dir \
18
+ torch \
19
+ transformers \
20
+ accelerate \
21
+ bitsandbytes \
22
+ huggingface_hub
23
+
24
+ # Create directories for model and cache
25
+ RUN mkdir -p /app/models /app/cache
26
+
27
+ # Set environment variables
28
+ ENV MODEL_NAME="ai/deepcoder-preview"
29
+ ENV MODEL_VARIANT="14B-Q4_K_M"
30
+ ENV HUGGINGFACE_HUB_CACHE="/app/cache"
31
+ ENV TRANSFORMERS_CACHE="/app/cache"
32
+
33
+ # Copy application files
34
+ COPY requirements.txt .
35
+ COPY app.py .
36
+ COPY download_model.py .
37
+
38
+ # Install Python dependencies
39
+ RUN pip install --no-cache-dir -r requirements.txt
40
+
41
+ # Download model during build (optional - can be done at runtime)
42
+ # RUN python download_model.py
43
+
44
+ # Expose port for API
45
+ EXPOSE 8000
46
+
47
+ # Health check
48
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
49
+ CMD curl -f http://localhost:8000/health || exit 1
50
+
51
+ # Run the application
52
+ CMD ["python", "app.py"]
53
+
54
+ version: '3.8'
55
+
56
+ services:
57
+ deepcoder-api:
58
+ build:
59
+ context: .
60
+ dockerfile: Dockerfile
61
+ container_name: deepcoder-model
62
+ ports:
63
+ - "8000:8000"
64
+ environment:
65
+ - MODEL_NAME=ai/deepcoder-preview
66
+ - MODEL_VARIANT=14B-Q4_K_M
67
+ - HUGGINGFACE_HUB_CACHE=/app/cache
68
+ - CUDA_VISIBLE_DEVICES=0
69
+ volumes:
70
+ - ./models:/app/models
71
+ - ./cache:/app/cache
72
+ - ./logs:/app/logs
73
+ restart: unless-stopped
74
+ deploy:
75
+ resources:
76
+ reservations:
77
+ devices:
78
+ - driver: nvidia
79
+ count: 1
80
+ capabilities: [gpu]
81
+ healthcheck:
82
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
83
+ interval: 30s
84
+ timeout: 10s
85
+ retries: 3
86
+ start_period: 40s
87
+
88
+ nginx:
89
+ image: nginx:alpine
90
+ container_name: deepcoder-nginx
91
+ ports:
92
+ - "80:80"
93
+ - "443:443"
94
+ volumes:
95
+ - ./nginx.conf:/etc/nginx/nginx.conf
96
+ - ./ssl:/etc/nginx/ssl
97
+ depends_on:
98
+ - deepcoder-api
99
+ restart: unless-stopped
100
+
101
+ volumes:
102
+ models:
103
+ cache:
104
+ logs:
105
+
106
+ #!/usr/bin/env python3
107
+ """
108
+ DeepCoder Model API Server
109
+ Serves the DeepCoder-14B model via FastAPI
110
+ """
111
+
112
+ import os
113
+ import asyncio
114
+ import logging
115
+ from typing import Optional, Dict, Any
116
+ import uvicorn
117
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
118
+ from fastapi.middleware.cors import CORSMiddleware
119
+ from pydantic import BaseModel, Field
120
+ import torch
121
+ from transformers import AutoTokenizer, AutoModelForCausalLM
122
+ from huggingface_hub import hf_hub_download
123
+ import json
124
+
125
+ # Configure logging
126
+ logging.basicConfig(level=logging.INFO)
127
+ logger = logging.getLogger(__name__)
128
+
129
+ # Configuration
130
+ MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview")
131
+ MODEL_VARIANT = os.getenv("MODEL_VARIANT", "14B-Q4_K_M")
132
+ CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache")
133
+ MAX_TOKENS = 131072 # 131K context length
134
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
135
+
136
+ app = FastAPI(
137
+ title="DeepCoder API",
138
+ description="AI Code Generation Model API",
139
+ version="1.0.0"
140
+ )
141
+
142
+ # CORS middleware
143
+ app.add_middleware(
144
+ CORSMiddleware,
145
+ allow_origins=["*"],
146
+ allow_credentials=True,
147
+ allow_methods=["*"],
148
+ allow_headers=["*"],
149
+ )
150
+
151
+ # Global model variables
152
+ tokenizer = None
153
+ model = None
154
+ model_loaded = False
155
+
156
+ class CodeRequest(BaseModel):
157
+ prompt: str = Field(..., description="Code generation prompt")
158
+ temperature: float = Field(0.6, ge=0.0, le=2.0, description="Sampling temperature")
159
+ top_p: float = Field(0.95, ge=0.0, le=1.0, description="Top-p sampling")
160
+ max_tokens: int = Field(2048, ge=1, le=8192, description="Maximum tokens to generate")
161
+ stop_sequences: Optional[list] = Field(None, description="Stop sequences")
162
+
163
+ class CodeResponse(BaseModel):
164
+ generated_code: str
165
+ model_info: Dict[str, Any]
166
+ generation_params: Dict[str, Any]
167
+
168
+ async def load_model():
169
+ """Load the DeepCoder model and tokenizer"""
170
+ global tokenizer, model, model_loaded
171
+
172
+ if model_loaded:
173
+ return
174
+
175
+ try:
176
+ logger.info(f"Loading model: {MODEL_NAME}")
177
+
178
+ # Load tokenizer
179
+ tokenizer = AutoTokenizer.from_pretrained(
180
+ MODEL_NAME,
181
+ cache_dir=CACHE_DIR,
182
+ trust_remote_code=True
183
+ )
184
+
185
+ # Load model with appropriate settings for the quantized version
186
+ model = AutoModelForCausalLM.from_pretrained(
187
+ MODEL_NAME,
188
+ cache_dir=CACHE_DIR,
189
+ trust_remote_code=True,
190
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
191
+ device_map="auto" if DEVICE == "cuda" else None,
192
+ load_in_4bit=True if "Q4" in MODEL_VARIANT else False,
193
+ )
194
+
195
+ if DEVICE == "cpu" and hasattr(model, 'to'):
196
+ model = model.to(DEVICE)
197
+
198
+ model_loaded = True
199
+ logger.info(f"Model loaded successfully on {DEVICE}")
200
+
201
+ except Exception as e:
202
+ logger.error(f"Error loading model: {str(e)}")
203
+ raise
204
+
205
+ @app.on_event("startup")
206
+ async def startup_event():
207
+ """Load model on startup"""
208
+ await load_model()
209
+
210
+ @app.get("/")
211
+ async def root():
212
+ return {
213
+ "message": "DeepCoder API",
214
+ "model": MODEL_NAME,
215
+ "variant": MODEL_VARIANT,
216
+ "status": "ready" if model_loaded else "loading"
217
+ }
218
+
219
+ @app.get("/health")
220
+ async def health_check():
221
+ return {
222
+ "status": "healthy" if model_loaded else "loading",
223
+ "model_loaded": model_loaded,
224
+ "device": DEVICE,
225
+ "gpu_available": torch.cuda.is_available()
226
+ }
227
+
228
+ @app.get("/model/info")
229
+ async def model_info():
230
+ """Get model information"""
231
+ if not model_loaded:
232
+ raise HTTPException(status_code=503, detail="Model not loaded yet")
233
+
234
+ return {
235
+ "model_name": MODEL_NAME,
236
+ "variant": MODEL_VARIANT,
237
+ "max_context_length": MAX_TOKENS,
238
+ "device": DEVICE,
239
+ "model_size": "14B parameters",
240
+ "quantization": "Q4_K_M" if "Q4" in MODEL_VARIANT else "None",
241
+ "benchmarks": {
242
+ "LiveCodeBench_v5_Pass@1": "60.6%",
243
+ "Codeforces_Elo": 1936,
244
+ "Codeforces_Percentile": "95.3",
245
+ "HumanEval+_Accuracy": "92.6%"
246
+ }
247
+ }
248
+
249
+ @app.post("/generate", response_model=CodeResponse)
250
+ async def generate_code(request: CodeRequest):
251
+ """Generate code using the DeepCoder model"""
252
+ if not model_loaded:
253
+ raise HTTPException(status_code=503, detail="Model not loaded yet")
254
+
255
+ try:
256
+ # Tokenize input
257
+ inputs = tokenizer(
258
+ request.prompt,
259
+ return_tensors="pt",
260
+ truncation=True,
261
+ max_length=MAX_TOKENS - request.max_tokens
262
+ )
263
+
264
+ if DEVICE == "cuda":
265
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
266
+
267
+ # Generation parameters
268
+ generation_kwargs = {
269
+ "max_new_tokens": request.max_tokens,
270
+ "temperature": request.temperature,
271
+ "top_p": request.top_p,
272
+ "do_sample": True,
273
+ "pad_token_id": tokenizer.eos_token_id,
274
+ }
275
+
276
+ if request.stop_sequences:
277
+ generation_kwargs["stop_sequences"] = request.stop_sequences
278
+
279
+ # Generate
280
+ with torch.no_grad():
281
+ outputs = model.generate(**inputs, **generation_kwargs)
282
+
283
+ # Decode output
284
+ generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
285
+ generated_code = tokenizer.decode(generated_tokens, skip_special_tokens=True)
286
+
287
+ return CodeResponse(
288
+ generated_code=generated_code,
289
+ model_info={
290
+ "model_name": MODEL_NAME,
291
+ "variant": MODEL_VARIANT,
292
+ "device": DEVICE
293
+ },
294
+ generation_params={
295
+ "temperature": request.temperature,
296
+ "top_p": request.top_p,
297
+ "max_tokens": request.max_tokens
298
+ }
299
+ )
300
+
301
+ except Exception as e:
302
+ logger.error(f"Generation error: {str(e)}")
303
+ raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")
304
+
305
+ @app.post("/chat")
306
+ async def chat_completion(request: CodeRequest):
307
+ """Chat-style completion for code assistance"""
308
+ # Add system context for better code generation
309
+ system_prompt = """You are DeepCoder, an expert AI programming assistant. Generate high-quality, well-commented code that follows best practices."""
310
+
311
+ full_prompt = f"{system_prompt}\n\nUser: {request.prompt}\n\nAssistant:"
312
+
313
+ # Create modified request with system prompt
314
+ modified_request = CodeRequest(
315
+ prompt=full_prompt,
316
+ temperature=request.temperature,
317
+ top_p=request.top_p,
318
+ max_tokens=request.max_tokens,
319
+ stop_sequences=request.stop_sequences
320
+ )
321
+
322
+ return await generate_code(modified_request)
323
+
324
+ if __name__ == "__main__":
325
+ uvicorn.run(
326
+ "app:app",
327
+ host="0.0.0.0",
328
+ port=8000,
329
+ reload=False,
330
+ log_level="info"
331
+ )
332
+
333
+ fastapi==0.104.1
334
+ uvicorn[standard]==0.24.0
335
+ torch>=2.0.0
336
+ transformers>=4.35.0
337
+ accelerate>=0.24.0
338
+ bitsandbytes>=0.41.0
339
+ huggingface_hub>=0.19.0
340
+ pydantic>=2.5.0
341
+ python-multipart==0.0.6
342
+ jinja2>=3.1.0
343
+ aiofiles>=23.0.0
344
+ nvidia-ml-py3>=7.352.0
345
+ psutil>=5.9.0
346
+ requests>=2.31.0
347
+
348
+ #!/usr/bin/env python3
349
+ """
350
+ Download script for DeepCoder model
351
+ Downloads and caches the model for faster container startup
352
+ """
353
+
354
+ import os
355
+ import logging
356
+ from transformers import AutoTokenizer, AutoModelForCausalLM
357
+ from huggingface_hub import snapshot_download
358
+
359
+ logging.basicConfig(level=logging.INFO)
360
+ logger = logging.getLogger(__name__)
361
+
362
+ MODEL_NAME = os.getenv("MODEL_NAME", "ai/deepcoder-preview")
363
+ CACHE_DIR = os.getenv("HUGGINGFACE_HUB_CACHE", "/app/cache")
364
+
365
+ def download_model():
366
+ """Download the model and tokenizer"""
367
+ try:
368
+ logger.info(f"Downloading model: {MODEL_NAME}")
369
+
370
+ # Download model files
371
+ snapshot_download(
372
+ repo_id=MODEL_NAME,
373
+ cache_dir=CACHE_DIR,
374
+ resume_download=True
375
+ )
376
+
377
+ # Verify by loading tokenizer
378
+ tokenizer = AutoTokenizer.from_pretrained(
379
+ MODEL_NAME,
380
+ cache_dir=CACHE_DIR,
381
+ trust_remote_code=True
382
+ )
383
+
384
+ logger.info("Model downloaded successfully")
385
+ logger.info(f"Vocab size: {tokenizer.vocab_size}")
386
+ logger.info(f"Cache directory: {CACHE_DIR}")
387
+
388
+ return True
389
+
390
+ except Exception as e:
391
+ logger.error(f"Error downloading model: {str(e)}")
392
+ return False
393
+
394
+ if __name__ == "__main__":
395
+ success = download_model()
396
+ if not success:
397
+ exit(1)
398
+ logger.info("Download complete!")
399
+
400
+ events {
401
+ worker_connections 1024;
402
+ }
403
+
404
+ http {
405
+ upstream deepcoder_backend {
406
+ server deepcoder-api:8000;
407
+ }
408
+
409
+ # Rate limiting
410
+ limit_req_zone $binary_remote_addr zone=api:10m rate=10r/m;
411
+
412
+ server {
413
+ listen 80;
414
+ server_name localhost;
415
+
416
+ # Security headers
417
+ add_header X-Frame-Options DENY;
418
+ add_header X-Content-Type-Options nosniff;
419
+ add_header X-XSS-Protection "1; mode=block";
420
+
421
+ # Increase client max body size for large code submissions
422
+ client_max_body_size 10M;
423
+
424
+ # Timeouts for long-running generation requests
425
+ proxy_connect_timeout 60s;
426
+ proxy_send_timeout 300s;
427
+ proxy_read_timeout 300s;
428
+
429
+ location / {
430
+ proxy_pass http://deepcoder_backend;
431
+ proxy_set_header Host $host;
432
+ proxy_set_header X-Real-IP $remote_addr;
433
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
434
+ proxy_set_header X-Forwarded-Proto $scheme;
435
+ }
436
+
437
+ location /generate {
438
+ limit_req zone=api burst=5 nodelay;
439
+ proxy_pass http://deepcoder_backend;
440
+ proxy_set_header Host $host;
441
+ proxy_set_header X-Real-IP $remote_addr;
442
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
443
+ proxy_set_header X-Forwarded-Proto $scheme;
444
+ }
445
+
446
+ location /health {
447
+ proxy_pass http://deepcoder_backend;
448
+ access_log off;
449
+ }
450
+ }
451
+ }
452
+
453
+ #!/bin/bash
454
+ # setup.sh - Setup script for DeepCoder deployment
455
+
456
+ set -e
457
+
458
+ echo "πŸš€ DeepCoder Model Setup"
459
+ echo "========================"
460
+
461
+ # Create necessary directories
462
+ echo "πŸ“ Creating directories..."
463
+ mkdir -p models cache logs ssl
464
+
465
+ # Set permissions
466
+ chmod 755 models cache logs
467
+ chmod 700 ssl
468
+
469
+ # Pull the DeepCoder model using Docker Model Runner
470
+ echo "πŸ“¦ Pulling DeepCoder model..."
471
+ if command -v docker &> /dev/null; then
472
+ # Assuming docker model runner is available
473
+ docker model pull ai/deepcoder-preview
474
+ else
475
+ echo "⚠️ Docker not found. Please install Docker first."
476
+ exit 1
477
+ fi
478
+
479
+ # Check for GPU support
480
+ echo "πŸ” Checking GPU support..."
481
+ if command -v nvidia-smi &> /dev/null; then
482
+ echo "βœ… NVIDIA GPU detected:"
483
+ nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader
484
+
485
+ # Check for Docker GPU support
486
+ if docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi &> /dev/null; then
487
+ echo "βœ… Docker GPU support verified"
488
+ export GPU_SUPPORT=true
489
+ else
490
+ echo "⚠️ Docker GPU support not available"
491
+ export GPU_SUPPORT=false
492
+ fi
493
+ else
494
+ echo "⚠️ No GPU detected. Running on CPU."
495
+ export GPU_SUPPORT=false
496
+ fi
497
+
498
+ # Build and start containers
499
+ echo "πŸ—οΈ Building Docker containers..."
500
+ docker-compose build
501
+
502
+ echo "πŸš€ Starting services..."
503
+ if [ "$GPU_SUPPORT" = true ]; then
504
+ docker-compose up -d
505
+ else
506
+ # Remove GPU requirements for CPU-only deployment
507
+ sed 's/devices:/# devices:/g' docker-compose.yml | \
508
+ sed 's/- driver: nvidia/# - driver: nvidia/g' | \
509
+ sed 's/count: 1/# count: 1/g' | \
510
+ sed 's/capabilities: \[gpu\]/# capabilities: [gpu]/g' > docker-compose-cpu.yml
511
+ docker-compose -f docker-compose-cpu.yml up -d
512
+ fi
513
+
514
+ # Wait for services to be ready
515
+ echo "⏳ Waiting for services to start..."
516
+ sleep 30
517
+
518
+ # Health check
519
+ echo "πŸ₯ Performing health check..."
520
+ for i in {1..10}; do
521
+ if curl -f http://localhost:8000/health > /dev/null 2>&1; then
522
+ echo "βœ… DeepCoder API is healthy!"
523
+ break
524
+ else
525
+ echo "⏳ Waiting for API to be ready... (attempt $i/10)"
526
+ sleep 10
527
+ fi
528
+ done
529
+
530
+ # Show status
531
+ echo "πŸ“Š Service Status:"
532
+ docker-compose ps
533
+
534
+ echo ""
535
+ echo "πŸŽ‰ DeepCoder setup complete!"
536
+ echo "API endpoint: http://localhost:8000"
537
+ echo "Health check: http://localhost:8000/health"
538
+ echo "Model info: http://localhost:8000/model/info"
539
+ echo ""
540
+ echo "To test the API:"
541
+ echo "curl -X POST http://localhost:8000/generate \\"
542
+ echo " -H 'Content-Type: application/json' \\"
543
+ echo " -d '{\"prompt\": \"def fibonacci(n):\", \"max_tokens\": 200}'"
544
+
545
+ ###########################################
546
+ # deploy-hf.sh - Hugging Face Spaces deployment
547
+ ###########################################
548
+
549
+ cat > deploy-hf.sh << 'EOL'
550
+ #!/bin/bash
551
+ # Deploy to Hugging Face Spaces
552
+
553
+ set -e
554
+
555
+ echo "πŸ€— Deploying to Hugging Face Spaces"
556
+ echo "===================================="
557
+
558
+ # Check if git is configured
559
+ if ! git config user.email > /dev/null; then
560
+ echo "⚠️ Please configure git:"
561
+ echo "git config --global user.email 'your-email@example.com'"
562
+ echo "git config --global user.name 'Your Name'"
563
+ exit 1
564
+ fi
565
+
566
+ # Check if HF_TOKEN is set
567
+ if [ -z "$HF_TOKEN" ]; then
568
+ echo "⚠️ Please set your Hugging Face token:"
569
+ echo "export HF_TOKEN=your_hf_token_here"
570
+ exit 1
571
+ fi
572
+
573
+ SPACE_NAME=${1:-"deepcoder-api"}
574
+ HF_USERNAME=${2:-$(whoami)}
575
+
576
+ echo "Creating Space: $HF_USERNAME/$SPACE_NAME"
577
+
578
+ # Create Hugging Face Space files
579
+ cat > README.md << EOF
580
+ ---
581
+ title: DeepCoder API
582
+ emoji: πŸš€
583
+ colorFrom: blue
584
+ colorTo: green
585
+ sdk: docker
586
+ pinned: false
587
+ license: mit
588
+ ---
589
+
590
+ # DeepCoder API
591
+
592
+ High-performance code generation API powered by DeepCoder-14B model.
593
+
594
+ ## Features
595
+ - 🎯 60.6% pass rate on LiveCodeBench v5
596
+ - πŸ† 1936 Elo rating on Codeforces (95.3 percentile)
597
+ - πŸ“ 92.6% accuracy on HumanEval+
598
+ - ⚑ 131K token context length
599
+ - πŸ”§ Optimized Q4_K_M quantization
600
+
601
+ ## API Endpoints
602
+ - \`POST /generate\` - Generate code from prompts
603
+ - \`POST /chat\` - Chat-style code assistance
604
+ - \`GET /model/info\` - Model information
605
+ - \`GET /health\` - Health check
606
+
607
+ ## Usage
608
+ \`\`\`bash
609
+ curl -X POST /generate \\
610
+ -H 'Content-Type: application/json' \\
611
+ -d '{"prompt": "def fibonacci(n):", "max_tokens": 200}'
612
+ \`\`\`
613
+ EOF
614
+
615
+ # Create Dockerfile for HF Spaces
616
+ cat > Dockerfile.hf << EOF
617
+ FROM python:3.11-slim
618
+
619
+ WORKDIR /app
620
+
621
+ RUN apt-get update && apt-get install -y curl git && rm -rf /var/lib/apt/lists/*
622
+
623
+ COPY requirements.txt .
624
+ RUN pip install --no-cache-dir -r requirements.txt
625
+
626
+ COPY . .
627
+
628
+ EXPOSE 7860
629
+
630
+ CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
631
+ EOF
632
+
633
+ # Update app.py for HF Spaces (port 7860)
634
+ sed 's/port=8000/port=7860/g' app.py > app_hf.py
635
+ mv app_hf.py app.py
636
+
637
+ # Initialize git repo if not exists
638
+ if [ ! -d .git ]; then
639
+ git init
640
+ git lfs install
641
+ fi
642
+
643
+ # Track large model files with git LFS
644
+ echo "*.bin filter=lfs diff=lfs merge=lfs -text" >> .gitattributes
645
+ echo "*.safetensors filter=lfs diff=lfs merge=lfs -text" >> .gitattributes
646
+
647
+ # Add remote if not exists
648
+ if ! git remote get-url origin > /dev/null 2>&1; then
649
+ git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
650
+ fi
651
+
652
+ # Commit and push
653
+ git add .
654
+ git commit -m "Initial DeepCoder API deployment" || true
655
+ git push -u origin main
656
+
657
+ echo "βœ… Deployed to: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
658
+ EOL
659
+
660
+ chmod +x deploy-hf.sh
661
+
662
+ echo "πŸ“ Additional deployment script created: deploy-hf.sh"
setup.sh ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # setup.sh - Setup script for DeepCoder deployment
3
+
4
+ set -e
5
+
6
+ echo "πŸš€ DeepCoder Model Setup"
7
+ echo "========================"
8
+
9
+ # Create necessary directories
10
+ echo "πŸ“ Creating directories..."
11
+ mkdir -p models cache logs ssl
12
+
13
+ # Set permissions
14
+ chmod 755 models cache logs
15
+ chmod 700 ssl
16
+
17
+ # Check for GPU support
18
+ echo "πŸ” Checking GPU support..."
19
+ if command -v nvidia-smi &> /dev/null; then
20
+ echo "βœ… NVIDIA GPU detected:"
21
+ nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader
22
+
23
+ # Check for Docker GPU support
24
+ if docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi &> /dev/null; then
25
+ echo "βœ… Docker GPU support verified"
26
+ export GPU_SUPPORT=true
27
+ else
28
+ echo "⚠️ Docker GPU support not available"
29
+ export GPU_SUPPORT=false
30
+ fi
31
+ else
32
+ echo "⚠️ No GPU detected. Running on CPU."
33
+ export GPU_SUPPORT=false
34
+ fi
35
+
36
+ # Build and start containers
37
+ echo "πŸ—οΈ Building Docker containers..."
38
+ docker-compose build
39
+
40
+ echo "πŸš€ Starting services..."
41
+ if [ "$GPU_SUPPORT" = true ]; then
42
+ docker-compose up -d
43
+ else
44
+ # Remove GPU requirements for CPU-only deployment
45
+ sed 's/devices:/# devices:/g' docker-compose.yml | \
46
+ sed 's/- driver: nvidia/# - driver: nvidia/g' | \
47
+ sed 's/count: 1/# count: 1/g' | \
48
+ sed 's/capabilities: \[gpu\]/# capabilities: [gpu]/g' > docker-compose-cpu.yml
49
+ docker-compose -f docker-compose-cpu.yml up -d
50
+ fi
51
+
52
+ # Wait for services to be ready
53
+ echo "⏳ Waiting for services to start..."
54
+ sleep 30
55
+
56
+ # Health check
57
+ echo "πŸ₯ Performing health check..."
58
+ for i in {1..10}; do
59
+ if curl -f http://localhost:8000/health > /dev/null 2>&1; then
60
+ echo "βœ… DeepCoder API is healthy!"
61
+ break
62
+ else
63
+ echo "⏳ Waiting for API to be ready... (attempt $i/10)"
64
+ sleep 10
65
+ fi
66
+ done
67
+
68
+ # Show status
69
+ echo "πŸ“Š Service Status:"
70
+ docker-compose ps
71
+
72
+ echo ""
73
+ echo "πŸŽ‰ DeepCoder setup complete!"
74
+ echo "API endpoint: http://localhost:8000"
75
+ echo "Health check: http://localhost:8000/health"
76
+ echo "Model info: http://localhost:8000/model/info"
77
+ echo ""
78
+ echo "To test the API:"
79
+ echo "curl -X POST http://localhost:8000/generate \\"
80
+ echo " -H 'Content-Type: application/json' \\"
81
+ echo " -d '{\"prompt\": \"def fibonacci(n):\", \"max_tokens\": 200}'"