Spaces:
Runtime error
Runtime error
| FROM python:3.10-slim | |
| # Set up non-root user | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH | |
| WORKDIR /app | |
| # Copy and install requirements | |
| COPY --chown=user:user requirements.txt . | |
| RUN pip install --no-cache-dir --user -r requirements.txt | |
| # Pre-cache the model with CPU-optimized 4-bit quantization | |
| RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig; \ | |
| quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=False, bnb_4bit_quant_type='nf4'); \ | |
| AutoTokenizer.from_pretrained('Qwen/Qwen2-1.5B-Instruct'); \ | |
| AutoModelForCausalLM.from_pretrained('Qwen/Qwen2-1.5B-Instruct', quantization_config=quantization_config, device_map='cpu')" | |
| # Copy app | |
| COPY --chown=user:user app.py . | |
| # Set cache directory permissions | |
| RUN mkdir -p $HOME/.cache && chmod -R 777 $HOME/.cache | |
| EXPOSE 8000 | |
| CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "300", "app:app"] |