File size: 853 Bytes
ba43a18
 
 
 
 
 
247f998
ba43a18
 
 
 
 
 
 
 
 
247f998
ba43a18
247f998
ba43a18
247f998
ba43a18
247f998
ba43a18
 
 
247f998
3fc6ddb
 
247f998
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Use lightweight Python base image
FROM python:3.10-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*

# Copy requirements and install
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# ✅ Hugging Face cache directory
ENV HF_HOME=/tmp
ENV TRANSFORMERS_CACHE=/tmp

# Pre-download K2-Think model to avoid cold start
RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; \
    model_id='LLM360/K2-Think'; \
    AutoTokenizer.from_pretrained(model_id, cache_dir='/tmp'); \
    AutoModelForCausalLM.from_pretrained(model_id, cache_dir='/tmp')"

# Expose port
EXPOSE 7860

# Command to run the FastAPI app
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]