ayush2917 commited on
Commit
cc0ef8d
·
verified ·
1 Parent(s): 56e361f

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +65 -39
Dockerfile CHANGED
@@ -1,62 +1,88 @@
1
- # Hugging Face Spaces optimized image
2
- FROM python:3.9-slim
3
 
4
- # Set environment variables
5
  ENV PYTHONDONTWRITEBYTECODE=1 \
6
  PYTHONUNBUFFERED=1 \
 
7
  HF_HOME=/cache \
8
  HUGGINGFACE_HUB_CACHE=/cache \
9
  PIP_NO_CACHE_DIR=1 \
10
- PORT=8000
11
-
12
- # Create directory structure
13
- RUN mkdir -p /app/{data,src,config,templates,static/css,tests}
14
 
 
15
  WORKDIR /app
16
 
17
- # Install system dependencies
 
 
 
 
 
 
 
 
 
 
 
18
  RUN apt-get update && apt-get install -y --no-install-recommends \
19
  build-essential \
20
  && rm -rf /var/lib/apt/lists/*
21
 
22
- # Copy requirements first for caching
23
  COPY requirements.txt .
24
-
25
- # Install Python dependencies
26
  RUN pip install --upgrade pip && \
27
- pip install --no-cache-dir -r requirements.txt
28
-
29
- # Copy application files
30
- COPY app.py .
31
- COPY src/ /app/src/
32
- COPY config/ /app/config/
33
- COPY templates/ /app/templates/
34
- COPY static/ /app/static/
35
- COPY tests/ /app/tests/
36
-
37
- # Copy data files with correct permissions
38
- COPY --chown=1000:1000 data/ /app/data/
39
-
40
- # Debug: List files in /app/data/
41
- RUN ls -l /app/data/
42
 
43
- # Verify data files or create chat_history.json if missing
44
- RUN python -c "import os, json; \
45
- assert os.path.exists('/app/data/rupeia_document.json'), 'Data file missing'; \
46
- if not os.path.exists('/app/data/chat_history.json'): \
47
- open('/app/data/chat_history.json', 'w').write('{}')"
48
-
49
- # Download models during build
50
  RUN python -c "\
 
51
  from sentence_transformers import SentenceTransformer; \
52
  SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/cache'); \
53
  from transformers import AutoModelForCausalLM, AutoTokenizer; \
54
  AutoModelForCausalLM.from_pretrained('distilgpt2', cache_dir='/cache'); \
55
- AutoTokenizer.from_pretrained('distilgpt2', cache_dir='/cache') \
56
- "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- # Set proper permissions
59
- RUN chmod a+r /app/data/*.json && \
60
- find /app -type d -exec chmod a+rx {} \;
61
 
62
- CMD gunicorn --bind 0.0.0.0:$PORT --workers 2 --timeout 120 --preload app:app
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Official Python base image with explicit version
2
+ FROM python:3.9-slim-buster@sha256:5f2c93f5369d1f41a7b41024bebf5db3e2227ba6830ff8ddb0a5e3a0c5cef8e9
3
 
4
+ # ===== SYSTEM CONFIGURATION =====
5
  ENV PYTHONDONTWRITEBYTECODE=1 \
6
  PYTHONUNBUFFERED=1 \
7
+ PORT=8000 \
8
  HF_HOME=/cache \
9
  HUGGINGFACE_HUB_CACHE=/cache \
10
  PIP_NO_CACHE_DIR=1 \
11
+ TOKENIZERS_PARALLELISM=false
 
 
 
12
 
13
+ # ===== APPLICATION SETUP =====
14
  WORKDIR /app
15
 
16
+ # Create directory structure matching your project
17
+ RUN mkdir -p \
18
+ /app/data \
19
+ /app/src \
20
+ /app/config \
21
+ /app/templates \
22
+ /app/static/css \
23
+ /app/tests \
24
+ /cache
25
+
26
+ # ===== DEPENDENCY INSTALLATION =====
27
+ # Install system dependencies in single layer
28
  RUN apt-get update && apt-get install -y --no-install-recommends \
29
  build-essential \
30
  && rm -rf /var/lib/apt/lists/*
31
 
32
+ # Copy and install Python requirements
33
  COPY requirements.txt .
 
 
34
  RUN pip install --upgrade pip && \
35
+ pip install --no-cache-dir -r requirements.txt && \
36
+ pip cache purge
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # ===== MODEL PRELOADING =====
39
+ # Pre-download models with error handling
 
 
 
 
 
40
  RUN python -c "\
41
+ try: \
42
  from sentence_transformers import SentenceTransformer; \
43
  SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/cache'); \
44
  from transformers import AutoModelForCausalLM, AutoTokenizer; \
45
  AutoModelForCausalLM.from_pretrained('distilgpt2', cache_dir='/cache'); \
46
+ AutoTokenizer.from_pretrained('distilgpt2', cache_dir='/cache'); \
47
+ except Exception as e: \
48
+ print(f'Model preloading failed: {str(e)}'); \
49
+ exit(1) \
50
+ "
51
+
52
+ # ===== APPLICATION FILES =====
53
+ # Copy application code with proper permissions
54
+ COPY --chown=1000:1000 \
55
+ app.py \
56
+ src/ \
57
+ config/ \
58
+ templates/ \
59
+ static/ \
60
+ tests/ \
61
+ ./
62
+
63
+ # Copy data files separately with strict permissions
64
+ COPY --chown=1000:1000 data/ /app/data/
65
+ RUN chmod 644 /app/data/*.json
66
+
67
+ # ===== RUNTIME CONFIGURATION =====
68
+ # Health check (HTTP, not HTTPS)
69
+ HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
70
+ CMD curl -f http://localhost:$PORT/health || exit 1
71
+
72
+ EXPOSE $PORT
73
 
74
+ # Run as non-root user
75
+ USER 1000
 
76
 
77
+ # Optimized Gunicorn configuration for Spaces
78
+ CMD ["gunicorn", \
79
+ "--bind", "0.0.0.0:$PORT", \
80
+ "--workers", "2", \
81
+ "--threads", "2", \
82
+ "--timeout", "120", \
83
+ "--preload", \
84
+ "--worker-class", "gthread", \
85
+ "--access-logfile", "-", \
86
+ "--error-logfile", "-", \
87
+ "--log-level", "info", \
88
+ "app:app"]