Imaginethat commited on
Commit
4cd9cbf
·
verified ·
1 Parent(s): 577cc97

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +37 -46
Dockerfile CHANGED
@@ -1,46 +1,37 @@
1
- # Use the same PyTorch base image that worked for your clustering job
2
- FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
3
-
4
- # Install system basics
5
- RUN apt-get update && \
6
- apt-get install -y wget ca-certificates git && \
7
- rm -rf /var/lib/apt/lists/*
8
-
9
- WORKDIR /app
10
-
11
- # Install Python dependencies
12
- COPY requirements.txt ./
13
- RUN pip install --no-cache-dir -r requirements.txt
14
-
15
- # Create non-root user and setup the Persistent Storage paths
16
- # This ensures /data is writable, which is where your Parquet inputs/outputs will live
17
- RUN useradd -m -u 1000 appuser && \
18
- mkdir -p /data/.cache && \
19
- mkdir -p /data/out && \
20
- mkdir -p /data/input && \
21
- chown -R appuser:appuser /data
22
-
23
- # Set environment variables to force all Hugging Face caches to the persistent volume
24
- ENV HF_HOME=/data/.cache \
25
- HF_HUB_CACHE=/data/.cache/hub \
26
- TRANSFORMERS_CACHE=/data/.cache/transformers \
27
- HF_DATASETS_CACHE=/data/.cache/datasets \
28
- SENTENCE_TRANSFORMERS_HOME=/data/.cache/sentence_transformers \
29
- TOKENIZERS_PARALLELISM=false \
30
- OMP_NUM_THREADS=1
31
-
32
- # Copy your Miner script and the required Config JSONs
33
- # Make sure you upload these JSONs to the Space Files along with this Dockerfile!
34
- COPY sys7_miner.py .
35
- COPY system7_lexicons.json .
36
- COPY label_orders.json .
37
- COPY slang_lexicon.json .
38
- # COPY sys7_phrase_lexicons_desc_only.json . <-- Uncomment if you use this
39
-
40
- # Copy the runner script
41
- COPY start.sh .
42
- RUN chmod +x start.sh && chown -R appuser:appuser /app
43
-
44
- USER appuser
45
-
46
- CMD ["./start.sh"]
 
1
+ FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
2
+
3
+ # 1. Install Python, git, AND 'dos2unix' (the magic fixer tool)
4
+ RUN apt-get update && apt-get install -y \
5
+ python3.10 \
6
+ python3-pip \
7
+ git \
8
+ git-lfs \
9
+ dos2unix \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # 2. Set up Python alias
13
+ RUN ln -s /usr/bin/python3.10 /usr/bin/python
14
+
15
+ WORKDIR /app
16
+
17
+ # 3. Install Python dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --no-cache-dir --upgrade pip && \
20
+ pip install --no-cache-dir -r requirements.txt
21
+
22
+ # 4. Copy all your scripts and configs
23
+ COPY sys7_miner.py .
24
+ COPY system7_lexicons.json .
25
+ COPY label_orders.json .
26
+ COPY slang_lexicon.json .
27
+ COPY run_job.py .
28
+
29
+ # 5. THE FIX: Convert all scripts to Unix line endings
30
+ # This fixes 'run_job.py' and any other text file that might have Windows formatting
31
+ RUN dos2unix run_job.py sys7_miner.py
32
+
33
+ # 6. Permissions
34
+ RUN chmod +x run_job.py
35
+
36
+ # 7. Run the job
37
+ CMD ["python", "run_job.py"]