Spaces:
Paused
Paused
| # Use the same PyTorch base image | |
| FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime | |
| # Install system basics | |
| RUN apt-get update && \ | |
| apt-get install -y wget ca-certificates git && \ | |
| rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # Install Python dependencies | |
| COPY requirements.txt ./ | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Create non-root user and setup the Persistent Storage paths | |
| RUN useradd -m -u 1000 appuser && \ | |
| mkdir -p /data/.cache && \ | |
| mkdir -p /data/out && \ | |
| mkdir -p /data/input && \ | |
| chown -R appuser:appuser /data | |
| # Set environment variables to force all Hugging Face caches to the persistent volume | |
| ENV HF_HOME=/data/.cache \ | |
| HF_HUB_CACHE=/data/.cache/hub \ | |
| TRANSFORMERS_CACHE=/data/.cache/transformers \ | |
| HF_DATASETS_CACHE=/data/.cache/datasets \ | |
| SENTENCE_TRANSFORMERS_HOME=/data/.cache/sentence_transformers \ | |
| TOKENIZERS_PARALLELISM=false \ | |
| OMP_NUM_THREADS=1 | |
| # Copy your scripts and config JSONs | |
| COPY sys7_miner_2.py . | |
| COPY run_job.py . | |
| COPY system7_lexicons.json . | |
| COPY label_orders.json . | |
| COPY slang_lexicon.json . | |
| COPY sys7_phrase_lexicons_desc_only.json . | |
| # Fix permissions | |
| RUN chown -R appuser:appuser /app | |
| USER appuser | |
| # --- CHANGED: Run the Gradio App, not the shell script --- | |
| CMD ["python", "run_job.py"] |