# Use the same PyTorch base image
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime

# Install system basics
RUN apt-get update && \
    apt-get install -y wget ca-certificates git && \
    rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Install Python dependencies
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

# Create non-root user and setup the Persistent Storage paths
RUN useradd -m -u 1000 appuser && \
    mkdir -p /data/.cache && \
    mkdir -p /data/out && \
    mkdir -p /data/input && \
    chown -R appuser:appuser /data

# Set environment variables to force all Hugging Face caches to the persistent volume
ENV HF_HOME=/data/.cache \
    HF_HUB_CACHE=/data/.cache/hub \
    TRANSFORMERS_CACHE=/data/.cache/transformers \
    HF_DATASETS_CACHE=/data/.cache/datasets \
    SENTENCE_TRANSFORMERS_HOME=/data/.cache/sentence_transformers \
    TOKENIZERS_PARALLELISM=false \
    OMP_NUM_THREADS=1

# Copy your scripts and config JSONs
COPY sys7_miner_2.py .
COPY run_job.py .
COPY system7_lexicons.json .
COPY label_orders.json .
COPY slang_lexicon.json .
COPY sys7_phrase_lexicons_desc_only.json .

# Fix permissions
RUN chown -R appuser:appuser /app

USER appuser

# --- CHANGED: Run the Gradio App, not the shell script ---
CMD ["python", "run_job.py"]