FROM python:3.12-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ && rm -rf /var/lib/apt/lists/* # Copy requirements and install Python dependencies # Install CPU-only PyTorch first (saves ~1.5GB vs full torch with CUDA) COPY requirements.txt . RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \ pip install --no-cache-dir -r requirements.txt # Pre-download models during build (network is available here) # At runtime, the container has NO outbound DNS, so models must be cached # 1. Summarization model (MBart, float16) RUN python -c "\ from transformers import MBartForConditionalGeneration, AutoTokenizer, AutoConfig; \ import torch; \ repo = 'bayan10/summarization-model'; \ print('Downloading summarization tokenizer...'); \ AutoTokenizer.from_pretrained(repo); \ print('Downloading summarization config...'); \ AutoConfig.from_pretrained(repo); \ print('Downloading summarization model (float16)...'); \ MBartForConditionalGeneration.from_pretrained(repo, torch_dtype=torch.float16); \ print('Summarization model cached!'); \ " # 2. Spelling model (AraSpell — AraBERT encoder-decoder + checkpoint) RUN python -c "\ from huggingface_hub import hf_hub_download; \ from transformers import AutoTokenizer, EncoderDecoderModel, AutoModelForMaskedLM; \ print('Downloading AraSpell checkpoint...'); \ hf_hub_download(repo_id='bayan10/AraSpell-Model', filename='last_model.pt'); \ print('Downloading AraBERT tokenizer...'); \ AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabertv02'); \ print('Downloading AraBERT encoder-decoder...'); \ EncoderDecoderModel.from_encoder_decoder_pretrained('aubmindlab/bert-base-arabertv02', 'aubmindlab/bert-base-arabertv02'); \ print('Downloading AraBERT MLM (for ContextualCorrector)...'); \ AutoModelForMaskedLM.from_pretrained('aubmindlab/bert-base-arabertv02'); \ print('Spelling model + MLM cached!'); \ " # 3. Grammar — camel-tools MLE disambiguator data RUN camel_data -i light # 4. Punctuation model (PuncAra-v1 — EncoderDecoderModel) RUN python -c "\ from transformers import EncoderDecoderModel, AutoTokenizer; \ repo = 'bayan10/PuncAra-v1'; \ print('Downloading PuncAra-v1 tokenizer...'); \ AutoTokenizer.from_pretrained(repo); \ print('Downloading PuncAra-v1 model...'); \ EncoderDecoderModel.from_pretrained(repo); \ print('PuncAra-v1 cached!'); \ " # 5. Dialect-to-MSA model (mT5, float16) RUN python -c "\ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM; \ import torch; \ repo = 'bayan10/dialect-to-msa-model'; \ print('Downloading dialect tokenizer...'); \ AutoTokenizer.from_pretrained(repo); \ print('Downloading dialect model (float16)...'); \ AutoModelForSeq2SeqLM.from_pretrained(repo, torch_dtype=torch.float16); \ print('Dialect model cached!'); \ " # Copy application code COPY src/ ./src/ COPY quran.py ./ COPY quran_master.db ./ COPY .env* ./ # Set environment variables ENV PORT=7860 ENV DEBUG=False ENV PYTHONUNBUFFERED=1 # Expose port EXPOSE 7860 # Start the app with gunicorn (single worker to minimize RAM) # Timeout 300s: full pipeline (spelling ~50s + grammar ~8s + punctuation ~30s + cold start) CMD ["gunicorn", "--chdir", "src", "app:app", "--bind", "0.0.0.0:7860", "--timeout", "300", "--workers", "1"]