Update Dockerfile
Browse files- Dockerfile +16 -13
Dockerfile
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# 1. Use Python 3.11
|
| 2 |
FROM python:3.11-slim
|
| 3 |
|
| 4 |
# 2. Install System Tools & Java 21
|
|
@@ -12,38 +12,41 @@ RUN apt-get update && \
|
|
| 12 |
gcc \
|
| 13 |
&& apt-get clean
|
| 14 |
|
| 15 |
-
# Set Java Home
|
| 16 |
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
|
| 17 |
|
| 18 |
-
# 3.
|
| 19 |
WORKDIR /app
|
| 20 |
-
|
| 21 |
-
# 4. Setup Cache Folders
|
| 22 |
ENV XDG_CACHE_HOME=/app/cache
|
| 23 |
ENV TRANSFORMERS_CACHE=/app/cache
|
| 24 |
ENV HF_HOME=/app/cache
|
| 25 |
RUN mkdir -p /app/cache && chmod 777 /app/cache
|
| 26 |
|
| 27 |
-
#
|
| 28 |
COPY requirements.txt .
|
| 29 |
RUN pip install --no-cache-dir --upgrade pip && \
|
| 30 |
pip install --no-cache-dir -r requirements.txt
|
| 31 |
|
| 32 |
-
#
|
| 33 |
RUN git clone https://github.com/gotutiyan/gector.git /app/gector_lib
|
| 34 |
RUN pip install --no-cache-dir /app/gector_lib
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
# We create the folder just in case, but COPY . . will fill it with your files
|
| 38 |
RUN mkdir -p /app/data
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
# We MUST download this because it is too big (500MB) for Git
|
| 42 |
RUN echo "Downloading Model Weights..." && \
|
| 43 |
wget -O /app/data/gector_model.th https://huggingface.co/gotutiyan/gector-roberta-base-5k/resolve/main/pytorch_model.bin
|
| 44 |
|
| 45 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
COPY . .
|
| 47 |
|
| 48 |
-
#
|
| 49 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
+
# 1. Use Python 3.11 (Required for GECToR)
|
| 2 |
FROM python:3.11-slim
|
| 3 |
|
| 4 |
# 2. Install System Tools & Java 21
|
|
|
|
| 12 |
gcc \
|
| 13 |
&& apt-get clean
|
| 14 |
|
|
|
|
| 15 |
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
|
| 16 |
|
| 17 |
+
# 3. Setup Directories
|
| 18 |
WORKDIR /app
|
|
|
|
|
|
|
| 19 |
ENV XDG_CACHE_HOME=/app/cache
|
| 20 |
ENV TRANSFORMERS_CACHE=/app/cache
|
| 21 |
ENV HF_HOME=/app/cache
|
| 22 |
RUN mkdir -p /app/cache && chmod 777 /app/cache
|
| 23 |
|
| 24 |
+
# 4. Install Dependencies
|
| 25 |
COPY requirements.txt .
|
| 26 |
RUN pip install --no-cache-dir --upgrade pip && \
|
| 27 |
pip install --no-cache-dir -r requirements.txt
|
| 28 |
|
| 29 |
+
# 5. Install GECToR Library
|
| 30 |
RUN git clone https://github.com/gotutiyan/gector.git /app/gector_lib
|
| 31 |
RUN pip install --no-cache-dir /app/gector_lib
|
| 32 |
|
| 33 |
+
# 6. SETUP DATA
|
|
|
|
| 34 |
RUN mkdir -p /app/data
|
| 35 |
|
| 36 |
+
# A. The Heavy Model (Must download)
|
|
|
|
| 37 |
RUN echo "Downloading Model Weights..." && \
|
| 38 |
wget -O /app/data/gector_model.th https://huggingface.co/gotutiyan/gector-roberta-base-5k/resolve/main/pytorch_model.bin
|
| 39 |
|
| 40 |
+
# B. The MISSING Vocab Files ONLY
|
| 41 |
+
# We download labels.txt and d_tags.txt because they are missing.
|
| 42 |
+
# We DO NOT download verb-form-vocab.txt because you uploaded it.
|
| 43 |
+
RUN echo "Downloading Missing Vocab Files..." && \
|
| 44 |
+
wget -O /app/data/labels.txt https://github.com/grammarly/gector/raw/master/data/output_vocabulary/labels.txt && \
|
| 45 |
+
wget -O /app/data/d_tags.txt https://github.com/grammarly/gector/raw/master/data/output_vocabulary/d_tags.txt
|
| 46 |
+
|
| 47 |
+
# 7. Copy App Code
|
| 48 |
+
# This command copies your uploaded 'verb-form-vocab.txt' into /app/data
|
| 49 |
COPY . .
|
| 50 |
|
| 51 |
+
# 8. Run App
|
| 52 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|