Spaces:
Running
Running
Commit ·
b5b1b99
1
Parent(s): deaf393
feat: download 4.4GB data from dataset on build
Browse files- Dockerfile +10 -6
Dockerfile
CHANGED
|
@@ -18,16 +18,20 @@ COPY run_api.py .
|
|
| 18 |
COPY .env.example ./.env
|
| 19 |
|
| 20 |
# Copy data (uploaded via HuggingFace web UI)
|
| 21 |
-
COPY data/qdrant_db/ ./data/qdrant_db/
|
| 22 |
-
COPY data/embeddings/bm25_index.pkl ./data/embeddings/bm25_index.pkl
|
| 23 |
-
COPY data/embeddings/embeddings.npy ./data/embeddings/embeddings.npy
|
| 24 |
-
COPY data/embeddings/chunk_ids.npy ./data/embeddings/chunk_ids.npy
|
| 25 |
-
COPY data/embeddings/embedding_index.json ./data/embeddings/embedding_index.json
|
| 26 |
-
COPY data/chunks/ ./data/chunks/
|
| 27 |
|
| 28 |
# Create remaining data dirs
|
| 29 |
RUN mkdir -p data/raw data/processed logs
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# HuggingFace Spaces uses port 7860
|
| 32 |
ENV PORT=7860
|
| 33 |
EXPOSE 7860
|
|
|
|
| 18 |
COPY .env.example ./.env
|
| 19 |
|
| 20 |
# Copy data (uploaded via HuggingFace web UI)
|
| 21 |
+
# COPY data/qdrant_db/ ./data/qdrant_db/
|
| 22 |
+
# COPY data/embeddings/bm25_index.pkl ./data/embeddings/bm25_index.pkl
|
| 23 |
+
# COPY data/embeddings/embeddings.npy ./data/embeddings/embeddings.npy
|
| 24 |
+
# COPY data/embeddings/chunk_ids.npy ./data/embeddings/chunk_ids.npy
|
| 25 |
+
# COPY data/embeddings/embedding_index.json ./data/embeddings/embedding_index.json
|
| 26 |
+
# COPY data/chunks/ ./data/chunks/
|
| 27 |
|
| 28 |
# Create remaining data dirs
|
| 29 |
RUN mkdir -p data/raw data/processed logs
|
| 30 |
|
| 31 |
+
# Download the 4.4 GB database from the limits-free HF Dataset
|
| 32 |
+
# This happens during the Docker build so the API starts instantly later
|
| 33 |
+
RUN huggingface-cli download Subhadip007/researchpilot-data --repo-type dataset --local-dir /app/data
|
| 34 |
+
|
| 35 |
# HuggingFace Spaces uses port 7860
|
| 36 |
ENV PORT=7860
|
| 37 |
EXPOSE 7860
|