Subhadip007 commited on
Commit
b5b1b99
·
1 Parent(s): deaf393

feat: download 4.4GB data from dataset on build

Browse files
Files changed (1) hide show
  1. Dockerfile +10 -6
Dockerfile CHANGED
@@ -18,16 +18,20 @@ COPY run_api.py .
18
  COPY .env.example ./.env
19
 
20
  # Copy data (uploaded via HuggingFace web UI)
21
- COPY data/qdrant_db/ ./data/qdrant_db/
22
- COPY data/embeddings/bm25_index.pkl ./data/embeddings/bm25_index.pkl
23
- COPY data/embeddings/embeddings.npy ./data/embeddings/embeddings.npy
24
- COPY data/embeddings/chunk_ids.npy ./data/embeddings/chunk_ids.npy
25
- COPY data/embeddings/embedding_index.json ./data/embeddings/embedding_index.json
26
- COPY data/chunks/ ./data/chunks/
27
 
28
  # Create remaining data dirs
29
  RUN mkdir -p data/raw data/processed logs
30
 
 
 
 
 
31
  # HuggingFace Spaces uses port 7860
32
  ENV PORT=7860
33
  EXPOSE 7860
 
18
  COPY .env.example ./.env
19
 
20
  # Copy data (uploaded via HuggingFace web UI)
21
+ # COPY data/qdrant_db/ ./data/qdrant_db/
22
+ # COPY data/embeddings/bm25_index.pkl ./data/embeddings/bm25_index.pkl
23
+ # COPY data/embeddings/embeddings.npy ./data/embeddings/embeddings.npy
24
+ # COPY data/embeddings/chunk_ids.npy ./data/embeddings/chunk_ids.npy
25
+ # COPY data/embeddings/embedding_index.json ./data/embeddings/embedding_index.json
26
+ # COPY data/chunks/ ./data/chunks/
27
 
28
  # Create remaining data dirs
29
  RUN mkdir -p data/raw data/processed logs
30
 
31
+ # Download the 4.4 GB database from the limits-free HF Dataset
32
+ # This happens during the Docker build so the API starts instantly later
33
+ RUN huggingface-cli download Subhadip007/researchpilot-data --repo-type dataset --local-dir /app/data
34
+
35
  # HuggingFace Spaces uses port 7860
36
  ENV PORT=7860
37
  EXPOSE 7860