VcRlAgent commited on
Commit
92f791e
Β·
1 Parent(s): 3f8e1ce
Dockerfile CHANGED
@@ -1,28 +1,14 @@
1
- FROM runpod/pytorch:2.2.0-py3.10-cuda12.1.1-devel-ubuntu22.04
2
-
3
- # Install Python
4
- RUN apt-get update && apt-get install -y \
5
- python3.10 \
6
- python3-pip \
7
- git \
8
- && rm -rf /var/lib/apt/lists/*
9
 
10
  WORKDIR /app
11
 
12
- COPY ../requirements.txt .
13
- RUN pip install -v --no-cache-dir -r requirements.txt
14
-
15
- # Pre-download model (faster cold starts)
16
- RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
17
-
18
- COPY entrypoint.sh /entrypoint.sh
19
- RUN chmod +x /entrypoint.sh
20
 
21
- COPY . .
 
 
22
 
23
  EXPOSE 7860
24
 
25
- ENV RUNPOD_VERBOSE=1
26
- #CMD ["/entrypoint.sh"]
27
- #CMD ["python", "handler.py"]
28
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.11-slim
 
 
 
 
 
 
 
2
 
3
  WORKDIR /app
4
 
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
 
7
 
8
+ COPY . /app
9
+ #COPY app/ ./app/
10
+ #COPY data/ ./data/
11
 
12
  EXPOSE 7860
13
 
14
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
backup/HF/Dockerfile.bak1 β†’ Dockerfile.bak1 RENAMED
File without changes
Dockerfile.bak2 ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RUN useradd -m -u 1000 user
2
+ WORKDIR /home/user/app
3
+
4
+ # Minimal system dependencies
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ git \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Copy requirements first
10
+ COPY --chown=1000:1000 requirements.txt .
11
+
12
+ # Install Python packages efficiently
13
+ RUN pip install --no-cache-dir -U pip && \
14
+ pip install --no-cache-dir \
15
+ fastapi==0.104.1 \
16
+ uvicorn[standard]==0.24.0 \
17
+ sentence-transformers \
18
+ faiss-cpu \
19
+ torch \
20
+ transformers \
21
+ pydantic
22
+
23
+ # Copy app
24
+ COPY --chown=1000:1000 . .
25
+
26
+ USER user
27
+
28
+ EXPOSE 7860
29
+
30
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
backup/HF/Space.yaml β†’ Space.yaml RENAMED
File without changes
app/services/retriever.py CHANGED
@@ -62,8 +62,7 @@ class RetrieverService:
62
  logger.info(f"[RETRIEVER] Retrieved {len(results)} documents")
63
 
64
  if results:
65
- logger.debug("[RETRIEVER] Raw FAISS top-5 scores: " +
66
- ", ".join(f"{r['score']:.4f}" for r in results[:5]))
67
 
68
  return results
69
 
 
62
  logger.info(f"[RETRIEVER] Retrieved {len(results)} documents")
63
 
64
  if results:
65
+ logger.debug("[RETRIEVER] Raw FAISS top-5 scores: " + ", ".join(f"{r['score']:.4f}" for r in results[:5]))
 
66
 
67
  return results
68
 
backup/HF/Dockerfile DELETED
@@ -1,17 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- COPY requirements.txt .
6
-
7
- RUN pip install --no-cache-dir -U pip && \
8
- pip install --no-cache-dir -r requirements.txt
9
- #RUN pip install --no-cache-dir -r requirements.txt
10
-
11
- COPY . /app
12
- #COPY app/ ./app/
13
- #COPY data/ ./data/
14
-
15
- EXPOSE 7860
16
-
17
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backup/HF/Dockerfile.bak DELETED
@@ -1,14 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- COPY requirements.txt .
6
- RUN pip install --no-cache-dir -r requirements.txt
7
-
8
- COPY . /app
9
- #COPY app/ ./app/
10
- #COPY data/ ./data/
11
-
12
- EXPOSE 7860
13
-
14
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
entrypoint.sh DELETED
@@ -1,6 +0,0 @@
1
- #!/bin/bash
2
- set -e
3
- echo "πŸš€ Starting container in $(pwd)"
4
- ls -la
5
- echo "Launching FastAPI..."
6
- exec python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
 
 
 
 
 
 
 
handler.py DELETED
@@ -1,43 +0,0 @@
1
- import runpod
2
- from sentence_transformers import SentenceTransformer
3
- import faiss
4
- import torch
5
- import json
6
-
7
- # Load model once (stays in memory between calls)
8
- model = None
9
- index = None
10
-
11
- def load_models():
12
- global model, index
13
- if model is None:
14
- print("Loading model...")
15
- model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
16
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
17
- model = model.to(device)
18
-
19
- # Initialize FAISS
20
- dimension = 384
21
- index = faiss.IndexFlatL2(dimension)
22
- print("Models loaded!")
23
-
24
- def handler(event):
25
- """RunPod serverless handler"""
26
- load_models()
27
-
28
- input_data = event["input"]
29
- query = input_data.get("query", "")
30
-
31
- # Your RAG logic
32
- embedding = model.encode([query])
33
-
34
- # FAISS search (add your logic)
35
- # distances, indices = index.search(embedding, k=5)
36
-
37
- return {
38
- "embedding": embedding[0].tolist(),
39
- "status": "success"
40
- }
41
-
42
- if __name__ == "__main__":
43
- runpod.serverless.start({"handler": handler})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backup/HF/requirements.txt β†’ requirements.txt RENAMED
@@ -13,13 +13,11 @@ loguru==0.7.2 # clean logging
13
 
14
  # Vector/Embedding stack (keep exactly what you use)
15
  faiss-cpu==1.7.4 # keep if you use FAISS locally
16
- qdrant-client==1.7.3 # keep if using Qdrant
17
-
18
 
19
  # === ML / AI stack (GPU-compatible) ===
20
  # Using torch for embeddings/models:
21
  torch #-- choose the right wheel for your CUDA (see Dockerfile notes)
22
  transformers
23
  sentence-transformers
24
- #cross-encoder==2.2.2
25
 
 
13
 
14
  # Vector/Embedding stack (keep exactly what you use)
15
  faiss-cpu==1.7.4 # keep if you use FAISS locally
 
 
16
 
17
  # === ML / AI stack (GPU-compatible) ===
18
  # Using torch for embeddings/models:
19
  torch #-- choose the right wheel for your CUDA (see Dockerfile notes)
20
  transformers
21
  sentence-transformers
22
+
23