To HF · VcRlAgent/workwise-backend-gpu at 92f791e

Files changed (10) hide show

Dockerfile CHANGED Viewed

@@ -1,28 +1,14 @@
-FROM runpod/pytorch:2.2.0-py3.10-cuda12.1.1-devel-ubuntu22.04
-# Install Python
-RUN apt-get update && apt-get install -y \
-    python3.10 \
-    python3-pip \
-    git \
-    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
-COPY ../requirements.txt .
-RUN pip install -v --no-cache-dir -r requirements.txt
-# Pre-download model (faster cold starts)
-RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
-COPY entrypoint.sh /entrypoint.sh
-RUN chmod +x /entrypoint.sh
-COPY . .
 EXPOSE 7860
-ENV RUNPOD_VERBOSE=1
-#CMD ["/entrypoint.sh"]
-#CMD ["python", "handler.py"]
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.11-slim
 WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . /app
+#COPY app/ ./app/
+#COPY data/ ./data/
 EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

backup/HF/Dockerfile.bak1 → Dockerfile.bak1 RENAMED Viewed

File without changes

Dockerfile.bak2 ADDED Viewed

+RUN useradd -m -u 1000 user
+WORKDIR /home/user/app
+# Minimal system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first
+COPY --chown=1000:1000 requirements.txt .
+# Install Python packages efficiently
+RUN pip install --no-cache-dir -U pip && \
+    pip install --no-cache-dir \
+    fastapi==0.104.1 \
+    uvicorn[standard]==0.24.0 \
+    sentence-transformers \
+    faiss-cpu \
+    torch \
+    transformers \
+    pydantic
+# Copy app
+COPY --chown=1000:1000 . .
+USER user
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

backup/HF/Space.yaml → Space.yaml RENAMED Viewed

File without changes

app/services/retriever.py CHANGED Viewed

@@ -62,8 +62,7 @@ class RetrieverService:
         logger.info(f"[RETRIEVER] Retrieved {len(results)} documents")
         if results:
-            logger.debug("[RETRIEVER] Raw FAISS top-5 scores: " +
-                 ", ".join(f"{r['score']:.4f}" for r in results[:5]))
         return results

         logger.info(f"[RETRIEVER] Retrieved {len(results)} documents")
         if results:
+            logger.debug("[RETRIEVER] Raw FAISS top-5 scores: " + ", ".join(f"{r['score']:.4f}" for r in results[:5]))
         return results

backup/HF/Dockerfile DELETED Viewed

@@ -1,17 +0,0 @@
-FROM python:3.11-slim
-WORKDIR /app
-COPY requirements.txt .
-RUN pip install --no-cache-dir -U pip && \
-    pip install --no-cache-dir -r requirements.txt
-#RUN pip install --no-cache-dir -r requirements.txt
-COPY . /app
-#COPY app/ ./app/
-#COPY data/ ./data/
-EXPOSE 7860
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

backup/HF/Dockerfile.bak DELETED Viewed

@@ -1,14 +0,0 @@
-FROM python:3.11-slim
-WORKDIR /app
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-COPY . /app
-#COPY app/ ./app/
-#COPY data/ ./data/
-EXPOSE 7860
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

entrypoint.sh DELETED Viewed

@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-echo "🚀 Starting container in $(pwd)"
-ls -la
-echo "Launching FastAPI..."
-exec python -m uvicorn app.main:app --host 0.0.0.0 --port 8000

handler.py DELETED Viewed

@@ -1,43 +0,0 @@
-import runpod
-from sentence_transformers import SentenceTransformer
-import faiss
-import torch
-import json
-# Load model once (stays in memory between calls)
-model = None
-index = None
-def load_models():
-    global model, index
-    if model is None:
-        print("Loading model...")
-        model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        model = model.to(device)
-        # Initialize FAISS
-        dimension = 384
-        index = faiss.IndexFlatL2(dimension)
-        print("Models loaded!")
-def handler(event):
-    """RunPod serverless handler"""
-    load_models()
-    input_data = event["input"]
-    query = input_data.get("query", "")
-    # Your RAG logic
-    embedding = model.encode([query])
-    # FAISS search (add your logic)
-    # distances, indices = index.search(embedding, k=5)
-    return {
-        "embedding": embedding[0].tolist(),
-        "status": "success"
-    }
-if __name__ == "__main__":
-    runpod.serverless.start({"handler": handler})

backup/HF/requirements.txt → requirements.txt RENAMED Viewed

@@ -13,13 +13,11 @@ loguru==0.7.2                 # clean logging
 # Vector/Embedding stack (keep exactly what you use)
 faiss-cpu==1.7.4           # keep if you use FAISS locally
-qdrant-client==1.7.3       # keep if using Qdrant
 # === ML / AI stack (GPU-compatible) ===
 # Using torch for embeddings/models:
 torch #-- choose the right wheel for your CUDA (see Dockerfile notes)
 transformers
 sentence-transformers
-#cross-encoder==2.2.2

 # Vector/Embedding stack (keep exactly what you use)
 faiss-cpu==1.7.4           # keep if you use FAISS locally
 # === ML / AI stack (GPU-compatible) ===
 # Using torch for embeddings/models:
 torch #-- choose the right wheel for your CUDA (see Dockerfile notes)
 transformers
 sentence-transformers