alaselababatunde commited on
Commit
2249124
·
1 Parent(s): 9c18fc4
Files changed (2) hide show
  1. Dockerfile +32 -24
  2. smebuilder_vector.py +7 -5
Dockerfile CHANGED
@@ -1,38 +1,46 @@
1
- # Use lightweight Python image
 
 
2
  FROM python:3.10-slim
3
 
 
 
 
 
 
 
 
 
4
  # Install system dependencies
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
- curl \
8
  git \
 
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
- # Set environment variables for HF cache
12
- ENV HF_HOME=/app/huggingface_cache
13
- ENV TRANSFORMERS_CACHE=/app/huggingface_cache
14
- ENV TORCH_HOME=/app/huggingface_cache
15
-
16
- # Create cache and DB folders with proper permissions
17
- RUN mkdir -p /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB \
18
- && useradd -m appuser \
19
- && chown -R appuser:appuser /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB
20
-
21
- # Switch to non-root user
22
- USER appuser
23
- WORKDIR /app
24
-
25
- # Copy requirements first for caching
26
- COPY requirements.txt .
27
-
28
- # Install Python dependencies
29
  RUN pip install --no-cache-dir -r requirements.txt
30
 
31
- # Copy project files
 
 
 
 
 
 
 
32
  COPY --chown=appuser:appuser . .
33
 
34
- # Expose FastAPI default port
 
 
35
  EXPOSE 7860
36
 
37
- # Command to run FastAPI with Uvicorn
38
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
1
+ # ------------------------------
2
+ # Base image
3
+ # ------------------------------
4
  FROM python:3.10-slim
5
 
6
+ # ------------------------------
7
+ # Create a non-root user
8
+ # ------------------------------
9
+ RUN useradd -m appuser
10
+ WORKDIR /app
11
+ USER appuser
12
+
13
+ # ------------------------------
14
  # Install system dependencies
15
+ # ------------------------------
16
  RUN apt-get update && apt-get install -y \
17
  build-essential \
 
18
  git \
19
+ curl \
20
  && rm -rf /var/lib/apt/lists/*
21
 
22
+ # ------------------------------
23
+ # Copy requirements and install
24
+ # ------------------------------
25
+ COPY --chown=appuser:appuser requirements.txt .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  RUN pip install --no-cache-dir -r requirements.txt
27
 
28
+ # ------------------------------
29
+ # Create cache & DB folders (writable by appuser)
30
+ # ------------------------------
31
+ RUN mkdir -p /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB
32
+
33
+ # ------------------------------
34
+ # Copy app source code
35
+ # ------------------------------
36
  COPY --chown=appuser:appuser . .
37
 
38
+ # ------------------------------
39
+ # Expose FastAPI port
40
+ # ------------------------------
41
  EXPOSE 7860
42
 
43
+ # ------------------------------
44
+ # CMD to run FastAPI
45
+ # ------------------------------
46
+ CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
smebuilder_vector.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
  import pandas as pd
3
  from langchain_huggingface import HuggingFaceEmbeddings
@@ -6,10 +8,10 @@ from langchain_core.documents import Document
6
 
7
  # ----------------- CONFIG -----------------
8
  DATASET_PATH = "sme_builder_dataset.csv"
9
- DB_LOCATION = "/app/Dev_Assist_SME_Builder_DB" # absolute path inside container
10
- HF_CACHE = "/app/huggingface_cache" # absolute path for HF cache
11
  COLLECTION_NAME = "landing_page_generation_examples"
12
  EMBEDDING_MODEL = "intfloat/e5-base-v2"
 
13
 
14
  # ----------------- LOAD DATASET -----------------
15
  if not os.path.exists(DATASET_PATH):
@@ -20,10 +22,10 @@ df = pd.read_csv(DATASET_PATH)
20
  # ----------------- EMBEDDINGS -----------------
21
  embeddings = HuggingFaceEmbeddings(
22
  model_name=EMBEDDING_MODEL,
23
- cache_dir=HF_CACHE # ensures HF uses a container-safe writable folder
24
  )
25
 
26
- # Check if vector store exists
27
  add_documents = not os.path.exists(DB_LOCATION)
28
 
29
  # ----------------- CREATE DOCUMENTS -----------------
@@ -46,7 +48,7 @@ if add_documents:
46
  # ----------------- VECTOR STORE -----------------
47
  vector_store = Chroma(
48
  collection_name=COLLECTION_NAME,
49
- persist_directory=DB_LOCATION, # absolute path
50
  embedding_function=embeddings,
51
  )
52
 
 
1
+ # smebuilder_vector.py
2
+
3
  import os
4
  import pandas as pd
5
  from langchain_huggingface import HuggingFaceEmbeddings
 
8
 
9
  # ----------------- CONFIG -----------------
10
  DATASET_PATH = "sme_builder_dataset.csv"
11
+ DB_LOCATION = "/app/Dev_Assist_SME_Builder_DB" # absolute path for container
 
12
  COLLECTION_NAME = "landing_page_generation_examples"
13
  EMBEDDING_MODEL = "intfloat/e5-base-v2"
14
+ HF_CACHE = "/app/huggingface_cache" # writable cache folder
15
 
16
  # ----------------- LOAD DATASET -----------------
17
  if not os.path.exists(DATASET_PATH):
 
22
  # ----------------- EMBEDDINGS -----------------
23
  embeddings = HuggingFaceEmbeddings(
24
  model_name=EMBEDDING_MODEL,
25
+ model_kwargs={"cache_dir": HF_CACHE} # pass writable cache
26
  )
27
 
28
+ # ----------------- CHECK VECTOR STORE -----------------
29
  add_documents = not os.path.exists(DB_LOCATION)
30
 
31
  # ----------------- CREATE DOCUMENTS -----------------
 
48
  # ----------------- VECTOR STORE -----------------
49
  vector_store = Chroma(
50
  collection_name=COLLECTION_NAME,
51
+ persist_directory=DB_LOCATION,
52
  embedding_function=embeddings,
53
  )
54