alaselababatunde commited on
Commit
4d95531
·
1 Parent(s): ea55213
Files changed (2) hide show
  1. Dockerfile +20 -35
  2. smebuilder_vector.py +53 -15
Dockerfile CHANGED
@@ -1,46 +1,31 @@
1
- # ------------------------------
 
 
 
2
  # Base image
3
- # ------------------------------
4
  FROM python:3.10-slim
5
 
6
- # ------------------------------
7
- # Create a non-root user
8
- # ------------------------------
9
- RUN useradd -m appuser
10
  WORKDIR /app
11
- USER appuser
12
-
13
- # ------------------------------
14
- # Install system dependencies
15
- # ------------------------------
16
- RUN apt-get update && apt-get install -y \
17
- build-essential \
18
- git \
19
- curl \
20
- && rm -rf /var/lib/apt/lists/*
21
-
22
- # ------------------------------
23
- # Copy requirements and install
24
- # ------------------------------
25
- COPY --chown=appuser:appuser requirements.txt .
26
- RUN pip install --no-cache-dir -r requirements.txt
27
 
28
- # ------------------------------
29
- # Create cache & DB folders (writable by appuser)
30
- # ------------------------------
31
  RUN mkdir -p /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB
32
 
33
- # ------------------------------
34
- # Copy app source code
35
- # ------------------------------
36
- COPY --chown=appuser:appuser . .
 
 
 
 
37
 
38
- # ------------------------------
39
  # Expose FastAPI port
40
- # ------------------------------
41
  EXPOSE 7860
42
 
43
- # ------------------------------
44
- # CMD to run FastAPI
45
- # ------------------------------
46
- CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
1
+ # ----------------------
2
+ # DevAssist AI Dockerfile
3
+ # ----------------------
4
+
5
  # Base image
 
6
  FROM python:3.10-slim
7
 
8
+ # Set working directory
 
 
 
9
  WORKDIR /app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Create directories for caches and Chroma DB (writable by default user)
 
 
12
  RUN mkdir -p /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB
13
 
14
+ # Copy requirements first (to leverage Docker cache)
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy all project files
21
+ COPY . .
22
 
 
23
  # Expose FastAPI port
 
24
  EXPOSE 7860
25
 
26
+ # Set environment variables for cache directories (HuggingFace & Chroma)
27
+ ENV HF_HOME=/app/huggingface_cache
28
+ ENV CHROMA_DB_DIR=/app/Dev_Assist_SME_Builder_DB
29
+
30
+ # Default command to start FastAPI via Uvicorn
31
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
smebuilder_vector.py CHANGED
@@ -1,21 +1,59 @@
1
- # Base image
2
- FROM python:3.10-slim
3
 
4
- # Set working directory
5
- WORKDIR /app
 
 
 
6
 
7
- # Copy and install Python dependencies
8
- COPY requirements.txt .
9
- RUN pip install --no-cache-dir -r requirements.txt
 
 
 
10
 
11
- # Copy project files
12
- COPY . .
 
13
 
14
- # Make folders writable for HuggingFace / Chroma
15
- RUN mkdir -p /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB
16
 
17
- # Expose FastAPI port
18
- EXPOSE 7860
 
 
 
19
 
20
- # Run as default user
21
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # smebuilder_vector.py
 
2
 
3
+ import os
4
+ import pandas as pd
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_chroma import Chroma
7
+ from langchain_core.documents import Document
8
 
9
+ # ----------------- CONFIG -----------------
10
+ DATASET_PATH = "sme_builder_dataset.csv"
11
+ DB_LOCATION = "/app/Dev_Assist_SME_Builder_DB" # absolute path for container
12
+ COLLECTION_NAME = "landing_page_generation_examples"
13
+ EMBEDDING_MODEL = "intfloat/e5-base-v2"
14
+ HF_CACHE = "/app/huggingface_cache" # writable cache folder
15
 
16
+ # ----------------- LOAD DATASET -----------------
17
+ if not os.path.exists(DATASET_PATH):
18
+ raise FileNotFoundError(f"Dataset file not found: {DATASET_PATH}")
19
 
20
+ df = pd.read_csv(DATASET_PATH)
 
21
 
22
+ # ----------------- EMBEDDINGS -----------------
23
+ embeddings = HuggingFaceEmbeddings(
24
+ model_name=EMBEDDING_MODEL,
25
+ model_kwargs={"cache_dir": HF_CACHE} # pass writable cache
26
+ )
27
 
28
+ # ----------------- CHECK VECTOR STORE -----------------
29
+ add_documents = not os.path.exists(DB_LOCATION)
30
+
31
+ # ----------------- CREATE DOCUMENTS -----------------
32
+ documents, ids = [], []
33
+ if add_documents:
34
+ for i, row in df.iterrows():
35
+ prompt = row.get("prompt", "")
36
+ html_code = row.get("html_code", "")
37
+ css_code = row.get("css_code", "")
38
+ js_code = row.get("js_code", "")
39
+ sector = row.get("sector", "")
40
+
41
+ page_content = " ".join(
42
+ [str(prompt), str(html_code), str(css_code), str(js_code), str(sector)]
43
+ ).strip()
44
+
45
+ documents.append(Document(page_content=page_content, id=str(i)))
46
+ ids.append(str(i))
47
+
48
+ # ----------------- VECTOR STORE -----------------
49
+ vector_store = Chroma(
50
+ collection_name=COLLECTION_NAME,
51
+ persist_directory=DB_LOCATION,
52
+ embedding_function=embeddings,
53
+ )
54
+
55
+ if add_documents and documents:
56
+ vector_store.add_documents(documents=documents, ids=ids)
57
+
58
+ # ----------------- RETRIEVER -----------------
59
+ retriever = vector_store.as_retriever(search_kwargs={"k": 20})