alaselababatunde commited on
Commit
410396e
·
1 Parent(s): 2249124
Files changed (1) hide show
  1. smebuilder_vector.py +36 -59
smebuilder_vector.py CHANGED
@@ -1,59 +1,36 @@
1
- # smebuilder_vector.py
2
-
3
- import os
4
- import pandas as pd
5
- from langchain_huggingface import HuggingFaceEmbeddings
6
- from langchain_chroma import Chroma
7
- from langchain_core.documents import Document
8
-
9
- # ----------------- CONFIG -----------------
10
- DATASET_PATH = "sme_builder_dataset.csv"
11
- DB_LOCATION = "/app/Dev_Assist_SME_Builder_DB" # absolute path for container
12
- COLLECTION_NAME = "landing_page_generation_examples"
13
- EMBEDDING_MODEL = "intfloat/e5-base-v2"
14
- HF_CACHE = "/app/huggingface_cache" # writable cache folder
15
-
16
- # ----------------- LOAD DATASET -----------------
17
- if not os.path.exists(DATASET_PATH):
18
- raise FileNotFoundError(f"Dataset file not found: {DATASET_PATH}")
19
-
20
- df = pd.read_csv(DATASET_PATH)
21
-
22
- # ----------------- EMBEDDINGS -----------------
23
- embeddings = HuggingFaceEmbeddings(
24
- model_name=EMBEDDING_MODEL,
25
- model_kwargs={"cache_dir": HF_CACHE} # pass writable cache
26
- )
27
-
28
- # ----------------- CHECK VECTOR STORE -----------------
29
- add_documents = not os.path.exists(DB_LOCATION)
30
-
31
- # ----------------- CREATE DOCUMENTS -----------------
32
- documents, ids = [], []
33
- if add_documents:
34
- for i, row in df.iterrows():
35
- prompt = row.get("prompt", "")
36
- html_code = row.get("html_code", "")
37
- css_code = row.get("css_code", "")
38
- js_code = row.get("js_code", "")
39
- sector = row.get("sector", "")
40
-
41
- page_content = " ".join(
42
- [str(prompt), str(html_code), str(css_code), str(js_code), str(sector)]
43
- ).strip()
44
-
45
- documents.append(Document(page_content=page_content, id=str(i)))
46
- ids.append(str(i))
47
-
48
- # ----------------- VECTOR STORE -----------------
49
- vector_store = Chroma(
50
- collection_name=COLLECTION_NAME,
51
- persist_directory=DB_LOCATION,
52
- embedding_function=embeddings,
53
- )
54
-
55
- if add_documents and documents:
56
- vector_store.add_documents(documents=documents, ids=ids)
57
-
58
- # ----------------- RETRIEVER -----------------
59
- retriever = vector_store.as_retriever(search_kwargs={"k": 20})
 
1
+ # Use lightweight Python image
2
+ FROM python:3.10-slim
3
+
4
+ # ----------------- SYSTEM DEPENDENCIES -----------------
5
+ # Install as root (default user)
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ git \
9
+ curl \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # ----------------- CREATE NON-ROOT USER -----------------
13
+ RUN useradd -m appuser
14
+
15
+ # Set working directory
16
+ WORKDIR /app
17
+
18
+ # ----------------- COPY AND INSTALL PYTHON DEPENDENCIES -----------------
19
+ COPY requirements.txt .
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # ----------------- COPY PROJECT FILES -----------------
23
+ COPY . .
24
+
25
+ # Make cache folders writable for appuser
26
+ RUN mkdir -p /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB \
27
+ && chown -R appuser:appuser /app/huggingface_cache /app/Dev_Assist_SME_Builder_DB /app
28
+
29
+ # Switch to non-root user
30
+ USER appuser
31
+
32
+ # Expose FastAPI default port
33
+ EXPOSE 7860
34
+
35
+ # Command to run FastAPI with Uvicorn
36
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]