internationalscholarsprogram commited on
Commit
0d0ccfc
·
1 Parent(s): 06f6eb8

update: switch to PersistentClient and refresh Dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -9
  2. app.py +67 -45
  3. code wget step; clean Dockerfile +62 -0
Dockerfile CHANGED
@@ -10,26 +10,28 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
10
  PIP_ROOT_USER_ACTION=ignore \
11
  PIP_DISABLE_PIP_VERSION_CHECK=1 \
12
  HF_HOME=/data/.huggingface \
13
- RAG_DB_DIR=/tmp/chroma_db \
14
  RAG_CORPUS_DIR=/data/corpus \
15
  RAG_DATASET_ID=internationalscholarsprogram/DOC \
16
  RAG_DATASET_REVISION=main \
17
  RAG_PORT=7860 \
18
  PORT=7860 \
19
  TOKENIZERS_PARALLELISM=false \
20
- CHROMA_DB_IMPL=duckdb+parquet \
21
- CHROMADB_TELEMETRY=false \
22
- ANONYMIZED_TELEMETRY=false \
23
  HF_HUB_DISABLE_TELEMETRY=1 \
24
  CUDA_VISIBLE_DEVICES="" \
25
- OMP_NUM_THREADS=1
 
 
 
 
 
26
 
27
  # --- System dependencies ---
28
  RUN apt-get update && apt-get install -y --no-install-recommends \
29
  tini wget curl ca-certificates tar git \
30
  && rm -rf /var/lib/apt/lists/*
31
 
32
- # --- Non-root user (kept for reference; not used) ---
33
  RUN useradd -m -u 1000 appuser || true
34
 
35
  WORKDIR /app
@@ -46,9 +48,6 @@ COPY . .
46
  RUN mkdir -p /data/chroma_db /data/.huggingface /data/corpus /tmp/chroma_db \
47
  && chmod -R 777 /data /app /tmp
48
 
49
- # --- Optional: bootstrap script permissions ---
50
- RUN if [ -f "bootstrap.sh" ]; then chmod +x bootstrap.sh; fi
51
-
52
  # Do NOT switch user; keep root so /data and /tmp are writable in Spaces
53
  # USER appuser
54
 
 
10
  PIP_ROOT_USER_ACTION=ignore \
11
  PIP_DISABLE_PIP_VERSION_CHECK=1 \
12
  HF_HOME=/data/.huggingface \
13
+ RAG_DB_DIR=/data/chroma_db \
14
  RAG_CORPUS_DIR=/data/corpus \
15
  RAG_DATASET_ID=internationalscholarsprogram/DOC \
16
  RAG_DATASET_REVISION=main \
17
  RAG_PORT=7860 \
18
  PORT=7860 \
19
  TOKENIZERS_PARALLELISM=false \
 
 
 
20
  HF_HUB_DISABLE_TELEMETRY=1 \
21
  CUDA_VISIBLE_DEVICES="" \
22
+ OMP_NUM_THREADS=1 \
23
+ ORT_LOG_SEVERITY_LEVEL=3
24
+
25
+ # NOTE:
26
+ # - Removed legacy Chroma envs (CHROMA_DB_IMPL, CHROMADB_TELEMETRY, ANONYMIZED_TELEMETRY)
27
+ # since the new PersistentClient doesn’t use them.
28
 
29
  # --- System dependencies ---
30
  RUN apt-get update && apt-get install -y --no-install-recommends \
31
  tini wget curl ca-certificates tar git \
32
  && rm -rf /var/lib/apt/lists/*
33
 
34
+ # --- (Optional) Non-root user (kept for reference) ---
35
  RUN useradd -m -u 1000 appuser || true
36
 
37
  WORKDIR /app
 
48
  RUN mkdir -p /data/chroma_db /data/.huggingface /data/corpus /tmp/chroma_db \
49
  && chmod -R 777 /data /app /tmp
50
 
 
 
 
51
  # Do NOT switch user; keep root so /data and /tmp are writable in Spaces
52
  # USER appuser
53
 
app.py CHANGED
@@ -10,12 +10,14 @@ Enhancements in this version:
10
  - Auto-(re)index Chroma when the dataset commit SHA changes
11
  - /refresh endpoint to force re-pull + reindex without redeploying
12
  - SAFE writable dir detection for Chroma with fallback to /tmp/chroma_db
 
13
  """
14
 
15
  import os, sys, logging, warnings, json, shutil, time
16
  from typing import List, Optional, Iterable, Dict, Any
17
 
18
- # -------------------- Quiet warnings --------------------
 
19
  if not sys.warnoptions:
20
  warnings.simplefilter("ignore")
21
  for cat in (DeprecationWarning, UserWarning, FutureWarning):
@@ -23,6 +25,21 @@ for cat in (DeprecationWarning, UserWarning, FutureWarning):
23
  warnings.filterwarnings("ignore", message=".*LangChainDeprecationWarning.*")
24
  os.environ.setdefault("PYTHONWARNINGS", "ignore")
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  logging.basicConfig(
27
  level=logging.ERROR,
28
  format="%(asctime)s | %(levelname)s | %(name)s | %(message)s"
@@ -36,12 +53,14 @@ from fastapi import FastAPI, HTTPException, Header, Depends
36
  from fastapi.middleware.cors import CORSMiddleware
37
  from pydantic import BaseModel, Field
38
 
39
- # Vector store
40
  try:
41
  from langchain_chroma import Chroma
42
  except ImportError:
43
  from langchain_community.vectorstores import Chroma # fallback
44
 
 
 
45
  # LLM endpoint
46
  try:
47
  from langchain_huggingface import HuggingFaceEndpoint
@@ -68,17 +87,16 @@ except ImportError:
68
  from langchain_core.documents import Document
69
  from langchain_core.embeddings import Embeddings # modern base
70
 
71
- # NEW: dataset + PDF loading helpers
72
  from huggingface_hub import snapshot_download, HfApi
73
  from langchain_community.document_loaders import PyPDFLoader
74
  from langchain.text_splitter import RecursiveCharacterTextSplitter
75
 
76
- # NEW: explicit Chroma settings (prevents telemetry + ensures persistence)
77
- from chromadb.config import Settings
78
-
79
  # -------------------- Config --------------------
80
  ENV = os.getenv
81
  DB_DIR = ENV("RAG_DB_DIR", "/data/chroma_db") # intended Chroma dir
 
 
82
  EMBED_PROVIDER = ENV("RAG_EMBED_PROVIDER", "bge").lower() # bge | fastembed | hf_local
83
  EMBED_MODEL = ENV("RAG_EMBED_MODEL", "BAAI/bge-small-en-v1.5")
84
  DEVICE = ENV("RAG_DEVICE", "cpu")
@@ -101,7 +119,7 @@ HOST = ENV("RAG_HOST", "0.0.0.0")
101
  PORT = int(ENV("PORT", ENV("RAG_PORT", "7860"))) # Spaces $PORT first
102
  CORS_ORIGINS = ENV("RAG_CORS_ORIGINS", "*")
103
 
104
- # NEW: dataset sync locations
105
  DATASET_ID = ENV("RAG_DATASET_ID", "internationalscholarsprogram/DOC")
106
  DATA_REV = ENV("RAG_DATASET_REVISION", "main") # tag/branch/sha, or "main"
107
  CORPUS_DIR = ENV("RAG_CORPUS_DIR", "/data/corpus") # where PDFs are downloaded
@@ -158,7 +176,7 @@ def build_embeddings(provider: str, model: str, device: str,
158
  base = HuggingFaceBgeEmbeddings(
159
  model_name=model,
160
  model_kwargs={"device": device},
161
- encode_kwargs={"normalize_embeddings": True},
162
  )
163
  return BGEAdapter(base, use_prefixes=use_prefixes)
164
 
@@ -169,7 +187,7 @@ def build_embeddings(provider: str, model: str, device: str,
169
  base = HFEmbeddings(
170
  model_name=model,
171
  model_kwargs={"device": device},
172
- encode_kwargs={"normalize_embeddings": True},
173
  )
174
  return BGEAdapter(base, use_prefixes=("bge" in model.lower() and use_prefixes))
175
 
@@ -184,18 +202,18 @@ embeddings = build_embeddings(
184
  batch_size=EMBED_BATCH,
185
  )
186
 
187
- # -------------------- Vector DB handle (created now; filled later) --------------------
188
  os.makedirs(DB_DIR, exist_ok=True)
189
- client_settings = Settings(
190
- anonymized_telemetry=False,
191
- is_persistent=True,
192
- persist_directory=DB_DIR,
193
- )
194
  vectordb = Chroma(
195
- persist_directory=DB_DIR,
196
  embedding_function=embeddings,
 
197
  collection_metadata={"hnsw:space": "cosine"},
198
- client_settings=client_settings,
199
  )
200
 
201
  def build_retriever(k: int):
@@ -291,48 +309,46 @@ def load_docs_from_pdfs(pdf_paths: List[str]) -> List[Document]:
291
 
292
  def _reset_chroma_dir():
293
  """Safely reset the Chroma persist dir even if a client is holding files."""
294
- # Try to tell Chroma to drop collections first (if client exists)
295
  try:
296
- client = getattr(vectordb, "_client", None)
297
- if client is not None:
298
- try:
299
- client.reset()
300
- except Exception:
301
- try:
302
- coll = getattr(vectordb, "_collection", None)
303
- if coll and getattr(coll, "name", None):
304
- client.delete_collection(coll.name)
305
- except Exception:
306
- pass
307
  except Exception:
308
  pass
309
 
310
- # Retry rmtree a few times in case background handles are slow to release
311
  for _ in range(10):
312
  try:
313
  if os.path.isdir(DB_DIR):
314
  shutil.rmtree(DB_DIR)
315
  break
316
  except OSError:
317
- time.sleep(0.2) # brief backoff
318
  os.makedirs(DB_DIR, exist_ok=True)
319
 
320
- def rebuild_chroma(docs: List[Document]):
321
- global vectordb
322
- _reset_chroma_dir()
323
- # Recreate a fresh store (use the same client_settings)
324
  vectordb = Chroma(
325
- persist_directory=DB_DIR,
326
  embedding_function=embeddings,
 
327
  collection_metadata={"hnsw:space": "cosine"},
328
- client_settings=client_settings,
329
  )
 
 
 
 
330
  if docs:
331
  # Add in small batches to keep memory low
332
  batch = 64
333
  for i in range(0, len(docs), batch):
334
  vectordb.add_documents(docs[i:i+batch])
335
- vectordb.persist()
336
 
337
  def reindex_if_needed(force: bool = False, revision: str = DATA_REV) -> Dict[str, Any]:
338
  """
@@ -342,17 +358,21 @@ def reindex_if_needed(force: bool = False, revision: str = DATA_REV) -> Dict[str
342
  st = _state_load()
343
  old_sha = st.get("dataset_sha")
344
 
345
- if force or (new_sha != old_sha) or (not os.path.isdir(DB_DIR)):
 
346
  pdfs = list_pdf_paths(CORPUS_DIR)
347
  docs = load_docs_from_pdfs(pdfs)
348
  rebuild_chroma(docs)
349
  st["dataset_sha"] = new_sha
350
  _state_save(st)
351
  return {"reindexed": True, "commit": new_sha, "docs": len(docs)}
352
- return {"reindexed": False, "commit": new_sha}
 
 
 
353
 
354
  # -------------------- Helpers --------------------
355
- def format_docs(docs: List[Document]) -> str:
356
  parts = []
357
  for i, d in enumerate(docs, 1):
358
  src = d.metadata.get("source", "unknown")
@@ -392,7 +412,7 @@ def answer_question(question: str, k: int = TOP_K_DEFAULT) -> Dict[str, Any]:
392
  return {"answer": answer, "citations": cits, "used_k": k}
393
 
394
  # -------------------- FastAPI --------------------
395
- app = FastAPI(title="Career GPT RAG API", version="1.1.0")
396
  app.add_middleware(
397
  CORSMiddleware,
398
  allow_origins=[o.strip() for o in CORS_ORIGINS.split(",") if o.strip()],
@@ -432,7 +452,8 @@ def healthz():
432
  # Best-effort count
433
  count = 0
434
  try:
435
- count = vectordb._collection.count() # type: ignore[attr-defined]
 
436
  except Exception:
437
  meta = vectordb.get(limit=1)
438
  count = len(meta.get("ids", []))
@@ -440,7 +461,8 @@ def healthz():
440
  return {
441
  "status": "ok",
442
  "db_dir": DB_DIR,
443
- "docs_indexed": count,
 
444
  "embed_provider": EMBED_PROVIDER,
445
  "embed_model": EMBED_MODEL,
446
  "llm": HF_LLM_REPO,
@@ -470,7 +492,7 @@ def ask(req: AskRequest, _ok: bool = Depends(require_api_key)):
470
  log.exception("Unhandled /ask error")
471
  raise HTTPException(status_code=500, detail=str(e))
472
 
473
- # ---- NEW: manual refresh endpoint ----
474
  @app.post("/refresh")
475
  def refresh(_ok: bool = Depends(require_api_key)):
476
  """
 
10
  - Auto-(re)index Chroma when the dataset commit SHA changes
11
  - /refresh endpoint to force re-pull + reindex without redeploying
12
  - SAFE writable dir detection for Chroma with fallback to /tmp/chroma_db
13
+ - UPDATED: Chroma migration to new client API (PersistentClient)
14
  """
15
 
16
  import os, sys, logging, warnings, json, shutil, time
17
  from typing import List, Optional, Iterable, Dict, Any
18
 
19
+ # -------------------- Quiet warnings & env hygiene --------------------
20
+ # Silence common warnings
21
  if not sys.warnoptions:
22
  warnings.simplefilter("ignore")
23
  for cat in (DeprecationWarning, UserWarning, FutureWarning):
 
25
  warnings.filterwarnings("ignore", message=".*LangChainDeprecationWarning.*")
26
  os.environ.setdefault("PYTHONWARNINGS", "ignore")
27
 
28
+ # Sanitize OMP_NUM_THREADS (fixes: "libgomp: Invalid value for OMP_NUM_THREADS")
29
+ _omp = os.environ.get("OMP_NUM_THREADS")
30
+ if _omp:
31
+ try:
32
+ n = int(str(_omp).strip())
33
+ if n <= 0:
34
+ raise ValueError
35
+ except Exception:
36
+ os.environ["OMP_NUM_THREADS"] = "1"
37
+
38
+ # Optionally quiet ONNX Runtime if present
39
+ os.environ.setdefault("ORT_LOG_SEVERITY_LEVEL", "3") # WARN
40
+ # Disable accidental GPU probing on CPU Spaces (harmless if GPU exists)
41
+ os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
42
+
43
  logging.basicConfig(
44
  level=logging.ERROR,
45
  format="%(asctime)s | %(levelname)s | %(name)s | %(message)s"
 
53
  from fastapi.middleware.cors import CORSMiddleware
54
  from pydantic import BaseModel, Field
55
 
56
+ # Vector store (new Chroma client API)
57
  try:
58
  from langchain_chroma import Chroma
59
  except ImportError:
60
  from langchain_community.vectorstores import Chroma # fallback
61
 
62
+ from chromadb import PersistentClient # NEW: explicit client
63
+
64
  # LLM endpoint
65
  try:
66
  from langchain_huggingface import HuggingFaceEndpoint
 
87
  from langchain_core.documents import Document
88
  from langchain_core.embeddings import Embeddings # modern base
89
 
90
+ # Dataset + PDF loading helpers
91
  from huggingface_hub import snapshot_download, HfApi
92
  from langchain_community.document_loaders import PyPDFLoader
93
  from langchain.text_splitter import RecursiveCharacterTextSplitter
94
 
 
 
 
95
  # -------------------- Config --------------------
96
  ENV = os.getenv
97
  DB_DIR = ENV("RAG_DB_DIR", "/data/chroma_db") # intended Chroma dir
98
+ COLLECTION_NAME = ENV("RAG_COLLECTION", "career_gpt") # NEW: explicit collection name
99
+
100
  EMBED_PROVIDER = ENV("RAG_EMBED_PROVIDER", "bge").lower() # bge | fastembed | hf_local
101
  EMBED_MODEL = ENV("RAG_EMBED_MODEL", "BAAI/bge-small-en-v1.5")
102
  DEVICE = ENV("RAG_DEVICE", "cpu")
 
119
  PORT = int(ENV("PORT", ENV("RAG_PORT", "7860"))) # Spaces $PORT first
120
  CORS_ORIGINS = ENV("RAG_CORS_ORIGINS", "*")
121
 
122
+ # Dataset sync locations
123
  DATASET_ID = ENV("RAG_DATASET_ID", "internationalscholarsprogram/DOC")
124
  DATA_REV = ENV("RAG_DATASET_REVISION", "main") # tag/branch/sha, or "main"
125
  CORPUS_DIR = ENV("RAG_CORPUS_DIR", "/data/corpus") # where PDFs are downloaded
 
176
  base = HuggingFaceBgeEmbeddings(
177
  model_name=model,
178
  model_kwargs={"device": device},
179
+ encode_kwargs={'normalize_embeddings': True},
180
  )
181
  return BGEAdapter(base, use_prefixes=use_prefixes)
182
 
 
187
  base = HFEmbeddings(
188
  model_name=model,
189
  model_kwargs={"device": device},
190
+ encode_kwargs={'normalize_embeddings': True},
191
  )
192
  return BGEAdapter(base, use_prefixes=("bge" in model.lower() and use_prefixes))
193
 
 
202
  batch_size=EMBED_BATCH,
203
  )
204
 
205
+ # -------------------- Vector DB handle (new Chroma client) --------------------
206
  os.makedirs(DB_DIR, exist_ok=True)
207
+
208
+ # Create a persistent Chroma **client** (NEW API)
209
+ _chroma_client = PersistentClient(path=DB_DIR)
210
+
211
+ # Create/open the collection via LangChain's wrapper
212
  vectordb = Chroma(
213
+ collection_name=COLLECTION_NAME,
214
  embedding_function=embeddings,
215
+ client=_chroma_client,
216
  collection_metadata={"hnsw:space": "cosine"},
 
217
  )
218
 
219
  def build_retriever(k: int):
 
309
 
310
  def _reset_chroma_dir():
311
  """Safely reset the Chroma persist dir even if a client is holding files."""
312
+ # Try to drop the collection cleanly
313
  try:
314
+ # Delete collection if it exists
315
+ try:
316
+ _chroma_client.delete_collection(COLLECTION_NAME)
317
+ except Exception:
318
+ pass
 
 
 
 
 
 
319
  except Exception:
320
  pass
321
 
322
+ # Ensure on-disk dir is clean (client keeps metadata separately)
323
  for _ in range(10):
324
  try:
325
  if os.path.isdir(DB_DIR):
326
  shutil.rmtree(DB_DIR)
327
  break
328
  except OSError:
329
+ time.sleep(0.2)
330
  os.makedirs(DB_DIR, exist_ok=True)
331
 
332
+ def _open_vectordb():
333
+ """(Re)create a vectordb handle bound to the persistent client."""
334
+ global vectordb, _chroma_client
335
+ _chroma_client = PersistentClient(path=DB_DIR)
336
  vectordb = Chroma(
337
+ collection_name=COLLECTION_NAME,
338
  embedding_function=embeddings,
339
+ client=_chroma_client,
340
  collection_metadata={"hnsw:space": "cosine"},
 
341
  )
342
+
343
+ def rebuild_chroma(docs: List[Document]):
344
+ _reset_chroma_dir()
345
+ _open_vectordb()
346
  if docs:
347
  # Add in small batches to keep memory low
348
  batch = 64
349
  for i in range(0, len(docs), batch):
350
  vectordb.add_documents(docs[i:i+batch])
351
+ # No explicit persist() call needed; PersistentClient is, well, persistent.
352
 
353
  def reindex_if_needed(force: bool = False, revision: str = DATA_REV) -> Dict[str, Any]:
354
  """
 
358
  st = _state_load()
359
  old_sha = st.get("dataset_sha")
360
 
361
+ needs_rebuild = force or (new_sha != old_sha)
362
+ if needs_rebuild:
363
  pdfs = list_pdf_paths(CORPUS_DIR)
364
  docs = load_docs_from_pdfs(pdfs)
365
  rebuild_chroma(docs)
366
  st["dataset_sha"] = new_sha
367
  _state_save(st)
368
  return {"reindexed": True, "commit": new_sha, "docs": len(docs)}
369
+ else:
370
+ # Ensure handle is open even if no rebuild was needed
371
+ _open_vectordb()
372
+ return {"reindexed": False, "commit": new_sha}
373
 
374
  # -------------------- Helpers --------------------
375
+ def format_docs(docs: List[Document] -> str):
376
  parts = []
377
  for i, d in enumerate(docs, 1):
378
  src = d.metadata.get("source", "unknown")
 
412
  return {"answer": answer, "citations": cits, "used_k": k}
413
 
414
  # -------------------- FastAPI --------------------
415
+ app = FastAPI(title="Career GPT RAG API", version="1.2.0")
416
  app.add_middleware(
417
  CORSMiddleware,
418
  allow_origins=[o.strip() for o in CORS_ORIGINS.split(",") if o.strip()],
 
452
  # Best-effort count
453
  count = 0
454
  try:
455
+ # Internal attribute may exist; keep as soft attempt
456
+ count = getattr(vectordb, "_collection", None).count() # type: ignore[call-arg, attr-defined]
457
  except Exception:
458
  meta = vectordb.get(limit=1)
459
  count = len(meta.get("ids", []))
 
461
  return {
462
  "status": "ok",
463
  "db_dir": DB_DIR,
464
+ "collection": COLLECTION_NAME,
465
+ "docs_indexed_estimate": count,
466
  "embed_provider": EMBED_PROVIDER,
467
  "embed_model": EMBED_MODEL,
468
  "llm": HF_LLM_REPO,
 
492
  log.exception("Unhandled /ask error")
493
  raise HTTPException(status_code=500, detail=str(e))
494
 
495
+ # ---- Manual refresh endpoint ----
496
  @app.post("/refresh")
497
  def refresh(_ok: bool = Depends(require_api_key)):
498
  """
code wget step; clean Dockerfile ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/Dockerfile b/Dockerfile
2
+ index b16398c..659aa4c 100644
3
+ --- a/Dockerfile
4
+ +++ b/Dockerfile
5
+ @@ -3,8 +3,6 @@
6
+ # ----------------------------------------
7
+ FROM python:3.11-slim-bookworm
8
+ 
9
+ -LABEL space.rebuild="final-fix-2025-10-22"
10
+ -
11
+ ENV PYTHONDONTWRITEBYTECODE=1 \
12
+ PYTHONUNBUFFERED=1 \
13
+ PIP_NO_CACHE_DIR=1 \
14
+ @@ -14,31 +12,31 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
15
+ RAG_DATASET_ID=internationalscholarsprogram/DOC \
16
+ RAG_DATASET_REVISION=main \
17
+ RAG_PORT=7860 \
18
+ - PORT=7860 \
19
+ - PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
20
+ + PORT=7860
21
+ 
22
+ -# ✅ Minimal system dependencies only
23
+ +# System deps (no git, no wget needed)
24
+ RUN apt-get update && apt-get install -y --no-install-recommends \
25
+ tini curl ca-certificates \
26
+ && rm -rf /var/lib/apt/lists/*
27
+ 
28
+ -# ✅ Create non-root user
29
+ +# Non-root user
30
+ RUN useradd -m -u 1000 appuser
31
+ +
32
+ WORKDIR /app
33
+ 
34
+ -# ✅ Install dependencies
35
+ +# Python deps
36
+ COPY requirements.txt .
37
+ RUN python -m pip install --upgrade pip setuptools wheel \
38
+ && pip install --no-cache-dir -r requirements.txt
39
+ 
40
+ -# ✅ Copy app files
41
+ +# Project files
42
+ COPY . .
43
+ 
44
+ -# ✅ Prepare /data directories
45
+ +# Persistent dirs & permissions
46
+ RUN mkdir -p /data/chroma_db /data/.huggingface /data/corpus \
47
+ && chown -R appuser:appuser /data /app
48
+ 
49
+ -# ✅ Optional: make bootstrap.sh executable if it exists
50
+ +# Optional start script perms
51
+ RUN if [ -f "bootstrap.sh" ]; then chmod +x bootstrap.sh; fi
52
+ 
53
+ USER appuser
54
+ @@ -47,5 +45,6 @@ EXPOSE 7860
55
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=20s \
56
+ CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
57
+ 
58
+ -ENTRYPOINT ["/usr/bin/tini","--"]
59
+ -CMD ["python","app.py"]
60
+ +ENTRYPOINT ["/usr/bin/tini", "--"]
61
+ +# CMD ["bash", "bootstrap.sh"]
62
+ +CMD ["python", "app.py"]