Oviya commited on
Commit
5850002
·
1 Parent(s): e9a901d
Files changed (3) hide show
  1. .env +1 -1
  2. ragg/app.py +61 -9
  3. ragg/rag_llm.py +42 -17
.env CHANGED
@@ -7,7 +7,7 @@ DID_API_KEY=b3ZpeWEuckBweWthcmEubmV0:FMWfsvU5tLYIeVzY0fyBG
7
  DID_SOURCE_IMAGE_URL=https://i.ibb.co/Tpq77ZJ/teacher.png
8
  DID_VOICE_ID=en-US-JennyNeural
9
  TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
10
- CHROMA_DIR=C:\path\to\your\project\chroma
11
  CHROMA_ROOT=C:/Users/DELL/Desktop/Deploymnet/29 oct/py-learn-backend/ragg/chroma
12
  EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
13
  ALLOWED_ORIGINS=http://localhost:4200,http://127.0.0.1:4200
 
7
  DID_SOURCE_IMAGE_URL=https://i.ibb.co/Tpq77ZJ/teacher.png
8
  DID_VOICE_ID=en-US-JennyNeural
9
  TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
10
+ CHROMA_DIR=C:/Users/DELL/Desktop/Deploymnet/29 oct/py-learn-backend/ragg/chroma
11
  CHROMA_ROOT=C:/Users/DELL/Desktop/Deploymnet/29 oct/py-learn-backend/ragg/chroma
12
  EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
13
  ALLOWED_ORIGINS=http://localhost:4200,http://127.0.0.1:4200
ragg/app.py CHANGED
@@ -264,9 +264,9 @@ def rag_suggest_followups():
264
  return jsonify(result)
265
 
266
 
 
267
  @rag_bp.get("/_diag")
268
  def rag_diag():
269
- # minimal imports here to avoid circulars
270
  try:
271
  from .rag_llm import CHROMA_DIR, CHROMA_ROOT, get_vectorstore, get_vectorstore_for
272
  except ImportError:
@@ -276,27 +276,79 @@ def rag_diag():
276
  from flask import jsonify
277
 
278
  def _count(vs):
279
- try:
280
- return vs._collection.count()
281
- except Exception:
 
 
282
  try:
283
- return vs._client.get_collection(vs._collection.name).count()
284
  except Exception:
285
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  info = {
288
- "env_seen": {"CHROMA_DIR": CHROMA_DIR, "CHROMA_ROOT": CHROMA_ROOT},
 
 
 
289
  "low_dir": {
290
  "path": os.path.join(CHROMA_ROOT, "low"),
291
  "exists": os.path.isdir(os.path.join(CHROMA_ROOT, "low")),
292
  },
293
  "counts_default": _count(get_vectorstore()),
294
- "counts_low": _count(get_vectorstore_for("low")),
295
- "counts_mid": _count(get_vectorstore_for("mid")),
296
- "counts_high": _count(get_vectorstore_for("high")),
297
  }
298
  return jsonify(info), 200
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  @rag_bp.route("/search", methods=["POST", "OPTIONS"])
301
  def rag_search():
302
  if request.method == "OPTIONS":
 
264
  return jsonify(result)
265
 
266
 
267
+ # @rag_bp.get("/_diag")
268
  @rag_bp.get("/_diag")
269
  def rag_diag():
 
270
  try:
271
  from .rag_llm import CHROMA_DIR, CHROMA_ROOT, get_vectorstore, get_vectorstore_for
272
  except ImportError:
 
276
  from flask import jsonify
277
 
278
  def _count(vs):
279
+ """Handle both LangChain and chromadb client objects."""
280
+ if vs is None:
281
+ return None
282
+ # 1️⃣ chromadb.Collection (your new get_vectorstore_for)
283
+ if hasattr(vs, "count") and callable(vs.count):
284
  try:
285
+ return vs.count()
286
  except Exception:
287
  return None
288
+ # 2️⃣ LangChain vectorstore
289
+ if hasattr(vs, "_collection"):
290
+ try:
291
+ return vs._collection.count() # type: ignore
292
+ except Exception:
293
+ try:
294
+ return vs._client.get_collection(vs._collection.name).count() # type: ignore
295
+ except Exception:
296
+ return None
297
+ return None
298
+
299
+ # load each level safely
300
+ low_vs = get_vectorstore_for("low")
301
+ mid_vs = get_vectorstore_for("mid")
302
+ high_vs = get_vectorstore_for("high")
303
 
304
  info = {
305
+ "env_seen": {
306
+ "CHROMA_DIR": CHROMA_DIR,
307
+ "CHROMA_ROOT": CHROMA_ROOT
308
+ },
309
  "low_dir": {
310
  "path": os.path.join(CHROMA_ROOT, "low"),
311
  "exists": os.path.isdir(os.path.join(CHROMA_ROOT, "low")),
312
  },
313
  "counts_default": _count(get_vectorstore()),
314
+ "counts_low": _count(low_vs),
315
+ "counts_mid": _count(mid_vs),
316
+ "counts_high": _count(high_vs),
317
  }
318
  return jsonify(info), 200
319
 
320
+ # def rag_diag():
321
+ # # minimal imports here to avoid circulars
322
+ # try:
323
+ # from .rag_llm import CHROMA_DIR, CHROMA_ROOT, get_vectorstore, get_vectorstore_for
324
+ # except ImportError:
325
+ # from rag_llm import CHROMA_DIR, CHROMA_ROOT, get_vectorstore, get_vectorstore_for
326
+
327
+ # import os
328
+ # from flask import jsonify
329
+
330
+ # def _count(vs):
331
+ # try:
332
+ # return vs._collection.count()
333
+ # except Exception:
334
+ # try:
335
+ # return vs._client.get_collection(vs._collection.name).count()
336
+ # except Exception:
337
+ # return None
338
+
339
+ # info = {
340
+ # "env_seen": {"CHROMA_DIR": CHROMA_DIR, "CHROMA_ROOT": CHROMA_ROOT},
341
+ # "low_dir": {
342
+ # "path": os.path.join(CHROMA_ROOT, "low"),
343
+ # "exists": os.path.isdir(os.path.join(CHROMA_ROOT, "low")),
344
+ # },
345
+ # "counts_default": _count(get_vectorstore()),
346
+ # "counts_low": _count(get_vectorstore_for("low")),
347
+ # "counts_mid": _count(get_vectorstore_for("mid")),
348
+ # "counts_high": _count(get_vectorstore_for("high")),
349
+ # }
350
+ # return jsonify(info), 200
351
+
352
  @rag_bp.route("/search", methods=["POST", "OPTIONS"])
353
  def rag_search():
354
  if request.method == "OPTIONS":
ragg/rag_llm.py CHANGED
@@ -11,6 +11,8 @@ from langchain_core.documents import Document
11
  from openai import OpenAI
12
  from dotenv import load_dotenv, find_dotenv
13
  load_dotenv(find_dotenv())
 
 
14
 
15
  # --- Constants ---
16
  # CHROMA_DIR = "./chroma"
@@ -55,28 +57,51 @@ def get_vectorstore():
55
  )
56
  return _vectorstore
57
 
58
-
59
  def get_vectorstore_for(db_level: Optional[str] = None):
60
- """
61
- Return a persistent Chroma vectorstore for the requested db_level.
62
- db_level in {"low","mid","high"} → ./chroma/<db_level>
63
- else → fall back to your original CHROMA_DIR (single-store).
64
- """
65
  key = (db_level or "").strip().lower()
66
  if key in ("low", "mid", "high"):
67
  persist_dir = os.path.join(CHROMA_ROOT, key)
68
- print(f"[RAG] get_vectorstore_for('{key}') -> {persist_dir}")
69
  else:
70
- persist_dir = CHROMA_DIR # fallback
71
- print(f"[RAG] get_vectorstore_for(None) -> default ({CHROMA_DIR})")
72
-
73
- if key not in _vectorstores:
74
- print(f"🔹 Loading Chroma at: {persist_dir}")
75
- _vectorstores[key] = Chroma(
76
- persist_directory=persist_dir,
77
- embedding_function=get_embeddings(),
78
- )
79
- return _vectorstores[key]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
  def get_client():
 
11
  from openai import OpenAI
12
  from dotenv import load_dotenv, find_dotenv
13
  load_dotenv(find_dotenv())
14
+ import chromadb
15
+ from chromadb.utils import embedding_functions
16
 
17
  # --- Constants ---
18
  # CHROMA_DIR = "./chroma"
 
57
  )
58
  return _vectorstore
59
 
 
60
  def get_vectorstore_for(db_level: Optional[str] = None):
 
 
 
 
 
61
  key = (db_level or "").strip().lower()
62
  if key in ("low", "mid", "high"):
63
  persist_dir = os.path.join(CHROMA_ROOT, key)
 
64
  else:
65
+ persist_dir = CHROMA_DIR
66
+
67
+ print(f"[RAG] Using Chroma from: {persist_dir}")
68
+
69
+ client = chromadb.PersistentClient(path=persist_dir)
70
+
71
+ # Show collections available
72
+ collections = client.list_collections()
73
+ print(f"Available collections: {[c.name for c in collections]}")
74
+
75
+ # Pick the default collection (first one)
76
+ if not collections:
77
+ print("❌ No collections found.")
78
+ return None
79
+ collection = client.get_collection(collections[0].name)
80
+ print(f"✅ Loaded Chroma collection: {collection.name}")
81
+ return collection
82
+
83
+ # def get_vectorstore_for(db_level: Optional[str] = None):
84
+ # """
85
+ # Return a persistent Chroma vectorstore for the requested db_level.
86
+ # db_level in {"low","mid","high"} → ./chroma/<db_level>
87
+ # else → fall back to your original CHROMA_DIR (single-store).
88
+ # """
89
+ # key = (db_level or "").strip().lower()
90
+ # if key in ("low", "mid", "high"):
91
+ # persist_dir = os.path.join(CHROMA_ROOT, key)
92
+ # print(f"[RAG] get_vectorstore_for('{key}') -> {persist_dir}")
93
+ # else:
94
+ # persist_dir = CHROMA_DIR # fallback
95
+ # print(f"[DEBUG] Using Chroma folder for level '{key or 'default'}' → {persist_dir}")
96
+ # print(f"[RAG] get_vectorstore_for(None) -> default ({CHROMA_DIR})")
97
+
98
+ # if key not in _vectorstores:
99
+ # print(f"🔹 Loading Chroma at: {persist_dir}")
100
+ # _vectorstores[key] = Chroma(
101
+ # persist_directory=persist_dir,
102
+ # embedding_function=get_embeddings(),
103
+ # )
104
+ # return _vectorstores[key]
105
 
106
 
107
  def get_client():