Hammad712 commited on
Commit
6324817
·
1 Parent(s): c59b861

Added Logging

Browse files
Files changed (1) hide show
  1. app/rag/routes.py +322 -200
app/rag/routes.py CHANGED
@@ -1,5 +1,17 @@
1
  # app/rag/routes.py
2
-
 
 
 
 
 
 
 
 
 
 
 
 
3
  import os
4
  import json
5
  import uuid
@@ -7,6 +19,7 @@ import time
7
  from typing import List, Optional, Iterable
8
 
9
  from fastapi import APIRouter, HTTPException, Path, Query
 
10
 
11
  from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
12
  from .utils import (
@@ -14,13 +27,14 @@ from .utils import (
14
  save_vectorstore_to_disk,
15
  upsert_vectorstore_metadata,
16
  get_vectorstore_metadata,
17
- build_rag_chain
18
  )
19
  from .chat_history import ChatHistoryManager
20
  from .logging_config import logger
21
 
22
  from qdrant_client import QdrantClient
23
  from qdrant_client.models import VectorParams, PointStruct, Distance
 
24
  from app.page_speed.config import settings
25
  from .embeddings import embeddings, text_splitter # kept here for ingestion
26
 
@@ -29,36 +43,51 @@ router = APIRouter(prefix="/rag", tags=["rag"])
29
 
30
  def _get_embeddings_for_texts(texts: List[str]) -> List[List[float]]:
31
  """
32
- Try common embedding API names (embed_documents, embed_texts, embed).
33
- Falls back to calling embed_query per text (slower).
 
 
34
  """
35
  if not texts:
 
36
  return []
37
 
38
- # Preferred bulk API
 
 
39
  for attr in ("embed_documents", "embed_texts", "embed_batch", "embed"):
40
  fn = getattr(embeddings, attr, None)
41
  if callable(fn):
 
42
  try:
43
- return fn(texts)
 
 
44
  except Exception:
45
  logger.debug("Embedding method %s failed; trying next option", attr, exc_info=True)
46
 
47
- # Fallback: try single-item embedding function repeatedly
48
  single_fn = getattr(embeddings, "embed_query", None) or getattr(embeddings, "embed", None)
49
  if callable(single_fn):
 
50
  vecs = []
51
- for t in texts:
52
- vec = single_fn(t)
53
- if isinstance(vec, dict) and "embedding" in vec:
54
- vecs.append(vec["embedding"])
55
- else:
56
- vecs.append(vec)
 
 
 
 
 
57
  return vecs
58
 
 
59
  raise RuntimeError(
60
  "Embeddings object does not expose a supported embedding method "
61
- "(embed_documents/embed_texts/embed_query)."
62
  )
63
 
64
 
@@ -66,175 +95,231 @@ def _get_embeddings_for_texts(texts: List[str]) -> List[List[float]]:
66
  async def setup_rag_session(
67
  onboarding_id: str = Path(..., description="Unique onboarding identifier"),
68
  doc_type: str = Path(..., description="Type of document (e.g., page_speed, seo, content_relevance, uiux or mobile_usability)"),
69
- body: SetupRequest = ...
70
  ):
71
  """
72
  Ingest documents under a specific document type and create a chat session.
73
- - If vectorstore metadata exists for onboarding_id and doc_type in MongoDB, skip ingestion.
74
- - Always create a new chat_id for this session.
75
- NOTE: This implementation does NOT create or rely on any local files on disk for metadata.
 
 
 
 
76
  """
77
- # Use DB metadata instead of local filesystem marker
78
- existing_meta = get_vectorstore_metadata(onboarding_id, doc_type)
79
- if existing_meta:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  logger.info(
81
- "Vectorstore metadata exists for onboarding_id=%s, doc_type=%s; skipping ingestion",
82
- onboarding_id, doc_type
 
 
 
 
83
  )
84
- metadata = existing_meta
85
- if metadata and metadata.get("chat_id"):
86
- chat_id = metadata["chat_id"]
87
- else:
88
- chat_id = str(uuid.uuid4())
89
- ChatHistoryManager.create_session(chat_id)
90
- # ensure DB has chat_id
91
- upsert_vectorstore_metadata(onboarding_id, doc_type, metadata.get("vectorstore_path"), chat_id, metadata.get("collection_name"))
92
  return SetupResponse(
93
  success=True,
94
- message="RAG setup completed with existing vectorstore metadata.",
95
  onboarding_id=onboarding_id,
96
  doc_type=doc_type,
97
  chat_id=chat_id,
98
- vectorstore_path=metadata.get("vectorstore_path")
99
- )
100
-
101
- # New ingestion flow
102
- if not body.documents:
103
- logger.error(
104
- "Missing documents for onboarding_id=%s, doc_type=%s",
105
- onboarding_id, doc_type
106
  )
107
- raise HTTPException(status_code=400, detail="Please provide documents to ingest.")
108
-
109
- # Create session and ingest
110
- chat_id = str(uuid.uuid4())
111
- ChatHistoryManager.create_session(chat_id)
112
-
113
- all_text = "\n\n".join(body.documents)
114
- text_chunks = text_splitter.split_text(all_text)
115
-
116
- # Build Qdrant client from settings (with timeout + optional prefer_grpc)
117
- client_kwargs = {}
118
- if getattr(settings, "qdrant_url", None):
119
- client_kwargs["url"] = settings.qdrant_url
120
- if getattr(settings, "qdrant_api_key", None):
121
- client_kwargs["api_key"] = settings.qdrant_api_key
122
-
123
- # sensible defaults; override via app config
124
- qdrant_timeout = getattr(settings, "qdrant_timeout", 60) # seconds (default 60)
125
- prefer_grpc = getattr(settings, "qdrant_prefer_grpc", False) # set True to use gRPC if available
126
-
127
- try:
128
- if client_kwargs:
129
- qdrant_client = QdrantClient(**client_kwargs, timeout=qdrant_timeout, prefer_grpc=prefer_grpc)
130
- else:
131
- qdrant_client = QdrantClient(timeout=qdrant_timeout, prefer_grpc=prefer_grpc)
132
- except TypeError as e:
133
- logger.exception("Failed to instantiate QdrantClient: %s", e)
134
- raise HTTPException(status_code=500, detail=f"Failed to construct Qdrant client: {e}")
135
-
136
- # Deterministic collection name for each onboarding/doc_type
137
- collection_name = f"vs_{onboarding_id}_{doc_type}"
138
-
139
- # --------------------------
140
- # INGEST: compute embeddings
141
- # --------------------------
142
- try:
143
- vectors = _get_embeddings_for_texts(text_chunks)
144
- except Exception as e:
145
- logger.exception("Failed to compute embeddings: %s", e)
146
- raise HTTPException(status_code=500, detail=f"Embedding error: {e}")
147
 
148
- if not vectors or len(vectors) != len(text_chunks):
149
- logger.error("Embeddings length mismatch: vectors=%s texts=%s", len(vectors), len(text_chunks))
150
- raise HTTPException(status_code=500, detail="Embedding generation failed or returned unexpected shape.")
151
-
152
- vector_size = len(vectors[0])
153
- if vector_size == 0:
154
- raise HTTPException(status_code=500, detail="Embedding returned empty vectors")
155
-
156
- # Recreate collection (idempotent for onboarding+doc_type)
157
- try:
158
- qdrant_client.recreate_collection(
159
- collection_name=collection_name,
160
- vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
161
- )
162
- except Exception as e:
163
- logger.exception("Failed to create/recreate qdrant collection '%s': %s", collection_name, e)
164
- raise HTTPException(status_code=500, detail=f"Failed to create qdrant collection: {e}")
165
-
166
- # Helper: safe upsert with retries/backoff
167
- def safe_upsert(client: QdrantClient, collection_name: str, points: Iterable[PointStruct], max_retries: int = 3):
168
- attempt = 0
169
- backoff = 1.0
170
- last_exc: Optional[Exception] = None
171
- while attempt < max_retries:
172
- try:
173
- client.upsert(collection_name=collection_name, points=points)
174
- return
175
- except Exception as exc:
176
- last_exc = exc
177
- attempt += 1
178
- logger.warning("Qdrant upsert attempt %d/%d failed: %s", attempt, max_retries, exc)
179
- if attempt >= max_retries:
180
- logger.exception("Qdrant upsert failed after %d attempts", max_retries)
181
- raise
182
- # exponential backoff
183
- time.sleep(backoff)
184
- backoff *= 2.0
185
- # if loop finishes without returning, raise last exception
186
- if last_exc:
187
- raise last_exc
188
-
189
- # Upsert points in smaller batches and use safe_upsert
190
- batch_size = getattr(settings, "qdrant_upsert_batch_size", 64) # smaller default batch size
191
- points_batch: List[PointStruct] = []
192
- try:
193
- for i, (vec, txt) in enumerate(zip(vectors, text_chunks)):
194
- payload = {"text": txt}
195
- # Use UUID string for id to avoid collisions across sessions
196
- point_id = str(uuid.uuid4())
197
- point = PointStruct(id=point_id, vector=vec, payload=payload)
198
- points_batch.append(point)
199
-
200
- if len(points_batch) >= batch_size:
201
- logger.debug("Upserting batch of %d points to collection %s", len(points_batch), collection_name)
202
- safe_upsert(qdrant_client, collection_name, points_batch)
203
- points_batch = []
204
-
205
- # final flush
206
- if points_batch:
207
- logger.debug("Upserting final batch of %d points to collection %s", len(points_batch), collection_name)
208
- safe_upsert(qdrant_client, collection_name, points_batch)
209
- except Exception as e:
210
- logger.exception("Failed to upsert points into qdrant: %s", e)
211
- raise HTTPException(status_code=500, detail=f"Failed to upsert points into Qdrant: {e}")
212
-
213
- # Create an in-application "vectorstore_path" (URI-style) and store metadata in DB
214
- vs_path = save_vectorstore_to_disk(
215
- onboarding_id,
216
- doc_type,
217
- collection_name,
218
- getattr(settings, "qdrant_url", None),
219
- getattr(settings, "qdrant_api_key", None)
220
- )
221
- # Persist metadata into MongoDB (no local disk involved)
222
- # Persist extra metadata fields so retrieval can use same connection details (if desired)
223
- upsert_vectorstore_metadata(onboarding_id, doc_type, vs_path, chat_id, collection_name)
224
-
225
- logger.info(
226
- "Created Qdrant collection %s for %s/%s (points=%d)",
227
- collection_name, onboarding_id, doc_type, len(text_chunks)
228
- )
229
-
230
- return SetupResponse(
231
- success=True,
232
- message="RAG setup completed.",
233
- onboarding_id=onboarding_id,
234
- doc_type=doc_type,
235
- chat_id=chat_id,
236
- vectorstore_path=vs_path
237
- )
238
 
239
 
240
  @router.post("/chat/{onboarding_id}/{doc_type}/{chat_id}", response_model=ChatResponse)
@@ -243,37 +328,74 @@ async def chat_with_user(
243
  doc_type: str = Path(...),
244
  chat_id: str = Path(...),
245
  prompt_type: str = Query(..., description="Prompt type, e.g., page_speed, content_relevance, seo, uiux or mobile_usability"),
246
- body: ChatRequest = ...
247
  ):
248
  """
249
  Chat endpoint using a specific document-type vectorstore.
 
 
 
 
 
 
 
250
  """
251
- # Use DB metadata instead of local filesystem marker
252
- metadata = get_vectorstore_metadata(onboarding_id, doc_type)
253
- if not metadata:
254
- raise HTTPException(status_code=400, detail="Vectorstore metadata not found; run initialization first.")
255
-
256
- if not ChatHistoryManager.chat_exists(chat_id):
257
- raise HTTPException(status_code=404, detail=f"Chat session {chat_id} not found.")
258
-
259
- question = (body.question or "").strip()
260
- if not question:
261
- raise HTTPException(status_code=400, detail="Question cannot be empty.")
262
-
263
- ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
264
- ChatHistoryManager.add_message(chat_id, role="human", content=question)
265
-
266
- chain = build_rag_chain(onboarding_id, doc_type, chat_id, prompt_type)
267
- history = ChatHistoryManager.get_messages(chat_id)
268
- result = chain.invoke({"question": question, "chat_history": history})
269
- answer = result.get("answer") or result.get("output_text") or ""
270
- ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
271
-
272
- return ChatResponse(
273
- success=True,
274
- answer=answer,
275
- error=None,
276
- chat_id=chat_id,
277
- onboarding_id=onboarding_id,
278
- doc_type=doc_type
279
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # app/rag/routes.py
2
+ """
3
+ RAG FastAPI routes.
4
+
5
+ This file contains:
6
+ - /initialization/{onboarding_id}/{doc_type} : ingest documents and create a RAG session
7
+ - /chat/{onboarding_id}/{doc_type}/{chat_id} : perform a retrieval-augmented chat using stored vectorstore
8
+
9
+ The functions add additional logging to make debugging easier and to surface metrics:
10
+ - request start/finish times and durations
11
+ - counts and sizes (documents, chunks, vectors, batches)
12
+ - Qdrant operations and retries
13
+ - embedding function selection failures
14
+ """
15
  import os
16
  import json
17
  import uuid
 
19
  from typing import List, Optional, Iterable
20
 
21
  from fastapi import APIRouter, HTTPException, Path, Query
22
+ from pydantic import BaseModel
23
 
24
  from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
25
  from .utils import (
 
27
  save_vectorstore_to_disk,
28
  upsert_vectorstore_metadata,
29
  get_vectorstore_metadata,
30
+ build_rag_chain,
31
  )
32
  from .chat_history import ChatHistoryManager
33
  from .logging_config import logger
34
 
35
  from qdrant_client import QdrantClient
36
  from qdrant_client.models import VectorParams, PointStruct, Distance
37
+
38
  from app.page_speed.config import settings
39
  from .embeddings import embeddings, text_splitter # kept here for ingestion
40
 
 
43
 
44
  def _get_embeddings_for_texts(texts: List[str]) -> List[List[float]]:
45
  """
46
+ Compute embeddings for a list of texts.
47
+
48
+ Tries common bulk methods on the embeddings object and falls back to single-item calls.
49
+ Logs which method is being attempted and any failures.
50
  """
51
  if not texts:
52
+ logger.debug("_get_embeddings_for_texts called with empty texts list.")
53
  return []
54
 
55
+ logger.debug("Computing embeddings for %d texts", len(texts))
56
+
57
+ # Preferred bulk API methods to attempt
58
  for attr in ("embed_documents", "embed_texts", "embed_batch", "embed"):
59
  fn = getattr(embeddings, attr, None)
60
  if callable(fn):
61
+ logger.debug("Trying embedding method: %s", attr)
62
  try:
63
+ vecs = fn(texts)
64
+ logger.debug("Embedding method %s returned %d vectors", attr, len(vecs) if vecs is not None else 0)
65
+ return vecs
66
  except Exception:
67
  logger.debug("Embedding method %s failed; trying next option", attr, exc_info=True)
68
 
69
+ # Fallback to single-item embedding function repeatedly
70
  single_fn = getattr(embeddings, "embed_query", None) or getattr(embeddings, "embed", None)
71
  if callable(single_fn):
72
+ logger.debug("Falling back to single-item embedding function: %s", getattr(single_fn, "__name__", "<fn>"))
73
  vecs = []
74
+ for i, t in enumerate(texts):
75
+ try:
76
+ vec = single_fn(t)
77
+ if isinstance(vec, dict) and "embedding" in vec:
78
+ vecs.append(vec["embedding"])
79
+ else:
80
+ vecs.append(vec)
81
+ except Exception as e:
82
+ logger.exception("Single-item embedding failed for text index %d: %s", i, e)
83
+ raise
84
+ logger.debug("Single-item embedding produced %d vectors", len(vecs))
85
  return vecs
86
 
87
+ logger.error("Embeddings object does not expose a supported embedding method")
88
  raise RuntimeError(
89
  "Embeddings object does not expose a supported embedding method "
90
+ "(embed_documents/embed_texts/embed_query/embed)."
91
  )
92
 
93
 
 
95
  async def setup_rag_session(
96
  onboarding_id: str = Path(..., description="Unique onboarding identifier"),
97
  doc_type: str = Path(..., description="Type of document (e.g., page_speed, seo, content_relevance, uiux or mobile_usability)"),
98
+ body: SetupRequest = ...,
99
  ):
100
  """
101
  Ingest documents under a specific document type and create a chat session.
102
+
103
+ Behavior:
104
+ - If vectorstore metadata exists for onboarding_id and doc_type in DB, skip ingestion (idempotent).
105
+ - Always create a new chat_id for this session and return it.
106
+ - Uses Qdrant as the vector store and stores metadata via upsert_vectorstore_metadata.
107
+
108
+ Returns: SetupResponse
109
  """
110
+ start_ts = time.time()
111
+ logger.info("RAG initialization called for onboarding_id=%s doc_type=%s", onboarding_id, doc_type)
112
+
113
+ try:
114
+ # Use DB metadata instead of local filesystem marker
115
+ existing_meta = get_vectorstore_metadata(onboarding_id, doc_type)
116
+ if existing_meta:
117
+ logger.info(
118
+ "Vectorstore metadata exists for onboarding_id=%s, doc_type=%s; skipping ingestion",
119
+ onboarding_id,
120
+ doc_type,
121
+ )
122
+ metadata = existing_meta or {}
123
+ chat_id = metadata.get("chat_id") or str(uuid.uuid4())
124
+ if not ChatHistoryManager.chat_exists(chat_id):
125
+ ChatHistoryManager.create_session(chat_id)
126
+ logger.debug("Created new chat session for existing metadata chat_id=%s", chat_id)
127
+
128
+ # ensure DB has chat_id (in case metadata existed but had missing fields)
129
+ upsert_vectorstore_metadata(
130
+ onboarding_id,
131
+ doc_type,
132
+ metadata.get("vectorstore_path"),
133
+ chat_id,
134
+ metadata.get("collection_name"),
135
+ )
136
+
137
+ duration = time.time() - start_ts
138
+ logger.info("RAG initialization skipped ingestion (existing); duration=%.3fs", duration)
139
+ return SetupResponse(
140
+ success=True,
141
+ message="RAG setup completed with existing vectorstore metadata.",
142
+ onboarding_id=onboarding_id,
143
+ doc_type=doc_type,
144
+ chat_id=chat_id,
145
+ vectorstore_path=metadata.get("vectorstore_path"),
146
+ )
147
+
148
+ # New ingestion flow
149
+ if not body.documents:
150
+ logger.error(
151
+ "Missing documents for onboarding_id=%s, doc_type=%s",
152
+ onboarding_id,
153
+ doc_type,
154
+ )
155
+ raise HTTPException(status_code=400, detail="Please provide documents to ingest.")
156
+
157
+ logger.info("Ingesting %d documents for %s/%s", len(body.documents), onboarding_id, doc_type)
158
+
159
+ # Create session and ingest
160
+ chat_id = str(uuid.uuid4())
161
+ ChatHistoryManager.create_session(chat_id)
162
+ logger.debug("Created chat session %s", chat_id)
163
+
164
+ all_text = "\n\n".join(body.documents)
165
+ text_chunks = text_splitter.split_text(all_text)
166
+ logger.info("Split documents into %d text chunks", len(text_chunks))
167
+
168
+ # Build Qdrant client from settings (with timeout + optional prefer_grpc)
169
+ client_kwargs = {}
170
+ if getattr(settings, "qdrant_url", None):
171
+ client_kwargs["url"] = settings.qdrant_url
172
+ if getattr(settings, "qdrant_api_key", None):
173
+ client_kwargs["api_key"] = settings.qdrant_api_key
174
+
175
+ qdrant_timeout = getattr(settings, "qdrant_timeout", 60) # seconds (default 60)
176
+ prefer_grpc = getattr(settings, "qdrant_prefer_grpc", False)
177
+
178
+ try:
179
+ if client_kwargs:
180
+ qdrant_client = QdrantClient(**client_kwargs, timeout=qdrant_timeout, prefer_grpc=prefer_grpc)
181
+ logger.debug("Instantiated QdrantClient with kwargs keys: %s", list(client_kwargs.keys()))
182
+ else:
183
+ qdrant_client = QdrantClient(timeout=qdrant_timeout, prefer_grpc=prefer_grpc)
184
+ logger.debug("Instantiated QdrantClient with default connection (no url/api_key)")
185
+ except TypeError as e:
186
+ logger.exception("Failed to instantiate QdrantClient: %s", e)
187
+ raise HTTPException(status_code=500, detail=f"Failed to construct Qdrant client: {e}")
188
+
189
+ # Deterministic collection name for each onboarding/doc_type
190
+ collection_name = f"vs_{onboarding_id}_{doc_type}"
191
+ logger.info("Using Qdrant collection name: %s", collection_name)
192
+
193
+ # --------------------------
194
+ # INGEST: compute embeddings
195
+ # --------------------------
196
+ try:
197
+ vectors = _get_embeddings_for_texts(text_chunks)
198
+ except Exception as e:
199
+ logger.exception("Failed to compute embeddings: %s", e)
200
+ raise HTTPException(status_code=500, detail=f"Embedding error: {e}")
201
+
202
+ if not vectors or len(vectors) != len(text_chunks):
203
+ logger.error(
204
+ "Embeddings length mismatch: vectors=%s texts=%s",
205
+ len(vectors) if vectors is not None else None,
206
+ len(text_chunks),
207
+ )
208
+ raise HTTPException(status_code=500, detail="Embedding generation failed or returned unexpected shape.")
209
+
210
+ vector_size = len(vectors[0]) if vectors else 0
211
+ logger.info("Computed embeddings: count=%d vector_size=%d", len(vectors), vector_size)
212
+ if vector_size == 0:
213
+ logger.error("Embedding returned empty vectors (vector_size=0)")
214
+ raise HTTPException(status_code=500, detail="Embedding returned empty vectors")
215
+
216
+ # Recreate collection (idempotent for onboarding+doc_type)
217
+ try:
218
+ qdrant_client.recreate_collection(
219
+ collection_name=collection_name,
220
+ vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
221
+ )
222
+ logger.info("Recreated Qdrant collection %s (vector_size=%d)", collection_name, vector_size)
223
+ except Exception as e:
224
+ logger.exception("Failed to create/recreate qdrant collection '%s': %s", collection_name, e)
225
+ raise HTTPException(status_code=500, detail=f"Failed to create qdrant collection: {e}")
226
+
227
+ # Helper: safe upsert with retries/backoff
228
+ def safe_upsert(client: QdrantClient, collection_name: str, points: Iterable[PointStruct], max_retries: int = 3):
229
+ attempt = 0
230
+ backoff = 1.0
231
+ last_exc: Optional[Exception] = None
232
+ while attempt < max_retries:
233
+ try:
234
+ client.upsert(collection_name=collection_name, points=points)
235
+ logger.debug("Safe upsert successful for %d points (collection=%s) on attempt %d", len(list(points)), collection_name, attempt + 1)
236
+ return
237
+ except Exception as exc:
238
+ last_exc = exc
239
+ attempt += 1
240
+ logger.warning("Qdrant upsert attempt %d/%d failed: %s", attempt, max_retries, exc)
241
+ if attempt >= max_retries:
242
+ logger.exception("Qdrant upsert failed after %d attempts", max_retries)
243
+ raise
244
+ time.sleep(backoff)
245
+ backoff *= 2.0
246
+ if last_exc:
247
+ raise last_exc
248
+
249
+ # Upsert points in smaller batches and use safe_upsert
250
+ batch_size = getattr(settings, "qdrant_upsert_batch_size", 64)
251
+ points_batch: List[PointStruct] = []
252
+ total_points = 0
253
+ try:
254
+ for i, (vec, txt) in enumerate(zip(vectors, text_chunks)):
255
+ payload = {"text": txt}
256
+ point_id = str(uuid.uuid4())
257
+ point = PointStruct(id=point_id, vector=vec, payload=payload)
258
+ points_batch.append(point)
259
+ total_points += 1
260
+
261
+ if len(points_batch) >= batch_size:
262
+ logger.debug("Upserting batch of %d points to collection %s (processed=%d)", len(points_batch), collection_name, total_points)
263
+ safe_upsert(qdrant_client, collection_name, points_batch)
264
+ points_batch = []
265
+
266
+ # final flush
267
+ if points_batch:
268
+ logger.debug("Upserting final batch of %d points to collection %s (processed=%d)", len(points_batch), collection_name, total_points)
269
+ safe_upsert(qdrant_client, collection_name, points_batch)
270
+
271
+ logger.info("Upserted total %d points into Qdrant collection %s", total_points, collection_name)
272
+ except Exception as e:
273
+ logger.exception("Failed to upsert points into qdrant: %s", e)
274
+ raise HTTPException(status_code=500, detail=f"Failed to upsert points into Qdrant: {e}")
275
+
276
+ # Create an in-application "vectorstore_path" (URI-style) and store metadata in DB
277
+ try:
278
+ vs_path = save_vectorstore_to_disk(
279
+ onboarding_id,
280
+ doc_type,
281
+ collection_name,
282
+ getattr(settings, "qdrant_url", None),
283
+ getattr(settings, "qdrant_api_key", None),
284
+ )
285
+ logger.debug("Saved vectorstore metadata path: %s", vs_path)
286
+ except Exception as e:
287
+ logger.exception("Failed to save vectorstore metadata to disk/DB: %s", e)
288
+ raise HTTPException(status_code=500, detail=f"Failed to persist vectorstore metadata: {e}")
289
+
290
+ # Persist metadata into MongoDB (no local disk involved)
291
+ try:
292
+ upsert_vectorstore_metadata(onboarding_id, doc_type, vs_path, chat_id, collection_name)
293
+ logger.info("Persisted vectorstore metadata for %s/%s (chat_id=%s)", onboarding_id, doc_type, chat_id)
294
+ except Exception as e:
295
+ logger.exception("Failed to upsert vectorstore metadata into DB: %s", e)
296
+ raise HTTPException(status_code=500, detail=f"Failed to persist vectorstore metadata: {e}")
297
+
298
+ duration = time.time() - start_ts
299
  logger.info(
300
+ "Created Qdrant collection %s for %s/%s (points=%d) in %.3fs",
301
+ collection_name,
302
+ onboarding_id,
303
+ doc_type,
304
+ total_points,
305
+ duration,
306
  )
307
+
 
 
 
 
 
 
 
308
  return SetupResponse(
309
  success=True,
310
+ message="RAG setup completed.",
311
  onboarding_id=onboarding_id,
312
  doc_type=doc_type,
313
  chat_id=chat_id,
314
+ vectorstore_path=vs_path,
 
 
 
 
 
 
 
315
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
+ except HTTPException:
318
+ # Re-raise HTTP exceptions (already logged above)
319
+ raise
320
+ except Exception as exc:
321
+ logger.exception("Unhandled exception during RAG initialization for %s/%s: %s", onboarding_id, doc_type, exc)
322
+ raise HTTPException(status_code=500, detail=f"Internal server error during RAG initialization: {exc}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
 
325
  @router.post("/chat/{onboarding_id}/{doc_type}/{chat_id}", response_model=ChatResponse)
 
328
  doc_type: str = Path(...),
329
  chat_id: str = Path(...),
330
  prompt_type: str = Query(..., description="Prompt type, e.g., page_speed, content_relevance, seo, uiux or mobile_usability"),
331
+ body: ChatRequest = ...,
332
  ):
333
  """
334
  Chat endpoint using a specific document-type vectorstore.
335
+
336
+ Steps:
337
+ - Verify vectorstore metadata exists.
338
+ - Ensure chat session exists.
339
+ - Optionally summarize history.
340
+ - Build the RAG chain and invoke it with the question + chat_history.
341
+ - Persist AI/human turns into ChatHistoryManager.
342
  """
343
+ start_ts = time.time()
344
+ logger.info("Chat request received: onboarding_id=%s doc_type=%s chat_id=%s prompt_type=%s", onboarding_id, doc_type, chat_id, prompt_type)
345
+
346
+ try:
347
+ # Use DB metadata instead of local filesystem marker
348
+ metadata = get_vectorstore_metadata(onboarding_id, doc_type)
349
+ if not metadata:
350
+ logger.warning("Vectorstore metadata not found for %s/%s", onboarding_id, doc_type)
351
+ raise HTTPException(status_code=400, detail="Vectorstore metadata not found; run initialization first.")
352
+
353
+ if not ChatHistoryManager.chat_exists(chat_id):
354
+ logger.warning("Chat session %s not found", chat_id)
355
+ raise HTTPException(status_code=404, detail=f"Chat session {chat_id} not found.")
356
+
357
+ question = (body.question or "").strip()
358
+ if not question:
359
+ logger.warning("Empty question in chat request for chat_id=%s", chat_id)
360
+ raise HTTPException(status_code=400, detail="Question cannot be empty.")
361
+
362
+ logger.info("Processing question (len=%d) for chat_id=%s", len(question), chat_id)
363
+ ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
364
+ ChatHistoryManager.add_message(chat_id, role="human", content=question)
365
+ logger.debug("Added human message to history for chat_id=%s", chat_id)
366
+
367
+ chain = build_rag_chain(onboarding_id, doc_type, chat_id, prompt_type)
368
+ logger.debug("Built RAG chain for onboarding_id=%s doc_type=%s chat_id=%s", onboarding_id, doc_type, chat_id)
369
+
370
+ history = ChatHistoryManager.get_messages(chat_id)
371
+ logger.debug("Chat history length=%d for chat_id=%s", len(history), chat_id)
372
+
373
+ try:
374
+ result = chain.invoke({"question": question, "chat_history": history})
375
+ logger.debug("RAG chain invoked successfully for chat_id=%s", chat_id)
376
+ except Exception as e:
377
+ logger.exception("RAG chain invocation failed for chat_id=%s: %s", chat_id, e)
378
+ raise HTTPException(status_code=500, detail=f"RAG chain invocation failed: {e}")
379
+
380
+ answer = result.get("answer") or result.get("output_text") or ""
381
+ logger.info("Generated answer length=%d for chat_id=%s", len(answer), chat_id)
382
+ ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
383
+
384
+ duration = time.time() - start_ts
385
+ logger.info("Chat request completed for chat_id=%s duration=%.3fs", chat_id, duration)
386
+
387
+ return ChatResponse(
388
+ success=True,
389
+ answer=answer,
390
+ error=None,
391
+ chat_id=chat_id,
392
+ onboarding_id=onboarding_id,
393
+ doc_type=doc_type,
394
+ )
395
+
396
+ except HTTPException:
397
+ # Re-raise HTTP exceptions (already logged above)
398
+ raise
399
+ except Exception as exc:
400
+ logger.exception("Unhandled exception during chat for %s/%s chat_id=%s: %s", onboarding_id, doc_type, chat_id, exc)
401
+ raise HTTPException(status_code=500, detail=f"Internal server error during chat: {exc}")