Agentic-Service-Data-Eyond-Catalog

Sleeping

sofhiaazzhr commited on 23 days ago

Commit

ce20d89

1 Parent(s): b272cc7

fixes: chain singleton, parquet logger, SSE event consistency, remove double DB fetch

Files changed (5) hide show

src/api/v1/chat.py CHANGED Viewed

@@ -176,7 +176,8 @@ async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
             async def stream_direct():
                 yield {"event": "sources", "data": json.dumps([])}
-                yield {"event": "message", "data": direct}
             return EventSourceResponse(stream_direct())

             async def stream_direct():
                 yield {"event": "sources", "data": json.dumps([])}
+                yield {"event": "chunk", "data": direct}
+                yield {"event": "done", "data": ""}
             return EventSourceResponse(stream_direct())

src/api/v1/document.py CHANGED Viewed

@@ -105,8 +105,7 @@ async def process_document(
     """Process document and ingest to vector index."""
     data = await document_pipeline.process(document_id, user_id, db)
-    document = await document_service.get_document(db, document_id)
-    if document and document.file_type in ("csv", "xlsx"):
         from src.pipeline.triggers import on_tabular_uploaded
         try:
             await on_tabular_uploaded(document_id, user_id)

     """Process document and ingest to vector index."""
     data = await document_pipeline.process(document_id, user_id, db)
+    if data["file_type"] in ("csv", "xlsx"):
         from src.pipeline.triggers import on_tabular_uploaded
         try:
             await on_tabular_uploaded(document_id, user_id)

src/pipeline/document_pipeline.py CHANGED Viewed

@@ -66,7 +66,7 @@ class DocumentPipeline:
             await document_service.update_document_status(db, document_id, "completed")
             logger.info(f"Processed document {document_id}: {chunks_count} chunks")
-            return {"document_id": document_id, "chunks_processed": chunks_count}
         except Exception as e:
             logger.error(f"Processing failed for document {document_id}", error=str(e))

             await document_service.update_document_status(db, document_id, "completed")
             logger.info(f"Processed document {document_id}: {chunks_count} chunks")
+            return {"document_id": document_id, "chunks_processed": chunks_count, "file_type": document.file_type}
         except Exception as e:
             logger.error(f"Processing failed for document {document_id}", error=str(e))

src/query/planner/service.py CHANGED Viewed

@@ -55,6 +55,16 @@ def _build_default_chain() -> Runnable:
     return prompt | llm.with_structured_output(QueryIR)
 class QueryPlannerService:
     """Wraps the LLM call with structured-output parsing into QueryIR.
@@ -68,7 +78,7 @@ class QueryPlannerService:
     def _ensure_chain(self) -> Runnable:
         if self._chain is None:
-            self._chain = _build_default_chain()
         return self._chain
     async def plan(

     return prompt | llm.with_structured_output(QueryIR)
+_default_chain: Runnable | None = None
+def _get_default_chain() -> Runnable:
+    global _default_chain
+    if _default_chain is None:
+        _default_chain = _build_default_chain()
+    return _default_chain
 class QueryPlannerService:
     """Wraps the LLM call with structured-output parsing into QueryIR.
     def _ensure_chain(self) -> Runnable:
         if self._chain is None:
+            self._chain = _get_default_chain()
         return self._chain
     async def plan(

src/storage/parquet.py CHANGED Viewed

@@ -18,7 +18,7 @@ import pandas as pd
 from src.middlewares.logging import get_logger
 from src.storage.az_blob.az_blob import blob_storage
-logger = get_logger("parquet_service")
 def _safe_sheet_name(sheet_name: str) -> str:
@@ -27,7 +27,7 @@ def _safe_sheet_name(sheet_name: str) -> str:
 def parquet_blob_name(user_id: str, document_id: str, sheet_name: str | None = None) -> str:
     """Construct deterministic Parquet blob name."""
-    if sheet_name:
         return f"{user_id}/{document_id}__{_safe_sheet_name(sheet_name)}.parquet"
     return f"{user_id}/{document_id}.parquet"

 from src.middlewares.logging import get_logger
 from src.storage.az_blob.az_blob import blob_storage
+logger = get_logger("storage.parquet")
 def _safe_sheet_name(sheet_name: str) -> str:
 def parquet_blob_name(user_id: str, document_id: str, sheet_name: str | None = None) -> str:
     """Construct deterministic Parquet blob name."""
+    if sheet_name is not None:
         return f"{user_id}/{document_id}__{_safe_sheet_name(sheet_name)}.parquet"
     return f"{user_id}/{document_id}.parquet"