rohannsinghal commited on
Commit
c82e944
Β·
1 Parent(s): afbff39

changes to main_api.py

Browse files
Files changed (1) hide show
  1. app/main_api.py +79 -108
app/main_api.py CHANGED
@@ -1,4 +1,4 @@
1
- # --- KAGGLE-POWERED RAG SYSTEM WITH LAZY INITIALIZATION - COMPLETE 1144+ LINES ---
2
 
3
  import os
4
  import json
@@ -24,7 +24,6 @@ from pydantic import BaseModel
24
 
25
  # LangChain imports
26
  from langchain_community.vectorstores import Chroma
27
- from langchain.schema.document import Document as LangChainDocument
28
 
29
  # Multi-format document processing
30
  import fitz # PyMuPDF
@@ -51,7 +50,7 @@ load_dotenv()
51
  logging.basicConfig(level=logging.INFO)
52
  logger = logging.getLogger(__name__)
53
 
54
- app = FastAPI(title="Kaggle-Powered Hackathon RAG", version="5.3.0")
55
 
56
  app.add_middleware(
57
  CORSMiddleware,
@@ -134,7 +133,7 @@ class LazyKaggleModelClient:
134
  logger.error(f"Kaggle reranking error: {e}")
135
  return documents[:k]
136
 
137
- # --- LIGHTWEIGHT QUERY PROCESSOR (YOUR EXCELLENT ORIGINAL) ---
138
  class LightweightQueryProcessor:
139
  def __init__(self, kaggle_client: LazyKaggleModelClient):
140
  self.kaggle_client = kaggle_client
@@ -142,18 +141,24 @@ class LightweightQueryProcessor:
142
 
143
  async def enhance_query_semantically(self, question: str, domain: str = "insurance") -> str:
144
  """OPTIMIZED semantic query processing"""
 
 
145
  cache_key = hashlib.md5(question.encode()).hexdigest()[:8]
146
  if cache_key in self.cache:
147
  return self.cache[cache_key]
148
 
 
149
  enhanced_query = self._expand_with_domain_knowledge_fast(question, domain)
150
  enhanced_query = self._handle_incomplete_questions(enhanced_query)
151
 
 
152
  self.cache[cache_key] = enhanced_query
153
  return enhanced_query
154
 
155
  def _expand_with_domain_knowledge_fast(self, query: str, domain: str) -> str:
156
  """OPTIMIZED domain expansion - same intelligence, faster processing"""
 
 
157
  key_expansions = {
158
  'grace period': 'payment deadline premium due',
159
  'waiting period': 'exclusion time coverage delay',
@@ -190,7 +195,7 @@ class LightweightQueryProcessor:
190
 
191
  return query
192
 
193
- # --- ANTI-JAILBREAK SECURITY SYSTEM (YOUR EXCELLENT ORIGINAL) ---
194
  class SecurityGuard:
195
  def __init__(self):
196
  self.jailbreak_patterns = [
@@ -226,7 +231,7 @@ class SecurityGuard:
226
 
227
  return answer
228
 
229
- # --- MULTI-LLM MANAGER (YOUR EXCELLENT ORIGINAL WITH ALL PROVIDERS) ---
230
  class MultiLLMManager:
231
  def __init__(self):
232
  # Initialize multiple LLM providers with fallback
@@ -302,7 +307,7 @@ class MultiLLMManager:
302
  response = await model.generate_content_async(prompt)
303
  return response.text.strip()
304
 
305
- # --- COMPLETE UNIVERSAL DOCUMENT PROCESSOR (ALL 12 FORMATS!) ---
306
  class UniversalDocumentProcessor:
307
  def __init__(self):
308
  # SPEED OPTIMIZATIONS: Reduced limits
@@ -379,7 +384,7 @@ class UniversalDocumentProcessor:
379
  else:
380
  return '.txt'
381
 
382
- # --- SPEED-OPTIMIZED PDF PROCESSING (YOUR EXCELLENT ORIGINAL) ---
383
  async def process_pdf(self, file_path: str, content: bytes) -> List[Dict[str, Any]]:
384
  """Enhanced PDF processing with speed optimizations"""
385
  chunks = []
@@ -704,59 +709,9 @@ class UniversalDocumentProcessor:
704
  "chunk_id": str(uuid.uuid4())
705
  }]
706
 
707
- # --- FIXED: ASYNC-AWARE EMBEDDING WRAPPER (YOUR EXCELLENT ORIGINAL + FIX) ---
708
- class AsyncKaggleEmbeddingWrapper:
709
- """FIXED: Async-aware embedding wrapper that works with Chroma"""
710
- def __init__(self, kaggle_client: LazyKaggleModelClient):
711
- self.kaggle_client = kaggle_client
712
- self._embeddings_cache = {}
713
-
714
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
715
- """FIXED: Embed documents using Kaggle (thread-safe async wrapper)"""
716
- try:
717
- # Check if we're in an async context
718
- try:
719
- loop = asyncio.get_running_loop()
720
- # We're in an async context, need to handle differently
721
- return self._embed_with_thread(texts)
722
- except RuntimeError:
723
- # No running loop, safe to create one
724
- return asyncio.run(self.kaggle_client.generate_embeddings(texts))
725
- except Exception as e:
726
- logger.error(f"Embedding wrapper error: {e}")
727
- # Fallback: return dummy embeddings to prevent crashes
728
- return [[0.0] * 384 for _ in texts]
729
-
730
- def embed_query(self, text: str) -> List[float]:
731
- """FIXED: Embed query using Kaggle (thread-safe async wrapper)"""
732
- try:
733
- embeddings = self.embed_documents([text])
734
- return embeddings[0] if embeddings else [0.0] * 384
735
- except Exception as e:
736
- logger.error(f"Query embedding error: {e}")
737
- return [0.0] * 384
738
-
739
- def _embed_with_thread(self, texts: List[str]) -> List[List[float]]:
740
- """Helper: Run embedding in separate thread when in async context"""
741
-
742
- # Use a thread pool to run the async function
743
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
744
- # Create new event loop in thread
745
- def run_in_thread():
746
- new_loop = asyncio.new_event_loop()
747
- asyncio.set_event_loop(new_loop)
748
- try:
749
- return new_loop.run_until_complete(
750
- self.kaggle_client.generate_embeddings(texts)
751
- )
752
- finally:
753
- new_loop.close()
754
-
755
- future = executor.submit(run_in_thread)
756
- return future.result(timeout=30)
757
-
758
- # --- KAGGLE-POWERED RAG PIPELINE WITH ALL YOUR FEATURES ---
759
- class KagglePoweredRAGPipeline:
760
  def __init__(self, collection_name: str, llm_manager: MultiLLMManager, kaggle_client: LazyKaggleModelClient):
761
  self.collection_name = collection_name
762
  self.llm_manager = llm_manager
@@ -764,19 +719,17 @@ class KagglePoweredRAGPipeline:
764
  self.security_guard = SecurityGuard()
765
  self.query_processor = LightweightQueryProcessor(kaggle_client)
766
 
767
- # FIXED: Use the async-aware embedding wrapper
768
- self.embedding_function = AsyncKaggleEmbeddingWrapper(kaggle_client)
769
-
770
  self.vectorstore = Chroma(
771
  collection_name=collection_name,
772
- embedding_function=self.embedding_function,
773
  persist_directory="/tmp/chroma_kaggle"
774
  )
775
 
776
- logger.info(f"πŸš€ Kaggle-Powered RAG Pipeline initialized: {collection_name}")
777
 
778
  async def add_documents(self, chunks: List[Dict[str, Any]]):
779
- """Add documents with advanced filtering and processing"""
780
  if not chunks:
781
  return
782
 
@@ -815,22 +768,32 @@ class KagglePoweredRAGPipeline:
815
 
816
  logger.info(f"πŸ“š Filtered to {len(quality_chunks)} quality chunks")
817
 
818
- # Convert to LangChain documents
819
- documents = [
820
- LangChainDocument(
821
- page_content=chunk['content'],
822
- metadata=chunk['metadata']
823
- )
824
- for chunk in quality_chunks[:100] # Reduced from 150 for speed
825
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
826
 
827
- # Add to vector store
828
- if documents:
829
- self.vectorstore.add_documents(documents)
830
- logger.info(f"βœ… Added {len(documents)} documents to vector store")
831
 
832
  async def answer_question(self, question: str) -> str:
833
- """Answer question with advanced semantic processing"""
834
  # Security check
835
  if self.security_guard.detect_jailbreak(question):
836
  return self.security_guard.sanitize_response(question, "")
@@ -839,17 +802,18 @@ class KagglePoweredRAGPipeline:
839
  # Enhanced query processing
840
  enhanced_question = await self.query_processor.enhance_query_semantically(question)
841
 
842
- # Initial retrieval (get more candidates)
843
- retriever = self.vectorstore.as_retriever(
844
- search_type="mmr",
845
- search_kwargs={
846
- "k": 15, # Reduced from 20
847
- "fetch_k": 30, # Reduced from 40
848
- "lambda_mult": 0.5
849
- }
850
- )
851
 
852
- relevant_docs = retriever.get_relevant_documents(enhanced_question)
 
 
 
 
853
 
854
  if not relevant_docs:
855
  return "I don't have sufficient information to answer this question based on the provided documents."
@@ -977,8 +941,9 @@ def test_endpoint():
977
  return {
978
  "message": "This endpoint requires POST method",
979
  "usage": "Send POST request with documents and questions",
980
- "status": "API is running with lazy initialization",
981
  "kaggle_connection": "Will initialize on first request",
 
982
  "method": "Use POST with JSON body",
983
  "example": {
984
  "documents": ["url1", "url2"],
@@ -986,11 +951,11 @@ def test_endpoint():
986
  }
987
  }
988
 
989
- # --- SPEED-OPTIMIZED MAIN ENDPOINT WITH LAZY INITIALIZATION ---
990
  @app.post("/api/v1/hackrx/run", response_model=SubmissionResponse, dependencies=[Depends(verify_bearer_token)])
991
  async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
992
  start_time = time.time()
993
- logger.info(f"🎯 KAGGLE-POWERED PROCESSING: {len(submission_request.documents)} docs, {len(submission_request.questions)} questions")
994
 
995
  try:
996
  # LAZY INITIALIZATION: Only now do we connect to Kaggle!
@@ -1003,9 +968,9 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1003
  "Model service unavailable" for _ in submission_request.questions
1004
  ])
1005
 
1006
- # Create unique session
1007
  session_id = f"kaggle_{uuid.uuid4().hex[:6]}" # Shorter UUID
1008
- rag_pipeline = KagglePoweredRAGPipeline(session_id, multi_llm, kaggle_client)
1009
 
1010
  # Process all documents with higher concurrency
1011
  all_chunks = []
@@ -1058,7 +1023,7 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1058
  for _ in submission_request.questions
1059
  ])
1060
 
1061
- # Add to RAG pipeline with advanced processing
1062
  await rag_pipeline.add_documents(all_chunks)
1063
 
1064
  # SPEED OPTIMIZATION: Full parallel question answering
@@ -1075,7 +1040,7 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1075
  answers = await asyncio.gather(*tasks)
1076
 
1077
  elapsed = time.time() - start_time
1078
- logger.info(f"πŸŽ‰ KAGGLE-POWERED SUCCESS! Processed in {elapsed:.2f}s")
1079
 
1080
  return SubmissionResponse(answers=answers)
1081
 
@@ -1088,13 +1053,13 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1088
  for _ in submission_request.questions
1089
  ])
1090
 
1091
- # --- HEALTH ENDPOINTS (YOUR EXCELLENT ORIGINAL + LAZY INFO) ---
1092
  @app.get("/")
1093
  def read_root():
1094
  return {
1095
- "message": "🎯 KAGGLE-POWERED HACKATHON RAG SYSTEM - COMPLETE WITH LAZY INITIALIZATION",
1096
- "version": "5.3.0",
1097
- "status": "FIXED: Lazy initialization prevents 'Preparing Space' issues!",
1098
  "target_time": "<20 seconds with Kaggle GPU",
1099
  "supported_formats": list(doc_processor.processors.keys()),
1100
  "features": [
@@ -1107,14 +1072,17 @@ def read_root():
1107
  "Semantic chunking and context fusion",
1108
  "R4 'half questions' handling",
1109
  "Lightning-fast GPU-accelerated response times",
1110
- "Fixed asyncio event loop issues",
1111
- "Ngrok compatibility headers",
1112
- "LAZY INITIALIZATION - prevents startup timeouts"
1113
  ],
1114
  "kaggle_connection": "Lazy (connects on first API call)",
 
1115
  "fixes": [
 
1116
  "LazyKaggleModelClient prevents startup connection",
1117
- "AsyncKaggleEmbeddingWrapper with thread isolation",
 
1118
  "CORS headers with ngrok-skip-browser-warning",
1119
  "Both GET and POST endpoints for /api/v1/hackrx/run",
1120
  "Improved error handling and logging",
@@ -1126,14 +1094,16 @@ def read_root():
1126
  def health_check():
1127
  return {
1128
  "status": "healthy",
1129
- "version": "5.3.0",
1130
- "mode": "KAGGLE_GPU_POWERED_LAZY",
1131
  "cache_size": len(doc_processor.cache),
1132
  "kaggle_connection": "lazy (on-demand)",
 
1133
  "timestamp": time.time(),
1134
  "fixes_applied": [
 
1135
  "lazy_initialization",
1136
- "asyncio_event_loop_fix",
1137
  "ngrok_compatibility",
1138
  "http_method_fix",
1139
  "cors_headers",
@@ -1149,6 +1119,7 @@ async def test_kaggle_connection():
1149
  return {
1150
  "kaggle_connection": "initialized" if kaggle_client._initialized else "not_initialized",
1151
  "health_status": "healthy" if is_healthy else "unhealthy",
 
1152
  "timestamp": time.time()
1153
  }
1154
  except Exception as e:
 
1
+ # --- KAGGLE-POWERED RAG SYSTEM - COMPLETE 1144+ LINES WITH DEADLOCK FIX ---
2
 
3
  import os
4
  import json
 
24
 
25
  # LangChain imports
26
  from langchain_community.vectorstores import Chroma
 
27
 
28
  # Multi-format document processing
29
  import fitz # PyMuPDF
 
50
  logging.basicConfig(level=logging.INFO)
51
  logger = logging.getLogger(__name__)
52
 
53
+ app = FastAPI(title="Kaggle-Powered Hackathon RAG", version="5.4.0")
54
 
55
  app.add_middleware(
56
  CORSMiddleware,
 
133
  logger.error(f"Kaggle reranking error: {e}")
134
  return documents[:k]
135
 
136
+ # --- LIGHTWEIGHT QUERY PROCESSOR (YOUR COMPLETE ORIGINAL) ---
137
  class LightweightQueryProcessor:
138
  def __init__(self, kaggle_client: LazyKaggleModelClient):
139
  self.kaggle_client = kaggle_client
 
141
 
142
  async def enhance_query_semantically(self, question: str, domain: str = "insurance") -> str:
143
  """OPTIMIZED semantic query processing"""
144
+
145
+ # Quick cache check with shorter hash
146
  cache_key = hashlib.md5(question.encode()).hexdigest()[:8]
147
  if cache_key in self.cache:
148
  return self.cache[cache_key]
149
 
150
+ # Streamlined domain expansion
151
  enhanced_query = self._expand_with_domain_knowledge_fast(question, domain)
152
  enhanced_query = self._handle_incomplete_questions(enhanced_query)
153
 
154
+ # Cache result
155
  self.cache[cache_key] = enhanced_query
156
  return enhanced_query
157
 
158
  def _expand_with_domain_knowledge_fast(self, query: str, domain: str) -> str:
159
  """OPTIMIZED domain expansion - same intelligence, faster processing"""
160
+
161
+ # Streamlined expansion mapping for speed
162
  key_expansions = {
163
  'grace period': 'payment deadline premium due',
164
  'waiting period': 'exclusion time coverage delay',
 
195
 
196
  return query
197
 
198
+ # --- ANTI-JAILBREAK SECURITY SYSTEM (YOUR COMPLETE ORIGINAL) ---
199
  class SecurityGuard:
200
  def __init__(self):
201
  self.jailbreak_patterns = [
 
231
 
232
  return answer
233
 
234
+ # --- MULTI-LLM MANAGER (YOUR COMPLETE ORIGINAL WITH ALL PROVIDERS) ---
235
  class MultiLLMManager:
236
  def __init__(self):
237
  # Initialize multiple LLM providers with fallback
 
307
  response = await model.generate_content_async(prompt)
308
  return response.text.strip()
309
 
310
+ # --- COMPLETE UNIVERSAL DOCUMENT PROCESSOR (ALL YOUR ORIGINAL FEATURES) ---
311
  class UniversalDocumentProcessor:
312
  def __init__(self):
313
  # SPEED OPTIMIZATIONS: Reduced limits
 
384
  else:
385
  return '.txt'
386
 
387
+ # --- SPEED-OPTIMIZED PDF PROCESSING (YOUR COMPLETE ORIGINAL) ---
388
  async def process_pdf(self, file_path: str, content: bytes) -> List[Dict[str, Any]]:
389
  """Enhanced PDF processing with speed optimizations"""
390
  chunks = []
 
709
  "chunk_id": str(uuid.uuid4())
710
  }]
711
 
712
+ # --- GEMINI'S FIX: DEADLOCK-FREE RAG PIPELINE ---
713
+ class DeadlockFreeRAGPipeline:
714
+ """FIXED: Direct embedding management - no more AsyncKaggleEmbeddingWrapper deadlock"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  def __init__(self, collection_name: str, llm_manager: MultiLLMManager, kaggle_client: LazyKaggleModelClient):
716
  self.collection_name = collection_name
717
  self.llm_manager = llm_manager
 
719
  self.security_guard = SecurityGuard()
720
  self.query_processor = LightweightQueryProcessor(kaggle_client)
721
 
722
+ # GEMINI'S FIX: No embedding function - let Chroma be a simple data store
 
 
723
  self.vectorstore = Chroma(
724
  collection_name=collection_name,
725
+ # REMOVED: embedding_function parameter completely
726
  persist_directory="/tmp/chroma_kaggle"
727
  )
728
 
729
+ logger.info(f"πŸš€ Deadlock-Free RAG Pipeline initialized: {collection_name}")
730
 
731
  async def add_documents(self, chunks: List[Dict[str, Any]]):
732
+ """GEMINI'S FIX: Direct embedding management - no deadlock"""
733
  if not chunks:
734
  return
735
 
 
768
 
769
  logger.info(f"πŸ“š Filtered to {len(quality_chunks)} quality chunks")
770
 
771
+ if not quality_chunks:
772
+ return
773
+
774
+ # GEMINI'S FIX: Step 1 - Get texts
775
+ texts = [chunk['content'] for chunk in quality_chunks[:100]] # Reduced from 150 for speed
776
+
777
+ # GEMINI'S FIX: Step 2 - Embed all texts via Kaggle (Manager gets sauce first)
778
+ logger.info(f"πŸš€ Embedding {len(texts)} chunks via Kaggle...")
779
+ embeddings = await self.kaggle_client.generate_embeddings(texts)
780
+
781
+ if not embeddings or len(embeddings) != len(texts):
782
+ logger.error("Embedding failed or returned mismatched count.")
783
+ return
784
+
785
+ # GEMINI'S FIX: Step 3 - Add to Chroma with pre-calculated embeddings
786
+ # This completely avoids the deadlock!
787
+ self.vectorstore.add_texts(
788
+ texts=texts,
789
+ metadatas=[chunk['metadata'] for chunk in quality_chunks[:100]],
790
+ embeddings=embeddings # Pass vectors directly - no async calls in Chroma!
791
+ )
792
 
793
+ logger.info(f"βœ… Added {len(texts)} documents with embeddings to vector store (DEADLOCK-FREE)")
 
 
 
794
 
795
  async def answer_question(self, question: str) -> str:
796
+ """GEMINI'S FIX: Direct query embedding - no deadlock"""
797
  # Security check
798
  if self.security_guard.detect_jailbreak(question):
799
  return self.security_guard.sanitize_response(question, "")
 
802
  # Enhanced query processing
803
  enhanced_question = await self.query_processor.enhance_query_semantically(question)
804
 
805
+ # GEMINI'S FIX: Step 1 - Embed the query yourself first (Manager gets sauce)
806
+ query_embedding_list = await self.kaggle_client.generate_embeddings([enhanced_question])
807
+ if not query_embedding_list:
808
+ return "I could not process the query for searching."
809
+
810
+ query_embedding = query_embedding_list[0]
 
 
 
811
 
812
+ # GEMINI'S FIX: Step 2 - Search using vector directly (no async calls in Chroma)
813
+ relevant_docs = self.vectorstore.similarity_search_by_vector(
814
+ embedding=query_embedding,
815
+ k=15
816
+ )
817
 
818
  if not relevant_docs:
819
  return "I don't have sufficient information to answer this question based on the provided documents."
 
941
  return {
942
  "message": "This endpoint requires POST method",
943
  "usage": "Send POST request with documents and questions",
944
+ "status": "API is running - DEADLOCK-FREE with lazy initialization",
945
  "kaggle_connection": "Will initialize on first request",
946
+ "fix": "Direct embedding management prevents async deadlocks",
947
  "method": "Use POST with JSON body",
948
  "example": {
949
  "documents": ["url1", "url2"],
 
951
  }
952
  }
953
 
954
+ # --- SPEED-OPTIMIZED MAIN ENDPOINT WITH GEMINI'S DEADLOCK FIX ---
955
  @app.post("/api/v1/hackrx/run", response_model=SubmissionResponse, dependencies=[Depends(verify_bearer_token)])
956
  async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
957
  start_time = time.time()
958
+ logger.info(f"🎯 DEADLOCK-FREE KAGGLE-POWERED PROCESSING: {len(submission_request.documents)} docs, {len(submission_request.questions)} questions")
959
 
960
  try:
961
  # LAZY INITIALIZATION: Only now do we connect to Kaggle!
 
968
  "Model service unavailable" for _ in submission_request.questions
969
  ])
970
 
971
+ # Create unique session with DEADLOCK-FREE pipeline
972
  session_id = f"kaggle_{uuid.uuid4().hex[:6]}" # Shorter UUID
973
+ rag_pipeline = DeadlockFreeRAGPipeline(session_id, multi_llm, kaggle_client)
974
 
975
  # Process all documents with higher concurrency
976
  all_chunks = []
 
1023
  for _ in submission_request.questions
1024
  ])
1025
 
1026
+ # Add to RAG pipeline with DEADLOCK-FREE processing
1027
  await rag_pipeline.add_documents(all_chunks)
1028
 
1029
  # SPEED OPTIMIZATION: Full parallel question answering
 
1040
  answers = await asyncio.gather(*tasks)
1041
 
1042
  elapsed = time.time() - start_time
1043
+ logger.info(f"πŸŽ‰ DEADLOCK-FREE KAGGLE-POWERED SUCCESS! Processed in {elapsed:.2f}s")
1044
 
1045
  return SubmissionResponse(answers=answers)
1046
 
 
1053
  for _ in submission_request.questions
1054
  ])
1055
 
1056
+ # --- HEALTH ENDPOINTS (YOUR EXCELLENT ORIGINAL + DEADLOCK-FREE INFO) ---
1057
  @app.get("/")
1058
  def read_root():
1059
  return {
1060
+ "message": "🎯 KAGGLE-POWERED HACKATHON RAG SYSTEM - DEADLOCK-FREE COMPLETE VERSION",
1061
+ "version": "5.4.0",
1062
+ "status": "FIXED: Deadlock-free + lazy initialization prevents all issues!",
1063
  "target_time": "<20 seconds with Kaggle GPU",
1064
  "supported_formats": list(doc_processor.processors.keys()),
1065
  "features": [
 
1072
  "Semantic chunking and context fusion",
1073
  "R4 'half questions' handling",
1074
  "Lightning-fast GPU-accelerated response times",
1075
+ "DEADLOCK-FREE async operations",
1076
+ "Lazy initialization prevents startup timeouts",
1077
+ "Direct embedding management"
1078
  ],
1079
  "kaggle_connection": "Lazy (connects on first API call)",
1080
+ "embedding_method": "Direct Kaggle management (no wrapper deadlock)",
1081
  "fixes": [
1082
+ "DeadlockFreeRAGPipeline prevents async conflicts",
1083
  "LazyKaggleModelClient prevents startup connection",
1084
+ "Direct embedding calls to Kaggle (no AsyncWrapper)",
1085
+ "Chroma as simple data store (no embedding function)",
1086
  "CORS headers with ngrok-skip-browser-warning",
1087
  "Both GET and POST endpoints for /api/v1/hackrx/run",
1088
  "Improved error handling and logging",
 
1094
  def health_check():
1095
  return {
1096
  "status": "healthy",
1097
+ "version": "5.4.0",
1098
+ "mode": "DEADLOCK_FREE_KAGGLE_GPU_POWERED_LAZY",
1099
  "cache_size": len(doc_processor.cache),
1100
  "kaggle_connection": "lazy (on-demand)",
1101
+ "embedding_method": "direct_kaggle_management",
1102
  "timestamp": time.time(),
1103
  "fixes_applied": [
1104
+ "deadlock_free_pipeline",
1105
  "lazy_initialization",
1106
+ "direct_embedding_management",
1107
  "ngrok_compatibility",
1108
  "http_method_fix",
1109
  "cors_headers",
 
1119
  return {
1120
  "kaggle_connection": "initialized" if kaggle_client._initialized else "not_initialized",
1121
  "health_status": "healthy" if is_healthy else "unhealthy",
1122
+ "endpoint": kaggle_client._endpoint if kaggle_client._initialized else "not_set",
1123
  "timestamp": time.time()
1124
  }
1125
  except Exception as e: