rohannsinghal commited on
Commit
e85c76b
Β·
1 Parent(s): 5375a1a

made changes to add_document fxn

Browse files
Files changed (1) hide show
  1. app/main_api.py +56 -8
app/main_api.py CHANGED
@@ -778,20 +778,68 @@ class DeadlockFreeRAGPipeline:
778
  logger.info(f"πŸš€ Embedding {len(texts)} chunks via Kaggle...")
779
  embeddings = await self.kaggle_client.generate_embeddings(texts)
780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781
  if not embeddings or len(embeddings) != len(texts):
782
  logger.error("Embedding failed or returned mismatched count.")
 
 
 
 
 
 
783
  return
784
 
 
 
 
 
 
 
 
 
 
785
  # GEMINI'S FIX: Step 3 - Add to Chroma with pre-calculated embeddings
786
  # This completely avoids the deadlock!
787
- self.vectorstore.add_texts(
788
- texts=texts,
789
- metadatas=[chunk['metadata'] for chunk in quality_chunks[:100]],
790
- embeddings=embeddings # <-- THE CRITICAL FIX - MAKE SURE THIS IS HERE
791
- )
792
-
793
- logger.info(f"βœ… Added {len(texts)} documents with embeddings to vector store (DEADLOCK-FREE)")
794
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
795
  async def answer_question(self, question: str) -> str:
796
  """GEMINI'S FIX: Direct query embedding - no deadlock"""
797
  # Security check
 
778
  logger.info(f"πŸš€ Embedding {len(texts)} chunks via Kaggle...")
779
  embeddings = await self.kaggle_client.generate_embeddings(texts)
780
 
781
+ # --- ADD THIS DEBUGGING SECTION ---
782
+ logger.info("--- HF DEBUG ---")
783
+ logger.info(f"Type of embeddings received: {type(embeddings)}")
784
+ if isinstance(embeddings, list) and len(embeddings) > 0:
785
+ logger.info(f"Number of embeddings: {len(embeddings)}")
786
+ logger.info(f"Type of first item: {type(embeddings[0])}")
787
+ if isinstance(embeddings[0], list):
788
+ logger.info(f"Dimension of first embedding: {len(embeddings[0])}")
789
+ logger.info(f"First few values: {embeddings[0][:5] if len(embeddings[0]) > 5 else embeddings[0]}")
790
+ else:
791
+ logger.info("Embeddings variable is either not a list or is empty!")
792
+ logger.info("--- END HF DEBUG ---")
793
+ # ---------------------------------
794
+
795
  if not embeddings or len(embeddings) != len(texts):
796
  logger.error("Embedding failed or returned mismatched count.")
797
+ logger.error(f"Expected {len(texts)} embeddings, got {len(embeddings) if embeddings else 0}")
798
+ return
799
+
800
+ # Validate embeddings format
801
+ if not isinstance(embeddings, list):
802
+ logger.error(f"Embeddings must be a list, got {type(embeddings)}")
803
  return
804
 
805
+ # Check if all embeddings are lists of floats
806
+ for i, emb in enumerate(embeddings[:3]): # Check first 3
807
+ if not isinstance(emb, list):
808
+ logger.error(f"Embedding {i} is not a list: {type(emb)}")
809
+ return
810
+ if not all(isinstance(x, (int, float)) for x in emb[:5]): # Check first 5 values
811
+ logger.error(f"Embedding {i} contains non-numeric values")
812
+ return
813
+
814
  # GEMINI'S FIX: Step 3 - Add to Chroma with pre-calculated embeddings
815
  # This completely avoids the deadlock!
816
+ try:
817
+ logger.info("πŸ”„ Adding texts to Chroma with embeddings...")
818
+ self.vectorstore.add_texts(
819
+ texts=texts,
820
+ metadatas=[chunk['metadata'] for chunk in quality_chunks[:100]],
821
+ embeddings=embeddings # <-- THE CRITICAL FIX - MAKE SURE THIS IS HERE
822
+ )
823
+ logger.info(f"βœ… Added {len(texts)} documents with embeddings to vector store (DEADLOCK-FREE)")
824
+
825
+ except Exception as e:
826
+ logger.error(f"❌ ChromaDB add_texts failed: {e}")
827
+ logger.error(f"❌ Error type: {type(e)}")
828
+
829
+ # Try adding without embeddings as fallback (this will show if it's an embedding format issue)
830
+ try:
831
+ logger.info("πŸ”„ Trying to add texts without embeddings (fallback)...")
832
+ self.vectorstore.add_texts(
833
+ texts=texts,
834
+ metadatas=[chunk['metadata'] for chunk in quality_chunks[:100]]
835
+ # No embeddings parameter - let Chroma handle it
836
+ )
837
+ logger.info("βœ… Fallback successful - issue is with embedding format")
838
+ except Exception as fallback_error:
839
+ logger.error(f"❌ Even fallback failed: {fallback_error}")
840
+
841
+ raise e # Re-raise original error
842
+
843
  async def answer_question(self, question: str) -> str:
844
  """GEMINI'S FIX: Direct query embedding - no deadlock"""
845
  # Security check