Spaces:
Sleeping
Sleeping
Commit Β·
c82e944
1
Parent(s): afbff39
changes to main_api.py
Browse files- app/main_api.py +79 -108
app/main_api.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# --- KAGGLE-POWERED RAG SYSTEM
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
@@ -24,7 +24,6 @@ from pydantic import BaseModel
|
|
| 24 |
|
| 25 |
# LangChain imports
|
| 26 |
from langchain_community.vectorstores import Chroma
|
| 27 |
-
from langchain.schema.document import Document as LangChainDocument
|
| 28 |
|
| 29 |
# Multi-format document processing
|
| 30 |
import fitz # PyMuPDF
|
|
@@ -51,7 +50,7 @@ load_dotenv()
|
|
| 51 |
logging.basicConfig(level=logging.INFO)
|
| 52 |
logger = logging.getLogger(__name__)
|
| 53 |
|
| 54 |
-
app = FastAPI(title="Kaggle-Powered Hackathon RAG", version="5.
|
| 55 |
|
| 56 |
app.add_middleware(
|
| 57 |
CORSMiddleware,
|
|
@@ -134,7 +133,7 @@ class LazyKaggleModelClient:
|
|
| 134 |
logger.error(f"Kaggle reranking error: {e}")
|
| 135 |
return documents[:k]
|
| 136 |
|
| 137 |
-
# --- LIGHTWEIGHT QUERY PROCESSOR (YOUR
|
| 138 |
class LightweightQueryProcessor:
|
| 139 |
def __init__(self, kaggle_client: LazyKaggleModelClient):
|
| 140 |
self.kaggle_client = kaggle_client
|
|
@@ -142,18 +141,24 @@ class LightweightQueryProcessor:
|
|
| 142 |
|
| 143 |
async def enhance_query_semantically(self, question: str, domain: str = "insurance") -> str:
|
| 144 |
"""OPTIMIZED semantic query processing"""
|
|
|
|
|
|
|
| 145 |
cache_key = hashlib.md5(question.encode()).hexdigest()[:8]
|
| 146 |
if cache_key in self.cache:
|
| 147 |
return self.cache[cache_key]
|
| 148 |
|
|
|
|
| 149 |
enhanced_query = self._expand_with_domain_knowledge_fast(question, domain)
|
| 150 |
enhanced_query = self._handle_incomplete_questions(enhanced_query)
|
| 151 |
|
|
|
|
| 152 |
self.cache[cache_key] = enhanced_query
|
| 153 |
return enhanced_query
|
| 154 |
|
| 155 |
def _expand_with_domain_knowledge_fast(self, query: str, domain: str) -> str:
|
| 156 |
"""OPTIMIZED domain expansion - same intelligence, faster processing"""
|
|
|
|
|
|
|
| 157 |
key_expansions = {
|
| 158 |
'grace period': 'payment deadline premium due',
|
| 159 |
'waiting period': 'exclusion time coverage delay',
|
|
@@ -190,7 +195,7 @@ class LightweightQueryProcessor:
|
|
| 190 |
|
| 191 |
return query
|
| 192 |
|
| 193 |
-
# --- ANTI-JAILBREAK SECURITY SYSTEM (YOUR
|
| 194 |
class SecurityGuard:
|
| 195 |
def __init__(self):
|
| 196 |
self.jailbreak_patterns = [
|
|
@@ -226,7 +231,7 @@ class SecurityGuard:
|
|
| 226 |
|
| 227 |
return answer
|
| 228 |
|
| 229 |
-
# --- MULTI-LLM MANAGER (YOUR
|
| 230 |
class MultiLLMManager:
|
| 231 |
def __init__(self):
|
| 232 |
# Initialize multiple LLM providers with fallback
|
|
@@ -302,7 +307,7 @@ class MultiLLMManager:
|
|
| 302 |
response = await model.generate_content_async(prompt)
|
| 303 |
return response.text.strip()
|
| 304 |
|
| 305 |
-
# --- COMPLETE UNIVERSAL DOCUMENT PROCESSOR (ALL
|
| 306 |
class UniversalDocumentProcessor:
|
| 307 |
def __init__(self):
|
| 308 |
# SPEED OPTIMIZATIONS: Reduced limits
|
|
@@ -379,7 +384,7 @@ class UniversalDocumentProcessor:
|
|
| 379 |
else:
|
| 380 |
return '.txt'
|
| 381 |
|
| 382 |
-
# --- SPEED-OPTIMIZED PDF PROCESSING (YOUR
|
| 383 |
async def process_pdf(self, file_path: str, content: bytes) -> List[Dict[str, Any]]:
|
| 384 |
"""Enhanced PDF processing with speed optimizations"""
|
| 385 |
chunks = []
|
|
@@ -704,59 +709,9 @@ class UniversalDocumentProcessor:
|
|
| 704 |
"chunk_id": str(uuid.uuid4())
|
| 705 |
}]
|
| 706 |
|
| 707 |
-
# ---
|
| 708 |
-
class
|
| 709 |
-
"""FIXED:
|
| 710 |
-
def __init__(self, kaggle_client: LazyKaggleModelClient):
|
| 711 |
-
self.kaggle_client = kaggle_client
|
| 712 |
-
self._embeddings_cache = {}
|
| 713 |
-
|
| 714 |
-
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
| 715 |
-
"""FIXED: Embed documents using Kaggle (thread-safe async wrapper)"""
|
| 716 |
-
try:
|
| 717 |
-
# Check if we're in an async context
|
| 718 |
-
try:
|
| 719 |
-
loop = asyncio.get_running_loop()
|
| 720 |
-
# We're in an async context, need to handle differently
|
| 721 |
-
return self._embed_with_thread(texts)
|
| 722 |
-
except RuntimeError:
|
| 723 |
-
# No running loop, safe to create one
|
| 724 |
-
return asyncio.run(self.kaggle_client.generate_embeddings(texts))
|
| 725 |
-
except Exception as e:
|
| 726 |
-
logger.error(f"Embedding wrapper error: {e}")
|
| 727 |
-
# Fallback: return dummy embeddings to prevent crashes
|
| 728 |
-
return [[0.0] * 384 for _ in texts]
|
| 729 |
-
|
| 730 |
-
def embed_query(self, text: str) -> List[float]:
|
| 731 |
-
"""FIXED: Embed query using Kaggle (thread-safe async wrapper)"""
|
| 732 |
-
try:
|
| 733 |
-
embeddings = self.embed_documents([text])
|
| 734 |
-
return embeddings[0] if embeddings else [0.0] * 384
|
| 735 |
-
except Exception as e:
|
| 736 |
-
logger.error(f"Query embedding error: {e}")
|
| 737 |
-
return [0.0] * 384
|
| 738 |
-
|
| 739 |
-
def _embed_with_thread(self, texts: List[str]) -> List[List[float]]:
|
| 740 |
-
"""Helper: Run embedding in separate thread when in async context"""
|
| 741 |
-
|
| 742 |
-
# Use a thread pool to run the async function
|
| 743 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
| 744 |
-
# Create new event loop in thread
|
| 745 |
-
def run_in_thread():
|
| 746 |
-
new_loop = asyncio.new_event_loop()
|
| 747 |
-
asyncio.set_event_loop(new_loop)
|
| 748 |
-
try:
|
| 749 |
-
return new_loop.run_until_complete(
|
| 750 |
-
self.kaggle_client.generate_embeddings(texts)
|
| 751 |
-
)
|
| 752 |
-
finally:
|
| 753 |
-
new_loop.close()
|
| 754 |
-
|
| 755 |
-
future = executor.submit(run_in_thread)
|
| 756 |
-
return future.result(timeout=30)
|
| 757 |
-
|
| 758 |
-
# --- KAGGLE-POWERED RAG PIPELINE WITH ALL YOUR FEATURES ---
|
| 759 |
-
class KagglePoweredRAGPipeline:
|
| 760 |
def __init__(self, collection_name: str, llm_manager: MultiLLMManager, kaggle_client: LazyKaggleModelClient):
|
| 761 |
self.collection_name = collection_name
|
| 762 |
self.llm_manager = llm_manager
|
|
@@ -764,19 +719,17 @@ class KagglePoweredRAGPipeline:
|
|
| 764 |
self.security_guard = SecurityGuard()
|
| 765 |
self.query_processor = LightweightQueryProcessor(kaggle_client)
|
| 766 |
|
| 767 |
-
#
|
| 768 |
-
self.embedding_function = AsyncKaggleEmbeddingWrapper(kaggle_client)
|
| 769 |
-
|
| 770 |
self.vectorstore = Chroma(
|
| 771 |
collection_name=collection_name,
|
| 772 |
-
embedding_function
|
| 773 |
persist_directory="/tmp/chroma_kaggle"
|
| 774 |
)
|
| 775 |
|
| 776 |
-
logger.info(f"π
|
| 777 |
|
| 778 |
async def add_documents(self, chunks: List[Dict[str, Any]]):
|
| 779 |
-
"""
|
| 780 |
if not chunks:
|
| 781 |
return
|
| 782 |
|
|
@@ -815,22 +768,32 @@ class KagglePoweredRAGPipeline:
|
|
| 815 |
|
| 816 |
logger.info(f"π Filtered to {len(quality_chunks)} quality chunks")
|
| 817 |
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 826 |
|
| 827 |
-
|
| 828 |
-
if documents:
|
| 829 |
-
self.vectorstore.add_documents(documents)
|
| 830 |
-
logger.info(f"β
Added {len(documents)} documents to vector store")
|
| 831 |
|
| 832 |
async def answer_question(self, question: str) -> str:
|
| 833 |
-
"""
|
| 834 |
# Security check
|
| 835 |
if self.security_guard.detect_jailbreak(question):
|
| 836 |
return self.security_guard.sanitize_response(question, "")
|
|
@@ -839,17 +802,18 @@ class KagglePoweredRAGPipeline:
|
|
| 839 |
# Enhanced query processing
|
| 840 |
enhanced_question = await self.query_processor.enhance_query_semantically(question)
|
| 841 |
|
| 842 |
-
#
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
"lambda_mult": 0.5
|
| 849 |
-
}
|
| 850 |
-
)
|
| 851 |
|
| 852 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 853 |
|
| 854 |
if not relevant_docs:
|
| 855 |
return "I don't have sufficient information to answer this question based on the provided documents."
|
|
@@ -977,8 +941,9 @@ def test_endpoint():
|
|
| 977 |
return {
|
| 978 |
"message": "This endpoint requires POST method",
|
| 979 |
"usage": "Send POST request with documents and questions",
|
| 980 |
-
"status": "API is running with lazy initialization",
|
| 981 |
"kaggle_connection": "Will initialize on first request",
|
|
|
|
| 982 |
"method": "Use POST with JSON body",
|
| 983 |
"example": {
|
| 984 |
"documents": ["url1", "url2"],
|
|
@@ -986,11 +951,11 @@ def test_endpoint():
|
|
| 986 |
}
|
| 987 |
}
|
| 988 |
|
| 989 |
-
# --- SPEED-OPTIMIZED MAIN ENDPOINT WITH
|
| 990 |
@app.post("/api/v1/hackrx/run", response_model=SubmissionResponse, dependencies=[Depends(verify_bearer_token)])
|
| 991 |
async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
|
| 992 |
start_time = time.time()
|
| 993 |
-
logger.info(f"π― KAGGLE-POWERED PROCESSING: {len(submission_request.documents)} docs, {len(submission_request.questions)} questions")
|
| 994 |
|
| 995 |
try:
|
| 996 |
# LAZY INITIALIZATION: Only now do we connect to Kaggle!
|
|
@@ -1003,9 +968,9 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
|
|
| 1003 |
"Model service unavailable" for _ in submission_request.questions
|
| 1004 |
])
|
| 1005 |
|
| 1006 |
-
# Create unique session
|
| 1007 |
session_id = f"kaggle_{uuid.uuid4().hex[:6]}" # Shorter UUID
|
| 1008 |
-
rag_pipeline =
|
| 1009 |
|
| 1010 |
# Process all documents with higher concurrency
|
| 1011 |
all_chunks = []
|
|
@@ -1058,7 +1023,7 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
|
|
| 1058 |
for _ in submission_request.questions
|
| 1059 |
])
|
| 1060 |
|
| 1061 |
-
# Add to RAG pipeline with
|
| 1062 |
await rag_pipeline.add_documents(all_chunks)
|
| 1063 |
|
| 1064 |
# SPEED OPTIMIZATION: Full parallel question answering
|
|
@@ -1075,7 +1040,7 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
|
|
| 1075 |
answers = await asyncio.gather(*tasks)
|
| 1076 |
|
| 1077 |
elapsed = time.time() - start_time
|
| 1078 |
-
logger.info(f"π KAGGLE-POWERED SUCCESS! Processed in {elapsed:.2f}s")
|
| 1079 |
|
| 1080 |
return SubmissionResponse(answers=answers)
|
| 1081 |
|
|
@@ -1088,13 +1053,13 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
|
|
| 1088 |
for _ in submission_request.questions
|
| 1089 |
])
|
| 1090 |
|
| 1091 |
-
# --- HEALTH ENDPOINTS (YOUR EXCELLENT ORIGINAL +
|
| 1092 |
@app.get("/")
|
| 1093 |
def read_root():
|
| 1094 |
return {
|
| 1095 |
-
"message": "π― KAGGLE-POWERED HACKATHON RAG SYSTEM - COMPLETE
|
| 1096 |
-
"version": "5.
|
| 1097 |
-
"status": "FIXED:
|
| 1098 |
"target_time": "<20 seconds with Kaggle GPU",
|
| 1099 |
"supported_formats": list(doc_processor.processors.keys()),
|
| 1100 |
"features": [
|
|
@@ -1107,14 +1072,17 @@ def read_root():
|
|
| 1107 |
"Semantic chunking and context fusion",
|
| 1108 |
"R4 'half questions' handling",
|
| 1109 |
"Lightning-fast GPU-accelerated response times",
|
| 1110 |
-
"
|
| 1111 |
-
"
|
| 1112 |
-
"
|
| 1113 |
],
|
| 1114 |
"kaggle_connection": "Lazy (connects on first API call)",
|
|
|
|
| 1115 |
"fixes": [
|
|
|
|
| 1116 |
"LazyKaggleModelClient prevents startup connection",
|
| 1117 |
-
"
|
|
|
|
| 1118 |
"CORS headers with ngrok-skip-browser-warning",
|
| 1119 |
"Both GET and POST endpoints for /api/v1/hackrx/run",
|
| 1120 |
"Improved error handling and logging",
|
|
@@ -1126,14 +1094,16 @@ def read_root():
|
|
| 1126 |
def health_check():
|
| 1127 |
return {
|
| 1128 |
"status": "healthy",
|
| 1129 |
-
"version": "5.
|
| 1130 |
-
"mode": "
|
| 1131 |
"cache_size": len(doc_processor.cache),
|
| 1132 |
"kaggle_connection": "lazy (on-demand)",
|
|
|
|
| 1133 |
"timestamp": time.time(),
|
| 1134 |
"fixes_applied": [
|
|
|
|
| 1135 |
"lazy_initialization",
|
| 1136 |
-
"
|
| 1137 |
"ngrok_compatibility",
|
| 1138 |
"http_method_fix",
|
| 1139 |
"cors_headers",
|
|
@@ -1149,6 +1119,7 @@ async def test_kaggle_connection():
|
|
| 1149 |
return {
|
| 1150 |
"kaggle_connection": "initialized" if kaggle_client._initialized else "not_initialized",
|
| 1151 |
"health_status": "healthy" if is_healthy else "unhealthy",
|
|
|
|
| 1152 |
"timestamp": time.time()
|
| 1153 |
}
|
| 1154 |
except Exception as e:
|
|
|
|
| 1 |
+
# --- KAGGLE-POWERED RAG SYSTEM - COMPLETE 1144+ LINES WITH DEADLOCK FIX ---
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 24 |
|
| 25 |
# LangChain imports
|
| 26 |
from langchain_community.vectorstores import Chroma
|
|
|
|
| 27 |
|
| 28 |
# Multi-format document processing
|
| 29 |
import fitz # PyMuPDF
|
|
|
|
| 50 |
logging.basicConfig(level=logging.INFO)
|
| 51 |
logger = logging.getLogger(__name__)
|
| 52 |
|
| 53 |
+
app = FastAPI(title="Kaggle-Powered Hackathon RAG", version="5.4.0")
|
| 54 |
|
| 55 |
app.add_middleware(
|
| 56 |
CORSMiddleware,
|
|
|
|
| 133 |
logger.error(f"Kaggle reranking error: {e}")
|
| 134 |
return documents[:k]
|
| 135 |
|
| 136 |
+
# --- LIGHTWEIGHT QUERY PROCESSOR (YOUR COMPLETE ORIGINAL) ---
|
| 137 |
class LightweightQueryProcessor:
|
| 138 |
def __init__(self, kaggle_client: LazyKaggleModelClient):
|
| 139 |
self.kaggle_client = kaggle_client
|
|
|
|
| 141 |
|
| 142 |
async def enhance_query_semantically(self, question: str, domain: str = "insurance") -> str:
|
| 143 |
"""OPTIMIZED semantic query processing"""
|
| 144 |
+
|
| 145 |
+
# Quick cache check with shorter hash
|
| 146 |
cache_key = hashlib.md5(question.encode()).hexdigest()[:8]
|
| 147 |
if cache_key in self.cache:
|
| 148 |
return self.cache[cache_key]
|
| 149 |
|
| 150 |
+
# Streamlined domain expansion
|
| 151 |
enhanced_query = self._expand_with_domain_knowledge_fast(question, domain)
|
| 152 |
enhanced_query = self._handle_incomplete_questions(enhanced_query)
|
| 153 |
|
| 154 |
+
# Cache result
|
| 155 |
self.cache[cache_key] = enhanced_query
|
| 156 |
return enhanced_query
|
| 157 |
|
| 158 |
def _expand_with_domain_knowledge_fast(self, query: str, domain: str) -> str:
|
| 159 |
"""OPTIMIZED domain expansion - same intelligence, faster processing"""
|
| 160 |
+
|
| 161 |
+
# Streamlined expansion mapping for speed
|
| 162 |
key_expansions = {
|
| 163 |
'grace period': 'payment deadline premium due',
|
| 164 |
'waiting period': 'exclusion time coverage delay',
|
|
|
|
| 195 |
|
| 196 |
return query
|
| 197 |
|
| 198 |
+
# --- ANTI-JAILBREAK SECURITY SYSTEM (YOUR COMPLETE ORIGINAL) ---
|
| 199 |
class SecurityGuard:
|
| 200 |
def __init__(self):
|
| 201 |
self.jailbreak_patterns = [
|
|
|
|
| 231 |
|
| 232 |
return answer
|
| 233 |
|
| 234 |
+
# --- MULTI-LLM MANAGER (YOUR COMPLETE ORIGINAL WITH ALL PROVIDERS) ---
|
| 235 |
class MultiLLMManager:
|
| 236 |
def __init__(self):
|
| 237 |
# Initialize multiple LLM providers with fallback
|
|
|
|
| 307 |
response = await model.generate_content_async(prompt)
|
| 308 |
return response.text.strip()
|
| 309 |
|
| 310 |
+
# --- COMPLETE UNIVERSAL DOCUMENT PROCESSOR (ALL YOUR ORIGINAL FEATURES) ---
|
| 311 |
class UniversalDocumentProcessor:
|
| 312 |
def __init__(self):
|
| 313 |
# SPEED OPTIMIZATIONS: Reduced limits
|
|
|
|
| 384 |
else:
|
| 385 |
return '.txt'
|
| 386 |
|
| 387 |
+
# --- SPEED-OPTIMIZED PDF PROCESSING (YOUR COMPLETE ORIGINAL) ---
|
| 388 |
async def process_pdf(self, file_path: str, content: bytes) -> List[Dict[str, Any]]:
|
| 389 |
"""Enhanced PDF processing with speed optimizations"""
|
| 390 |
chunks = []
|
|
|
|
| 709 |
"chunk_id": str(uuid.uuid4())
|
| 710 |
}]
|
| 711 |
|
| 712 |
+
# --- GEMINI'S FIX: DEADLOCK-FREE RAG PIPELINE ---
|
| 713 |
+
class DeadlockFreeRAGPipeline:
|
| 714 |
+
"""FIXED: Direct embedding management - no more AsyncKaggleEmbeddingWrapper deadlock"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
def __init__(self, collection_name: str, llm_manager: MultiLLMManager, kaggle_client: LazyKaggleModelClient):
|
| 716 |
self.collection_name = collection_name
|
| 717 |
self.llm_manager = llm_manager
|
|
|
|
| 719 |
self.security_guard = SecurityGuard()
|
| 720 |
self.query_processor = LightweightQueryProcessor(kaggle_client)
|
| 721 |
|
| 722 |
+
# GEMINI'S FIX: No embedding function - let Chroma be a simple data store
|
|
|
|
|
|
|
| 723 |
self.vectorstore = Chroma(
|
| 724 |
collection_name=collection_name,
|
| 725 |
+
# REMOVED: embedding_function parameter completely
|
| 726 |
persist_directory="/tmp/chroma_kaggle"
|
| 727 |
)
|
| 728 |
|
| 729 |
+
logger.info(f"π Deadlock-Free RAG Pipeline initialized: {collection_name}")
|
| 730 |
|
| 731 |
async def add_documents(self, chunks: List[Dict[str, Any]]):
|
| 732 |
+
"""GEMINI'S FIX: Direct embedding management - no deadlock"""
|
| 733 |
if not chunks:
|
| 734 |
return
|
| 735 |
|
|
|
|
| 768 |
|
| 769 |
logger.info(f"π Filtered to {len(quality_chunks)} quality chunks")
|
| 770 |
|
| 771 |
+
if not quality_chunks:
|
| 772 |
+
return
|
| 773 |
+
|
| 774 |
+
# GEMINI'S FIX: Step 1 - Get texts
|
| 775 |
+
texts = [chunk['content'] for chunk in quality_chunks[:100]] # Reduced from 150 for speed
|
| 776 |
+
|
| 777 |
+
# GEMINI'S FIX: Step 2 - Embed all texts via Kaggle (Manager gets sauce first)
|
| 778 |
+
logger.info(f"π Embedding {len(texts)} chunks via Kaggle...")
|
| 779 |
+
embeddings = await self.kaggle_client.generate_embeddings(texts)
|
| 780 |
+
|
| 781 |
+
if not embeddings or len(embeddings) != len(texts):
|
| 782 |
+
logger.error("Embedding failed or returned mismatched count.")
|
| 783 |
+
return
|
| 784 |
+
|
| 785 |
+
# GEMINI'S FIX: Step 3 - Add to Chroma with pre-calculated embeddings
|
| 786 |
+
# This completely avoids the deadlock!
|
| 787 |
+
self.vectorstore.add_texts(
|
| 788 |
+
texts=texts,
|
| 789 |
+
metadatas=[chunk['metadata'] for chunk in quality_chunks[:100]],
|
| 790 |
+
embeddings=embeddings # Pass vectors directly - no async calls in Chroma!
|
| 791 |
+
)
|
| 792 |
|
| 793 |
+
logger.info(f"β
Added {len(texts)} documents with embeddings to vector store (DEADLOCK-FREE)")
|
|
|
|
|
|
|
|
|
|
| 794 |
|
| 795 |
async def answer_question(self, question: str) -> str:
|
| 796 |
+
"""GEMINI'S FIX: Direct query embedding - no deadlock"""
|
| 797 |
# Security check
|
| 798 |
if self.security_guard.detect_jailbreak(question):
|
| 799 |
return self.security_guard.sanitize_response(question, "")
|
|
|
|
| 802 |
# Enhanced query processing
|
| 803 |
enhanced_question = await self.query_processor.enhance_query_semantically(question)
|
| 804 |
|
| 805 |
+
# GEMINI'S FIX: Step 1 - Embed the query yourself first (Manager gets sauce)
|
| 806 |
+
query_embedding_list = await self.kaggle_client.generate_embeddings([enhanced_question])
|
| 807 |
+
if not query_embedding_list:
|
| 808 |
+
return "I could not process the query for searching."
|
| 809 |
+
|
| 810 |
+
query_embedding = query_embedding_list[0]
|
|
|
|
|
|
|
|
|
|
| 811 |
|
| 812 |
+
# GEMINI'S FIX: Step 2 - Search using vector directly (no async calls in Chroma)
|
| 813 |
+
relevant_docs = self.vectorstore.similarity_search_by_vector(
|
| 814 |
+
embedding=query_embedding,
|
| 815 |
+
k=15
|
| 816 |
+
)
|
| 817 |
|
| 818 |
if not relevant_docs:
|
| 819 |
return "I don't have sufficient information to answer this question based on the provided documents."
|
|
|
|
| 941 |
return {
|
| 942 |
"message": "This endpoint requires POST method",
|
| 943 |
"usage": "Send POST request with documents and questions",
|
| 944 |
+
"status": "API is running - DEADLOCK-FREE with lazy initialization",
|
| 945 |
"kaggle_connection": "Will initialize on first request",
|
| 946 |
+
"fix": "Direct embedding management prevents async deadlocks",
|
| 947 |
"method": "Use POST with JSON body",
|
| 948 |
"example": {
|
| 949 |
"documents": ["url1", "url2"],
|
|
|
|
| 951 |
}
|
| 952 |
}
|
| 953 |
|
| 954 |
+
# --- SPEED-OPTIMIZED MAIN ENDPOINT WITH GEMINI'S DEADLOCK FIX ---
|
| 955 |
@app.post("/api/v1/hackrx/run", response_model=SubmissionResponse, dependencies=[Depends(verify_bearer_token)])
|
| 956 |
async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
|
| 957 |
start_time = time.time()
|
| 958 |
+
logger.info(f"π― DEADLOCK-FREE KAGGLE-POWERED PROCESSING: {len(submission_request.documents)} docs, {len(submission_request.questions)} questions")
|
| 959 |
|
| 960 |
try:
|
| 961 |
# LAZY INITIALIZATION: Only now do we connect to Kaggle!
|
|
|
|
| 968 |
"Model service unavailable" for _ in submission_request.questions
|
| 969 |
])
|
| 970 |
|
| 971 |
+
# Create unique session with DEADLOCK-FREE pipeline
|
| 972 |
session_id = f"kaggle_{uuid.uuid4().hex[:6]}" # Shorter UUID
|
| 973 |
+
rag_pipeline = DeadlockFreeRAGPipeline(session_id, multi_llm, kaggle_client)
|
| 974 |
|
| 975 |
# Process all documents with higher concurrency
|
| 976 |
all_chunks = []
|
|
|
|
| 1023 |
for _ in submission_request.questions
|
| 1024 |
])
|
| 1025 |
|
| 1026 |
+
# Add to RAG pipeline with DEADLOCK-FREE processing
|
| 1027 |
await rag_pipeline.add_documents(all_chunks)
|
| 1028 |
|
| 1029 |
# SPEED OPTIMIZATION: Full parallel question answering
|
|
|
|
| 1040 |
answers = await asyncio.gather(*tasks)
|
| 1041 |
|
| 1042 |
elapsed = time.time() - start_time
|
| 1043 |
+
logger.info(f"π DEADLOCK-FREE KAGGLE-POWERED SUCCESS! Processed in {elapsed:.2f}s")
|
| 1044 |
|
| 1045 |
return SubmissionResponse(answers=answers)
|
| 1046 |
|
|
|
|
| 1053 |
for _ in submission_request.questions
|
| 1054 |
])
|
| 1055 |
|
| 1056 |
+
# --- HEALTH ENDPOINTS (YOUR EXCELLENT ORIGINAL + DEADLOCK-FREE INFO) ---
|
| 1057 |
@app.get("/")
|
| 1058 |
def read_root():
|
| 1059 |
return {
|
| 1060 |
+
"message": "π― KAGGLE-POWERED HACKATHON RAG SYSTEM - DEADLOCK-FREE COMPLETE VERSION",
|
| 1061 |
+
"version": "5.4.0",
|
| 1062 |
+
"status": "FIXED: Deadlock-free + lazy initialization prevents all issues!",
|
| 1063 |
"target_time": "<20 seconds with Kaggle GPU",
|
| 1064 |
"supported_formats": list(doc_processor.processors.keys()),
|
| 1065 |
"features": [
|
|
|
|
| 1072 |
"Semantic chunking and context fusion",
|
| 1073 |
"R4 'half questions' handling",
|
| 1074 |
"Lightning-fast GPU-accelerated response times",
|
| 1075 |
+
"DEADLOCK-FREE async operations",
|
| 1076 |
+
"Lazy initialization prevents startup timeouts",
|
| 1077 |
+
"Direct embedding management"
|
| 1078 |
],
|
| 1079 |
"kaggle_connection": "Lazy (connects on first API call)",
|
| 1080 |
+
"embedding_method": "Direct Kaggle management (no wrapper deadlock)",
|
| 1081 |
"fixes": [
|
| 1082 |
+
"DeadlockFreeRAGPipeline prevents async conflicts",
|
| 1083 |
"LazyKaggleModelClient prevents startup connection",
|
| 1084 |
+
"Direct embedding calls to Kaggle (no AsyncWrapper)",
|
| 1085 |
+
"Chroma as simple data store (no embedding function)",
|
| 1086 |
"CORS headers with ngrok-skip-browser-warning",
|
| 1087 |
"Both GET and POST endpoints for /api/v1/hackrx/run",
|
| 1088 |
"Improved error handling and logging",
|
|
|
|
| 1094 |
def health_check():
|
| 1095 |
return {
|
| 1096 |
"status": "healthy",
|
| 1097 |
+
"version": "5.4.0",
|
| 1098 |
+
"mode": "DEADLOCK_FREE_KAGGLE_GPU_POWERED_LAZY",
|
| 1099 |
"cache_size": len(doc_processor.cache),
|
| 1100 |
"kaggle_connection": "lazy (on-demand)",
|
| 1101 |
+
"embedding_method": "direct_kaggle_management",
|
| 1102 |
"timestamp": time.time(),
|
| 1103 |
"fixes_applied": [
|
| 1104 |
+
"deadlock_free_pipeline",
|
| 1105 |
"lazy_initialization",
|
| 1106 |
+
"direct_embedding_management",
|
| 1107 |
"ngrok_compatibility",
|
| 1108 |
"http_method_fix",
|
| 1109 |
"cors_headers",
|
|
|
|
| 1119 |
return {
|
| 1120 |
"kaggle_connection": "initialized" if kaggle_client._initialized else "not_initialized",
|
| 1121 |
"health_status": "healthy" if is_healthy else "unhealthy",
|
| 1122 |
+
"endpoint": kaggle_client._endpoint if kaggle_client._initialized else "not_set",
|
| 1123 |
"timestamp": time.time()
|
| 1124 |
}
|
| 1125 |
except Exception as e:
|