Spaces:

quantumbit
/

rag-bajaj

Sleeping

File size: 18,077 Bytes

e8051be

from fastapi import FastAPI, HTTPException, Depends, Query
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel, HttpUrl
from typing import List, Dict, Any, Optional
import tempfile
import os
import hashlib
import asyncio
import aiohttp
import time
from contextlib import asynccontextmanager

from RAG.advanced_rag_processor import AdvancedRAGProcessor
from preprocessing.preprocessing import DocumentPreprocessor
from logger.logger import rag_logger
from LLM.llm_handler import llm_handler
from LLM.tabular_answer import get_answer_for_tabluar
from LLM.image_answerer import get_answer_for_image
from LLM.one_shotter import get_oneshot_answer
from config.config import *
import config.config as config

# Initialize security
security = HTTPBearer()
admin_security = HTTPBearer()

def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
    """Verify the bearer token for main API."""
    if credentials.credentials != BEARER_TOKEN:
        raise HTTPException(
            status_code=401,
            detail="Invalid authentication token"
        )
    return credentials.credentials

def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(admin_security)):
    """Verify the bearer token for admin endpoints."""
    if credentials.credentials != "9420689497":
        raise HTTPException(
            status_code=401,
            detail="Invalid admin authentication token"
        )
    return credentials.credentials

# Pydantic models for request/response
class ProcessDocumentRequest(BaseModel):
    documents: HttpUrl  # URL to the PDF document
    questions: List[str]  # List of questions to answer

class ProcessDocumentResponse(BaseModel):
    answers: List[str]

class HealthResponse(BaseModel):
    status: str
    message: str

class PreprocessingResponse(BaseModel):
    status: str
    message: str
    doc_id: str
    chunk_count: int

class LogsResponse(BaseModel):
    export_timestamp: str
    metadata: Dict[str, Any]
    logs: List[Dict[str, Any]]

class LogsSummaryResponse(BaseModel):
    summary: Dict[str, Any]

# Global instances
rag_processor: Optional[AdvancedRAGProcessor] = None
document_preprocessor: Optional[DocumentPreprocessor] = None

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Initialize and cleanup the RAG processor."""
    global rag_processor, document_preprocessor
    
    # Startup
    print("🚀 Initializing Advanced RAG System...")
    rag_processor = AdvancedRAGProcessor()  # Use advanced processor for better accuracy
    document_preprocessor = DocumentPreprocessor()
    print("✅ Advanced RAG System initialized successfully")
    
    yield
    
    # Shutdown
    print("🔄 Shutting down RAG System...")
    if rag_processor:
        rag_processor.cleanup()
    print("✅ Cleanup completed")

# FastAPI app with lifespan management
app = FastAPI(
    title="Advanced RAG API",
    description="API for document processing and question answering using RAG",
    version="1.0.0",
    lifespan=lifespan
)

@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint."""
    return HealthResponse(
        status="healthy",
        message="RAG API is running successfully"
    )

@app.post("/hackrx/run", response_model=ProcessDocumentResponse)
async def process_document(
    request: ProcessDocumentRequest, 
    token: str = Depends(verify_token)
):
    """
    Process a PDF document and answer questions about it.
    
    This endpoint implements an optimized flow:
    1. Check if the document is already processed (pre-computed embeddings)
    2. If yes, use existing embeddings for fast retrieval + generation
    3. If no, run full RAG pipeline (download + process + embed + store + answer)
    
    Args:
        request: Contains document URL and list of questions
        token: Bearer token for authentication
        
    Returns:
        ProcessDocumentResponse: List of answers corresponding to the questions
    """
    global rag_processor, document_preprocessor
    
    if not rag_processor or not document_preprocessor:
        raise HTTPException(
            status_code=503, 
            detail="RAG system not initialized"
        )
    
    # Start timing and logging
    start_time = time.time()
    document_url = str(request.documents)
    questions = request.questions
    # Initialize answers for safe logging in finally
    final_answers = []
    status = "success"
    error_message = None
    doc_id = None
    was_preprocessed = False
    
    # Initialize enhanced logging
    request_id = rag_logger.generate_request_id()
    rag_logger.start_request_timing(request_id)
    
    try:
        print(f"📋 [{request_id}] Processing document: {document_url[:50]}...")
        print(f"🤔 [{request_id}] Number of questions: {len(questions)}")
        print(f"")
        print(f"🚀 [{request_id}] ===== STARTING RAG PIPELINE =====")
        print(f"📌 [{request_id}] PRIORITY 1: Checking stored embeddings database...")
        
        # Generate document ID
        doc_id = document_preprocessor.generate_doc_id(document_url)
        
        # Step 1: Check if document is already processed (stored embeddings)
        is_processed = document_preprocessor.is_document_processed(document_url)
        was_preprocessed = is_processed
        
        if is_processed:
            print(f"✅ [{request_id}] ✅ FOUND STORED EMBEDDINGS for {doc_id}")
            print(f"⚡ [{request_id}] Using fast path with pre-computed embeddings")
            # Fast path: Use existing embeddings
            doc_info = document_preprocessor.get_document_info(document_url)
            print(f"📊 [{request_id}] Using existing collection with {doc_info.get('chunk_count', 'N/A')} chunks")
        else:
            print(f"❌ [{request_id}] No stored embeddings found for {doc_id}")
            print(f"📌 [{request_id}] PRIORITY 2: Running full RAG pipeline (download + process + embed)...")
            # Full path: Download and process document
            resp = await document_preprocessor.process_document(document_url)
            
            # Handle different return formats: [content, type] or [content, type, no_cleanup_flag]
            if isinstance(resp, list):
                content, _type = resp[0], resp[1]
                if content == 'unsupported':
                    # Unsupported file type: respond gracefully without throwing server error
                    msg = f"Unsupported file type: {_type.lstrip('.')}"
                    final_answers = [msg]
                    status = "success"  # ensure no 500 is raised in the finally block
                    return ProcessDocumentResponse(answers=final_answers)
                
                if _type == "image":
                    try:
                        final_answers = get_answer_for_image(content, questions)
                        status = "success"
                        return ProcessDocumentResponse(answers=final_answers)
                    finally:
                        # Clean up the image file after processing
                        if os.path.exists(content):
                            os.unlink(content)
                            print(f"🗑️ Cleaned up image file: {content}")

                if _type == "tabular":
                    final_answers = get_answer_for_tabluar(content, questions)
                    status = "success"
                    return ProcessDocumentResponse(answers=final_answers)
                
                if _type == "oneshot":
                    # Process questions in batches for oneshot
                    tasks = [
                        get_oneshot_answer(content, questions[i:i + 3])
                        for i in range(0, len(questions), 3)
                    ]
                    
                    # Run all batches in parallel
                    results = await asyncio.gather(*tasks)
                    
                    # Flatten results
                    final_answers = [ans for batch in results for ans in batch]
                    status = "success"
                    return ProcessDocumentResponse(answers=final_answers)          
            else:
                doc_id = resp

            print(f"✅ [{request_id}] Document {doc_id} processed and stored")
        
        # Answer all questions using parallel processing for better latency
        print(f"🚀 [{request_id}] Processing {len(questions)} questions in parallel...")
        
        async def answer_single_question(question: str, index: int) -> tuple[str, Dict[str, float]]:
            """Answer a single question with error handling and timing."""
            try:
                question_start = time.time()
                print(f"❓ [{request_id}] Q{index+1}: {question[:50]}...")
                
                answer, pipeline_timings = await rag_processor.answer_question(
                    question=question,
                    doc_id=doc_id,
                    logger=rag_logger,
                    request_id=request_id
                )
                
                question_time = time.time() - question_start
                
                # Log question timing
                rag_logger.log_question_timing(
                    request_id, index, question, answer, question_time, pipeline_timings
                )
                
                print(f"✅ [{request_id}] Q{index+1} completed in {question_time:.4f}s")
                return answer, pipeline_timings
            except Exception as e:
                print(f"❌ [{request_id}] Q{index+1} Error: {str(e)}")
                return f"I encountered an error while processing this question: {str(e)}", {}
        
        
        # Process questions in parallel with controlled concurrency
        semaphore = asyncio.Semaphore(3)  # Reduced concurrency for better logging visibility
        
        async def bounded_answer(question: str, index: int) -> tuple[str, Dict[str, float]]:
            async with semaphore:
                return await answer_single_question(question, index)
        
        # Execute all questions concurrently
        tasks = [
            bounded_answer(question, i) 
            for i, question in enumerate(questions)
        ]
        
        results = await asyncio.gather(*tasks, return_exceptions=True)
        
        # Handle any exceptions in answers
        final_answers = []
        error_count = 0
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                error_count += 1
                final_answers.append(f"Error processing question {i+1}: {str(result)}")
            else:
                answer, _ = result
                final_answers.append(answer)
        
        # Determine final status
        if error_count == 0:
            status = "success"
        elif error_count == len(questions):
            status = "error"
        else:
            status = "partial"
        
        print(f"✅ [{request_id}] Successfully processed {len(questions) - error_count}/{len(questions)} questions")
        
    except Exception as e:
        print(f"❌ [{request_id}] Error processing request: {str(e)}")
        status = "error"
        error_message = str(e)
        final_answers = [f"Error: {str(e)}" for _ in questions]
    
    finally:
        # End request timing and get detailed timing data
        timing_data = rag_logger.end_request_timing(request_id)
        
        # Log the request with enhanced timing
        processing_time = time.time() - start_time
        logged_request_id = rag_logger.log_request(
            document_url=document_url,
            questions=questions,
            answers=final_answers,
            processing_time=processing_time,
            status=status,
            error_message=error_message,
            document_id=doc_id,
            was_preprocessed=was_preprocessed,
            timing_data=timing_data
        )
        
        print(f"📊 Request logged with ID: {logged_request_id} (Status: {status}, Time: {processing_time:.2f}s)")
        
        if status == "error":
            raise HTTPException(
                status_code=500,
                detail=f"Failed to process document: {error_message}"
            )
    
    return ProcessDocumentResponse(answers=final_answers)

@app.post("/preprocess", response_model=PreprocessingResponse)
async def preprocess_document(document_url: str, force: bool = False, token: str = Depends(verify_admin_token)):
    """
    Preprocess a document (for batch preprocessing).
    
    Args:
        document_url: URL of the PDF to preprocess
        force: Whether to reprocess if already processed
        
    Returns:
        PreprocessingResponse: Status and document info
    """
    global document_preprocessor
    
    if not document_preprocessor:
        raise HTTPException(
            status_code=503,
            detail="Document preprocessor not initialized"
        )
    
    try:
        doc_id = await document_preprocessor.process_document(document_url, force)
        doc_info = document_preprocessor.get_document_info(document_url)
        
        return PreprocessingResponse(
            status="success",
            message=f"Document processed successfully",
            doc_id=doc_id,
            chunk_count=doc_info.get("chunk_count", 0)
        )
        
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Failed to preprocess document: {str(e)}"
        )

@app.get("/collections")
async def list_collections(token: str = Depends(verify_admin_token)):
    """List all available document collections."""
    global document_preprocessor
    
    if not document_preprocessor:
        raise HTTPException(
            status_code=503,
            detail="Document preprocessor not initialized"
        )
    
    try:
        processed_docs = document_preprocessor.list_processed_documents()
        return {"collections": processed_docs}
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Failed to list collections: {str(e)}"
        )

@app.get("/collections/stats")
async def get_collection_stats(token: str = Depends(verify_admin_token)):
    """Get statistics about all collections."""
    global document_preprocessor
    
    if not document_preprocessor:
        raise HTTPException(
            status_code=503,
            detail="Document preprocessor not initialized"
        )
    
    try:
        stats = document_preprocessor.get_collection_stats()
        return stats
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Failed to get collection stats: {str(e)}"
        )
    
# Logging Endpoints
@app.get("/logs", response_model=LogsResponse)
async def get_logs(
    token: str = Depends(verify_admin_token),
    limit: Optional[int] = Query(None, description="Maximum number of logs to return"),
    minutes: Optional[int] = Query(None, description="Get logs from last N minutes"),
    document_url: Optional[str] = Query(None, description="Filter logs by document URL")
):
    """
    Export all API request logs as JSON.
    
    Query Parameters:
        limit: Maximum number of recent logs to return
        minutes: Get logs from the last N minutes
        document_url: Filter logs for a specific document URL
    
    Returns:
        LogsResponse: Complete logs export with metadata
    """
    try:
        if document_url:
            # Get logs for specific document
            logs = rag_logger.get_logs_by_document(document_url)
            metadata = {
                "filtered_by": "document_url",
                "document_url": document_url,
                "total_logs": len(logs)
            }
            return LogsResponse(
                export_timestamp=rag_logger.export_logs()["export_timestamp"],
                metadata=metadata,
                logs=logs
            )
        
        elif minutes:
            # Get recent logs
            logs = rag_logger.get_recent_logs(minutes)
            metadata = {
                "filtered_by": "time_range",
                "minutes": minutes,
                "total_logs": len(logs)
            }
            return LogsResponse(
                export_timestamp=rag_logger.export_logs()["export_timestamp"],
                metadata=metadata,
                logs=logs
            )
        
        else:
            # Get all logs (with optional limit)
            if limit:
                logs = rag_logger.get_logs(limit)
                metadata = rag_logger.get_logs_summary()
                metadata["limited_to"] = limit
            else:
                logs_export = rag_logger.export_logs()
                return LogsResponse(**logs_export)
            
            return LogsResponse(
                export_timestamp=rag_logger.export_logs()["export_timestamp"],
                metadata=metadata,
                logs=logs
            )
    
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Failed to export logs: {str(e)}"
        )

@app.get("/logs/summary", response_model=LogsSummaryResponse)
async def get_logs_summary(token: str = Depends(verify_admin_token)):
    """
    Get summary statistics of all logs.
    
    Returns:
        LogsSummaryResponse: Summary statistics
    """
    try:
        summary = rag_logger.get_logs_summary()
        return LogsSummaryResponse(summary=summary)
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Failed to get logs summary: {str(e)}"
        )

if __name__ == "__main__":
    import uvicorn
    
    # Run the FastAPI server
    uvicorn.run(
        "api:app",
        host=API_HOST,
        port=API_PORT,
        reload=API_RELOAD,
        log_level="info"
    )