Spaces:

adeyemi001
/

FinSight-RAG-Application

Running

App Files Files Community

adeyemi001 commited on Dec 14, 2025

Commit

d8ba418

verified ·

1 Parent(s): 92237ff

Upload project excluding env and notebook

Browse files

Files changed (19) hide show

Dockerfile +38 -0
backend/__init__.py +57 -0
backend/app/config.py +67 -0
backend/app/main.py +258 -0
backend/app/models.py +104 -0
backend/app/rag/__init__.py +0 -0
backend/app/rag/chain.py +569 -0
backend/app/rag/compressor.py +173 -0
backend/app/rag/query_expander.py +297 -0
backend/app/rag/reranker.py +182 -0
backend/app/rag/retriever.py +215 -0
backend/app/utils/__init__.py +0 -0
backend/app/utils/cache.py +393 -0
backend/app/utils/citations.py +125 -0
backend/app/utils/conversation.py +180 -0
backend/requirements.txt +21 -0
frontend/index.html +135 -0
frontend/script.js +524 -0
frontend/style.css +610 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+# Use Python 3.11 slim image
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PORT=7860
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY backend/requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy backend code
+COPY backend/app ./app
+# Copy frontend files
+COPY frontend ./frontend
+# Expose Hugging Face Spaces port
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Run the application
+CMD uvicorn app.main:app --host 0.0.0.0 --port 7860

backend/__init__.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# backend/app/__init__.py
+"""FinSight RAG Application."""
+__version__ = "1.0.0"
+# backend/app/rag/__init__.py
+"""RAG components for document retrieval and processing."""
+from app.rag.retriever import ZillizRetriever
+from app.rag.query_expander import QueryExpander
+from app.rag.reranker import MMRReranker
+from app.rag.compressor import ContextualCompressor
+from app.rag.chain import RAGChain
+__all__ = [
+    "ZillizRetriever",
+    "QueryExpander",
+    "MMRReranker",
+    "ContextualCompressor",
+    "RAGChain",
+]
+# backend/app/utils/__init__.py
+"""Utility functions and helpers."""
+from app.utils.citations import CitationTracker, extract_citations_from_answer
+from app.utils.conversation import (
+    ConversationMessage,
+    ConversationHistory,
+    SessionManager,
+    session_manager
+)
+from app.utils.cache import (
+    CacheEntry,
+    EmbeddingCache,
+    QueryResponseCache,
+    DocumentCache,
+    CacheManager,
+    cache_manager
+)
+__all__ = [
+    "CitationTracker",
+    "extract_citations_from_answer",
+    "ConversationMessage",
+    "ConversationHistory",
+    "SessionManager",
+    "session_manager",
+    "CacheEntry",
+    "EmbeddingCache",
+    "QueryResponseCache",
+    "DocumentCache",
+    "CacheManager",
+    "cache_manager",
+]

backend/app/config.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Configuration management for the RAG application."""
+import os
+from typing import Optional
+from pydantic_settings import BaseSettings
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+class Settings(BaseSettings):
+    """Application settings loaded from environment variables."""
+    # OpenAI Configuration
+    OPENAI_API_KEY: str
+    OPENAI_MODEL: str = "gpt-3.5-turbo"
+    OPENAI_EMBEDDING_MODEL: str = "text-embedding-3-large"
+    OPENAI_EMBEDDING_DIMENSION: int = 3072
+    # Zilliz Configuration
+    ZILLIZ_URI: str
+    ZILLIZ_TOKEN: str
+    COLLECTION_NAME: str = "financial_documents"
+    # RAG Configuration
+    DEFAULT_TOP_K: int = 10
+    RETRIEVAL_TOP_K: int = 30  # Retrieve more for reranking
+    MAX_CONTEXT_TOKENS: int = 8000
+    LLM_TIMEOUT: int = 30
+    # Query Expansion
+    ENABLE_QUERY_EXPANSION: bool = True
+    MAX_QUERY_VARIATIONS: int = 3
+    # Reranking
+    ENABLE_RERANKING: bool = True
+    MMR_DIVERSITY_SCORE: float = 0.3  # Balance between relevance and diversity
+    # Compression
+    ENABLE_COMPRESSION: bool = True
+    # Caching
+    ENABLE_QUERY_CACHE: bool = True
+    ENABLE_EMBEDDING_CACHE: bool = True
+    EMBEDDING_CACHE_SIZE: int = 1000
+    EMBEDDING_CACHE_TTL: int = 86400  # 24 hours
+    QUERY_CACHE_SIZE: int = 100
+    QUERY_CACHE_TTL: int = 3600  # 1 hour
+    # CORS - IMPORTANT: Update for Hugging Face
+    ALLOWED_ORIGINS: list = [
+        "*",  # Allow all origins for Hugging Face Spaces
+        "https://huggingface.co",
+        "https://*.hf.space",
+    ]
+    # Server Configuration - Hugging Face uses port 7860
+    PORT: int = int(os.getenv("PORT", "7860"))
+    HOST: str = "0.0.0.0"
+    class Config:
+        env_file = ".env"
+        case_sensitive = True
+# Global settings instance
+settings = Settings()

backend/app/main.py ADDED Viewed

	@@ -0,0 +1,258 @@

+"""FastAPI application entry point - Hugging Face optimized."""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from contextlib import asynccontextmanager
+import logging
+import os
+from app.config import settings
+from app.models import QueryRequest, QueryResponse, HealthResponse, StatsResponse
+from app.rag.chain import RAGChain
+from app.rag.retriever import ZillizRetriever
+from app.utils.cache import cache_manager
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global RAG chain instance
+rag_chain = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan context manager for startup and shutdown events."""
+    global rag_chain
+    # Startup
+    logger.info("Initializing RAG chain...")
+    logger.info(f"Running on port: {settings.PORT}")
+    logger.info(f"CORS origins: {settings.ALLOWED_ORIGINS}")
+    try:
+        rag_chain = RAGChain()
+        logger.info("RAG chain initialized successfully")
+    except Exception as e:
+        logger.error(f"Failed to initialize RAG chain: {e}")
+        raise
+    yield
+    # Shutdown
+    logger.info("Shutting down application...")
+# Create FastAPI app
+app = FastAPI(
+    title="FinSight RAG API",
+    description="Production-ready LangChain RAG application for financial document Q&A",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# Add CORS middleware - Important for Hugging Face Spaces
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=settings.ALLOWED_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static files (frontend)
+# Check if frontend directory exists
+frontend_path = os.path.join(os.path.dirname(__file__), "..", "frontend")
+if os.path.exists(frontend_path):
+    app.mount("/static", StaticFiles(directory=frontend_path), name="static")
+    logger.info(f"Frontend mounted at /static from {frontend_path}")
+@app.get("/", tags=["Root"])
+async def root():
+    """Root endpoint - serve frontend."""
+    frontend_file = os.path.join(frontend_path, "index.html")
+    if os.path.exists(frontend_file):
+        return FileResponse(frontend_file)
+    return {
+        "message": "FinSight RAG API",
+        "version": "1.0.0",
+        "docs": "/docs",
+        "frontend": "Frontend not found. Use API directly."
+    }
+@app.get("/health", response_model=HealthResponse, tags=["Health"])
+async def health_check():
+    """
+    Health check endpoint.
+    Returns:
+        HealthResponse with status
+    """
+    return HealthResponse(
+        status="healthy",
+        version="1.0.0"
+    )
+@app.get("/stats", response_model=StatsResponse, tags=["Statistics"])
+async def get_stats():
+    """
+    Get collection statistics.
+    Returns:
+        StatsResponse with collection information
+    """
+    try:
+        retriever = ZillizRetriever()
+        stats = retriever.get_collection_stats()
+        return StatsResponse(
+            collection_name=stats.get("collection_name", settings.COLLECTION_NAME),
+            total_documents=stats.get("total_documents", 0),
+            embedding_dimension=stats.get("embedding_dimension", settings.OPENAI_EMBEDDING_DIMENSION),
+            available_tickers=["ACM"],  # Hardcoded for now
+            available_doc_types=["balance_sheet", "cash_flow", "income_statement", "10k"]
+        )
+    except Exception as e:
+        logger.error(f"Error getting stats: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get statistics: {str(e)}")
+@app.post("/query", response_model=QueryResponse, tags=["Query"])
+async def query_documents(request: QueryRequest):
+    """
+    Main RAG query endpoint with conversation memory.
+    Process a financial query through the RAG pipeline and return an answer with sources.
+    Maintains conversation history within sessions for follow-up questions.
+    Args:
+        request: QueryRequest with query text, optional filters, and session_id
+    Returns:
+        QueryResponse with answer, sources, and session_id
+    """
+    global rag_chain
+    if rag_chain is None:
+        raise HTTPException(status_code=503, detail="RAG chain not initialized")
+    try:
+        logger.info(f"Processing query: {request.query} [Session: {request.session_id or 'new'}]")
+        # Process query through RAG chain
+        response = await rag_chain.aprocess_query(request)
+        logger.info(f"Query processed successfully in {response.processing_time}s [Session: {response.session_id}]")
+        return response
+    except Exception as e:
+        logger.error(f"Error processing query: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to process query: {str(e)}")
+@app.post("/query/sync", response_model=QueryResponse, tags=["Query"])
+def query_documents_sync(request: QueryRequest):
+    """
+    Synchronous version of query endpoint with conversation memory.
+    Args:
+        request: QueryRequest with query text, optional filters, and session_id
+    Returns:
+        QueryResponse with answer, sources, and session_id
+    """
+    global rag_chain
+    if rag_chain is None:
+        raise HTTPException(status_code=503, detail="RAG chain not initialized")
+    try:
+        logger.info(f"Processing query (sync): {request.query} [Session: {request.session_id or 'new'}]")
+        # Process query through RAG chain
+        response = rag_chain.process_query(request)
+        logger.info(f"Query processed successfully in {response.processing_time}s [Session: {response.session_id}]")
+        return response
+    except Exception as e:
+        logger.error(f"Error processing query: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to process query: {str(e)}")
+@app.delete("/session/{session_id}", tags=["Session"])
+async def clear_session(session_id: str):
+    """
+    Clear conversation history for a session.
+    Args:
+        session_id: Session identifier to clear
+    Returns:
+        Success message
+    """
+    global rag_chain
+    if rag_chain is None:
+        raise HTTPException(status_code=503, detail="RAG chain not initialized")
+    try:
+        rag_chain.clear_conversation(session_id)
+        logger.info(f"Cleared session: {session_id}")
+        return {"message": f"Session {session_id} cleared successfully"}
+    except Exception as e:
+        logger.error(f"Error clearing session: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to clear session: {str(e)}")
+@app.get("/cache/stats", tags=["Cache"])
+async def get_cache_stats():
+    """
+    Get cache statistics showing hit rates and cost savings.
+    Returns:
+        Dictionary with cache statistics for all cache layers
+    """
+    try:
+        stats = cache_manager.get_all_stats()
+        logger.info("Cache stats retrieved")
+        return stats
+    except Exception as e:
+        logger.error(f"Error getting cache stats: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get cache stats: {str(e)}")
+@app.delete("/cache/clear", tags=["Cache"])
+async def clear_all_caches():
+    """
+    Clear all caches (embeddings and query results).
+    Use this to force fresh results or manage memory.
+    Returns:
+        Success message
+    """
+    try:
+        cache_manager.clear_all()
+        logger.info("All caches cleared")
+        return {"message": "All caches cleared successfully"}
+    except Exception as e:
+        logger.error(f"Error clearing caches: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to clear caches: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "app.main:app",
+        host=settings.HOST,
+        port=settings.PORT,
+        reload=False  # Disable reload in production
+    )

backend/app/models.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""Pydantic models for request/response validation."""
+from typing import List, Optional, Dict, Any
+from pydantic import BaseModel, Field
+class QueryRequest(BaseModel):
+    """Request model for RAG query endpoint."""
+    query: str = Field(..., description="User's financial question")
+    ticker: Optional[str] = Field(None, description="Filter by company ticker (e.g., 'ACM')")
+    doc_types: Optional[List[str]] = Field(
+        None,
+        description="Filter by document types: balance_sheet, cash_flow, income_statement, 10k"
+    )
+    top_k: int = Field(10, ge=1, le=20, description="Number of sources to retrieve")
+    session_id: Optional[str] = Field(None, description="Session ID for conversation history")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "query": "What was ACM's revenue in 2024?",
+                "ticker": "ACM",
+                "doc_types": ["income_statement"],
+                "top_k": 10,
+                "session_id": "abc123"
+            }
+        }
+class Source(BaseModel):
+    """Source citation information."""
+    source_id: int = Field(..., description="Source reference number [Source N]")
+    filename: str = Field(..., description="Source filename")
+    doc_type: str = Field(..., description="Document type")
+    ticker: Optional[str] = Field(None, description="Company ticker")
+    similarity_score: float = Field(..., description="Similarity score (0-1)")
+    chunk_id: Optional[str] = Field(None, description="Chunk identifier")
+    text_preview: str = Field(..., description="Preview of source text (first 200 chars)")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "source_id": 1,
+                "filename": "ACM_balance_sheet.md",
+                "doc_type": "balance_sheet",
+                "ticker": "ACM",
+                "similarity_score": 0.89,
+                "chunk_id": "chunk_0",
+                "text_preview": "Total Current Assets for FY 2025: $6.73B..."
+            }
+        }
+class QueryResponse(BaseModel):
+    """Response model for RAG query endpoint."""
+    answer: str = Field(..., description="AI-generated answer with citations")
+    sources: List[Source] = Field(..., description="List of sources used")
+    query: str = Field(..., description="Original query")
+    processing_time: float = Field(..., description="Total processing time in seconds")
+    expanded_queries: Optional[List[str]] = Field(None, description="Query variations used")
+    num_documents_retrieved: int = Field(..., description="Number of documents retrieved")
+    session_id: str = Field(..., description="Session ID for this conversation")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "answer": "ACM's revenue in FY 2024 was $16.11B [Source 1]...",
+                "sources": [
+                    {
+                        "source_id": 1,
+                        "filename": "ACM_income_statement.md",
+                        "doc_type": "income_statement",
+                        "ticker": "ACM",
+                        "similarity_score": 0.92,
+                        "chunk_id": "chunk_0",
+                        "text_preview": "Contract Revenue FY 2024: $16.11B..."
+                    }
+                ],
+                "query": "What was ACM's revenue in 2024?",
+                "processing_time": 2.34,
+                "expanded_queries": ["What was ACM's revenue in 2024?"],
+                "num_documents_retrieved": 5,
+                "session_id": "abc123"
+            }
+        }
+class HealthResponse(BaseModel):
+    """Health check response."""
+    status: str
+    version: str = "1.0.0"
+class StatsResponse(BaseModel):
+    """Collection statistics response."""
+    collection_name: str
+    total_documents: int
+    embedding_dimension: int
+    available_tickers: List[str]
+    available_doc_types: List[str]

backend/app/rag/__init__.py ADDED Viewed

File without changes

backend/app/rag/chain.py ADDED Viewed

	@@ -0,0 +1,569 @@

+"""Main RAG chain orchestration with conversation memory."""
+import time
+from typing import List, Optional
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from app.config import settings
+from app.models import QueryRequest, QueryResponse, Source
+from app.rag.retriever import ZillizRetriever
+from app.rag.query_expander import QueryExpander
+from app.rag.reranker import MMRReranker
+from app.rag.compressor import ContextualCompressor
+from app.utils.citations import CitationTracker
+from app.utils.conversation import ConversationHistory
+from app.utils.cache import cache_manager
+# System prompt from specifications
+SYSTEM_PROMPT = """You are an expert financial analyst AI. You provide accurate financial analysis from company financial statements and 10-K filings.
+═══════════════════════════════════════════════════════════════════════════
+CRITICAL RULE #1: ALWAYS USE "TOTAL" LINE ITEMS FROM FINANCIAL STATEMENTS
+═══════════════════════════════════════════════════════════════════════════
+Financial statements contain summary rows labeled "Total". These are the ONLY numbers you should use for calculations.
+**MANDATORY LABELS TO USE (Look for these EXACT phrases):**
+FROM BALANCE SHEET:
+✓ "Total Current Assets" - Use this, NOT individual assets
+✓ "Total Current Liabilities" - Use this, NOT individual liabilities
+✓ "Total Assets" - Use this
+✓ "Total Liabilities" - Use this
+✓ "Total Stockholders' Equity" or "Total Equity" - Use this
+✓ "Long Term Debt" or "Long-term debt" - Use this
+✓ "Short Term Debt" or "Short-term debt" - Use this (if needed)
+FROM INCOME STATEMENT:
+✓ "Contract Revenue" or "Revenue" or "Total Revenue" - Use this
+✓ "Total Cost of Revenue" or "Cost of Revenue" - Use this
+✓ "Gross Profit" - Use this (already calculated)
+✓ "Operating Income" - Use this (already calculated)
+✓ "Net Income" or "Profit or Loss" - Use this
+✓ "Income Before Tax" - Use this
+FROM CASH FLOW STATEMENT:
+✓ "Net Cash from Operating Activities" - Use this
+✓ "Net Cash from Investing Activities" - Use this
+✓ "Net Cash from Financing Activities" - Use this
+═══════════════════════════════════════════════════════════════════════════
+FINANCIAL CALCULATIONS - COMPREHENSIVE GUIDE
+═══════════════════════════════════════════════════════════════════════════
+**LIQUIDITY RATIOS:**
+1. Working Capital = Total Current Assets - Total Current Liabilities
+   Example: $6.73B - $5.93B = $800M
+2. Current Ratio = Total Current Assets ÷ Total Current Liabilities
+   Example: $6.73B ÷ $5.93B = 1.13
+   Interpretation: >1.0 is good (company can cover short-term obligations)
+3. Quick Ratio = (Total Current Assets - Inventory) ÷ Total Current Liabilities
+   Note: Only subtract inventory if explicitly asked for quick ratio
+**LEVERAGE/SOLVENCY RATIOS:**
+4. Debt-to-Equity Ratio = Total Debt ÷ Total Stockholders' Equity
+   Where Total Debt = Long Term Debt + Short Term Debt
+   Example: ($2.65B + $4.07M) ÷ $2.70B = 0.98
+   Interpretation: <1.0 means less debt than equity (generally safer)
+5. Debt-to-Assets Ratio = Total Debt ÷ Total Assets
+   Example: $2.65B ÷ $12.20B = 0.22 or 22%
+6. Equity Ratio = Total Stockholders' Equity ÷ Total Assets
+   Example: $2.70B ÷ $12.20B = 0.22 or 22%
+**PROFITABILITY RATIOS:**
+7. Gross Profit Margin = (Gross Profit ÷ Revenue) × 100
+   Example: ($1.22B ÷ $16.14B) × 100 = 7.6%
+8. Operating Profit Margin = (Operating Income ÷ Revenue) × 100
+   Example: ($1.03B ÷ $16.14B) × 100 = 6.4%
+9. Net Profit Margin = (Net Income ÷ Revenue) × 100
+   Example: ($561.77M ÷ $16.14B) × 100 = 3.5%
+10. Return on Assets (ROA) = (Net Income ÷ Total Assets) × 100
+    Example: ($561.77M ÷ $12.20B) × 100 = 4.6%
+11. Return on Equity (ROE) = (Net Income ÷ Total Stockholders' Equity) × 100
+    Example: ($561.77M ÷ $2.70B) × 100 = 20.8%
+**EFFICIENCY RATIOS:**
+12. Asset Turnover = Revenue ÷ Total Assets
+    Example: $16.14B ÷ $12.20B = 1.32
+    Interpretation: Company generates $1.32 in revenue for every $1 of assets
+13. Inventory Turnover = Cost of Revenue ÷ Inventory
+    (Only calculate if inventory is available in balance sheet)
+**CASH FLOW ANALYSIS:**
+14. Operating Cash Flow Margin = (Net Cash from Operating Activities ÷ Revenue) × 100
+15. Free Cash Flow = Net Cash from Operating Activities - Capital Expenditures
+    (Capital Expenditures = "Payments for Property, Plant and Equipment" from cash flow)
+16. Cash Flow to Net Income Ratio = Net Cash from Operating Activities ÷ Net Income
+    Interpretation: >1.0 means high quality earnings (cash backing profits)
+**YEAR-OVER-YEAR (YoY) ANALYSIS:**
+17. YoY Growth Rate = ((Current Year - Prior Year) ÷ Prior Year) × 100
+    Example Revenue Growth: (($16.14B - $16.11B) ÷ $16.11B) × 100 = 0.19%
+18. YoY Change (Dollar Amount) = Current Year - Prior Year
+    Example: $16.14B - $16.11B = $30M increase
+**TREND ANALYSIS (Multiple Years):**
+19. When analyzing trends over 3+ years:
+    - Calculate YoY change for each consecutive year
+    - Identify if trend is increasing, decreasing, or stable
+    - Note any significant inflection points
+═══════════════════════════════════════════════════════════════════════════
+HOW TO EXTRACT DATA FROM FINANCIAL STATEMENTS
+═══════════════════════════════════════════════════════════════════════════
+1. **Identify the fiscal year columns** (usually labeled FY 2025, FY 2024, etc.)
+2. **Find the "Total" row** for what you need:
+   - Scan the "label" column for rows starting with "Total"
+   - Use the value from the appropriate fiscal year column
+3. **For balance sheet items**, look in balance sheet sources
+4. **For income statement items**, look in income statement sources
+5. **For cash flow items**, look in cash flow statement sources
+6. **NEVER add up individual line items** when a "Total" exists
+═══════════════════════════════════════════════════════════════════════════
+RESPONSE FORMAT (User-Friendly)
+═══════════════════════════════════════════════════════════════════════════
+**Structure:**
+1. Direct Answer - What's the answer in one sentence?
+2. Key Figures - List the relevant numbers with years and [Source X]
+3. Calculation - Show the result (not the formula)
+4. Analysis - What does this mean? Is it good or bad? What's the trend?
+5. Sources - List all sources cited
+**Writing Style:**
+- Use simple language, not jargon
+- Show formulas in plain text: "Revenue ÷ Gross Profit" or "Current Assets - Current Liabilities"
+- Then show the calculation with actual numbers: "$16.14B ÷ $1.22B = 13.2"
+- Use bullet points
+- Compare to prior year when relevant
+- State if trend is positive or negative for the company
+**Example Good Answer:**
+"Direct Answer: ACM's working capital was $800M in FY 2025.
+Key Figures:
+• FY 2025: Total Current Assets $6.73B, Total Current Liabilities $5.93B [Source 1]
+• FY 2024: Total Current Assets $7.18B, Total Current Liabilities $6.37B [Source 1]
+Calculation:
+• Formula: Total Current Assets - Total Current Liabilities = Working Capital
+• FY 2025: $6.73B - $5.93B = $800M
+• FY 2024: $7.18B - $6.37B = $810M
+• Change: $800M - $810M = -$10M decline (1.2% decrease)
+Analysis:
+Working capital decreased by $10M (1.2% decline). This slight reduction means ACM has marginally less liquidity to cover short-term obligations compared to last year, though the company still maintains positive working capital.
+Sources: [Source 1] ACM_balance_sheet.md"
+═══════════════════════════════════════════════════════════════════════════
+FOR 10-K NARRATIVE SECTIONS
+═══════════════════════════════════════════════════════════════════════════
+When answering questions about 10-K narrative content (business description, risks, strategy):
+- Summarize key points clearly
+- Use bullet points for multiple items
+- Quote important phrases when relevant
+- Cite sources for each major point
+- Group related information together
+═══════════════════════════════════════════════════════════════════════════
+CONVERSATION CONTEXT AND FOLLOW-UP QUESTIONS
+═════════════════════��═════════════════════════════════════════════════════
+If the user asks a follow-up question that refers to previous context:
+- Use the conversation history provided to understand the context
+- Reference previous questions/answers when relevant (e.g., "As mentioned earlier...")
+- Maintain consistency with previous responses
+- If the follow-up requires new data, retrieve it from the documents
+For pronoun references (e.g., "What about last year?" or "How does that compare?"):
+- Infer what "that" or "it" refers to from the conversation history
+- Explicitly state what you're comparing in your answer
+═══════════════════════════════════════════════════════════════════════════
+CRITICAL CONSTRAINTS
+═══════════════════════════════════════════════════════════════════════════
+**NEVER FABRICATE NUMBERS**: If specific information is not present in the provided context, explicitly state "This information is not available in the financial documents provided" and suggest consulting the company's official SEC filings or investor relations for complete information.
+**DATA CUTOFF**: All financial data was collected on December 7, 2025. Information or events after this date are not available in this system.
+**ACCURACY OVER COMPLETENESS**: It is better to say "I don't have this information" than to make up numbers or calculations."""
+class RAGChain:
+    """Main RAG chain for financial Q&A with conversation memory."""
+    def __init__(self):
+        """Initialize all RAG components."""
+        self.retriever = ZillizRetriever()
+        self.query_expander = QueryExpander()
+        self.reranker = MMRReranker()
+        self.compressor = ContextualCompressor()
+        self.llm = ChatOpenAI(
+            model=settings.OPENAI_MODEL,
+            temperature=0,
+            openai_api_key=settings.OPENAI_API_KEY,
+            timeout=settings.LLM_TIMEOUT
+        )
+        # Conversation histories keyed by session_id
+        self.conversations: dict[str, ConversationHistory] = {}
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", SYSTEM_PROMPT),
+            ("user", """{conversation_history}
+Context from financial documents:
+{context}
+Question: {query}
+Please provide a detailed answer using the context above. If this is a follow-up question, use the conversation history to understand the context. Remember to cite sources using [Source N] notation.""")
+        ])
+    def _get_or_create_conversation(self, session_id: str) -> ConversationHistory:
+        """Get existing conversation or create new one."""
+        if session_id not in self.conversations:
+            self.conversations[session_id] = ConversationHistory(max_tokens=4000)
+        return self.conversations[session_id]
+    def process_query(self, request: QueryRequest) -> QueryResponse:
+        """
+        Process a query through the full RAG pipeline with conversation memory.
+        Args:
+            request: QueryRequest with query and filters
+        Returns:
+            QueryResponse with answer and sources
+        """
+        start_time = time.time()
+        # Check response cache (only for queries without session history)
+        if settings.ENABLE_RESPONSE_CACHE and not request.session_id:
+            cached_response = cache_manager.response_cache.get(
+                query=request.query,
+                ticker=request.ticker,
+                doc_types=request.doc_types,
+                top_k=request.top_k
+            )
+            if cached_response is not None:
+                # Add processing time and return cached response
+                cached_response['processing_time'] = round(time.time() - start_time, 2)
+                cached_response['from_cache'] = True
+                return QueryResponse(**cached_response)
+        citation_tracker = CitationTracker()
+        # Get or create session
+        session_id = request.session_id or f"session_{int(time.time())}"
+        conversation = self._get_or_create_conversation(session_id)
+        try:
+            # Step 1: Query Expansion
+            if settings.ENABLE_QUERY_EXPANSION:
+                expanded_queries = self.query_expander.expand(
+                    request.query,
+                    num_variations=settings.MAX_QUERY_VARIATIONS - 1
+                )
+            else:
+                expanded_queries = [request.query]
+            # Step 2: Retrieve documents for each query
+            all_documents = []
+            for query in expanded_queries:
+                docs = self.retriever.retrieve(
+                    query=query,
+                    ticker=request.ticker,
+                    doc_types=request.doc_types,
+                    top_k=settings.RETRIEVAL_TOP_K
+                )
+                all_documents.extend(docs)
+            # Step 3: Deduplicate documents
+            unique_docs = self._deduplicate_documents(all_documents)
+            # Step 4: Rerank documents
+            if settings.ENABLE_RERANKING and len(unique_docs) > request.top_k:
+                reranked_docs = self.reranker.rerank(
+                    query=request.query,
+                    documents=unique_docs,
+                    top_k=request.top_k,
+                    diversity_score=settings.MMR_DIVERSITY_SCORE
+                )
+            else:
+                reranked_docs = unique_docs[:request.top_k]
+            # Step 5: Contextual Compression
+            if settings.ENABLE_COMPRESSION:
+                compressed_docs = self.compressor.compress(
+                    query=request.query,
+                    documents=reranked_docs
+                )
+            else:
+                compressed_docs = reranked_docs
+            # Step 6: Prepare context with citations
+            context = citation_tracker.format_context_with_citations(compressed_docs)
+            # Step 7: Get conversation history
+            conversation_history = ""
+            if conversation.messages:
+                history_msgs = conversation.get_messages()
+                # Format last few exchanges
+                recent_history = history_msgs[-6:]  # Last 3 exchanges
+                if recent_history:
+                    conversation_history = "Previous conversation:\n"
+                    for msg in recent_history:
+                        role_label = "User" if msg["role"] == "user" else "Assistant"
+                        # Truncate long messages
+                        content = msg["content"][:300]
+                        if len(msg["content"]) > 300:
+                            content += "..."
+                        conversation_history += f"{role_label}: {content}\n\n"
+            # Step 8: Generate answer
+            chain = self.prompt | self.llm
+            response = chain.invoke({
+                "conversation_history": conversation_history,
+                "context": context,
+                "query": request.query
+            })
+            answer = response.content
+            # Step 9: Update conversation history
+            conversation.add_message("user", request.query)
+            conversation.add_message("assistant", answer)
+            # Step 10: Get sources list
+            sources_list = citation_tracker.get_sources_list()
+            sources = [Source(**src) for src in sources_list]
+            # Calculate processing time
+            processing_time = time.time() - start_time
+            response_dict = {
+                "answer": answer,
+                "sources": [src.dict() if hasattr(src, 'dict') else src for src in sources],
+                "query": request.query,
+                "processing_time": round(processing_time, 2),
+                "expanded_queries": expanded_queries if len(expanded_queries) > 1 else None,
+                "num_documents_retrieved": len(compressed_docs),
+                "session_id": session_id
+            }
+            # Cache response (only for queries without session history)
+            if settings.ENABLE_RESPONSE_CACHE and not request.session_id:
+                cache_manager.response_cache.set(
+                    query=request.query,
+                    response=response_dict,
+                    ticker=request.ticker,
+                    doc_types=request.doc_types,
+                    top_k=request.top_k
+                )
+            return QueryResponse(**response_dict)
+        except Exception as e:
+            print(f"RAG chain error: {e}")
+            raise
+    def _deduplicate_documents(
+        self,
+        documents: List[Document]
+    ) -> List[Document]:
+        """
+        Remove duplicate documents based on content and metadata.
+        Args:
+            documents: List of documents
+        Returns:
+            Deduplicated list of documents
+        """
+        seen = set()
+        unique_docs = []
+        for doc in documents:
+            # Create unique key
+            metadata = doc.metadata
+            key = f"{metadata.get('filename', '')}_{metadata.get('chunk_id', '')}_{doc.page_content[:100]}"
+            if key not in seen:
+                seen.add(key)
+                unique_docs.append(doc)
+        return unique_docs
+    def clear_conversation(self, session_id: str):
+        """Clear conversation history for a session."""
+        if session_id in self.conversations:
+            del self.conversations[session_id]
+    async def aprocess_query(self, request: QueryRequest) -> QueryResponse:
+        """
+        Async version of process_query.
+        Args:
+            request: QueryRequest with query and filters
+        Returns:
+            QueryResponse with answer and sources
+        """
+        start_time = time.time()
+        # Check response cache (only for queries without session history)
+        if settings.ENABLE_RESPONSE_CACHE and not request.session_id:
+            cached_response = cache_manager.response_cache.get(
+                query=request.query,
+                ticker=request.ticker,
+                doc_types=request.doc_types,
+                top_k=request.top_k
+            )
+            if cached_response is not None:
+                # Add processing time and return cached response
+                cached_response['processing_time'] = round(time.time() - start_time, 2)
+                cached_response['from_cache'] = True
+                return QueryResponse(**cached_response)
+        citation_tracker = CitationTracker()
+        # Get or create session
+        session_id = request.session_id or f"session_{int(time.time())}"
+        conversation = self._get_or_create_conversation(session_id)
+        try:
+            # Query expansion
+            if settings.ENABLE_QUERY_EXPANSION:
+                expanded_queries = await self.query_expander.aexpand(
+                    request.query,
+                    num_variations=settings.MAX_QUERY_VARIATIONS - 1
+                )
+            else:
+                expanded_queries = [request.query]
+            # Retrieve documents
+            all_documents = []
+            for query in expanded_queries:
+                docs = await self.retriever.aretrieve(
+                    query=query,
+                    ticker=request.ticker,
+                    doc_types=request.doc_types,
+                    top_k=settings.RETRIEVAL_TOP_K
+                )
+                all_documents.extend(docs)
+            # Deduplicate
+            unique_docs = self._deduplicate_documents(all_documents)
+            # Rerank
+            if settings.ENABLE_RERANKING and len(unique_docs) > request.top_k:
+                reranked_docs = await self.reranker.arerank(
+                    query=request.query,
+                    documents=unique_docs,
+                    top_k=request.top_k,
+                    diversity_score=settings.MMR_DIVERSITY_SCORE
+                )
+            else:
+                reranked_docs = unique_docs[:request.top_k]
+            # Compress
+            if settings.ENABLE_COMPRESSION:
+                compressed_docs = await self.compressor.acompress(
+                    query=request.query,
+                    documents=reranked_docs
+                )
+            else:
+                compressed_docs = reranked_docs
+            # Prepare context
+            context = citation_tracker.format_context_with_citations(compressed_docs)
+            # Get conversation history
+            conversation_history = ""
+            if conversation.messages:
+                history_msgs = conversation.get_messages()
+                recent_history = history_msgs[-6:]
+                if recent_history:
+                    conversation_history = "Previous conversation:\n"
+                    for msg in recent_history:
+                        role_label = "User" if msg["role"] == "user" else "Assistant"
+                        content = msg["content"][:300]
+                        if len(msg["content"]) > 300:
+                            content += "..."
+                        conversation_history += f"{role_label}: {content}\n\n"
+            # Generate answer
+            chain = self.prompt | self.llm
+            response = await chain.ainvoke({
+                "conversation_history": conversation_history,
+                "context": context,
+                "query": request.query
+            })
+            answer = response.content
+            # Update conversation history
+            conversation.add_message("user", request.query)
+            conversation.add_message("assistant", answer)
+            sources_list = citation_tracker.get_sources_list()
+            sources = [Source(**src) for src in sources_list]
+            processing_time = time.time() - start_time
+            response_dict = {
+                "answer": answer,
+                "sources": [src.dict() if hasattr(src, 'dict') else src for src in sources],
+                "query": request.query,
+                "processing_time": round(processing_time, 2),
+                "expanded_queries": expanded_queries if len(expanded_queries) > 1 else None,
+                "num_documents_retrieved": len(compressed_docs),
+                "session_id": session_id
+            }
+            # Cache response (only for queries without session history)
+            if settings.ENABLE_RESPONSE_CACHE and not request.session_id:
+                cache_manager.response_cache.set(
+                    query=request.query,
+                    response=response_dict,
+                    ticker=request.ticker,
+                    doc_types=request.doc_types,
+                    top_k=request.top_k
+                )
+            return QueryResponse(**response_dict)
+        except Exception as e:
+            print(f"RAG chain error: {e}")
+            raise

backend/app/rag/compressor.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""Contextual compression to extract relevant sentences from retrieved chunks."""
+from typing import List
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from app.config import settings
+class ContextualCompressor:
+    """Compresses retrieved documents by extracting only relevant content."""
+    def __init__(self):
+        """Initialize LLM for compression."""
+        self.llm = ChatOpenAI(
+            model=settings.OPENAI_MODEL,
+            temperature=0,
+            openai_api_key=settings.OPENAI_API_KEY
+        )
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are a precise information extraction assistant.
+Given a query and a document chunk, extract ONLY the sentences that are directly relevant to answering the query.
+Rules:
+1. Extract complete sentences (don't cut off mid-sentence)
+2. Maintain the original wording - do not paraphrase
+3. Keep financial figures and context together
+4. If the entire chunk is relevant, return it as-is
+5. If nothing is relevant, return "NOT_RELEVANT"
+6. Preserve numerical data and labels exactly as written
+Return only the extracted sentences, separated by spaces."""),
+            ("user", """Query: {query}
+Document:
+{document}
+Relevant sentences:""")
+        ])
+    def compress(
+        self,
+        query: str,
+        documents: List[Document]
+    ) -> List[Document]:
+        """
+        Compress documents by extracting relevant sentences.
+        Args:
+            query: Original query text
+            documents: List of documents to compress
+        Returns:
+            List of compressed documents
+        """
+        if not documents:
+            return []
+        compressed_docs = []
+        for doc in documents:
+            try:
+                # Skip very short documents (already concise)
+                if len(doc.page_content) < 200:
+                    compressed_docs.append(doc)
+                    continue
+                # Extract relevant content
+                chain = self.prompt | self.llm
+                response = chain.invoke({
+                    "query": query,
+                    "document": doc.page_content
+                })
+                extracted = response.content.strip()
+                # Skip if nothing relevant found
+                if extracted == "NOT_RELEVANT" or not extracted:
+                    continue
+                # Create compressed document with same metadata
+                compressed_doc = Document(
+                    page_content=extracted,
+                    metadata=doc.metadata.copy()
+                )
+                compressed_docs.append(compressed_doc)
+            except Exception as e:
+                print(f"Compression error for doc: {e}")
+                # Fallback: include original document
+                compressed_docs.append(doc)
+        return compressed_docs
+    def compress_batch(
+        self,
+        query: str,
+        documents: List[Document],
+        batch_size: int = 5
+    ) -> List[Document]:
+        """
+        Compress documents in batches for better performance.
+        Args:
+            query: Original query text
+            documents: List of documents to compress
+            batch_size: Number of documents to process at once
+        Returns:
+            List of compressed documents
+        """
+        if not documents:
+            return []
+        compressed_docs = []
+        # Process in batches
+        for i in range(0, len(documents), batch_size):
+            batch = documents[i:i + batch_size]
+            compressed_batch = self.compress(query, batch)
+            compressed_docs.extend(compressed_batch)
+        return compressed_docs
+    async def acompress(
+        self,
+        query: str,
+        documents: List[Document]
+    ) -> List[Document]:
+        """
+        Async version of compress method.
+        Args:
+            query: Original query text
+            documents: List of documents to compress
+        Returns:
+            List of compressed documents
+        """
+        if not documents:
+            return []
+        compressed_docs = []
+        for doc in documents:
+            try:
+                if len(doc.page_content) < 200:
+                    compressed_docs.append(doc)
+                    continue
+                chain = self.prompt | self.llm
+                response = await chain.ainvoke({
+                    "query": query,
+                    "document": doc.page_content
+                })
+                extracted = response.content.strip()
+                if extracted == "NOT_RELEVANT" or not extracted:
+                    continue
+                compressed_doc = Document(
+                    page_content=extracted,
+                    metadata=doc.metadata.copy()
+                )
+                compressed_docs.append(compressed_doc)
+            except Exception as e:
+                print(f"Compression error for doc: {e}")
+                compressed_docs.append(doc)
+        return compressed_docs

backend/app/rag/query_expander.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""Query expansion for improved retrieval with multi-part question decomposition."""
+from typing import List
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from app.config import settings
+class QueryExpander:
+    """Expands queries into multiple variations and decomposes multi-part questions."""
+    def __init__(self):
+        """Initialize LLM for query expansion."""
+        self.llm = ChatOpenAI(
+            model=settings.OPENAI_MODEL,
+            temperature=0.3,
+            openai_api_key=settings.OPENAI_API_KEY
+        )
+        # Prompt for detecting and decomposing multi-part questions
+        self.decompose_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are a financial query analyzer. Analyze if this is a multi-part question with distinct sub-questions.
+If the query contains multiple DISTINCT questions (numbered or clearly separated topics), break it down into individual sub-queries.
+If it's a single complex question, return it as-is.
+Rules:
+- Each sub-query should be standalone and answerable independently
+- Preserve the ticker/company context in each sub-query
+- Keep financial terminology intact
+- Number sub-queries if there are multiple
+Examples:
+Input: "For ACM: 1. What was revenue? 2. What are the risks?"
+Output:
+1. What was ACM's revenue for the most recent fiscal year?
+2. What are the major risks for ACM according to the latest 10-K?
+Input: "What was ACM's revenue growth rate?"
+Output:
+What was ACM's revenue growth rate?
+Return ONLY the decomposed queries, one per line. If single query, return it unchanged."""),
+            ("user", "Query: {query}")
+        ])
+        # Prompt for expanding individual queries
+        self.expansion_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are a financial query expansion expert.
+Generate {num_variations} alternative phrasings of the user's query that would help retrieve relevant financial information.
+Focus on:
+- Different financial terminology (e.g., "revenue" vs "sales" vs "contract revenue")
+- Different ways to ask about financial metrics
+- Explicit mention of financial statement types if relevant (balance sheet, income statement, cash flow, 10-K)
+- Keeping the core intent of the original query
+Return ONLY the query variations, one per line, without numbering or explanations."""),
+            ("user", "Original query: {query}")
+        ])
+    def expand(self, query: str, num_variations: int = 2) -> List[str]:
+        """
+        Expand a query into multiple variations.
+        Handles multi-part questions by decomposing them first.
+        Args:
+            query: Original query text
+            num_variations: Number of variations to generate per sub-query (default: 2)
+        Returns:
+            List of query variations including the original and decomposed parts
+        """
+        # Always include the original query first
+        all_queries = [query]
+        try:
+            # Step 1: Check if this is a multi-part question and decompose
+            sub_queries = self._decompose_query(query)
+            # Step 2: If decomposed into multiple parts, expand each part
+            if len(sub_queries) > 1:
+                print(f"Decomposed into {len(sub_queries)} sub-queries")
+                for sub_query in sub_queries:
+                    # Add the sub-query itself
+                    if sub_query not in all_queries:
+                        all_queries.append(sub_query)
+                    # Generate variations for this sub-query if it's complex enough
+                    if self._should_expand(sub_query):
+                        variations = self._generate_variations(sub_query, num_variations)
+                        all_queries.extend([v for v in variations if v not in all_queries])
+            else:
+                # Single query - just expand normally if complex enough
+                if self._should_expand(query):
+                    variations = self._generate_variations(query, num_variations)
+                    all_queries.extend([v for v in variations if v not in all_queries])
+            return all_queries
+        except Exception as e:
+            print(f"Query expansion error: {e}")
+            # Fallback to original query only
+            return [query]
+    def _decompose_query(self, query: str) -> List[str]:
+        """
+        Decompose a multi-part query into individual sub-queries.
+        Args:
+            query: Original query text
+        Returns:
+            List of sub-queries (or single query if not multi-part)
+        """
+        try:
+            # Check if query has clear multi-part indicators
+            multi_part_indicators = [
+                '1.' in query and '2.' in query,
+                '1)' in query and '2)' in query,
+                query.count('?') > 1,  # Multiple question marks
+                ' and ' in query.lower() and len(query.split()) > 15  # Long query with 'and'
+            ]
+            if not any(multi_part_indicators):
+                return [query]
+            # Use LLM to decompose
+            chain = self.decompose_prompt | self.llm
+            response = chain.invoke({"query": query})
+            # Parse response
+            sub_queries = response.content.strip().split('\n')
+            sub_queries = [q.strip() for q in sub_queries if q.strip()]
+            # Remove numbering if present (1., 2., etc.)
+            cleaned_queries = []
+            for q in sub_queries:
+                # Remove leading numbering like "1. " or "1) "
+                import re
+                cleaned = re.sub(r'^\d+[\.\)]\s*', '', q)
+                if cleaned:
+                    cleaned_queries.append(cleaned)
+            return cleaned_queries if cleaned_queries else [query]
+        except Exception as e:
+            print(f"Query decomposition error: {e}")
+            return [query]
+    def _generate_variations(self, query: str, num_variations: int) -> List[str]:
+        """
+        Generate variations of a single query.
+        Args:
+            query: Query text to expand
+            num_variations: Number of variations to generate
+        Returns:
+            List of query variations
+        """
+        try:
+            chain = self.expansion_prompt | self.llm
+            response = chain.invoke({
+                "query": query,
+                "num_variations": num_variations
+            })
+            # Parse response
+            variations = response.content.strip().split('\n')
+            variations = [v.strip() for v in variations if v.strip()]
+            return variations[:num_variations]
+        except Exception as e:
+            print(f"Variation generation error: {e}")
+            return []
+    def _should_expand(self, query: str) -> bool:
+        """
+        Determine if query should be expanded.
+        Simple queries (< 5 words) or yes/no questions typically don't need expansion.
+        Args:
+            query: Original query text
+        Returns:
+            True if query should be expanded
+        """
+        # Don't expand very short queries
+        word_count = len(query.split())
+        if word_count < 5:
+            return False
+        # Don't expand simple what/when/where questions
+        query_lower = query.lower().strip()
+        simple_patterns = [
+            query_lower.startswith("what is"),
+            query_lower.startswith("what was"),
+            query_lower.startswith("when did"),
+            query_lower.startswith("where is"),
+            "yes or no" in query_lower,
+        ]
+        if any(simple_patterns):
+            return False
+        # Expand complex queries
+        return True
+    async def aexpand(self, query: str, num_variations: int = 2) -> List[str]:
+        """
+        Async version of expand method.
+        Args:
+            query: Original query text
+            num_variations: Number of variations to generate per sub-query
+        Returns:
+            List of query variations including the original and decomposed parts
+        """
+        all_queries = [query]
+        try:
+            # Decompose if multi-part
+            sub_queries = await self._adecompose_query(query)
+            if len(sub_queries) > 1:
+                print(f"Decomposed into {len(sub_queries)} sub-queries")
+                for sub_query in sub_queries:
+                    if sub_query not in all_queries:
+                        all_queries.append(sub_query)
+                    if self._should_expand(sub_query):
+                        variations = await self._agenerate_variations(sub_query, num_variations)
+                        all_queries.extend([v for v in variations if v not in all_queries])
+            else:
+                if self._should_expand(query):
+                    variations = await self._agenerate_variations(query, num_variations)
+                    all_queries.extend([v for v in variations if v not in all_queries])
+            return all_queries
+        except Exception as e:
+            print(f"Query expansion error: {e}")
+            return [query]
+    async def _adecompose_query(self, query: str) -> List[str]:
+        """Async version of _decompose_query."""
+        try:
+            multi_part_indicators = [
+                '1.' in query and '2.' in query,
+                '1)' in query and '2)' in query,
+                query.count('?') > 1,
+                ' and ' in query.lower() and len(query.split()) > 15
+            ]
+            if not any(multi_part_indicators):
+                return [query]
+            chain = self.decompose_prompt | self.llm
+            response = await chain.ainvoke({"query": query})
+            sub_queries = response.content.strip().split('\n')
+            sub_queries = [q.strip() for q in sub_queries if q.strip()]
+            # Remove numbering
+            import re
+            cleaned_queries = []
+            for q in sub_queries:
+                cleaned = re.sub(r'^\d+[\.\)]\s*', '', q)
+                if cleaned:
+                    cleaned_queries.append(cleaned)
+            return cleaned_queries if cleaned_queries else [query]
+        except Exception as e:
+            print(f"Query decomposition error: {e}")
+            return [query]
+    async def _agenerate_variations(self, query: str, num_variations: int) -> List[str]:
+        """Async version of _generate_variations."""
+        try:
+            chain = self.expansion_prompt | self.llm
+            response = await chain.ainvoke({
+                "query": query,
+                "num_variations": num_variations
+            })
+            variations = response.content.strip().split('\n')
+            variations = [v.strip() for v in variations if v.strip()]
+            return variations[:num_variations]
+        except Exception as e:
+            print(f"Variation generation error: {e}")
+            return []

backend/app/rag/reranker.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""Reranking retrieved documents using MMR (Maximal Marginal Relevance)."""
+from typing import List
+import numpy as np
+from langchain_core.documents import Document
+from langchain_openai import OpenAIEmbeddings
+from app.config import settings
+class MMRReranker:
+    """Reranks documents using Maximal Marginal Relevance algorithm."""
+    def __init__(self):
+        """Initialize embeddings for MMR computation."""
+        self.embeddings = OpenAIEmbeddings(
+            model=settings.OPENAI_EMBEDDING_MODEL,
+            openai_api_key=settings.OPENAI_API_KEY,
+            dimensions=settings.OPENAI_EMBEDDING_DIMENSION
+        )
+    def rerank(
+        self,
+        query: str,
+        documents: List[Document],
+        top_k: int = 10,
+        diversity_score: float = 0.3
+    ) -> List[Document]:
+        """
+        Rerank documents using MMR to balance relevance and diversity.
+        MMR Formula:
+        MMR = argmax [λ * Sim(D_i, Q) - (1-λ) * max Sim(D_i, D_j)]
+        where D_j are already selected documents
+        Args:
+            query: Original query text
+            documents: List of retrieved documents
+            top_k: Number of documents to return
+            diversity_score: Lambda parameter (0 = max diversity, 1 = max relevance)
+        Returns:
+            Reranked list of top_k documents
+        """
+        if not documents:
+            return []
+        # If we have fewer documents than top_k, return all
+        if len(documents) <= top_k:
+            return documents
+        try:
+            # Get embeddings
+            query_embedding = self.embeddings.embed_query(query)
+            doc_texts = [doc.page_content for doc in documents]
+            doc_embeddings = self.embeddings.embed_documents(doc_texts)
+            # Convert to numpy arrays
+            query_vec = np.array(query_embedding)
+            doc_vecs = np.array(doc_embeddings)
+            # Compute similarity to query for all documents
+            query_similarities = self._cosine_similarity(query_vec, doc_vecs)
+            # MMR selection
+            selected_indices = []
+            remaining_indices = list(range(len(documents)))
+            for _ in range(min(top_k, len(documents))):
+                if not remaining_indices:
+                    break
+                mmr_scores = []
+                for idx in remaining_indices:
+                    # Relevance to query
+                    relevance = query_similarities[idx]
+                    # Redundancy with already selected documents
+                    if selected_indices:
+                        selected_vecs = doc_vecs[selected_indices]
+                        redundancy = np.max(
+                            self._cosine_similarity(doc_vecs[idx], selected_vecs)
+                        )
+                    else:
+                        redundancy = 0
+                    # MMR score
+                    mmr = diversity_score * relevance - (1 - diversity_score) * redundancy
+                    mmr_scores.append((idx, mmr))
+                # Select document with highest MMR score
+                best_idx = max(mmr_scores, key=lambda x: x[1])[0]
+                selected_indices.append(best_idx)
+                remaining_indices.remove(best_idx)
+            # Return reranked documents
+            return [documents[i] for i in selected_indices]
+        except Exception as e:
+            print(f"Reranking error: {e}")
+            # Fallback: return top_k by original similarity score
+            return self._fallback_rerank(documents, top_k)
+    def _cosine_similarity(
+        self,
+        vec1: np.ndarray,
+        vec2: np.ndarray
+    ) -> np.ndarray:
+        """
+        Compute cosine similarity between vectors.
+        Args:
+            vec1: Single vector or array of vectors
+            vec2: Array of vectors
+        Returns:
+            Similarity scores
+        """
+        if vec1.ndim == 1:
+            vec1 = vec1.reshape(1, -1)
+        if vec2.ndim == 1:
+            vec2 = vec2.reshape(1, -1)
+        # Normalize vectors
+        vec1_norm = vec1 / np.linalg.norm(vec1, axis=1, keepdims=True)
+        vec2_norm = vec2 / np.linalg.norm(vec2, axis=1, keepdims=True)
+        # Compute dot product
+        similarity = np.dot(vec1_norm, vec2_norm.T)
+        return similarity.flatten() if similarity.shape[0] == 1 else similarity
+    def _fallback_rerank(
+        self,
+        documents: List[Document],
+        top_k: int
+    ) -> List[Document]:
+        """
+        Fallback reranking using existing similarity scores.
+        Args:
+            documents: List of documents with similarity_score in metadata
+            top_k: Number of documents to return
+        Returns:
+            Top-k documents sorted by similarity score
+        """
+        # Sort by similarity score (higher is better)
+        sorted_docs = sorted(
+            documents,
+            key=lambda d: d.metadata.get('similarity_score', 0),
+            reverse=True
+        )
+        return sorted_docs[:top_k]
+    async def arerank(
+        self,
+        query: str,
+        documents: List[Document],
+        top_k: int = 10,
+        diversity_score: float = 0.3
+    ) -> List[Document]:
+        """
+        Async version of rerank method.
+        Args:
+            query: Original query text
+            documents: List of retrieved documents
+            top_k: Number of documents to return
+            diversity_score: Lambda parameter for MMR
+        Returns:
+            Reranked list of top_k documents
+        """
+        import asyncio
+        return await asyncio.to_thread(
+            self.rerank,
+            query,
+            documents,
+            top_k,
+            diversity_score
+        )

backend/app/rag/retriever.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""Zilliz retriever with hybrid search capabilities."""
+from typing import List, Optional
+from langchain_core.documents import Document
+from langchain_milvus import Milvus
+from langchain_openai import OpenAIEmbeddings
+from app.config import settings
+from app.utils.cache import cache_manager
+class CachedEmbeddings(OpenAIEmbeddings):
+    """Wrapper for OpenAI embeddings with caching."""
+    def embed_query(self, text: str) -> List[float]:
+        """Embed query with caching."""
+        if settings.ENABLE_EMBEDDING_CACHE:
+            cached = cache_manager.embedding_cache.get(text)
+            if cached is not None:
+                return cached
+        # Get embedding from OpenAI
+        embedding = super().embed_query(text)
+        # Cache it
+        if settings.ENABLE_EMBEDDING_CACHE:
+            cache_manager.embedding_cache.set(text, embedding)
+        return embedding
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed documents with caching."""
+        embeddings = []
+        uncached_texts = []
+        uncached_indices = []
+        # Check cache for each text
+        for i, text in enumerate(texts):
+            if settings.ENABLE_EMBEDDING_CACHE:
+                cached = cache_manager.embedding_cache.get(text)
+                if cached is not None:
+                    embeddings.append(cached)
+                else:
+                    uncached_texts.append(text)
+                    uncached_indices.append(i)
+                    embeddings.append(None)  # Placeholder
+            else:
+                uncached_texts.append(text)
+                uncached_indices.append(i)
+                embeddings.append(None)
+        # Get embeddings for uncached texts
+        if uncached_texts:
+            new_embeddings = super().embed_documents(uncached_texts)
+            # Fill in the placeholders and cache
+            for idx, embedding in zip(uncached_indices, new_embeddings):
+                embeddings[idx] = embedding
+                if settings.ENABLE_EMBEDDING_CACHE:
+                    cache_manager.embedding_cache.set(texts[idx], embedding)
+        return embeddings
+class ZillizRetriever:
+    """Retriever for Zilliz vector database with metadata filtering."""
+    def __init__(self):
+        """Initialize Zilliz connection and embeddings."""
+        # Initialize OpenAI embeddings with caching
+        self.embeddings = CachedEmbeddings(
+            model=settings.OPENAI_EMBEDDING_MODEL,
+            openai_api_key=settings.OPENAI_API_KEY,
+            dimensions=settings.OPENAI_EMBEDDING_DIMENSION
+        )
+        # Initialize Milvus vector store
+        self.vector_store = Milvus(
+            embedding_function=self.embeddings,
+            collection_name=settings.COLLECTION_NAME,
+            connection_args={
+                "uri": settings.ZILLIZ_URI,
+                "token": settings.ZILLIZ_TOKEN,
+            },
+            auto_id=True,
+        )
+    def retrieve(
+        self,
+        query: str,
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None,
+        top_k: int = 30
+    ) -> List[Document]:
+        """
+        Retrieve documents using hybrid search (semantic + metadata filtering).
+        Args:
+            query: User query text
+            ticker: Optional ticker symbol filter (e.g., 'ACM')
+            doc_types: Optional list of document types to filter
+            top_k: Number of documents to retrieve
+        Returns:
+            List of Document objects with metadata and similarity scores
+        """
+        # Check document cache
+        if settings.ENABLE_DOCUMENT_CACHE:
+            cached_docs = cache_manager.document_cache.get(query, ticker, doc_types)
+            if cached_docs is not None:
+                return cached_docs[:top_k]  # Return requested number
+        # Build metadata filter expression
+        filter_expr = self._build_filter_expression(ticker, doc_types)
+        # Perform similarity search with metadata filtering
+        if filter_expr:
+            results = self.vector_store.similarity_search_with_score(
+                query=query,
+                k=top_k,
+                expr=filter_expr
+            )
+        else:
+            results = self.vector_store.similarity_search_with_score(
+                query=query,
+                k=top_k
+            )
+        # Convert results to Documents with similarity scores in metadata
+        documents = []
+        for doc, score in results:
+            # Add similarity score to metadata
+            doc.metadata['similarity_score'] = float(score)
+            documents.append(doc)
+        # Cache the results
+        if settings.ENABLE_DOCUMENT_CACHE:
+            cache_manager.document_cache.set(query, documents, ticker, doc_types)
+        return documents
+    def _build_filter_expression(
+        self,
+        ticker: Optional[str],
+        doc_types: Optional[List[str]]
+    ) -> Optional[str]:
+        """
+        Build Milvus filter expression from parameters.
+        Args:
+            ticker: Optional ticker symbol
+            doc_types: Optional list of document types
+        Returns:
+            Filter expression string or None
+        """
+        conditions = []
+        if ticker:
+            conditions.append(f'ticker == "{ticker}"')
+        if doc_types:
+            # Build OR condition for multiple doc types
+            doc_type_conditions = [f'doc_type == "{dt}"' for dt in doc_types]
+            conditions.append(f'({" or ".join(doc_type_conditions)})')
+        if not conditions:
+            return None
+        # Combine with AND
+        return " and ".join(conditions)
+    async def aretrieve(
+        self,
+        query: str,
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None,
+        top_k: int = 30
+    ) -> List[Document]:
+        """
+        Async version of retrieve method.
+        Note: Current Milvus client is synchronous, so this wraps the sync call.
+        For true async, would need async Milvus client.
+        """
+        import asyncio
+        return await asyncio.to_thread(
+            self.retrieve,
+            query,
+            ticker,
+            doc_types,
+            top_k
+        )
+    def get_collection_stats(self) -> dict:
+        """
+        Get statistics about the collection.
+        Returns:
+            Dictionary with collection statistics
+        """
+        try:
+            # Get collection info
+            collection = self.vector_store.col
+            stats = {
+                "collection_name": settings.COLLECTION_NAME,
+                "total_documents": collection.num_entities,
+                "embedding_dimension": settings.OPENAI_EMBEDDING_DIMENSION,
+            }
+            return stats
+        except Exception as e:
+            return {
+                "error": str(e),
+                "collection_name": settings.COLLECTION_NAME
+            }

backend/app/utils/__init__.py ADDED Viewed

File without changes

backend/app/utils/cache.py ADDED Viewed

	@@ -0,0 +1,393 @@

+"""Caching system for embeddings, queries, and responses."""
+import hashlib
+import time
+from typing import Optional, Dict, Any, List
+from datetime import datetime, timedelta
+import json
+class CacheEntry:
+    """Represents a cached item with expiration."""
+    def __init__(self, value: Any, ttl: int = 3600):
+        """
+        Initialize cache entry.
+        Args:
+            value: Value to cache
+            ttl: Time to live in seconds (default: 1 hour)
+        """
+        self.value = value
+        self.created_at = time.time()
+        self.ttl = ttl
+        self.hit_count = 0
+    def is_expired(self) -> bool:
+        """Check if entry has expired."""
+        return time.time() - self.created_at > self.ttl
+    def increment_hits(self):
+        """Increment hit counter."""
+        self.hit_count += 1
+class EmbeddingCache:
+    """Cache for query embeddings to avoid re-computing."""
+    def __init__(self, max_size: int = 1000, ttl: int = 86400):
+        """
+        Initialize embedding cache.
+        Args:
+            max_size: Maximum number of entries (default: 1000)
+            ttl: Time to live in seconds (default: 24 hours)
+        """
+        self.cache: Dict[str, CacheEntry] = {}
+        self.max_size = max_size
+        self.ttl = ttl
+        self.hits = 0
+        self.misses = 0
+    def _generate_key(self, text: str) -> str:
+        """Generate cache key from text."""
+        return hashlib.md5(text.lower().strip().encode()).hexdigest()
+    def get(self, text: str) -> Optional[List[float]]:
+        """
+        Get cached embedding.
+        Args:
+            text: Query text
+        Returns:
+            Cached embedding vector or None
+        """
+        key = self._generate_key(text)
+        if key in self.cache:
+            entry = self.cache[key]
+            if not entry.is_expired():
+                entry.increment_hits()
+                self.hits += 1
+                return entry.value
+            else:
+                # Remove expired entry
+                del self.cache[key]
+        self.misses += 1
+        return None
+    def set(self, text: str, embedding: List[float]):
+        """
+        Cache an embedding.
+        Args:
+            text: Query text
+            embedding: Embedding vector
+        """
+        key = self._generate_key(text)
+        # If cache is full, remove oldest entries
+        if len(self.cache) >= self.max_size:
+            self._evict_oldest()
+        self.cache[key] = CacheEntry(embedding, ttl=self.ttl)
+    def _evict_oldest(self):
+        """Remove oldest 10% of entries."""
+        num_to_remove = max(1, self.max_size // 10)
+        # Sort by creation time and remove oldest
+        sorted_keys = sorted(
+            self.cache.keys(),
+            key=lambda k: self.cache[k].created_at
+        )
+        for key in sorted_keys[:num_to_remove]:
+            del self.cache[key]
+    def clear(self):
+        """Clear all cached embeddings."""
+        self.cache.clear()
+        self.hits = 0
+        self.misses = 0
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        total_requests = self.hits + self.misses
+        hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0
+        return {
+            "size": len(self.cache),
+            "max_size": self.max_size,
+            "hits": self.hits,
+            "misses": self.misses,
+            "hit_rate": round(hit_rate, 2),
+            "total_requests": total_requests
+        }
+class QueryResponseCache:
+    """Cache for complete query responses."""
+    def __init__(self, max_size: int = 500, ttl: int = 3600):
+        """
+        Initialize response cache.
+        Args:
+            max_size: Maximum number of entries (default: 500)
+            ttl: Time to live in seconds (default: 1 hour)
+        """
+        self.cache: Dict[str, CacheEntry] = {}
+        self.max_size = max_size
+        self.ttl = ttl
+        self.hits = 0
+        self.misses = 0
+    def _generate_key(
+        self,
+        query: str,
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None,
+        top_k: int = 10
+    ) -> str:
+        """Generate cache key from query parameters."""
+        # Normalize inputs
+        query_normalized = query.lower().strip()
+        ticker_normalized = ticker.lower() if ticker else ""
+        doc_types_normalized = sorted(doc_types) if doc_types else []
+        # Create key string
+        key_parts = [
+            query_normalized,
+            ticker_normalized,
+            ",".join(doc_types_normalized),
+            str(top_k)
+        ]
+        key_string = "|".join(key_parts)
+        return hashlib.md5(key_string.encode()).hexdigest()
+    def get(
+        self,
+        query: str,
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None,
+        top_k: int = 10
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get cached response.
+        Args:
+            query: Query text
+            ticker: Ticker filter
+            doc_types: Document type filters
+            top_k: Number of results
+        Returns:
+            Cached response or None
+        """
+        key = self._generate_key(query, ticker, doc_types, top_k)
+        if key in self.cache:
+            entry = self.cache[key]
+            if not entry.is_expired():
+                entry.increment_hits()
+                self.hits += 1
+                return entry.value
+            else:
+                del self.cache[key]
+        self.misses += 1
+        return None
+    def set(
+        self,
+        query: str,
+        response: Dict[str, Any],
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None,
+        top_k: int = 10
+    ):
+        """
+        Cache a response.
+        Args:
+            query: Query text
+            response: Response to cache
+            ticker: Ticker filter
+            doc_types: Document type filters
+            top_k: Number of results
+        """
+        key = self._generate_key(query, ticker, doc_types, top_k)
+        if len(self.cache) >= self.max_size:
+            self._evict_lru()
+        self.cache[key] = CacheEntry(response, ttl=self.ttl)
+    def _evict_lru(self):
+        """Remove least recently used 10% of entries."""
+        num_to_remove = max(1, self.max_size // 10)
+        # Sort by last access time (hit count and creation time)
+        sorted_keys = sorted(
+            self.cache.keys(),
+            key=lambda k: (self.cache[k].hit_count, self.cache[k].created_at)
+        )
+        for key in sorted_keys[:num_to_remove]:
+            del self.cache[key]
+    def clear(self):
+        """Clear all cached responses."""
+        self.cache.clear()
+        self.hits = 0
+        self.misses = 0
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        total_requests = self.hits + self.misses
+        hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0
+        # Calculate cost savings (assuming $0.0001 per query)
+        cost_per_query = 0.0001  # Approximate cost per LLM call
+        estimated_savings = self.hits * cost_per_query
+        return {
+            "size": len(self.cache),
+            "max_size": self.max_size,
+            "hits": self.hits,
+            "misses": self.misses,
+            "hit_rate": round(hit_rate, 2),
+            "total_requests": total_requests,
+            "estimated_savings_usd": round(estimated_savings, 4)
+        }
+class DocumentCache:
+    """Cache for retrieved documents to avoid vector searches."""
+    def __init__(self, max_size: int = 200, ttl: int = 7200):
+        """
+        Initialize document cache.
+        Args:
+            max_size: Maximum number of entries (default: 200)
+            ttl: Time to live in seconds (default: 2 hours)
+        """
+        self.cache: Dict[str, CacheEntry] = {}
+        self.max_size = max_size
+        self.ttl = ttl
+        self.hits = 0
+        self.misses = 0
+    def _generate_key(
+        self,
+        query: str,
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None
+    ) -> str:
+        """Generate cache key from search parameters."""
+        query_normalized = query.lower().strip()
+        ticker_normalized = ticker.lower() if ticker else ""
+        doc_types_normalized = sorted(doc_types) if doc_types else []
+        key_string = f"{query_normalized}|{ticker_normalized}|{','.join(doc_types_normalized)}"
+        return hashlib.md5(key_string.encode()).hexdigest()
+    def get(
+        self,
+        query: str,
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None
+    ) -> Optional[List[Any]]:
+        """Get cached documents."""
+        key = self._generate_key(query, ticker, doc_types)
+        if key in self.cache:
+            entry = self.cache[key]
+            if not entry.is_expired():
+                entry.increment_hits()
+                self.hits += 1
+                return entry.value
+            else:
+                del self.cache[key]
+        self.misses += 1
+        return None
+    def set(
+        self,
+        query: str,
+        documents: List[Any],
+        ticker: Optional[str] = None,
+        doc_types: Optional[List[str]] = None
+    ):
+        """Cache retrieved documents."""
+        key = self._generate_key(query, ticker, doc_types)
+        if len(self.cache) >= self.max_size:
+            self._evict_oldest()
+        self.cache[key] = CacheEntry(documents, ttl=self.ttl)
+    def _evict_oldest(self):
+        """Remove oldest 10% of entries."""
+        num_to_remove = max(1, self.max_size // 10)
+        sorted_keys = sorted(
+            self.cache.keys(),
+            key=lambda k: self.cache[k].created_at
+        )
+        for key in sorted_keys[:num_to_remove]:
+            del self.cache[key]
+    def clear(self):
+        """Clear all cached documents."""
+        self.cache.clear()
+        self.hits = 0
+        self.misses = 0
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        total_requests = self.hits + self.misses
+        hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0
+        return {
+            "size": len(self.cache),
+            "max_size": self.max_size,
+            "hits": self.hits,
+            "misses": self.misses,
+            "hit_rate": round(hit_rate, 2),
+            "total_requests": total_requests
+        }
+class CacheManager:
+    """Centralized cache management."""
+    def __init__(self):
+        """Initialize all caches."""
+        self.embedding_cache = EmbeddingCache(max_size=1000, ttl=86400)  # 24h
+        self.response_cache = QueryResponseCache(max_size=500, ttl=3600)  # 1h
+        self.document_cache = DocumentCache(max_size=200, ttl=7200)  # 2h
+    def clear_all(self):
+        """Clear all caches."""
+        self.embedding_cache.clear()
+        self.response_cache.clear()
+        self.document_cache.clear()
+    def get_all_stats(self) -> Dict[str, Any]:
+        """Get statistics for all caches."""
+        return {
+            "embedding_cache": self.embedding_cache.get_stats(),
+            "response_cache": self.response_cache.get_stats(),
+            "document_cache": self.document_cache.get_stats(),
+            "timestamp": datetime.now().isoformat()
+        }
+# Global cache manager instance
+cache_manager = CacheManager()

backend/app/utils/citations.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""Utilities for tracking and formatting source citations."""
+from typing import List, Dict, Any
+from langchain_core.documents import Document
+class CitationTracker:
+    """Tracks sources and generates citation references."""
+    def __init__(self):
+        self.sources: List[Document] = []
+        self.source_map: Dict[str, int] = {}
+    def add_document(self, doc: Document) -> int:
+        """
+        Add a document and return its source ID.
+        Args:
+            doc: LangChain Document with metadata
+        Returns:
+            Source ID (1-indexed)
+        """
+        # Create unique key from metadata
+        doc_key = self._create_doc_key(doc)
+        # Return existing ID if already added
+        if doc_key in self.source_map:
+            return self.source_map[doc_key]
+        # Add new source
+        source_id = len(self.sources) + 1
+        self.sources.append(doc)
+        self.source_map[doc_key] = source_id
+        return source_id
+    def _create_doc_key(self, doc: Document) -> str:
+        """Create unique key for document deduplication."""
+        metadata = doc.metadata
+        filename = metadata.get('filename', 'unknown')
+        chunk_id = metadata.get('chunk_id', 'unknown')
+        return f"{filename}_{chunk_id}"
+    def format_context_with_citations(self, documents: List[Document]) -> str:
+        """
+        Format documents into context string with source markers.
+        Args:
+            documents: List of LangChain Documents
+        Returns:
+            Formatted context string with [Source N] markers
+        """
+        context_parts = []
+        for doc in documents:
+            source_id = self.add_document(doc)
+            # Format: [Source N] content
+            context_parts.append(f"[Source {source_id}] {doc.page_content}")
+        return "\n\n".join(context_parts)
+    def get_sources_list(self) -> List[Dict[str, Any]]:
+        """
+        Get formatted list of all sources.
+        Returns:
+            List of source dictionaries with metadata
+        """
+        sources_list = []
+        for idx, doc in enumerate(self.sources, start=1):
+            metadata = doc.metadata
+            # Get text preview (first 200 chars)
+            text_preview = doc.page_content[:200]
+            if len(doc.page_content) > 200:
+                text_preview += "..."
+            # Convert chunk_id to string if it exists (FIXED)
+            chunk_id = metadata.get('chunk_id')
+            if chunk_id is not None:
+                chunk_id = str(chunk_id)
+            source_info = {
+                "source_id": idx,
+                "filename": metadata.get('filename', 'unknown'),
+                "doc_type": metadata.get('doc_type', 'unknown'),
+                "ticker": metadata.get('ticker'),
+                "similarity_score": float(metadata.get('similarity_score', 0.0)),
+                "chunk_id": chunk_id,  # Now properly converted to string
+                "text_preview": text_preview
+            }
+            sources_list.append(source_info)
+        return sources_list
+    def clear(self):
+        """Clear all tracked sources."""
+        self.sources.clear()
+        self.source_map.clear()
+def extract_citations_from_answer(answer: str) -> List[int]:
+    """
+    Extract citation numbers from answer text.
+    Args:
+        answer: Generated answer with [Source N] citations
+    Returns:
+        List of unique source IDs mentioned in answer
+    """
+    import re
+    # Find all [Source N] patterns
+    pattern = r'\[Source (\d+)\]'
+    matches = re.findall(pattern, answer)
+    # Convert to integers and remove duplicates
+    cited_sources = sorted(set(int(m) for m in matches))
+    return cited_sources

backend/app/utils/conversation.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""Conversation memory management for multi-turn interactions."""
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+import uuid
+class ConversationMessage:
+    """Single message in a conversation."""
+    def __init__(self, role: str, content: str, timestamp: Optional[datetime] = None):
+        self.role = role  # 'user' or 'assistant'
+        self.content = content
+        self.timestamp = timestamp or datetime.now()
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "role": self.role,
+            "content": self.content,
+            "timestamp": self.timestamp.isoformat()
+        }
+class ConversationHistory:
+    """Manages conversation history with context window management."""
+    def __init__(self, max_tokens: int = 4000):
+        """
+        Initialize conversation history.
+        Args:
+            max_tokens: Maximum tokens to keep in history (rough estimate)
+        """
+        self.messages: List[ConversationMessage] = []
+        self.max_tokens = max_tokens
+    def add_message(self, role: str, content: str):
+        """
+        Add a message to conversation history.
+        Args:
+            role: 'user' or 'assistant'
+            content: Message content
+        """
+        message = ConversationMessage(role, content)
+        self.messages.append(message)
+        # Trim history if needed
+        self._trim_history()
+    def get_messages(self) -> List[Dict[str, str]]:
+        """
+        Get messages in LangChain format.
+        Returns:
+            List of message dictionaries with role and content
+        """
+        return [
+            {"role": msg.role, "content": msg.content}
+            for msg in self.messages
+        ]
+    def get_context_summary(self) -> str:
+        """
+        Get a text summary of conversation context.
+        Returns:
+            Formatted conversation history
+        """
+        if not self.messages:
+            return ""
+        context_parts = []
+        for msg in self.messages[-6:]:  # Last 3 exchanges (6 messages)
+            prefix = "User" if msg.role == "user" else "Assistant"
+            context_parts.append(f"{prefix}: {msg.content[:200]}...")
+        return "\n\n".join(context_parts)
+    def _trim_history(self):
+        """
+        Trim history to stay within token limits.
+        Uses a simple heuristic: ~4 chars per token
+        Keeps system messages and recent conversation
+        """
+        if not self.messages:
+            return
+        # Estimate total tokens (rough: 4 chars per token)
+        total_chars = sum(len(msg.content) for msg in self.messages)
+        estimated_tokens = total_chars / 4
+        # If under limit, keep all
+        if estimated_tokens <= self.max_tokens:
+            return
+        # Keep most recent messages that fit within limit
+        chars_limit = self.max_tokens * 4
+        cumulative_chars = 0
+        keep_from_index = len(self.messages)
+        # Work backwards from most recent
+        for i in range(len(self.messages) - 1, -1, -1):
+            msg_chars = len(self.messages[i].content)
+            if cumulative_chars + msg_chars > chars_limit:
+                keep_from_index = i + 1
+                break
+            cumulative_chars += msg_chars
+        # Always keep at least the last 2 messages (1 exchange)
+        keep_from_index = min(keep_from_index, len(self.messages) - 2)
+        # Trim older messages
+        if keep_from_index > 0:
+            self.messages = self.messages[keep_from_index:]
+    def clear(self):
+        """Clear all conversation history."""
+        self.messages.clear()
+    def to_dict(self) -> Dict[str, Any]:
+        """Export conversation to dictionary."""
+        return {
+            "messages": [msg.to_dict() for msg in self.messages],
+            "max_tokens": self.max_tokens
+        }
+class SessionManager:
+    """Manages multiple conversation sessions."""
+    def __init__(self):
+        self.sessions: Dict[str, ConversationHistory] = {}
+    def create_session(self, session_id: Optional[str] = None) -> str:
+        """
+        Create a new conversation session.
+        Args:
+            session_id: Optional session ID, generates one if not provided
+        Returns:
+            Session ID
+        """
+        if session_id is None:
+            session_id = str(uuid.uuid4())
+        self.sessions[session_id] = ConversationHistory()
+        return session_id
+    def get_session(self, session_id: str) -> Optional[ConversationHistory]:
+        """
+        Get a conversation session.
+        Args:
+            session_id: Session identifier
+        Returns:
+            ConversationHistory or None if not found
+        """
+        return self.sessions.get(session_id)
+    def delete_session(self, session_id: str):
+        """
+        Delete a conversation session.
+        Args:
+            session_id: Session identifier
+        """
+        if session_id in self.sessions:
+            del self.sessions[session_id]
+    def clear_all_sessions(self):
+        """Clear all sessions."""
+        self.sessions.clear()
+# Global session manager instance
+session_manager = SessionManager()

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+fastapi
+uvicorn[standard]
+python-dotenv
+pydantic
+pydantic-settings
+langchain
+langchain-openai
+langchain-milvus
+langchain-community
+langchain-core
+pymilvus
+openai
+python-multipart
+aiohttp
+tiktoken
+numpy

frontend/index.html ADDED Viewed

	@@ -0,0 +1,135 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>FinSage Analytics - AI-Powered Financial Analysis</title>
+    <link rel="stylesheet" href="style.css">
+</head>
+<body>
+    <!-- Top Navigation Bar -->
+    <nav class="top-nav">
+        <div class="nav-left">
+            <div class="logo">
+                <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+                    <path d="M12 2L2 7L12 12L22 7L12 2Z" fill="#3B82F6"/>
+                    <path d="M2 17L12 22L22 17" stroke="#3B82F6" stroke-width="2"/>
+                </svg>
+                <div>
+                    <div class="logo-title">FinSage Analytics</div>
+                    <div class="logo-subtitle">AI-Powered Fundamental Analysis for S&P MidCap 400</div>
+                </div>
+            </div>
+        </div>
+        <div class="nav-right">
+            <button class="icon-btn" id="exportBtn" title="Export to PDF">
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                    <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
+                    <polyline points="7 10 12 15 17 10"/>
+                    <line x1="12" y1="15" x2="12" y2="3"/>
+                </svg>
+            </button>
+            <button class="icon-btn" id="githubBtn" title="View Documentation">
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                    <path d="M9 19c-5 1.5-5-2.5-7-3m14 6v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
+                </svg>
+            </button>
+            <a href="https://www.linkedin.com/in/adediran-adeyemi-17103b114/" target="_blank" class="icon-btn user-btn" title="Connect on LinkedIn">
+                <div class="user-avatar">A</div>
+            </a>
+        </div>
+    </nav>
+    <!-- Main Container -->
+    <div class="main-container">
+        <!-- Left Panel - Query Input -->
+        <div class="left-panel">
+            <h2 class="panel-title">Financial Query</h2>
+            <div class="query-box">
+                <textarea
+                    id="queryInput"
+                    placeholder="Ask about company financials..."
+                    rows="6"
+                ></textarea>
+                <button id="submitBtn" class="submit-button">
+                    <span class="btn-text">Submit</span>
+                    <span class="loader" style="display: none;"></span>
+                </button>
+            </div>
+            <!-- Example Queries Section -->
+            <div class="examples-section">
+                <h3>Example Queries</h3>
+                <div class="example-queries-list">
+                    <div class="example-item" data-query="What is ACM's current ratio for 2023?">
+                        e.g., What is ACM's current ratio for 2023?
+                    </div>
+                    <div class="example-item" data-query="Compare ACM's debt-to-equity ratio over 8 years.">
+                        e.g., Compare ACM's debt-to-equity ratio over 8 years.
+                    </div>
+                    <div class="example-item" data-query="Show ACM's revenue growth trend.">
+                        e.g., Show ACM's revenue growth trend.
+                    </div>
+                </div>
+            </div>
+            <!-- Recent Queries History -->
+            <div class="recent-queries">
+                <h3>Recent queries history</h3>
+                <select id="historySelect" class="history-dropdown">
+                    <option value="">Select a recent query</option>
+                </select>
+            </div>
+        </div>
+        <!-- Right Panel - AI Answer -->
+        <div class="right-panel">
+            <!-- Loading State -->
+            <div id="loadingState" class="loading-state" style="display: none;">
+                <div class="loading-spinner"></div>
+                <span>Analyzing...</span>
+            </div>
+            <!-- Response Section -->
+            <div id="responseSection" style="display: none;">
+                <h2 class="panel-title">AI Answer</h2>
+                <div id="answer" class="answer-content"></div>
+                <!-- Query Variations -->
+                <div id="expandedQueries" class="expanded-section" style="display: none;">
+                    <h3>Query Variations Used:</h3>
+                    <ul id="queriesList"></ul>
+                </div>
+                <!-- Sources Section -->
+                <div class="sources-section">
+                    <h3 class="sources-title">Sources</h3>
+                    <div id="sources" class="sources-list"></div>
+                </div>
+            </div>
+            <!-- Initial State -->
+            <div id="initialState" class="initial-state">
+                <div class="empty-state">
+                    <svg width="64" height="64" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1">
+                        <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"/>
+                    </svg>
+                    <p>Ask a question to get started</p>
+                </div>
+            </div>
+        </div>
+    </div>
+    <!-- Error Toast -->
+    <div id="errorToast" class="error-toast" style="display: none;">
+        <span id="errorMessage"></span>
+    </div>
+    <!-- Include jsPDF library for PDF export -->
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>
+    <script src="script.js"></script>
+</body>
+</html>

frontend/script.js ADDED Viewed

	@@ -0,0 +1,524 @@

+// API Configuration - Auto-detect URL for Hugging Face
+const API_BASE_URL = window.location.origin;
+// Session Management
+let currentSessionId = null;
+let currentAnswer = null;
+let currentSources = null;
+let currentQuery = null;
+let queryHistory = [];
+// DOM Elements
+const queryInput = document.getElementById('queryInput');
+const submitBtn = document.getElementById('submitBtn');
+const btnText = submitBtn.querySelector('.btn-text');
+const loader = submitBtn.querySelector('.loader');
+const responseSection = document.getElementById('responseSection');
+const initialState = document.getElementById('initialState');
+const loadingState = document.getElementById('loadingState');
+const answerDiv = document.getElementById('answer');
+const sourcesDiv = document.getElementById('sources');
+const errorToast = document.getElementById('errorToast');
+const errorMessage = document.getElementById('errorMessage');
+const expandedQueriesDiv = document.getElementById('expandedQueries');
+const queriesList = document.getElementById('queriesList');
+const exportBtn = document.getElementById('exportBtn');
+const githubBtn = document.getElementById('githubBtn');
+const historySelect = document.getElementById('historySelect');
+// Initialize
+function init() {
+    currentSessionId = sessionStorage.getItem('sessionId') || generateSessionId();
+    sessionStorage.setItem('sessionId', currentSessionId);
+    // Load history
+    const saved = sessionStorage.getItem('queryHistory');
+    if (saved) {
+        try {
+            queryHistory = JSON.parse(saved);
+            updateHistoryDropdown();
+        } catch (e) {
+            queryHistory = [];
+        }
+    }
+    setupEventListeners();
+    checkHealth();
+    // Log API URL for debugging
+    console.log('API Base URL:', API_BASE_URL);
+}
+function generateSessionId() {
+    return `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
+}
+function setupEventListeners() {
+    // Submit button
+    if (submitBtn) {
+        submitBtn.addEventListener('click', handleSubmit);
+    }
+    // Enter key (Ctrl+Enter)
+    if (queryInput) {
+        queryInput.addEventListener('keydown', (e) => {
+            if (e.key === 'Enter' && e.ctrlKey) {
+                handleSubmit();
+            }
+        });
+    }
+    // Example queries
+    document.querySelectorAll('.example-item').forEach(item => {
+        item.addEventListener('click', () => {
+            const query = item.getAttribute('data-query');
+            if (query && queryInput) {
+                queryInput.value = query;
+                handleSubmit();
+            }
+        });
+    });
+    // Export button
+    if (exportBtn) {
+        exportBtn.addEventListener('click', exportToPDF);
+    }
+    // GitHub button
+    if (githubBtn) {
+        githubBtn.addEventListener('click', () => {
+            window.open('https://github.com/Adeyemi0/FinSight-RAG-Application-', '_blank');
+        });
+    }
+    // History dropdown
+    if (historySelect) {
+        historySelect.addEventListener('change', (e) => {
+            const query = e.target.value;
+            if (query && queryInput) {
+                queryInput.value = query;
+            }
+        });
+    }
+}
+// Update history dropdown
+function updateHistoryDropdown() {
+    if (!historySelect) return;
+    historySelect.innerHTML = '<option value="">Select a recent query</option>';
+    queryHistory.slice().reverse().forEach((item, index) => {
+        const option = document.createElement('option');
+        option.value = item.query;
+        option.textContent = item.query.substring(0, 60) + (item.query.length > 60 ? '...' : '');
+        historySelect.appendChild(option);
+    });
+}
+// Save to history
+function saveToHistory(query, answer) {
+    queryHistory.push({
+        query,
+        answer: answer.substring(0, 500),
+        timestamp: new Date().toISOString()
+    });
+    // Keep last 20
+    if (queryHistory.length > 20) {
+        queryHistory = queryHistory.slice(-20);
+    }
+    try {
+        sessionStorage.setItem('queryHistory', JSON.stringify(queryHistory));
+        updateHistoryDropdown();
+    } catch (e) {
+        console.error('Failed to save history:', e);
+    }
+}
+// Main submit handler
+async function handleSubmit() {
+    if (!queryInput) return;
+    const query = queryInput.value.trim();
+    if (!query) {
+        showError('Please enter a question');
+        return;
+    }
+    // Show loading
+    setLoading(true);
+    hideError();
+    // Hide initial state, show loading
+    if (initialState) initialState.style.display = 'none';
+    if (loadingState) loadingState.style.display = 'flex';
+    if (responseSection) responseSection.style.display = 'none';
+    try {
+        const requestData = {
+            query,
+            ticker: 'ACM',  // Fixed ticker
+            doc_types: null,  // No filter
+            top_k: 10,
+            session_id: currentSessionId
+        };
+        // Use API_BASE_URL which auto-detects for Hugging Face
+        const response = await fetch(`${API_BASE_URL}/query`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(requestData)
+        });
+        if (!response.ok) {
+            const errorData = await response.json().catch(() => ({}));
+            throw new Error(errorData.detail || `HTTP ${response.status}`);
+        }
+        const data = await response.json();
+        if (!data) {
+            throw new Error('Empty response from server');
+        }
+        // Store current data for export
+        currentQuery = query;
+        currentAnswer = data.answer;
+        currentSources = data.sources;
+        // Save to history
+        saveToHistory(query, data.answer || '');
+        // Display results
+        displayResults(data);
+        // Clear input
+        queryInput.value = '';
+    } catch (error) {
+        console.error('Error:', error);
+        showError(error.message || 'Failed to process query');
+        // Show initial state again
+        if (loadingState) loadingState.style.display = 'none';
+        if (initialState) initialState.style.display = 'flex';
+    } finally {
+        setLoading(false);
+    }
+}
+// Display results
+function displayResults(data) {
+    if (!data) return;
+    // Hide loading, show response
+    if (loadingState) loadingState.style.display = 'none';
+    if (responseSection) responseSection.style.display = 'block';
+    // Display answer with cache indicator
+    if (answerDiv) {
+        let answerHTML = formatAnswer(data.answer || 'No answer available');
+        // Add cache indicator if from cache
+        if (data.from_cache) {
+            const cacheAge = Math.floor((data.cache_age_seconds || 0) / 60);
+            const ageText = cacheAge < 1 ? 'just now' : `${cacheAge}m ago`;
+            answerHTML = `
+                <div class="cache-indicator">
+                    <span class="cache-badge">⚡ Cached</span>
+                    <span class="cache-details">Retrieved ${ageText} • Hit ${data.cache_hits || 1}x</span>
+                </div>
+            ` + answerHTML;
+        }
+        answerDiv.innerHTML = answerHTML;
+    }
+    // Display expanded queries
+    if (expandedQueriesDiv && queriesList) {
+        if (data.expanded_queries && data.expanded_queries.length > 1) {
+            expandedQueriesDiv.style.display = 'block';
+            queriesList.innerHTML = data.expanded_queries
+                .map(q => `<li>${escapeHtml(q)}</li>`)
+                .join('');
+        } else {
+            expandedQueriesDiv.style.display = 'none';
+        }
+    }
+    // Display sources
+    if (sourcesDiv) {
+        displaySources(data.sources || []);
+    }
+    // Scroll to top of right panel
+    const rightPanel = document.querySelector('.right-panel');
+    if (rightPanel) {
+        rightPanel.scrollTop = 0;
+    }
+}
+// Format answer with enhanced styling
+function formatAnswer(answer) {
+    if (!answer) return '';
+    let formatted = escapeHtml(answer);
+    // Line breaks
+    formatted = formatted.replace(/\n/g, '<br>');
+    // Citations
+    formatted = formatted.replace(/\[Source (\d+)\]/g,
+        '<span class="citation">[Source $1]</span>');
+    // Highlight numbers (currency, percentages, ratios)
+    formatted = formatted.replace(/\$[\d,]+\.?\d*[BM]?/g,
+        match => `<span class="highlight-number">${match}</span>`);
+    formatted = formatted.replace(/\d+\.?\d*%/g,
+        match => `<span class="highlight-number">${match}</span>`);
+    // Color-code metrics
+    formatted = formatted.replace(/(\d+\.?\d+)(x|:1)/g,
+        '<span class="metric-green">$1$2</span>');
+    // Bold headers (lines ending with:)
+    formatted = formatted.replace(/^(.+:)$/gm, '<strong>$1</strong>');
+    // Create calculation boxes for formulas
+    formatted = formatted.replace(/Formula: ([^\n]+)/g,
+        '<div class="calculation-box"><h4>Formula</h4><div class="calculation-step">$1</div></div>');
+    return formatted;
+}
+// Display sources with collapsible cards
+function displaySources(sources) {
+    if (!sourcesDiv) return;
+    if (!sources || sources.length === 0) {
+        sourcesDiv.innerHTML = '<p style="color: var(--text-muted);">No sources available</p>';
+        return;
+    }
+    sourcesDiv.innerHTML = sources.map((source, index) => {
+        if (!source) return '';
+        const docTypeLabel = source.doc_type === '10k' ? '10-K Report' :
+                            source.doc_type.replace('_', ' ').replace(/\b\w/g, l => l.toUpperCase());
+        return `
+            <div class="source-card" id="source-${index}">
+                <div class="source-header" onclick="toggleSource(${index})">
+                    <div class="source-title">
+                        <span class="source-badge">${docTypeLabel}</span>
+                        ${escapeHtml(source.filename || 'Unknown')}
+                    </div>
+                    <div class="source-similarity">
+                        Similarity Score: <strong>${source.similarity_score ? (source.similarity_score * 100).toFixed(0) + '%' : 'N/A'}</strong>
+                    </div>
+                </div>
+                <div class="source-content">
+                    <div class="source-details">
+                        ${source.ticker ? `<span class="source-detail"><strong>Ticker:</strong> ${escapeHtml(source.ticker)}</span>` : ''}
+                        ${source.chunk_id ? `<span class="source-detail"><strong>Chunk:</strong> ${escapeHtml(source.chunk_id)}</span>` : ''}
+                    </div>
+                    <div class="source-preview">
+                        "${escapeHtml(source.text_preview || 'No preview available')}"
+                    </div>
+                </div>
+            </div>
+        `;
+    }).filter(Boolean).join('');
+}
+// Toggle source expansion
+function toggleSource(index) {
+    const card = document.getElementById(`source-${index}`);
+    if (card) {
+        card.classList.toggle('expanded');
+    }
+}
+// Export to PDF function
+function exportToPDF() {
+    if (!currentAnswer) {
+        showError('No answer to export. Please ask a question first.');
+        return;
+    }
+    try {
+        // Check if jsPDF is loaded
+        if (typeof window.jspdf === 'undefined') {
+            showError('PDF library not loaded. Please refresh the page.');
+            return;
+        }
+        const { jsPDF } = window.jspdf;
+        const doc = new jsPDF();
+        // Set title
+        doc.setFontSize(18);
+        doc.setFont(undefined, 'bold');
+        doc.text('FinSight Analytics Report', 20, 20);
+        // Set subtitle
+        doc.setFontSize(10);
+        doc.setFont(undefined, 'normal');
+        doc.setTextColor(100);
+        doc.text(`Generated on ${new Date().toLocaleString()}`, 20, 28);
+        // Query section
+        doc.setFontSize(12);
+        doc.setFont(undefined, 'bold');
+        doc.setTextColor(0);
+        doc.text('Query:', 20, 40);
+        doc.setFont(undefined, 'normal');
+        doc.setFontSize(10);
+        const queryLines = doc.splitTextToSize(currentQuery || '', 170);
+        doc.text(queryLines, 20, 48);
+        // Answer section
+        let yPos = 48 + (queryLines.length * 7) + 10;
+        doc.setFontSize(12);
+        doc.setFont(undefined, 'bold');
+        doc.text('Answer:', 20, yPos);
+        yPos += 8;
+        doc.setFont(undefined, 'normal');
+        doc.setFontSize(10);
+        // Clean answer text (remove HTML tags and format)
+        let cleanAnswer = currentAnswer
+            .replace(/<[^>]*>/g, '')  // Remove HTML tags
+            .replace(/\[Source \d+\]/g, '')  // Remove citation markers
+            .replace(/&nbsp;/g, ' ')
+            .replace(/&lt;/g, '<')
+            .replace(/&gt;/g, '>')
+            .replace(/&amp;/g, '&');
+        const answerLines = doc.splitTextToSize(cleanAnswer, 170);
+        // Add answer with page breaks if needed
+        answerLines.forEach((line, index) => {
+            if (yPos > 270) {
+                doc.addPage();
+                yPos = 20;
+            }
+            doc.text(line, 20, yPos);
+            yPos += 7;
+        });
+        // Sources section
+        if (currentSources && currentSources.length > 0) {
+            yPos += 10;
+            if (yPos > 250) {
+                doc.addPage();
+                yPos = 20;
+            }
+            doc.setFontSize(12);
+            doc.setFont(undefined, 'bold');
+            doc.text('Sources:', 20, yPos);
+            yPos += 8;
+            doc.setFontSize(9);
+            doc.setFont(undefined, 'normal');
+            currentSources.forEach((source, index) => {
+                if (yPos > 270) {
+                    doc.addPage();
+                    yPos = 20;
+                }
+                doc.setFont(undefined, 'bold');
+                doc.text(`[${index + 1}] ${source.filename || 'Unknown'}`, 20, yPos);
+                yPos += 5;
+                doc.setFont(undefined, 'normal');
+                doc.setTextColor(100);
+                doc.text(`Type: ${source.doc_type || 'N/A'} | Similarity: ${source.similarity_score ? (source.similarity_score * 100).toFixed(0) + '%' : 'N/A'}`, 20, yPos);
+                yPos += 8;
+                doc.setTextColor(0);
+            });
+        }
+        // Save PDF
+        const filename = `FinSight_Analysis_${Date.now()}.pdf`;
+        doc.save(filename);
+    } catch (error) {
+        console.error('PDF Export Error:', error);
+        showError('Failed to export PDF. Please try again.');
+    }
+}
+// Loading state
+function setLoading(isLoading) {
+    if (!submitBtn) return;
+    submitBtn.disabled = isLoading;
+    if (btnText && loader) {
+        btnText.style.display = isLoading ? 'none' : 'inline';
+        loader.style.display = isLoading ? 'inline-block' : 'none';
+    }
+}
+// Error handling
+function showError(message) {
+    if (!errorToast || !errorMessage) {
+        alert(message);
+        return;
+    }
+    errorMessage.textContent = message;
+    errorToast.style.display = 'block';
+    setTimeout(() => {
+        errorToast.style.display = 'none';
+    }, 5000);
+}
+function hideError() {
+    if (errorToast) {
+        errorToast.style.display = 'none';
+    }
+}
+// Utility: Escape HTML
+function escapeHtml(text) {
+    if (!text) return '';
+    const div = document.createElement('div');
+    div.textContent = text;
+    return div.innerHTML;
+}
+// Health check
+async function checkHealth() {
+    try {
+        const response = await fetch(`${API_BASE_URL}/health`, {
+            signal: AbortSignal.timeout(5000)
+        });
+        if (!response.ok) {
+            console.warn('API health check failed');
+        } else {
+            console.log('API health check passed');
+        }
+    } catch (error) {
+        console.error('Cannot connect to API:', error);
+        // Don't show error on page load for Hugging Face
+        // The space might still be starting up
+    }
+}
+// Make toggleSource available globally
+window.toggleSource = toggleSource;
+// Initialize on load
+init();

frontend/style.css ADDED Viewed

	@@ -0,0 +1,610 @@

+/* Reset and Base Styles */
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+:root {
+    --bg-primary: #0A0A0A;
+    --bg-secondary: #141414;
+    --bg-tertiary: #1E1E1E;
+    --bg-hover: #252525;
+    --border-color: #2A2A2A;
+    --text-primary: #E5E5E5;
+    --text-secondary: #A0A0A0;
+    --text-muted: #6B6B6B;
+    --accent-blue: #3B82F6;
+    --accent-blue-hover: #2563EB;
+    --accent-yellow: #F59E0B;
+    --accent-red: #EF4444;
+    --accent-green: #10B981;
+}
+body {
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', sans-serif;
+    background-color: var(--bg-primary);
+    color: var(--text-primary);
+    min-height: 100vh;
+    overflow-x: hidden;
+}
+/* Top Navigation */
+.top-nav {
+    background-color: var(--bg-secondary);
+    border-bottom: 1px solid var(--border-color);
+    padding: 12px 24px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    position: sticky;
+    top: 0;
+    z-index: 100;
+}
+.nav-left .logo {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+}
+.logo-title {
+    font-size: 16px;
+    font-weight: 700;
+    color: var(--text-primary);
+    line-height: 1.2;
+}
+.logo-subtitle {
+    font-size: 11px;
+    font-weight: 400;
+    color: var(--text-secondary);
+    margin-top: 2px;
+}
+.nav-right {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+.icon-btn {
+    background: transparent;
+    border: none;
+    color: var(--text-secondary);
+    padding: 8px;
+    border-radius: 6px;
+    cursor: pointer;
+    transition: all 0.2s;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    text-decoration: none;
+}
+.icon-btn:hover {
+    background-color: var(--bg-hover);
+    color: var(--text-primary);
+}
+.user-btn .user-avatar {
+    width: 28px;
+    height: 28px;
+    border-radius: 50%;
+    background: linear-gradient(135deg, #3B82F6 0%, #2563EB 100%);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 13px;
+    font-weight: 600;
+    color: white;
+}
+/* Main Container */
+.main-container {
+    display: grid;
+    grid-template-columns: 490px 1fr;
+    height: calc(100vh - 57px);
+    gap: 0;
+}
+/* Left Panel */
+.left-panel {
+    background-color: var(--bg-secondary);
+    border-right: 1px solid var(--border-color);
+    padding: 32px 24px;
+    overflow-y: auto;
+    display: flex;
+    flex-direction: column;
+    gap: 24px;
+}
+.panel-title {
+    font-size: 20px;
+    font-weight: 600;
+    color: var(--text-primary);
+    margin-bottom: 8px;
+}
+/* Query Box */
+.query-box {
+    background-color: var(--bg-tertiary);
+    border: 1px solid var(--border-color);
+    border-radius: 12px;
+    padding: 16px;
+}
+#queryInput {
+    width: 100%;
+    background-color: transparent;
+    border: none;
+    color: var(--text-primary);
+    font-size: 14px;
+    font-family: inherit;
+    resize: none;
+    outline: none;
+    line-height: 1.6;
+    margin-bottom: 16px;
+    min-height: 120px;
+}
+#queryInput::placeholder {
+    color: var(--text-muted);
+}
+.submit-button {
+    width: 100%;
+    background-color: var(--accent-blue);
+    color: white;
+    border: none;
+    padding: 12px;
+    border-radius: 8px;
+    font-size: 14px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background-color 0.2s;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 8px;
+}
+.submit-button:hover:not(:disabled) {
+    background-color: var(--accent-blue-hover);
+}
+.submit-button:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+/* Examples Section */
+.examples-section {
+    margin-top: 8px;
+}
+.examples-section h3 {
+    font-size: 14px;
+    color: var(--text-secondary);
+    margin-bottom: 12px;
+    font-weight: 600;
+}
+.example-queries-list {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+.example-item {
+    color: var(--text-muted);
+    font-size: 13px;
+    cursor: pointer;
+    padding: 10px 12px;
+    border-radius: 6px;
+    transition: all 0.2s;
+    background-color: var(--bg-tertiary);
+    border: 1px solid var(--border-color);
+}
+.example-item:hover {
+    background-color: var(--bg-hover);
+    color: var(--text-secondary);
+    border-color: var(--accent-blue);
+}
+/* Recent Queries */
+.recent-queries {
+    margin-top: 16px;
+}
+.recent-queries h3 {
+    font-size: 14px;
+    color: var(--text-secondary);
+    margin-bottom: 12px;
+    font-weight: 600;
+}
+.history-dropdown {
+    width: 100%;
+    background-color: var(--bg-tertiary);
+    border: 1px solid var(--border-color);
+    color: var(--text-primary);
+    padding: 10px 12px;
+    border-radius: 8px;
+    font-size: 13px;
+    cursor: pointer;
+    outline: none;
+    transition: border-color 0.2s;
+}
+.history-dropdown:focus {
+    border-color: var(--accent-blue);
+}
+.history-dropdown option {
+    background-color: var(--bg-tertiary);
+    color: var(--text-primary);
+    padding: 8px;
+}
+/* Right Panel */
+.right-panel {
+    background-color: var(--bg-primary);
+    padding: 32px 48px;
+    overflow-y: auto;
+    position: relative;
+}
+/* Loading State */
+.loading-state {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    color: var(--text-secondary);
+    font-size: 14px;
+    margin-top: 24px;
+}
+.loading-spinner {
+    width: 20px;
+    height: 20px;
+    border: 2px solid var(--border-color);
+    border-top-color: var(--accent-blue);
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+}
+@keyframes spin {
+    to { transform: rotate(360deg); }
+}
+/* Initial Empty State */
+.initial-state {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    height: 100%;
+    min-height: 400px;
+}
+.empty-state {
+    text-align: center;
+    color: var(--text-muted);
+}
+.empty-state svg {
+    margin-bottom: 16px;
+    opacity: 0.5;
+}
+.empty-state p {
+    font-size: 15px;
+}
+/* Answer Content */
+.answer-content {
+    background-color: var(--bg-secondary);
+    border: 1px solid var(--border-color);
+    border-radius: 12px;
+    padding: 24px;
+    margin-top: 16px;
+    line-height: 1.8;
+    font-size: 14px;
+    color: var(--text-primary);
+}
+.answer-content strong {
+    color: var(--text-primary);
+    font-weight: 600;
+}
+.answer-content .highlight-number {
+    background-color: var(--accent-yellow);
+    color: var(--bg-primary);
+    padding: 2px 6px;
+    border-radius: 4px;
+    font-weight: 700;
+    font-size: 13px;
+}
+.answer-content .metric-red {
+    color: var(--accent-red);
+    font-weight: 600;
+}
+.answer-content .metric-green {
+    color: var(--accent-green);
+    font-weight: 600;
+}
+/* Citation Styling */
+.citation {
+    background-color: var(--accent-blue);
+    color: white;
+    padding: 2px 8px;
+    border-radius: 4px;
+    font-size: 12px;
+    font-weight: 600;
+    white-space: nowrap;
+}
+/* Calculation Boxes */
+.calculation-box {
+    background-color: var(--bg-tertiary);
+    border-left: 3px solid var(--accent-blue);
+    padding: 16px;
+    margin: 16px 0;
+    border-radius: 6px;
+}
+.calculation-box h4 {
+    font-size: 13px;
+    color: var(--text-secondary);
+    margin-bottom: 8px;
+    font-weight: 600;
+}
+.calculation-step {
+    font-family: 'Courier New', monospace;
+    color: var(--text-primary);
+    font-size: 13px;
+    margin: 4px 0;
+}
+/* Expanded Queries */
+.expanded-section {
+    margin: 24px 0;
+    padding: 16px;
+    background-color: var(--bg-secondary);
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+}
+.expanded-section h3 {
+    font-size: 14px;
+    color: var(--text-secondary);
+    margin-bottom: 12px;
+    font-weight: 600;
+}
+.expanded-section ul {
+    list-style: none;
+    padding: 0;
+}
+.expanded-section li {
+    padding: 8px 0;
+    color: var(--text-secondary);
+    font-size: 13px;
+}
+.expanded-section li:before {
+    content: "→ ";
+    color: var(--accent-blue);
+    margin-right: 8px;
+}
+/* Sources Section */
+.sources-section {
+    margin-top: 32px;
+}
+.sources-title {
+    font-size: 16px;
+    color: var(--text-primary);
+    margin-bottom: 16px;
+    font-weight: 600;
+}
+.sources-list {
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+}
+/* Source Card */
+.source-card {
+    background-color: var(--bg-secondary);
+    border: 1px solid var(--border-color);
+    border-radius: 10px;
+    overflow: hidden;
+    transition: all 0.2s;
+}
+.source-card:hover {
+    border-color: var(--accent-blue);
+}
+.source-header {
+    padding: 16px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    cursor: pointer;
+    user-select: none;
+}
+.source-title {
+    font-size: 14px;
+    font-weight: 600;
+    color: var(--text-primary);
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+.source-badge {
+    background-color: var(--accent-blue);
+    color: white;
+    padding: 2px 8px;
+    border-radius: 4px;
+    font-size: 11px;
+    font-weight: 600;
+}
+.source-similarity {
+    font-size: 13px;
+    color: var(--text-secondary);
+}
+.source-similarity strong {
+    color: var(--accent-green);
+}
+.source-content {
+    padding: 0 16px 16px 16px;
+    display: none;
+}
+.source-card.expanded .source-content {
+    display: block;
+}
+.source-details {
+    margin-bottom: 12px;
+    display: flex;
+    flex-wrap: wrap;
+    gap: 12px;
+}
+.source-detail {
+    font-size: 12px;
+    color: var(--text-muted);
+}
+.source-detail strong {
+    color: var(--text-secondary);
+}
+.source-preview {
+    background-color: var(--bg-tertiary);
+    padding: 12px;
+    border-radius: 6px;
+    font-size: 13px;
+    color: var(--text-secondary);
+    line-height: 1.6;
+    font-style: italic;
+}
+.view-context-btn {
+    margin-top: 8px;
+    color: var(--accent-blue);
+    background: none;
+    border: none;
+    font-size: 12px;
+    cursor: pointer;
+    padding: 4px 0;
+    display: inline-flex;
+    align-items: center;
+    gap: 4px;
+}
+.view-context-btn:hover {
+    text-decoration: underline;
+}
+/* Error Toast */
+.error-toast {
+    position: fixed;
+    bottom: 24px;
+    right: 24px;
+    background-color: var(--accent-red);
+    color: white;
+    padding: 16px 24px;
+    border-radius: 8px;
+    box-shadow: 0 10px 40px rgba(0, 0, 0, 0.3);
+    z-index: 1000;
+    animation: slideIn 0.3s ease-out;
+}
+@keyframes slideIn {
+    from {
+        transform: translateY(100px);
+        opacity: 0;
+    }
+    to {
+        transform: translateY(0);
+        opacity: 1;
+    }
+}
+/* Loader Spinner */
+.loader {
+    width: 16px;
+    height: 16px;
+    border: 2px solid rgba(255, 255, 255, 0.3);
+    border-top-color: white;
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+}
+/* Scrollbar Styling */
+::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+}
+::-webkit-scrollbar-track {
+    background: var(--bg-primary);
+}
+::-webkit-scrollbar-thumb {
+    background: var(--border-color);
+    border-radius: 4px;
+}
+::-webkit-scrollbar-thumb:hover {
+    background: var(--bg-hover);
+}
+/* Responsive Design */
+@media (max-width: 1200px) {
+    .main-container {
+        grid-template-columns: 420px 1fr;
+    }
+    .right-panel {
+        padding: 24px 32px;
+    }
+}
+@media (max-width: 968px) {
+    .main-container {
+        grid-template-columns: 1fr;
+        height: auto;
+    }
+    .left-panel {
+        border-right: none;
+        border-bottom: 1px solid var(--border-color);
+    }
+    .right-panel {
+        min-height: calc(100vh - 57px);
+    }
+    .logo-subtitle {
+        display: none;
+    }
+}