""" app.py — Flask entry point + all routes + inline HTML/CSS/JS UI. No templates/ or static/ folders needed. Run locally: python app.py """ import os import uuid import logging from pathlib import Path from flask import Flask, render_template_string, request, jsonify, session from pipeline import pipeline_query, add_document_to_index, clear_session_memory from ingest import FAISS_INDEX_PATH, BASE_DIR logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)-8s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S") logger = logging.getLogger(__name__) UPLOADED_DOCS_PATH = str(BASE_DIR / "data" / "uploads") FLASK_SECRET_KEY = os.environ.get("FLASK_SECRET_KEY", os.urandom(24).hex()) app = Flask(__name__) app.secret_key = FLASK_SECRET_KEY # ══════════════════════════════════════════════════════════════════════ # INLINE HTML / CSS / JS # ══════════════════════════════════════════════════════════════════════ _PAGE = """
Retrieval-Augmented Generation · Banking & Insurance · LangChain · FAISS · LLaMA 3.3 70B
Upload your own financial PDFs and FinRAG will embed and index them immediately — no restart required.
vectorstore.merge_from()Click to select a PDF or drag and drop here
PDF only · Max 50 MB
┌─────────────────────────────────────────────────────────────────┐ │ Financial-RAG-Platform │ │ │ │ ┌───────────┐ ┌──────────────────┐ ┌──────────────────┐ │ │ │ PDF / TXT │──▶│ ingest.py │──▶│ FAISS Index │ │ │ │ Sources │ │ Chunk + Embed │ │ (pre-baked in │ │ │ │ FDIC/NAIC │ │ MiniLM-L6-v2 │ │ Docker image) │ │ │ └───────────┘ └──────────────────┘ └────────┬─────────┘ │ │ │ │ │ ┌───────────┐ ┌──────────────────┐ ┌────────▼─────────┐ │ │ │Guardrails │──▶│ pipeline.py │◀──│ MMR Retriever │ │ │ │ (domain │ │ LangChain Conv. │ │ top-k = 5 │ │ │ │ filter) │ │ RetrievalChain │ └──────────────────┘ │ │ └───────────┘ └────────┬─────────┘ │ │ │ ┌──────────────────┐ │ │ ┌───────────┐ │ │ Confidence │ │ │ │ Session │◀───────────┤ │ Scoring │ │ │ │ Memory │ ▼ └──────────────────┘ │ │ └───────────┘ ┌──────────────────┐ │ │ │ Groq API │ │ │ │ LLaMA 3.3 70B │ │ │ └────────┬─────────┘ │ │ │ │ │ ┌────────▼─────────┐ │ │ │ app.py │ │ │ │ Flask + HTML │ │ │ │ /api/chat │ │ │ │ /api/upload │ │ │ │ /api/clear │ │ │ └──────────────────┘ │ └─────────────────────────────────────────────────────────────────┘
| Component | Technology | Role |
|---|---|---|
| Embeddings | sentence-transformers/all-MiniLM-L6-v2 | Local, no external API, 384-dim vectors |
| Vector Store | faiss-cpu (IndexFlatL2) | Millisecond ANN search, pre-baked in Docker image |
| LLM | Groq · LLaMA 3.3 70B Versatile | Sub-second generation via Groq API |
| RAG Orchestration | LangChain 0.3 ConversationalRetrievalChain | Multi-turn with condense + QA prompts |
| Memory | ConversationBufferWindowMemory (k=6) | Per-session isolated conversation history |
| Guardrails | Keyword scan + regex off-topic detection | Domain relevance enforcement |
| Confidence | Cosine similarity + exponential decay weights | Hallucination risk flagging per query |
| Backend | Flask 3.x | REST API: /api/chat, /api/upload, /api/clear |
| Frontend | Vanilla HTML / CSS / JS (inline) | Dark financial theme, tab-based dashboard |
| Deployment | Dockerfile → HuggingFace Spaces Docker SDK | Port 7860, non-root user |