""" app.py — FECB RAG Search Application Loads a pre-built FAISS index (produced by ingest.py) and provides a Gradio interface for semantic search and AI-assisted Q&A over your PDF documents. Environment variables: ANTHROPIC_API_KEY — Anthropic API key (required) CLAUDE_MODEL — Claude model ID (default: claude-sonnet-4-6) EMBED_MODEL — Embedding model (default: BAAI/bge-small-en-v1.5) TOP_K — Max documents to retrieve (default: 5) INDEX_DIR — Path to FAISS index (default: faiss_index) META_FILE — Path to metadata JSON (default: metadata.json) Run: python app.py """ import json import os import re from pathlib import Path import anthropic import gradio as gr from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings # ── Config ──────────────────────────────────────────────────────────────────── EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-small-en-v1.5") CLAUDE_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6") API_KEY = os.getenv("ANTHROPIC_API_KEY") TOP_K = int(os.getenv("TOP_K", "8")) INDEX_DIR = Path(os.getenv("INDEX_DIR", "faiss_index")) META_FILE = Path(os.getenv("META_FILE", "metadata.json")) SYSTEM_PROMPT = ( "You are a knowledgeable research assistant. You help users find relevant " "information from a document collection and synthesize key findings. " "When answering, cite the specific document(s) by their bracketed number [N]. " "Be concise and precise. If the context doesn't contain enough information " "to answer fully, say so clearly." ) # ── Load resources ──────────────────────────────────────────────────────────── print(f"Loading embedding model: {EMBED_MODEL}") _embeddings = HuggingFaceEmbeddings( model_name=EMBED_MODEL, model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True}, ) if not INDEX_DIR.exists(): raise FileNotFoundError( f"FAISS index not found at '{INDEX_DIR}'. " "Run 'python ingest.py' first to build the index from your PDFs." ) print(f"Loading FAISS index from: {INDEX_DIR}") _vectorstore = FAISS.load_local( str(INDEX_DIR), _embeddings, allow_dangerous_deserialization=True ) _metadata: dict[str, dict] = {} if META_FILE.exists(): print(f"Loading metadata from: {META_FILE}") with open(META_FILE, encoding="utf-8") as f: for record in json.load(f): _metadata[record["doc_id"]] = record print(f" Loaded metadata for {len(_metadata)} documents") else: print(f" [WARN] {META_FILE} not found — document names will be inferred from IDs") if not API_KEY: raise EnvironmentError("ANTHROPIC_API_KEY is not set. Export it before running.") _client = anthropic.Anthropic(api_key=API_KEY) print(f"Claude model: {CLAUDE_MODEL}") print("Ready.\n") # ── RAG helpers ─────────────────────────────────────────────────────────────── def retrieve(query: str, n_chunks: int) -> list[tuple]: """Return the top n_chunks most relevant chunks, allowing multiple per document.""" raw = _vectorstore.similarity_search_with_score(query, k=n_chunks) return sorted(raw, key=lambda x: x[1]) def build_context(hits: list[tuple]) -> str: parts = [] for i, (doc, _) in enumerate(hits, 1): doc_id = doc.metadata.get("doc_id", f"doc_{i}") filename = doc.metadata.get("filename", f"{doc_id}.pdf") excerpt = doc.page_content.strip() parts.append(f"[{i}] {filename} (ID: {doc_id})\n{excerpt}") return "\n\n---\n\n".join(parts) def ask_claude(query: str, context: str) -> str: user_content = ( f"Using the document excerpts below, answer the following question. " f"Cite documents by their bracketed number.\n\n" f"Question: {query}\n\nContext:\n{context}" ) try: message = _client.messages.create( model=CLAUDE_MODEL, max_tokens=800, system=SYSTEM_PROMPT, messages=[{"role": "user", "content": user_content}], ) return message.content[0].text.strip() except anthropic.APIError as exc: return ( f"Could not reach Claude ({exc}).\n\n" "Check that **ANTHROPIC_API_KEY** is set and valid." ) def cosine_to_pct(score: float) -> str: """Convert FAISS L2 distance (normalised embeddings) to 0–100% relevance.""" pct = (1.0 - min(max(score, 0.0), 2.0) / 2.0) * 100 return f"{pct:.1f}%" # ── Main search function ────────────────────────────────────────────────────── def rag_search(query: str, n_docs: int) -> tuple[str, str]: query = query.strip() if not query: return "Please enter a question or keyword.", "" hits = retrieve(query, n_docs) if not hits: return "No relevant documents found. Try different keywords.", "" context = build_context(hits) answer = ask_claude(query, context) cards = [] for i, (doc, score) in enumerate(hits, 1): doc_id = doc.metadata.get("doc_id", f"doc_{i}") filename = doc.metadata.get("filename", f"{doc_id}.pdf") rel = cosine_to_pct(score) snippet = doc.page_content.replace("\n", " ").strip()[:350] cards.append( f"### [{i}] {filename}\n" f"**Relevance:** {rel} \n" f"**ID:** {doc_id} \n" f"> {snippet}…" ) return answer, "\n\n---\n\n".join(cards) # ── Gradio UI ───────────────────────────────────────────────────────────────── with gr.Blocks(title="FECB Document Search") as demo: gr.Markdown( """ # FECB Document Search — AI-Powered RAG Search your document collection using semantic AI search. Ask a question or enter keywords; the app retrieves the most relevant documents and generates a synthesised answer with citations. > **Powered by** `BAAI/bge-small-en-v1.5` embeddings · Claude via Anthropic API """ ) with gr.Row(): with gr.Column(scale=5): query_box = gr.Textbox( label="Question or keywords", placeholder="e.g. 'What are the main findings about X?'", lines=2, elem_id="query-box", ) with gr.Column(scale=1, min_width=160): n_slider = gr.Slider( minimum=3, maximum=20, value=TOP_K, step=1, label="Chunks to retrieve", ) search_btn = gr.Button("Search", variant="primary", size="lg") gr.Markdown("---") with gr.Row(): with gr.Column(scale=2): gr.Markdown("### AI Answer") answer_md = gr.Markdown(value="*Results will appear here after searching.*") with gr.Column(scale=3): gr.Markdown("### Relevant Documents") papers_md = gr.Markdown(value="") search_btn.click(rag_search, [query_box, n_slider], [answer_md, papers_md]) query_box.submit(rag_search, [query_box, n_slider], [answer_md, papers_md]) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, theme=gr.themes.Soft(primary_hue="blue", font=gr.themes.GoogleFont("Inter")), css=""" .gradio-container { max-width: 1100px; margin: auto; } #query-box textarea { font-size: 16px; } """, )