fecb-rag / app.py
malaporte's picture
Upload folder using huggingface_hub
32bcdac verified
Raw
History Blame Contribute Delete
8.06 kB
"""
app.py β€” FECB RAG Search Application
Loads a pre-built FAISS index (produced by ingest.py) and provides a
Gradio interface for semantic search and AI-assisted Q&A over your PDF documents.
Environment variables:
ANTHROPIC_API_KEY β€” Anthropic API key (required)
CLAUDE_MODEL β€” Claude model ID (default: claude-sonnet-4-6)
EMBED_MODEL β€” Embedding model (default: BAAI/bge-small-en-v1.5)
TOP_K β€” Max documents to retrieve (default: 5)
INDEX_DIR β€” Path to FAISS index (default: faiss_index)
META_FILE β€” Path to metadata JSON (default: metadata.json)
Run:
python app.py
"""
import json
import os
import re
from pathlib import Path
import anthropic
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
# ── Config ────────────────────────────────────────────────────────────────────
EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-small-en-v1.5")
CLAUDE_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6")
API_KEY = os.getenv("ANTHROPIC_API_KEY")
TOP_K = int(os.getenv("TOP_K", "8"))
INDEX_DIR = Path(os.getenv("INDEX_DIR", "faiss_index"))
META_FILE = Path(os.getenv("META_FILE", "metadata.json"))
SYSTEM_PROMPT = (
"You are a knowledgeable research assistant. You help users find relevant "
"information from a document collection and synthesize key findings. "
"When answering, cite the specific document(s) by their bracketed number [N]. "
"Be concise and precise. If the context doesn't contain enough information "
"to answer fully, say so clearly."
)
# ── Load resources ────────────────────────────────────────────────────────────
print(f"Loading embedding model: {EMBED_MODEL}")
_embeddings = HuggingFaceEmbeddings(
model_name=EMBED_MODEL,
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True},
)
if not INDEX_DIR.exists():
raise FileNotFoundError(
f"FAISS index not found at '{INDEX_DIR}'. "
"Run 'python ingest.py' first to build the index from your PDFs."
)
print(f"Loading FAISS index from: {INDEX_DIR}")
_vectorstore = FAISS.load_local(
str(INDEX_DIR), _embeddings, allow_dangerous_deserialization=True
)
_metadata: dict[str, dict] = {}
if META_FILE.exists():
print(f"Loading metadata from: {META_FILE}")
with open(META_FILE, encoding="utf-8") as f:
for record in json.load(f):
_metadata[record["doc_id"]] = record
print(f" Loaded metadata for {len(_metadata)} documents")
else:
print(f" [WARN] {META_FILE} not found β€” document names will be inferred from IDs")
if not API_KEY:
raise EnvironmentError("ANTHROPIC_API_KEY is not set. Export it before running.")
_client = anthropic.Anthropic(api_key=API_KEY)
print(f"Claude model: {CLAUDE_MODEL}")
print("Ready.\n")
# ── RAG helpers ───────────────────────────────────────────────────────────────
def retrieve(query: str, n_chunks: int) -> list[tuple]:
"""Return the top n_chunks most relevant chunks, allowing multiple per document."""
raw = _vectorstore.similarity_search_with_score(query, k=n_chunks)
return sorted(raw, key=lambda x: x[1])
def build_context(hits: list[tuple]) -> str:
parts = []
for i, (doc, _) in enumerate(hits, 1):
doc_id = doc.metadata.get("doc_id", f"doc_{i}")
filename = doc.metadata.get("filename", f"{doc_id}.pdf")
excerpt = doc.page_content.strip()
parts.append(f"[{i}] {filename} (ID: {doc_id})\n{excerpt}")
return "\n\n---\n\n".join(parts)
def ask_claude(query: str, context: str) -> str:
user_content = (
f"Using the document excerpts below, answer the following question. "
f"Cite documents by their bracketed number.\n\n"
f"Question: {query}\n\nContext:\n{context}"
)
try:
message = _client.messages.create(
model=CLAUDE_MODEL,
max_tokens=800,
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": user_content}],
)
return message.content[0].text.strip()
except anthropic.APIError as exc:
return (
f"Could not reach Claude ({exc}).\n\n"
"Check that **ANTHROPIC_API_KEY** is set and valid."
)
def cosine_to_pct(score: float) -> str:
"""Convert FAISS L2 distance (normalised embeddings) to 0–100% relevance."""
pct = (1.0 - min(max(score, 0.0), 2.0) / 2.0) * 100
return f"{pct:.1f}%"
# ── Main search function ──────────────────────────────────────────────────────
def rag_search(query: str, n_docs: int) -> tuple[str, str]:
query = query.strip()
if not query:
return "Please enter a question or keyword.", ""
hits = retrieve(query, n_docs)
if not hits:
return "No relevant documents found. Try different keywords.", ""
context = build_context(hits)
answer = ask_claude(query, context)
cards = []
for i, (doc, score) in enumerate(hits, 1):
doc_id = doc.metadata.get("doc_id", f"doc_{i}")
filename = doc.metadata.get("filename", f"{doc_id}.pdf")
rel = cosine_to_pct(score)
snippet = doc.page_content.replace("\n", " ").strip()[:350]
cards.append(
f"### [{i}] {filename}\n"
f"**Relevance:** {rel} \n"
f"**ID:** {doc_id} \n"
f"> {snippet}…"
)
return answer, "\n\n---\n\n".join(cards)
# ── Gradio UI ─────────────────────────────────────────────────────────────────
with gr.Blocks(title="FECB Document Search") as demo:
gr.Markdown(
"""
# FECB Document Search β€” AI-Powered RAG
Search your document collection using semantic AI search.
Ask a question or enter keywords; the app retrieves the most relevant
documents and generates a synthesised answer with citations.
> **Powered by** `BAAI/bge-small-en-v1.5` embeddings Β· Claude via Anthropic API
"""
)
with gr.Row():
with gr.Column(scale=5):
query_box = gr.Textbox(
label="Question or keywords",
placeholder="e.g. 'What are the main findings about X?'",
lines=2,
elem_id="query-box",
)
with gr.Column(scale=1, min_width=160):
n_slider = gr.Slider(
minimum=3, maximum=20, value=TOP_K, step=1,
label="Chunks to retrieve",
)
search_btn = gr.Button("Search", variant="primary", size="lg")
gr.Markdown("---")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### AI Answer")
answer_md = gr.Markdown(value="*Results will appear here after searching.*")
with gr.Column(scale=3):
gr.Markdown("### Relevant Documents")
papers_md = gr.Markdown(value="")
search_btn.click(rag_search, [query_box, n_slider], [answer_md, papers_md])
query_box.submit(rag_search, [query_box, n_slider], [answer_md, papers_md])
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
theme=gr.themes.Soft(primary_hue="blue", font=gr.themes.GoogleFont("Inter")),
css="""
.gradio-container { max-width: 1100px; margin: auto; }
#query-box textarea { font-size: 16px; }
""",
)