Spaces:

MohitG012
/

RAG_on_User_data

Sleeping

App Files Files Community

MohitGupta41 commited on Dec 21, 2025

Commit

8d5a4b2

1 Parent(s): 0570fc0

FastAPI RAG backend (Docker)

Browse files

Files changed (4) hide show

Dockerfile +23 -0
app.py +86 -0
rag.py +102 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+FROM python:3.10-slim
+WORKDIR /app
+# System deps (pymupdf may need extra libs sometimes; this minimal usually works)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+ && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Hugging Face expects port 7860
+EXPOSE 7860
+# Recommended: keep caches in /tmp on Spaces
+ENV HF_HOME=/tmp/hf
+ENV TRANSFORMERS_CACHE=/tmp/hf/transformers
+ENV SENTENCE_TRANSFORMERS_HOME=/tmp/hf/sentence-transformers
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from rag import (
+    extract_text_from_pdf,
+    chunk_text,
+    create_session,
+    retrieve_top_k,
+    generate_answer,
+    SESSIONS,
+)
+app = FastAPI(title="Mini RAG Backend")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # tighten later if needed
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class AskRequest(BaseModel):
+    session_id: str
+    question: str
+    top_k: int = 3
+@app.post("/ingest")
+async def ingest(file: UploadFile = File(...)) -> Dict[str, Any]:
+    filename = file.filename.lower()
+    content = await file.read()
+    if filename.endswith(".pdf"):
+        text = extract_text_from_pdf(content)
+    elif filename.endswith(".txt"):
+        text = content.decode("utf-8", errors="ignore")
+    else:
+        raise HTTPException(status_code=400, detail="Only PDF or TXT allowed")
+    text = text.strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="No extractable text found")
+    chunks = chunk_text(text, chunk_size_words=350, overlap_words=60)
+    if len(chunks) == 0:
+        raise HTTPException(status_code=400, detail="Chunking produced 0 chunks")
+    session_id = create_session(chunks)
+    return {
+        "session_id": session_id,
+        "num_chunks": len(chunks)
+    }
+@app.post("/ask")
+async def ask(req: AskRequest) -> Dict[str, Any]:
+    sess = SESSIONS.get(req.session_id)
+    if not sess:
+        raise HTTPException(status_code=404, detail="Invalid session_id")
+    chunks = sess["chunks"]
+    index = sess["index"]
+    hits = retrieve_top_k(req.question, chunks, index, k=req.top_k)
+    context = "\n\n---\n\n".join([h[2] for h in hits])
+    answer = generate_answer(req.question, context)
+    sources = [
+        {"chunk_id": h[0], "score": h[1], "text": h[2][:400] + "..."}
+        for h in hits
+    ]
+    return {"answer": answer, "sources": sources}
+@app.delete("/session/{session_id}")
+async def delete_session(session_id: str):
+    if session_id in SESSIONS:
+        del SESSIONS[session_id]
+    return {"status": "ok"}

rag.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import uuid
+from dataclasses import dataclass
+from typing import List, Dict, Any, Tuple
+import time
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+# PDF extraction
+import fitz  # pymupdf
+# LLM (choose 1)
+from transformers import pipeline
+# -----------------------------
+# Globals (MVP)
+# -----------------------------
+EMBEDDER = SentenceTransformer("all-MiniLM-L6-v2")
+# For MVP: use a smallish instruct model if possible
+# NOTE: Mistral 7B is heavy; if you can't run it locally, use a smaller HF model.
+GENERATOR = pipeline(
+    "text-generation",
+    model="google/flan-t5-base",   # safe CPU model for MVP
+    max_new_tokens=256
+)
+SESSIONS: Dict[str, Dict[str, Any]] = {}  # session_id -> {chunks, index, created_at}
+# -----------------------------
+# Helpers
+# -----------------------------
+def extract_text_from_pdf(pdf_bytes: bytes) -> str:
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    pages = []
+    for page in doc:
+        pages.append(page.get_text("text"))
+    return "\n".join(pages).strip()
+def chunk_text(text: str, chunk_size_words: int = 350, overlap_words: int = 60) -> List[str]:
+    words = text.split()
+    chunks = []
+    step = max(1, chunk_size_words - overlap_words)
+    for i in range(0, len(words), step):
+        chunk = words[i:i + chunk_size_words]
+        if chunk:
+            chunks.append(" ".join(chunk))
+    return chunks
+def build_faiss_index(vectors: np.ndarray) -> faiss.Index:
+    vectors = vectors.astype("float32")
+    dim = vectors.shape[1]
+    index = faiss.IndexFlatIP(dim)  # cosine-like if vectors normalized
+    faiss.normalize_L2(vectors)
+    index.add(vectors)
+    return index
+def retrieve_top_k(query: str, chunks: List[str], index: faiss.Index, k: int = 3) -> List[Tuple[int, float, str]]:
+    q = EMBEDDER.encode([query], convert_to_numpy=True).astype("float32")
+    faiss.normalize_L2(q)
+    scores, ids = index.search(q, k)
+    results = []
+    for rank, idx in enumerate(ids[0]):
+        if idx == -1:
+            continue
+        results.append((int(idx), float(scores[0][rank]), chunks[int(idx)]))
+    return results
+def generate_answer(question: str, context: str) -> str:
+    prompt = (
+        "Answer using ONLY the provided context. "
+        "If not found in the context, say: Not found in the provided documents.\n\n"
+        f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
+    )
+    # For flan-t5 pipeline: use text2text-generation instead normally,
+    # but keeping this simple - if needed swap pipeline.
+    out = GENERATOR(prompt)
+    # pipeline output format differs by model; handle safely:
+    if isinstance(out, list) and out and "generated_text" in out[0]:
+        return out[0]["generated_text"]
+    return str(out)
+def create_session(chunks: List[str]) -> str:
+    embeddings = EMBEDDER.encode(chunks, convert_to_numpy=True)
+    index = build_faiss_index(embeddings)
+    session_id = str(uuid.uuid4())
+    SESSIONS[session_id] = {
+        "chunks": chunks,
+        "index": index,
+        "created_at": time.time()
+    }
+    return session_id

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+python-multipart
+pydantic
+sentence-transformers
+faiss-cpu
+pymupdf
+transformers
+torch