lvvignesh2122's picture
Add frontend UI and document upload for RAG app
9d21791
import os
from time import time
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from dotenv import load_dotenv
import google.generativeai as genai
from rag_store import ingest_documents, search_knowledge
# -----------------------
# Setup
# -----------------------
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
app = FastAPI(
title="Gemini RAG FastAPI",
docs_url="/docs",
redoc_url="/redoc"
)
# -----------------------
# CORS
# -----------------------
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# -----------------------
# Frontend
# -----------------------
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
# -----------------------
# Cache (protect quota)
# -----------------------
CACHE_TTL = 300 # seconds
answer_cache = {}
# -----------------------
# Models
# -----------------------
class PromptRequest(BaseModel):
prompt: str
# -----------------------
# Routes
# -----------------------
@app.get("/", response_class=HTMLResponse)
def serve_ui():
with open("frontend/index.html", "r", encoding="utf-8") as f:
return f.read()
# -----------------------
# Upload
# -----------------------
@app.post("/upload")
async def upload(files: list[UploadFile] = File(...)):
try:
chunks = ingest_documents(files)
return {"message": f"Indexed {chunks} chunks from {len(files)} file(s)."}
except Exception as e:
return JSONResponse(status_code=400, content={"error": str(e)})
# -----------------------
# Ask
# -----------------------
@app.post("/ask")
async def ask(data: PromptRequest):
prompt_key = data.prompt.strip().lower()
now = time()
# πŸ” Cache
if prompt_key in answer_cache:
ts, cached = answer_cache[prompt_key]
if now - ts < CACHE_TTL:
return cached
results = search_knowledge(data.prompt)
if not results:
response = {
"answer": "I don't know based on the provided documents.",
"confidence": 0.0,
"citations": []
}
answer_cache[prompt_key] = (now, response)
return response
context = "\n\n".join(r["text"] for r in results)
prompt = f"""
Answer strictly using the context below.
If not found, say "I don't know".
Context:
{context}
Question:
{data.prompt}
"""
try:
model = genai.GenerativeModel("gemini-2.5-flash")
llm_response = model.generate_content(prompt)
response = {
"answer": llm_response.text,
"confidence": round(min(1.0, len(results) / 5), 2),
"citations": [
{"source": r["metadata"]["source"], "page": r["metadata"]["page"]}
for r in results
]
}
answer_cache[prompt_key] = (now, response)
return response
except Exception as e:
return JSONResponse(
status_code=429,
content={"error": "LLM quota exceeded. Please wait and retry."}
)
# -----------------------
# Summarize
# -----------------------
@app.post("/summarize")
async def summarize():
return await ask(PromptRequest(
prompt="Summarize the uploaded documents in 5 concise bullet points."
))