| import os |
| from time import time |
| from fastapi import FastAPI, UploadFile, File |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import HTMLResponse, JSONResponse |
| from fastapi.staticfiles import StaticFiles |
| from pydantic import BaseModel |
| from dotenv import load_dotenv |
| import google.generativeai as genai |
|
|
| from rag_store import ingest_documents, search_knowledge |
|
|
| |
| |
| |
| load_dotenv() |
| genai.configure(api_key=os.getenv("GEMINI_API_KEY")) |
|
|
| app = FastAPI( |
| title="Gemini RAG FastAPI", |
| docs_url="/docs", |
| redoc_url="/redoc" |
| ) |
|
|
| |
| |
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| |
| |
| app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend") |
|
|
| |
| |
| |
| CACHE_TTL = 300 |
| answer_cache = {} |
|
|
| |
| |
| |
| class PromptRequest(BaseModel): |
| prompt: str |
|
|
| |
| |
| |
|
|
| @app.get("/", response_class=HTMLResponse) |
| def serve_ui(): |
| with open("frontend/index.html", "r", encoding="utf-8") as f: |
| return f.read() |
|
|
| |
| |
| |
| @app.post("/upload") |
| async def upload(files: list[UploadFile] = File(...)): |
| try: |
| chunks = ingest_documents(files) |
| return {"message": f"Indexed {chunks} chunks from {len(files)} file(s)."} |
| except Exception as e: |
| return JSONResponse(status_code=400, content={"error": str(e)}) |
|
|
| |
| |
| |
| @app.post("/ask") |
| async def ask(data: PromptRequest): |
| prompt_key = data.prompt.strip().lower() |
| now = time() |
|
|
| |
| if prompt_key in answer_cache: |
| ts, cached = answer_cache[prompt_key] |
| if now - ts < CACHE_TTL: |
| return cached |
|
|
| results = search_knowledge(data.prompt) |
| if not results: |
| response = { |
| "answer": "I don't know based on the provided documents.", |
| "confidence": 0.0, |
| "citations": [] |
| } |
| answer_cache[prompt_key] = (now, response) |
| return response |
|
|
| context = "\n\n".join(r["text"] for r in results) |
|
|
| prompt = f""" |
| Answer strictly using the context below. |
| If not found, say "I don't know". |
| |
| Context: |
| {context} |
| |
| Question: |
| {data.prompt} |
| """ |
|
|
| try: |
| model = genai.GenerativeModel("gemini-2.5-flash") |
| llm_response = model.generate_content(prompt) |
|
|
| response = { |
| "answer": llm_response.text, |
| "confidence": round(min(1.0, len(results) / 5), 2), |
| "citations": [ |
| {"source": r["metadata"]["source"], "page": r["metadata"]["page"]} |
| for r in results |
| ] |
| } |
|
|
| answer_cache[prompt_key] = (now, response) |
| return response |
|
|
| except Exception as e: |
| return JSONResponse( |
| status_code=429, |
| content={"error": "LLM quota exceeded. Please wait and retry."} |
| ) |
|
|
| |
| |
| |
| @app.post("/summarize") |
| async def summarize(): |
| return await ask(PromptRequest( |
| prompt="Summarize the uploaded documents in 5 concise bullet points." |
| )) |
|
|