|
|
import os |
|
|
from time import time |
|
|
from fastapi import FastAPI, UploadFile, File |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
|
from fastapi.staticfiles import StaticFiles |
|
|
from pydantic import BaseModel |
|
|
from dotenv import load_dotenv |
|
|
import google.generativeai as genai |
|
|
|
|
|
from rag_store import ingest_documents, search_knowledge |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) |
|
|
|
|
|
app = FastAPI( |
|
|
title="Gemini RAG FastAPI", |
|
|
docs_url="/docs", |
|
|
redoc_url="/redoc" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CACHE_TTL = 300 |
|
|
answer_cache = {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PromptRequest(BaseModel): |
|
|
prompt: str |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
|
def serve_ui(): |
|
|
with open("frontend/index.html", "r", encoding="utf-8") as f: |
|
|
return f.read() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/upload") |
|
|
async def upload(files: list[UploadFile] = File(...)): |
|
|
try: |
|
|
chunks = ingest_documents(files) |
|
|
return {"message": f"Indexed {chunks} chunks from {len(files)} file(s)."} |
|
|
except Exception as e: |
|
|
return JSONResponse(status_code=400, content={"error": str(e)}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/ask") |
|
|
async def ask(data: PromptRequest): |
|
|
prompt_key = data.prompt.strip().lower() |
|
|
now = time() |
|
|
|
|
|
|
|
|
if prompt_key in answer_cache: |
|
|
ts, cached = answer_cache[prompt_key] |
|
|
if now - ts < CACHE_TTL: |
|
|
return cached |
|
|
|
|
|
results = search_knowledge(data.prompt) |
|
|
if not results: |
|
|
response = { |
|
|
"answer": "I don't know based on the provided documents.", |
|
|
"confidence": 0.0, |
|
|
"citations": [] |
|
|
} |
|
|
answer_cache[prompt_key] = (now, response) |
|
|
return response |
|
|
|
|
|
context = "\n\n".join(r["text"] for r in results) |
|
|
|
|
|
prompt = f""" |
|
|
Answer strictly using the context below. |
|
|
If not found, say "I don't know". |
|
|
|
|
|
Context: |
|
|
{context} |
|
|
|
|
|
Question: |
|
|
{data.prompt} |
|
|
""" |
|
|
|
|
|
try: |
|
|
model = genai.GenerativeModel("gemini-2.5-flash") |
|
|
llm_response = model.generate_content(prompt) |
|
|
|
|
|
response = { |
|
|
"answer": llm_response.text, |
|
|
"confidence": round(min(1.0, len(results) / 5), 2), |
|
|
"citations": [ |
|
|
{"source": r["metadata"]["source"], "page": r["metadata"]["page"]} |
|
|
for r in results |
|
|
] |
|
|
} |
|
|
|
|
|
answer_cache[prompt_key] = (now, response) |
|
|
return response |
|
|
|
|
|
except Exception as e: |
|
|
return JSONResponse( |
|
|
status_code=429, |
|
|
content={"error": "LLM quota exceeded. Please wait and retry."} |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/summarize") |
|
|
async def summarize(): |
|
|
return await ask(PromptRequest( |
|
|
prompt="Summarize the uploaded documents in 5 concise bullet points." |
|
|
)) |
|
|
|