| |
| import sys, time |
| sys.path.insert(0, ".") |
|
|
| from fastapi import FastAPI |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from backend.rag import query_rag |
| from backend.llm_only import query_llm_only |
| from backend.graphrag import query_graphrag, build_graph |
| import json |
|
|
| app = FastAPI() |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| G = build_graph() |
|
|
| class Query(BaseModel): |
| question: str |
|
|
| @app.get("/") |
| def root(): |
| return {"status": "ok"} |
|
|
| @app.post("/query/llm") |
| def llm_endpoint(q: Query): |
| t0 = time.time() |
| result = query_llm_only(q.question) |
| result["latency"] = round(time.time() - t0, 2) |
| return result |
|
|
| @app.post("/query/rag") |
| def rag_endpoint(q: Query): |
| t0 = time.time() |
| result = query_rag(q.question) |
| result["latency"] = round(time.time() - t0, 2) |
| return result |
|
|
| @app.post("/query/graphrag") |
| def graphrag_endpoint(q: Query): |
| t0 = time.time() |
| result = query_graphrag(q.question, G) |
| result["latency"] = round(time.time() - t0, 2) |
| return result |
|
|
| @app.post("/query/all") |
| def all_endpoint(q: Query): |
| t0 = time.time(); llm = query_llm_only(q.question); llm["latency"] = round(time.time()-t0, 2) |
| t0 = time.time(); rag = query_rag(q.question); rag["latency"] = round(time.time()-t0, 2) |
| t0 = time.time(); grag = query_graphrag(q.question, G); grag["latency"] = round(time.time()-t0, 2) |
| return { |
| "question": q.question, |
| "llm_only": llm, |
| "rag": rag, |
| "graphrag": grag, |
| "token_reduction_vs_rag": round((rag["total_tokens"] - grag["total_tokens"]) / rag["total_tokens"] * 100, 1) |
| } |
|
|
| @app.get("/results") |
| def get_results(): |
| return json.load(open("data/results.json", encoding="utf-8")) |