Spaces:
Sleeping
Sleeping
File size: 2,049 Bytes
a86c572 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from fastapi import FastAPI, UploadFile, File
import json
from db import conn, cursor
from embeddings import semantic_chunking, embedding_model
from retrieval import retrieve_top_chunks
from llm import build_prompt, call_llm
app = FastAPI(
title="RAG QA System",
version="1.0"
)
@app.get("/health")
def health():
return {"status": "OK"}
@app.post("/ingest")
async def ingest(file: UploadFile = File(...)):
text = (await file.read()).decode("utf-8")
chunks = semantic_chunking(text)
embeddings = embedding_model.encode(chunks)
for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
cursor.execute(
"INSERT INTO chunks VALUES (?, ?, ?, ?)",
(file.filename, i, chunk, json.dumps(emb.tolist()))
)
conn.commit()
return {
"message": "Document ingested",
"chunks": len(chunks)
}
@app.post("/ask")
def ask(question: str):
top_chunks = retrieve_top_chunks(question)
if not top_chunks:
return {
"question": question,
"answer": "I don't know based on the provided context",
"confidence": 0.0,
"evidence": []
}
best_score = max(c[0] for c in top_chunks)
CONFIDENCE_THRESHOLD = 0.6
if best_score < CONFIDENCE_THRESHOLD:
return {
"question": question,
"answer": "I don't know based on the provided context",
"confidence": round(float(best_score), 2),
"evidence": []
}
prompt = build_prompt(question, top_chunks)
answer = call_llm(prompt)
# deterministic fallback
if "I don't know based on the provided context" in answer:
answer = top_chunks[0][3]
evidence = [
{
"document": doc,
"chunk_id": cid,
"text": text
}
for _, doc, cid, text in top_chunks
]
return {
"question": question,
"answer": answer,
"confidence": round(float(best_score), 2),
"evidence": evidence
}
|