Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, UploadFile, Form, Request | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.templating import Jinja2Templates | |
| from ingestion import extract_pdf | |
| from chunking import chunk_text | |
| from retrieval_colbert import ColBERTRetriever | |
| from reranker import rerank | |
| from llm import generate_answer | |
| from scraper import scrape_url | |
| app = FastAPI() | |
| templates = Jinja2Templates(directory="templates") | |
| retriever = ColBERTRetriever() | |
| # conversation memory | |
| chat_memory = [] | |
| async def home(request: Request): | |
| return templates.TemplateResponse( | |
| "index.html", | |
| {"request": request} | |
| ) | |
| # ----------------------- | |
| # PDF Upload | |
| # ----------------------- | |
| async def upload(file: UploadFile): | |
| text = extract_pdf(file.file) | |
| chunks = chunk_text(text, file.filename) | |
| if retriever.chunks: | |
| retriever.build_index(retriever.chunks + chunks) | |
| else: | |
| retriever.build_index(chunks) | |
| return {"status": "PDF indexed"} | |
| # ----------------------- | |
| # Website Scraper | |
| # ----------------------- | |
| async def scrape(url: str = Form(...)): | |
| text = scrape_url(url) | |
| chunks = chunk_text(text, url) | |
| if retriever.chunks: | |
| retriever.build_index(retriever.chunks + chunks) | |
| else: | |
| retriever.build_index(chunks) | |
| return {"status": "Website indexed"} | |
| # ----------------------- | |
| # Chat endpoint | |
| # ----------------------- | |
| async def chat(message: str = Form(...)): | |
| global chat_memory | |
| retrieved = retriever.query(message, k=25) | |
| if not retrieved: | |
| return {"answer": "Please index a document first."} | |
| reranked = rerank(message, retrieved) | |
| top_chunks = reranked[:3] | |
| context = "\n\n".join(c["text"] for c in top_chunks) | |
| # build conversation history | |
| history = "" | |
| for m in chat_memory[-4:]: | |
| history += f"{m['role']}: {m['content']}\n" | |
| prompt_context = f""" | |
| Conversation history: | |
| {history} | |
| Context: | |
| {context} | |
| User question: | |
| {message} | |
| """ | |
| answer = generate_answer(prompt_context, message) | |
| chat_memory.append({"role": "user", "content": message}) | |
| chat_memory.append({"role": "assistant", "content": answer}) | |
| return { | |
| "answer": answer, | |
| "chunks": top_chunks | |
| } |