Spaces:
Runtime error
Runtime error
| from rag_pipeline import process_file, answer_query | |
| from pydantic import BaseModel | |
| class QueryRequest(BaseModel): | |
| file_id: str | |
| question: str | |
| page: int | |
| explainLike5: bool = False | |
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from dotenv import load_dotenv | |
| from fastapi import Body | |
| import uuid | |
| import os | |
| from langchain_chroma import Chroma | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| import re | |
| load_dotenv() | |
| CHROMA_DIR = "./chroma_db" | |
| embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| app = FastAPI() | |
| BASE_DIR = "files" | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| file_store = {} | |
| async def test(): | |
| return {"message": "hello world!"} | |
| async def upload(file: UploadFile = File(...)): | |
| content = await file.read() | |
| file_id = str(uuid.uuid4()) | |
| safe_filename = file.filename.replace(" ", "_") | |
| full_filename = f"{file_id}_{safe_filename}" | |
| save_path = os.path.join(BASE_DIR, full_filename) | |
| os.makedirs(BASE_DIR, exist_ok=True) | |
| with open(save_path, "wb") as f: | |
| f.write(content) | |
| retriever = process_file(content, safe_filename, file_id) | |
| file_store[file_id] = retriever | |
| return {"message": "File processed", "file_id": file_id} | |
| async def query_endpoint(request = Body(...)): | |
| file_id = request.get("file_id") | |
| question = request.get("question") | |
| selected_text = request.get("selectedText") | |
| explain_like_5 = request.get("explainLike5", False) | |
| if not file_id or not question: | |
| return {"error": "Missing file_id or question"} | |
| retriever_path = f"{CHROMA_DIR}/{file_id}" | |
| # Load retriever from disk | |
| if not os.path.exists(retriever_path): | |
| return {"error": "Vectorstore for this file_id not found."} | |
| vectorstore = Chroma( | |
| embedding_function=embedding_model, | |
| persist_directory=retriever_path | |
| ) | |
| retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4}) | |
| retrieved_docs = retriever.invoke(selected_text or question) | |
| retrieved_context = "\n\n".join( | |
| re.sub(r"\s+", " ", doc.page_content.strip()) for doc in retrieved_docs | |
| ) | |
| combined_context = f"User selected this:\n\"{selected_text}\"\n\nRelated parts from the document:\n{retrieved_context}" | |
| print("Combined context", combined_context) | |
| answer = answer_query(question, combined_context, explain_like_5) | |
| return {"answer": answer} |