Aliashraf commited on
Commit
8383a37
·
verified ·
1 Parent(s): 1429c79

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ import os
4
+ import shutil
5
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.document_loaders import PyPDFLoader
9
+ from langchain.chains import RetrievalQA
10
+
11
+ app = FastAPI(title="RAG Chatbot API")
12
+
13
+ # Ensure directories exist
14
+ os.makedirs("documents", exist_ok=True)
15
+ os.makedirs("vectorstore", exist_ok=True)
16
+
17
+ # Initialize Gemini LLM
18
+ llm = ChatGoogleGenerativeAI(
19
+ model="gemini-1.5-flash",
20
+ google_api_key=os.getenv("GOOGLE_API_KEY")
21
+ )
22
+
23
+ # Initialize embeddings
24
+ embeddings = GoogleGenerativeAIEmbeddings(
25
+ model="models/embedding-001",
26
+ google_api_key=os.getenv("GOOGLE_API_KEY")
27
+ )
28
+
29
+ # Path for vector store
30
+ VECTOR_STORE_PATH = "vectorstore/index"
31
+
32
+ def process_pdf(pdf_path):
33
+ """Process and index a PDF document."""
34
+ try:
35
+ loader = PyPDFLoader(pdf_path)
36
+ documents = loader.load()
37
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
38
+ texts = text_splitter.split_documents(documents)
39
+ if os.path.exists(VECTOR_STORE_PATH):
40
+ vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
41
+ vector_store.add_documents(texts)
42
+ else:
43
+ vector_store = FAISS.from_documents(texts, embeddings)
44
+ vector_store.save_local(VECTOR_STORE_PATH)
45
+ return {"status": "Document processed and indexed successfully"}
46
+ except Exception as e:
47
+ raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
48
+
49
+ def answer_query(query):
50
+ """Answer a query using the RAG pipeline."""
51
+ if not os.path.exists(VECTOR_STORE_PATH):
52
+ return {"error": "No documents indexed yet. Please upload a document first."}
53
+ try:
54
+ vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
55
+ qa_chain = RetrievalQA.from_chain_type(
56
+ llm=llm,
57
+ chain_type="stuff",
58
+ retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
59
+ return_source_documents=True
60
+ )
61
+ result = qa_chain({"query": query})
62
+ return {
63
+ "answer": result["result"],
64
+ "source_documents": [doc.page_content[:200] for doc in result["source_documents"]]
65
+ }
66
+ except Exception as e:
67
+ raise HTTPException(status_code=500, detail=f"Error answering query: {str(e)}")
68
+
69
+ @app.post("/upload-document")
70
+ async def upload_document(file: UploadFile = File(...)):
71
+ """API to upload and process a PDF document."""
72
+ if not file.filename.endswith(".pdf"):
73
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed")
74
+ file_path = f"documents/{file.filename}"
75
+ with open(file_path, "wb") as buffer:
76
+ shutil.copyfileobj(file.file, buffer)
77
+ result = process_pdf(file_path)
78
+ return JSONResponse(content=result, status_code=200)
79
+
80
+ @app.post("/ask-question")
81
+ async def ask_question(query: str):
82
+ """API to answer a query based on indexed documents."""
83
+ result = answer_query(query)
84
+ return JSONResponse(content=result, status_code=200)
85
+
86
+ @app.get("/health")
87
+ async def health_check():
88
+ """Health check endpoint."""
89
+ return {"status": "API is running"}