Aliashraf commited on
Commit
baed8db
·
verified ·
1 Parent(s): 460a44b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import uvicorn
4
+ from fastapi import FastAPI, File, UploadFile, HTTPException
5
+ from fastapi.responses import JSONResponse
6
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain_community.document_loaders import PyPDFLoader
10
+ from langchain.chains import RetrievalQA
11
+ import shutil
12
+
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ app = FastAPI(title="RAG Chatbot API")
18
+
19
+ # Ensure directories exist
20
+ try:
21
+ os.makedirs("documents", exist_ok=True)
22
+ os.makedirs("vectorstore", exist_ok=True)
23
+ logger.info("Directories 'documents' and 'vectorstore' created or already exist.")
24
+ except Exception as e:
25
+ logger.error(f"Failed to create directories: {str(e)}")
26
+ raise
27
+
28
+ # Check for GOOGLE_API_KEY
29
+ if not os.getenv("GOOGLE_API_KEY"):
30
+ logger.error("GOOGLE_API_KEY environment variable not set.")
31
+ raise ValueError("GOOGLE_API_KEY environment variable not set.")
32
+
33
+ # Initialize Gemini LLM
34
+ try:
35
+ llm = ChatGoogleGenerativeAI(
36
+ model="gemini-1.5-flash",
37
+ google_api_key=os.getenv("GOOGLE_API_KEY")
38
+ )
39
+ logger.info("Gemini LLM initialized successfully.")
40
+ except Exception as e:
41
+ logger.error(f"Failed to initialize Gemini LLM: {str(e)}")
42
+ raise
43
+
44
+ # Initialize embeddings
45
+ try:
46
+ embeddings = GoogleGenerativeAIEmbeddings(
47
+ model="models/embedding-001",
48
+ google_api_key=os.getenv("GOOGLE_API_KEY")
49
+ )
50
+ logger.info("Gemini embeddings initialized successfully.")
51
+ except Exception as e:
52
+ logger.error(f"Failed to initialize Gemini embeddings: {str(e)}")
53
+ raise
54
+
55
+ # Path for vector store
56
+ VECTOR_STORE_PATH = "vectorstore/index"
57
+
58
+ def process_pdf(pdf_path):
59
+ """Process and index a PDF document."""
60
+ try:
61
+ logger.info(f"Processing PDF: {pdf_path}")
62
+ loader = PyPDFLoader(pdf_path)
63
+ documents = loader.load()
64
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
65
+ texts = text_splitter.split_documents(documents)
66
+ if os.path.exists(VECTOR_STORE_PATH):
67
+ vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
68
+ vector_store.add_documents(texts)
69
+ logger.info("Added documents to existing FAISS vector store.")
70
+ else:
71
+ vector_store = FAISS.from_documents(texts, embeddings)
72
+ logger.info("Created new FAISS vector store.")
73
+ vector_store.save_local(VECTOR_STORE_PATH)
74
+ logger.info("Vector store saved successfully.")
75
+ return {"status": "Document processed and indexed successfully"}
76
+ except Exception as e:
77
+ logger.error(f"Error processing PDF: {str(e)}")
78
+ raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
79
+
80
+ def answer_query(query):
81
+ """Answer a query using the RAG pipeline."""
82
+ if not os.path.exists(VECTOR_STORE_PATH):
83
+ logger.warning("No vector store found. Please upload a document first.")
84
+ return {"error": "No documents indexed yet. Please upload a document first."}
85
+ try:
86
+ logger.info(f"Processing query: {query}")
87
+ vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
88
+ qa_chain = RetrievalQA.from_chain_type(
89
+ llm=llm,
90
+ chain_type="stuff",
91
+ retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
92
+ return_source_documents=True
93
+ )
94
+ result = qa_chain({"query": query})
95
+ logger.info("Query processed successfully.")
96
+ return {
97
+ "answer": result["result"],
98
+ "source_documents": [doc.page_content[:200] for doc in result["source_documents"]]
99
+ }
100
+ except Exception as e:
101
+ logger.error(f"Error answering query: {str(e)}")
102
+ raise HTTPException(status_code=500, detail=f"Error answering query: {str(e)}")
103
+
104
+ @app.post("/upload-document")
105
+ async def upload_document(file: UploadFile = File(...)):
106
+ """API to upload and process a PDF document."""
107
+ if not file.filename.endswith(".pdf"):
108
+ logger.warning(f"Invalid file type uploaded: {file.filename}")
109
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed")
110
+ file_path = f"documents/{file.filename}"
111
+ try:
112
+ with open(file_path, "wb") as buffer:
113
+ shutil.copyfileobj(file.file, buffer)
114
+ logger.info(f"Uploaded file saved: {file_path}")
115
+ result = process_pdf(file_path)
116
+ return JSONResponse(content=result, status_code=200)
117
+ except Exception as e:
118
+ logger.error(f"Error in upload_document: {str(e)}")
119
+ raise HTTPException(status_code=500, detail=f"Error uploading document: {str(e)}")
120
+
121
+ @app.post("/ask-question")
122
+ async def ask_question(query: str):
123
+ """API to answer a query based on indexed documents."""
124
+ logger.info(f"Received question: {query}")
125
+ result = answer_query(query)
126
+ return JSONResponse(content=result, status_code=200)
127
+
128
+ @app.get("/health")
129
+ async def health_check():
130
+ """Health check endpoint."""
131
+ logger.info("Health check requested.")
132
+ return {"status": "API is running"}
133
+
134
+ # Log server startup
135
+ logger.info("Starting FastAPI application...")