Aliashraf commited on
Commit
deb0ac4
·
verified ·
1 Parent(s): df757c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -16
app.py CHANGED
@@ -1,30 +1,55 @@
 
 
1
  from fastapi import FastAPI, File, UploadFile, HTTPException
2
  from fastapi.responses import JSONResponse
3
- import os
4
- import shutil
5
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain_community.document_loaders import PyPDFLoader
9
  from langchain.chains import RetrievalQA
 
 
 
 
 
10
 
11
  app = FastAPI(title="RAG Chatbot API")
12
 
13
  # Ensure directories exist
14
- os.makedirs("documents", exist_ok=True)
15
- os.makedirs("vectorstore", exist_ok=True)
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Initialize Gemini LLM
18
- llm = ChatGoogleGenerativeAI(
19
- model="gemini-1.5-flash",
20
- google_api_key=os.getenv("GOOGLE_API_KEY")
21
- )
 
 
 
 
 
22
 
23
  # Initialize embeddings
24
- embeddings = GoogleGenerativeAIEmbeddings(
25
- model="models/embedding-001",
26
- google_api_key=os.getenv("GOOGLE_API_KEY")
27
- )
 
 
 
 
 
28
 
29
  # Path for vector store
30
  VECTOR_STORE_PATH = "vectorstore/index"
@@ -32,6 +57,7 @@ VECTOR_STORE_PATH = "vectorstore/index"
32
  def process_pdf(pdf_path):
33
  """Process and index a PDF document."""
34
  try:
 
35
  loader = PyPDFLoader(pdf_path)
36
  documents = loader.load()
37
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@@ -39,18 +65,24 @@ def process_pdf(pdf_path):
39
  if os.path.exists(VECTOR_STORE_PATH):
40
  vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
41
  vector_store.add_documents(texts)
 
42
  else:
43
  vector_store = FAISS.from_documents(texts, embeddings)
 
44
  vector_store.save_local(VECTOR_STORE_PATH)
 
45
  return {"status": "Document processed and indexed successfully"}
46
  except Exception as e:
 
47
  raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
48
 
49
  def answer_query(query):
50
  """Answer a query using the RAG pipeline."""
51
  if not os.path.exists(VECTOR_STORE_PATH):
 
52
  return {"error": "No documents indexed yet. Please upload a document first."}
53
  try:
 
54
  vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
55
  qa_chain = RetrievalQA.from_chain_type(
56
  llm=llm,
@@ -59,31 +91,41 @@ def answer_query(query):
59
  return_source_documents=True
60
  )
61
  result = qa_chain({"query": query})
 
62
  return {
63
  "answer": result["result"],
64
  "source_documents": [doc.page_content[:200] for doc in result["source_documents"]]
65
  }
66
  except Exception as e:
 
67
  raise HTTPException(status_code=500, detail=f"Error answering query: {str(e)}")
68
 
69
  @app.post("/upload-document")
70
  async def upload_document(file: UploadFile = File(...)):
71
  """API to upload and process a PDF document."""
72
  if not file.filename.endswith(".pdf"):
 
73
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
74
  file_path = f"documents/{file.filename}"
75
- with open(file_path, "wb") as buffer:
76
- shutil.copyfileobj(file.file, buffer)
77
- result = process_pdf(file_path)
78
- return JSONResponse(content=result, status_code=200)
 
 
 
 
 
79
 
80
  @app.post("/ask-question")
81
  async def ask_question(query: str):
82
  """API to answer a query based on indexed documents."""
 
83
  result = answer_query(query)
84
  return JSONResponse(content=result, status_code=200)
85
 
86
  @app.get("/health")
87
  async def health_check():
88
  """Health check endpoint."""
 
89
  return {"status": "API is running"}
 
1
+ import os
2
+ import logging
3
  from fastapi import FastAPI, File, UploadFile, HTTPException
4
  from fastapi.responses import JSONResponse
 
 
5
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain_community.document_loaders import PyPDFLoader
9
  from langchain.chains import RetrievalQA
10
+ import shutil
11
+
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
 
16
  app = FastAPI(title="RAG Chatbot API")
17
 
18
  # Ensure directories exist
19
+ try:
20
+ os.makedirs("documents", exist_ok=True)
21
+ os.makedirs("vectorstore", exist_ok=True)
22
+ logger.info("Directories 'documents' and 'vectorstore' created or already exist.")
23
+ except Exception as e:
24
+ logger.error(f"Failed to create directories: {str(e)}")
25
+ raise
26
+
27
+ # Check for GOOGLE_API_KEY
28
+ if not os.getenv("GOOGLE_API_KEY"):
29
+ logger.error("GOOGLE_API_KEY environment variable not set.")
30
+ raise ValueError("GOOGLE_API_KEY environment variable not set.")
31
 
32
  # Initialize Gemini LLM
33
+ try:
34
+ llm = ChatGoogleGenerativeAI(
35
+ model="gemini-1.5-flash",
36
+ google_api_key=os.getenv("GOOGLE_API_KEY")
37
+ )
38
+ logger.info("Gemini LLM initialized successfully.")
39
+ except Exception as e:
40
+ logger.error(f"Failed to initialize Gemini LLM: {str(e)}")
41
+ raise
42
 
43
  # Initialize embeddings
44
+ try:
45
+ embeddings = GoogleGenerativeAIEmbeddings(
46
+ model="models/embedding-001",
47
+ google_api_key=os.getenv("GOOGLE_API_KEY")
48
+ )
49
+ logger.info("Gemini embeddings initialized successfully.")
50
+ except Exception as e:
51
+ logger.error(f"Failed to initialize Gemini embeddings: {str(e)}")
52
+ raise
53
 
54
  # Path for vector store
55
  VECTOR_STORE_PATH = "vectorstore/index"
 
57
  def process_pdf(pdf_path):
58
  """Process and index a PDF document."""
59
  try:
60
+ logger.info(f"Processing PDF: {pdf_path}")
61
  loader = PyPDFLoader(pdf_path)
62
  documents = loader.load()
63
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
65
  if os.path.exists(VECTOR_STORE_PATH):
66
  vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
67
  vector_store.add_documents(texts)
68
+ logger.info("Added documents to existing FAISS vector store.")
69
  else:
70
  vector_store = FAISS.from_documents(texts, embeddings)
71
+ logger.info("Created new FAISS vector store.")
72
  vector_store.save_local(VECTOR_STORE_PATH)
73
+ logger.info("Vector store saved successfully.")
74
  return {"status": "Document processed and indexed successfully"}
75
  except Exception as e:
76
+ logger.error(f"Error processing PDF: {str(e)}")
77
  raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
78
 
79
  def answer_query(query):
80
  """Answer a query using the RAG pipeline."""
81
  if not os.path.exists(VECTOR_STORE_PATH):
82
+ logger.warning("No vector store found. Please upload a document first.")
83
  return {"error": "No documents indexed yet. Please upload a document first."}
84
  try:
85
+ logger.info(f"Processing query: {query}")
86
  vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
87
  qa_chain = RetrievalQA.from_chain_type(
88
  llm=llm,
 
91
  return_source_documents=True
92
  )
93
  result = qa_chain({"query": query})
94
+ logger.info("Query processed successfully.")
95
  return {
96
  "answer": result["result"],
97
  "source_documents": [doc.page_content[:200] for doc in result["source_documents"]]
98
  }
99
  except Exception as e:
100
+ logger.error(f"Error answering query: {str(e)}")
101
  raise HTTPException(status_code=500, detail=f"Error answering query: {str(e)}")
102
 
103
  @app.post("/upload-document")
104
  async def upload_document(file: UploadFile = File(...)):
105
  """API to upload and process a PDF document."""
106
  if not file.filename.endswith(".pdf"):
107
+ logger.warning(f"Invalid file type uploaded: {file.filename}")
108
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
109
  file_path = f"documents/{file.filename}"
110
+ try:
111
+ with open(file_path, "wb") as buffer:
112
+ shutil.copyfileobj(file.file, buffer)
113
+ logger.info(f"Uploaded file saved: {file_path}")
114
+ result = process_pdf(file_path)
115
+ return JSONResponse(content=result, status_code=200)
116
+ except Exception as e:
117
+ logger.error(f"Error in upload_document: {str(e)}")
118
+ raise HTTPException(status_code=500, detail=f"Error uploading document: {str(e)}")
119
 
120
  @app.post("/ask-question")
121
  async def ask_question(query: str):
122
  """API to answer a query based on indexed documents."""
123
+ logger.info(f"Received question: {query}")
124
  result = answer_query(query)
125
  return JSONResponse(content=result, status_code=200)
126
 
127
  @app.get("/health")
128
  async def health_check():
129
  """Health check endpoint."""
130
+ logger.info("Health check requested.")
131
  return {"status": "API is running"}