nikhmr1235 commited on
Commit
a68a0ec
·
verified ·
1 Parent(s): 886337d

add some debug print statements to debug the state,db being unavaiable in chat_with_pdf()

Browse files
Files changed (1) hide show
  1. app.py +38 -34
app.py CHANGED
@@ -22,46 +22,47 @@ class PDFChatbot:
22
  self.state = SessionState()
23
 
24
  def process_pdf(self, pdf_file):
25
- try:
26
- if self.state.is_db_ready():
27
- return
28
-
29
- file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
30
- if file_size_mb >= 75:
31
- gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
32
- return
33
-
34
- self.state = SessionState()
35
- doc = fitz.open(pdf_file.name)
36
- text = ""
37
- for page in doc:
38
- text += page.get_text()
39
- doc.close()
40
-
41
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
42
- docs = text_splitter.create_documents([text])
43
-
44
- embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
45
- self.state.db = Chroma.from_documents(
46
- documents=docs,
47
- embedding=embeddings,
48
- persist_directory=self.state.vector_store_path
49
- )
50
-
51
- gr.Info("PDF processed successfully! You can now ask questions about the document.")
52
- except Exception as e:
53
- if os.path.exists(self.state.vector_store_path):
54
- shutil.rmtree(self.state.vector_store_path)
55
- gr.Error(f"An error occurred: {str(e)}")
56
 
57
- def is_db_ready(self):
58
- return self.state.db is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def chat_with_pdf(self, message, history):
61
- if not self.is_db_ready():
 
 
62
  yield "Error: Database not ready."
63
  return
64
 
 
65
  retriever = self.state.db.as_retriever()
66
  llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7)
67
 
@@ -87,6 +88,9 @@ class PDFChatbot:
87
  response = rag_chain.invoke(message)
88
  yield response
89
 
 
 
 
90
  class SessionState:
91
  def __init__(self):
92
  self.session_id = str(uuid.uuid4())
 
22
  self.state = SessionState()
23
 
24
  def process_pdf(self, pdf_file):
25
+ try:
26
+ if self.state.is_db_ready():
27
+ print("Database is already ready.")
28
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
31
+ if file_size_mb >= 75:
32
+ print("File size exceeds the 75 MB limit.")
33
+ gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
34
+ return
35
+
36
+ self.state = SessionState()
37
+ doc = fitz.open(pdf_file.name)
38
+ text = ""
39
+ for page in doc:
40
+ text += page.get_text()
41
+ doc.close()
42
+
43
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
44
+ docs = text_splitter.create_documents([text])
45
+
46
+ embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
47
+ self.state.db = Chroma.from_documents(
48
+ documents=docs,
49
+ embedding=embeddings,
50
+ persist_directory=self.state.vector_store_path
51
+ )
52
+ print("PDF processed successfully! Database is ready.")
53
+ except Exception as e:
54
+ if os.path.exists(self.state.vector_store_path):
55
+ shutil.rmtree(self.state.vector_store_path)
56
+ print(f"An error occurred: {str(e)}")
57
 
58
  def chat_with_pdf(self, message, history):
59
+ print("Chat interface called. Checking if database is ready...")
60
+ if not self.state.is_db_ready():
61
+ print("Database is not ready.")
62
  yield "Error: Database not ready."
63
  return
64
 
65
+ print("Database is ready. Retrieving relevant documents...")
66
  retriever = self.state.db.as_retriever()
67
  llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7)
68
 
 
88
  response = rag_chain.invoke(message)
89
  yield response
90
 
91
+ def is_db_ready(self):
92
+ return self.state.db is not None
93
+
94
  class SessionState:
95
  def __init__(self):
96
  self.session_id = str(uuid.uuid4())