nikhmr1235 commited on
Commit
d64bbb5
·
verified ·
1 Parent(s): a68a0ec

fix indentation issue

Browse files
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -22,38 +22,38 @@ class PDFChatbot:
22
  self.state = SessionState()
23
 
24
  def process_pdf(self, pdf_file):
25
- try:
26
- if self.state.is_db_ready():
27
- print("Database is already ready.")
28
- return
29
-
30
- file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
31
- if file_size_mb >= 75:
32
- print("File size exceeds the 75 MB limit.")
33
- gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
34
- return
35
-
36
- self.state = SessionState()
37
- doc = fitz.open(pdf_file.name)
38
- text = ""
39
- for page in doc:
40
- text += page.get_text()
41
- doc.close()
42
-
43
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
44
- docs = text_splitter.create_documents([text])
45
-
46
- embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
47
- self.state.db = Chroma.from_documents(
48
- documents=docs,
49
- embedding=embeddings,
50
- persist_directory=self.state.vector_store_path
51
- )
52
- print("PDF processed successfully! Database is ready.")
53
- except Exception as e:
54
- if os.path.exists(self.state.vector_store_path):
55
- shutil.rmtree(self.state.vector_store_path)
56
- print(f"An error occurred: {str(e)}")
57
 
58
  def chat_with_pdf(self, message, history):
59
  print("Chat interface called. Checking if database is ready...")
 
22
  self.state = SessionState()
23
 
24
  def process_pdf(self, pdf_file):
25
+ try:
26
+ if self.state.is_db_ready():
27
+ print("Database is already ready.")
28
+ return
29
+
30
+ file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
31
+ if file_size_mb >= 75:
32
+ print("File size exceeds the 75 MB limit.")
33
+ gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
34
+ return
35
+
36
+ self.state = SessionState()
37
+ doc = fitz.open(pdf_file.name)
38
+ text = ""
39
+ for page in doc:
40
+ text += page.get_text()
41
+ doc.close()
42
+
43
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
44
+ docs = text_splitter.create_documents([text])
45
+
46
+ embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
47
+ self.state.db = Chroma.from_documents(
48
+ documents=docs,
49
+ embedding=embeddings,
50
+ persist_directory=self.state.vector_store_path
51
+ )
52
+ print("PDF processed successfully! Database is ready.")
53
+ except Exception as e:
54
+ if os.path.exists(self.state.vector_store_path):
55
+ shutil.rmtree(self.state.vector_store_path)
56
+ print(f"An error occurred: {str(e)}")
57
 
58
  def chat_with_pdf(self, message, history):
59
  print("Chat interface called. Checking if database is ready...")