Spaces:
Sleeping
Sleeping
fix indentation issue
Browse files
app.py
CHANGED
|
@@ -22,38 +22,38 @@ class PDFChatbot:
|
|
| 22 |
self.state = SessionState()
|
| 23 |
|
| 24 |
def process_pdf(self, pdf_file):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
def chat_with_pdf(self, message, history):
|
| 59 |
print("Chat interface called. Checking if database is ready...")
|
|
|
|
| 22 |
self.state = SessionState()
|
| 23 |
|
| 24 |
def process_pdf(self, pdf_file):
|
| 25 |
+
try:
|
| 26 |
+
if self.state.is_db_ready():
|
| 27 |
+
print("Database is already ready.")
|
| 28 |
+
return
|
| 29 |
+
|
| 30 |
+
file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
|
| 31 |
+
if file_size_mb >= 75:
|
| 32 |
+
print("File size exceeds the 75 MB limit.")
|
| 33 |
+
gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
|
| 34 |
+
return
|
| 35 |
+
|
| 36 |
+
self.state = SessionState()
|
| 37 |
+
doc = fitz.open(pdf_file.name)
|
| 38 |
+
text = ""
|
| 39 |
+
for page in doc:
|
| 40 |
+
text += page.get_text()
|
| 41 |
+
doc.close()
|
| 42 |
+
|
| 43 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 44 |
+
docs = text_splitter.create_documents([text])
|
| 45 |
+
|
| 46 |
+
embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
|
| 47 |
+
self.state.db = Chroma.from_documents(
|
| 48 |
+
documents=docs,
|
| 49 |
+
embedding=embeddings,
|
| 50 |
+
persist_directory=self.state.vector_store_path
|
| 51 |
+
)
|
| 52 |
+
print("PDF processed successfully! Database is ready.")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
if os.path.exists(self.state.vector_store_path):
|
| 55 |
+
shutil.rmtree(self.state.vector_store_path)
|
| 56 |
+
print(f"An error occurred: {str(e)}")
|
| 57 |
|
| 58 |
def chat_with_pdf(self, message, history):
|
| 59 |
print("Chat interface called. Checking if database is ready...")
|