Hk4crprasad commited on
Commit
b78f929
·
verified ·
1 Parent(s): 1e4a977

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +6 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -23,7 +23,8 @@ from langchain.chains.question_answering import load_qa_chain
23
  from langchain.prompts import PromptTemplate
24
  load_dotenv()
25
  import datetime
26
-
 
27
  history=[
28
  {
29
  "parts": [
@@ -462,9 +463,10 @@ history=[
462
  def get_pdf_text(pdf_docs):
463
  text = ""
464
  for pdf in pdf_docs:
465
- pdf_reader = PdfReader(pdf)
466
- for page in pdf_reader.pages:
467
- text += page.extract_text()
 
468
  return text
469
 
470
  def get_text_chunks(text):
 
23
  from langchain.prompts import PromptTemplate
24
  load_dotenv()
25
  import datetime
26
+ import fitz # PyMuPDF
27
+
28
  history=[
29
  {
30
  "parts": [
 
463
  def get_pdf_text(pdf_docs):
464
  text = ""
465
  for pdf in pdf_docs:
466
+ pdf_reader = fitz.open(pdf)
467
+ for page_num in range(pdf_reader.page_count):
468
+ page = pdf_reader[page_num]
469
+ text += page.get_text()
470
  return text
471
 
472
  def get_text_chunks(text):
requirements.txt CHANGED
@@ -7,4 +7,5 @@ PyPDF2
7
  chromadb
8
  faiss-cpu
9
  langchain_google_genai
10
- langchain-community
 
 
7
  chromadb
8
  faiss-cpu
9
  langchain_google_genai
10
+ langchain-community
11
+ PyMuPDF