GirishaBuilds01 commited on
Commit
e25f854
·
verified ·
1 Parent(s): d8c3a2c

Create pdf_parser.py

Browse files
Files changed (1) hide show
  1. pdf_parser.py +13 -0
pdf_parser.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz
2
+
3
+ def extract_text(file):
4
+
5
+ doc = fitz.open(stream=file.read(), filetype="pdf")
6
+
7
+ text = ""
8
+
9
+ for page in doc:
10
+
11
+ text += page.get_text()
12
+
13
+ return text