Commit
·
379c547
1
Parent(s):
687dee7
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -2,7 +2,7 @@ import re
|
|
| 2 |
import requests
|
| 3 |
import docx2txt
|
| 4 |
from io import StringIO
|
| 5 |
-
from PyPDF2 import
|
| 6 |
|
| 7 |
from bs4 import BeautifulSoup
|
| 8 |
from nltk.tokenize import sent_tokenize
|
|
@@ -99,7 +99,7 @@ def preprocess_text_for_abstractive_summarization(tokenizer, text):
|
|
| 99 |
|
| 100 |
|
| 101 |
def read_pdf(file):
|
| 102 |
-
pdfReader =
|
| 103 |
count = pdfReader.numPages
|
| 104 |
all_page_text = ""
|
| 105 |
for i in range(count):
|
|
|
|
| 2 |
import requests
|
| 3 |
import docx2txt
|
| 4 |
from io import StringIO
|
| 5 |
+
from PyPDF2 import PdfReader
|
| 6 |
|
| 7 |
from bs4 import BeautifulSoup
|
| 8 |
from nltk.tokenize import sent_tokenize
|
|
|
|
| 99 |
|
| 100 |
|
| 101 |
def read_pdf(file):
|
| 102 |
+
pdfReader = PdfReader(file)
|
| 103 |
count = pdfReader.numPages
|
| 104 |
all_page_text = ""
|
| 105 |
for i in range(count):
|