Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,10 @@ def download_pdf(url, output_path):
|
|
| 16 |
|
| 17 |
def extract_zip(file):
|
| 18 |
with zipfile.ZipFile(file, 'r') as zip_ref:
|
| 19 |
-
zip_ref.
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
def preprocess(text):
|
| 22 |
text = text.replace('\n', ' ')
|
|
@@ -94,8 +97,9 @@ def load_recommender(paths, start_page=1):
|
|
| 94 |
global recommender
|
| 95 |
chunks = []
|
| 96 |
for path in paths:
|
| 97 |
-
|
| 98 |
-
|
|
|
|
| 99 |
recommender.fit(chunks)
|
| 100 |
return 'Corpus Loaded.'
|
| 101 |
|
|
|
|
| 16 |
|
| 17 |
def extract_zip(file):
|
| 18 |
with zipfile.ZipFile(file, 'r') as zip_ref:
|
| 19 |
+
for member in zip_ref.namelist():
|
| 20 |
+
filename = os.path.basename(member)
|
| 21 |
+
if filename.endswith('.pdf'):
|
| 22 |
+
zip_ref.extract(member, 'pdfs')
|
| 23 |
|
| 24 |
def preprocess(text):
|
| 25 |
text = text.replace('\n', ' ')
|
|
|
|
| 97 |
global recommender
|
| 98 |
chunks = []
|
| 99 |
for path in paths:
|
| 100 |
+
if path.endswith('.pdf'):
|
| 101 |
+
texts = pdf_to_text(path, start_page=start_page)
|
| 102 |
+
chunks += text_to_chunks(texts, start_page=start_page)
|
| 103 |
recommender.fit(chunks)
|
| 104 |
return 'Corpus Loaded.'
|
| 105 |
|