Spaces:

cogcorp
/

assignment1

Sleeping

cogcorp commited on May 24, 2023

Commit

15e8c54

1 Parent(s): 8e74737

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,7 +16,10 @@ def download_pdf(url, output_path):
 def extract_zip(file):
     with zipfile.ZipFile(file, 'r') as zip_ref:
-        zip_ref.extractall('pdfs')
 def preprocess(text):
     text = text.replace('\n', ' ')
@@ -94,8 +97,9 @@ def load_recommender(paths, start_page=1):
     global recommender
     chunks = []
     for path in paths:
-        texts = pdf_to_text(path, start_page=start_page)
-        chunks += text_to_chunks(texts, start_page=start_page)
     recommender.fit(chunks)
     return 'Corpus Loaded.'

 def extract_zip(file):
     with zipfile.ZipFile(file, 'r') as zip_ref:
+        for member in zip_ref.namelist():
+            filename = os.path.basename(member)
+            if filename.endswith('.pdf'):
+                zip_ref.extract(member, 'pdfs')
 def preprocess(text):
     text = text.replace('\n', ' ')
     global recommender
     chunks = []
     for path in paths:
+        if path.endswith('.pdf'):
+            texts = pdf_to_text(path, start_page=start_page)
+            chunks += text_to_chunks(texts, start_page=start_page)
     recommender.fit(chunks)
     return 'Corpus Loaded.'