Spaces:
Build error
Build error
download analysis
Browse files- src/Surveyor.py +13 -9
src/Surveyor.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from arxiv_public_data.fulltext import convert_directory_parallel
|
| 2 |
from arxiv_public_data import internal_citations
|
| 3 |
import torch
|
|
@@ -1329,7 +1330,7 @@ class Surveyor:
|
|
| 1329 |
self.print_fn("outputs: " + outputs)
|
| 1330 |
return outputs
|
| 1331 |
|
| 1332 |
-
def zip_outputs(self, dump_dir,
|
| 1333 |
import zipfile
|
| 1334 |
def zipdir(path, ziph):
|
| 1335 |
# ziph is zipfile handle
|
|
@@ -1339,10 +1340,9 @@ class Surveyor:
|
|
| 1339 |
os.path.relpath(os.path.join(root, file),
|
| 1340 |
os.path.join(path, '../..')))
|
| 1341 |
|
| 1342 |
-
|
| 1343 |
zipf = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
|
| 1344 |
zipdir(dump_dir, zipf)
|
| 1345 |
-
return zip_name
|
| 1346 |
|
| 1347 |
def survey(self, query=None, id_list=None, max_search=None, num_papers=None, debug=False, weigh_authors=False):
|
| 1348 |
import joblib
|
|
@@ -1468,17 +1468,21 @@ class Surveyor:
|
|
| 1468 |
query = self.generate_title(' '.join([v for v in clustered_sections.values()]))
|
| 1469 |
|
| 1470 |
survey_file = 'A_Survey_on_' + query.replace(' ', '_') + '.txt'
|
| 1471 |
-
|
|
|
|
| 1472 |
|
| 1473 |
self.survey_print_fn("\n-citation-network: ")
|
| 1474 |
self.survey_print_fn(cites)
|
| 1475 |
|
| 1476 |
shutil.copytree('arxiv_data/', self.dump_dir + '/arxiv_data/')
|
| 1477 |
-
shutil.copy(self.dump_dir + survey_file, survey_file)
|
| 1478 |
assert (os.path.exists(survey_file))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1479 |
self.print_fn(str(list(Path(self.dump_dir).resolve().glob('*'))))
|
| 1480 |
-
output_zip = self.zip_outputs(self.dump_dir, query)
|
| 1481 |
-
self.print_fn("\n- Survey complete.. \nSurvey file path :" + os.path.abspath(
|
| 1482 |
-
survey_file) + "\nAll outputs zip path :" + os.path.abspath(self.dump_dir + output_zip))
|
| 1483 |
|
| 1484 |
-
return
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
from arxiv_public_data.fulltext import convert_directory_parallel
|
| 3 |
from arxiv_public_data import internal_citations
|
| 4 |
import torch
|
|
|
|
| 1330 |
self.print_fn("outputs: " + outputs)
|
| 1331 |
return outputs
|
| 1332 |
|
| 1333 |
+
def zip_outputs(self, dump_dir, zip_name):
|
| 1334 |
import zipfile
|
| 1335 |
def zipdir(path, ziph):
|
| 1336 |
# ziph is zipfile handle
|
|
|
|
| 1340 |
os.path.relpath(os.path.join(root, file),
|
| 1341 |
os.path.join(path, '../..')))
|
| 1342 |
|
| 1343 |
+
|
| 1344 |
zipf = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
|
| 1345 |
zipdir(dump_dir, zipf)
|
|
|
|
| 1346 |
|
| 1347 |
def survey(self, query=None, id_list=None, max_search=None, num_papers=None, debug=False, weigh_authors=False):
|
| 1348 |
import joblib
|
|
|
|
| 1468 |
query = self.generate_title(' '.join([v for v in clustered_sections.values()]))
|
| 1469 |
|
| 1470 |
survey_file = 'A_Survey_on_' + query.replace(' ', '_') + '.txt'
|
| 1471 |
+
survey_file = Path(self.dump_dir).resolve() / survey_file
|
| 1472 |
+
self.build_doc(clustered_sections, papers_standardized, query=query, filename=str(survey_file))
|
| 1473 |
|
| 1474 |
self.survey_print_fn("\n-citation-network: ")
|
| 1475 |
self.survey_print_fn(cites)
|
| 1476 |
|
| 1477 |
shutil.copytree('arxiv_data/', self.dump_dir + '/arxiv_data/')
|
|
|
|
| 1478 |
assert (os.path.exists(survey_file))
|
| 1479 |
+
|
| 1480 |
+
zip_name = 'arxiv_dumps_'+query.replace(' ', '_')+'.zip'
|
| 1481 |
+
zip_name = Path(self.dump_dir).parent.resolve() / zip_name
|
| 1482 |
+
self.zip_outputs(self.dump_dir, str(zip_name))
|
| 1483 |
+
self.print_fn("\n- Survey complete.. \nSurvey file path :" + str(survey_file) +
|
| 1484 |
+
"\nAll outputs zip path :" + str(zipname))
|
| 1485 |
+
|
| 1486 |
self.print_fn(str(list(Path(self.dump_dir).resolve().glob('*'))))
|
|
|
|
|
|
|
|
|
|
| 1487 |
|
| 1488 |
+
return str(zip_name.resolve()), str(zipname.resolve())
|