Commit ·
5a2457c
1
Parent(s): 77e6cdd
Add count of category and PMC IDs to data output in extract_arxiv_data and extract_pmc_data functions
Browse files
arvix.py
CHANGED
|
@@ -92,7 +92,7 @@ def extract_arxiv_data():
|
|
| 92 |
break
|
| 93 |
while len(category_ids) < 4:
|
| 94 |
category_ids.add(random.choice(list(used_ids)))
|
| 95 |
-
data[category] = {"ids": list(category_ids)}
|
| 96 |
if not utils.upload_datafile('arxiv.txt'):
|
| 97 |
raise Exception("Failed to upload datafile")
|
| 98 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
|
|
|
| 92 |
break
|
| 93 |
while len(category_ids) < 4:
|
| 94 |
category_ids.add(random.choice(list(used_ids)))
|
| 95 |
+
data[category] = {"ids": list(category_ids), "count": len(category_ids)}
|
| 96 |
if not utils.upload_datafile('arxiv.txt'):
|
| 97 |
raise Exception("Failed to upload datafile")
|
| 98 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
pmc.py
CHANGED
|
@@ -79,7 +79,7 @@ def extract_pmc_data():
|
|
| 79 |
if not utils.check_data_in_file(pmcid, 'pmc.txt'):
|
| 80 |
utils.write_data_to_file(pmcid, 'pmc.txt')
|
| 81 |
pmc_ids.append(pmcid)
|
| 82 |
-
pmc_data[topic] = {"ids": pmc_ids}
|
| 83 |
if not utils.upload_datafile('pmc.txt'):
|
| 84 |
raise Exception("Failed to upload datafile")
|
| 85 |
return json.dumps(pmc_data, indent=4, ensure_ascii=False)
|
|
|
|
| 79 |
if not utils.check_data_in_file(pmcid, 'pmc.txt'):
|
| 80 |
utils.write_data_to_file(pmcid, 'pmc.txt')
|
| 81 |
pmc_ids.append(pmcid)
|
| 82 |
+
pmc_data[topic] = {"ids": pmc_ids, "count": len(pmc_ids)}
|
| 83 |
if not utils.upload_datafile('pmc.txt'):
|
| 84 |
raise Exception("Failed to upload datafile")
|
| 85 |
return json.dumps(pmc_data, indent=4, ensure_ascii=False)
|