Update arvix.py
Browse files
arvix.py
CHANGED
|
@@ -65,7 +65,7 @@ def extract_data(category):
|
|
| 65 |
tools.write_data_to_file(id, 'arxiv.txt')
|
| 66 |
else:
|
| 67 |
for id in data:
|
| 68 |
-
if len(sanitized_data) >=
|
| 69 |
break
|
| 70 |
if tools.check_data_in_file(id, 'arxiv.txt'):
|
| 71 |
continue
|
|
@@ -104,7 +104,7 @@ def extract_arxiv_data():
|
|
| 104 |
temp_id_storage = []
|
| 105 |
for subcategory in subcategories:
|
| 106 |
ids = extract_data(subcategory)
|
| 107 |
-
if len(ids) ==
|
| 108 |
for id in ids:
|
| 109 |
temp_id_storage.append(id)
|
| 110 |
else:
|
|
@@ -113,9 +113,9 @@ def extract_arxiv_data():
|
|
| 113 |
for temp_id in temp_id_storage:
|
| 114 |
all_ids.append(temp_id)
|
| 115 |
random.shuffle(all_ids)
|
| 116 |
-
if len(all_ids) >
|
| 117 |
-
print(f"Found more than
|
| 118 |
-
all_ids = all_ids[:
|
| 119 |
category_data['count'] = len(all_ids)
|
| 120 |
category_data['ids'] = all_ids
|
| 121 |
data[category] = category_data
|
|
|
|
| 65 |
tools.write_data_to_file(id, 'arxiv.txt')
|
| 66 |
else:
|
| 67 |
for id in data:
|
| 68 |
+
if len(sanitized_data) >= 3:
|
| 69 |
break
|
| 70 |
if tools.check_data_in_file(id, 'arxiv.txt'):
|
| 71 |
continue
|
|
|
|
| 104 |
temp_id_storage = []
|
| 105 |
for subcategory in subcategories:
|
| 106 |
ids = extract_data(subcategory)
|
| 107 |
+
if len(ids) == 3:
|
| 108 |
for id in ids:
|
| 109 |
temp_id_storage.append(id)
|
| 110 |
else:
|
|
|
|
| 113 |
for temp_id in temp_id_storage:
|
| 114 |
all_ids.append(temp_id)
|
| 115 |
random.shuffle(all_ids)
|
| 116 |
+
if len(all_ids) > 3:
|
| 117 |
+
print(f"Found more than 3 papers for {category}.")
|
| 118 |
+
all_ids = all_ids[:2]
|
| 119 |
category_data['count'] = len(all_ids)
|
| 120 |
category_data['ids'] = all_ids
|
| 121 |
data[category] = category_data
|