Commit ·
1f545ac
1
Parent(s): 4ce49ad
Refactor fetch_paper_data to ensure DOI and PDF URL are valid before fetching citation and title
Browse files- fetch_paper_data.py +10 -9
fetch_paper_data.py
CHANGED
|
@@ -52,19 +52,20 @@ def fetch_paper_data(id):
|
|
| 52 |
else:
|
| 53 |
doi = fetch_arxiv_doi(id)
|
| 54 |
pdf_url = f"https://arxiv.org/pdf/{id}"
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
| 63 |
except Exception as e:
|
| 64 |
data['status'] = 'error'
|
| 65 |
print(str(e))
|
| 66 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
| 67 |
|
| 68 |
if __name__ == '__main__':
|
| 69 |
-
data =
|
| 70 |
print(data)
|
|
|
|
| 52 |
else:
|
| 53 |
doi = fetch_arxiv_doi(id)
|
| 54 |
pdf_url = f"https://arxiv.org/pdf/{id}"
|
| 55 |
+
if doi and pdf_url:
|
| 56 |
+
citation = fetch_citation(doi).replace('\n', ' ').replace("<i>", "").replace("</i>", "").strip()
|
| 57 |
+
title = fetch_title(doi).replace('\n', ' ').strip()
|
| 58 |
+
data['status'] = 'success'
|
| 59 |
+
data['data'] = {}
|
| 60 |
+
data['data']['doi'] = doi
|
| 61 |
+
data['data']['title'] = title
|
| 62 |
+
data['data']['pdf_url'] = pdf_url
|
| 63 |
+
data['data']['citation'] = citation
|
| 64 |
except Exception as e:
|
| 65 |
data['status'] = 'error'
|
| 66 |
print(str(e))
|
| 67 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
| 68 |
|
| 69 |
if __name__ == '__main__':
|
| 70 |
+
data = fetch_paper_data('PMC317040')
|
| 71 |
print(data)
|