raannakasturi commited on
Commit
1f545ac
·
1 Parent(s): 4ce49ad

Refactor fetch_paper_data to ensure DOI and PDF URL are valid before fetching citation and title

Browse files
Files changed (1) hide show
  1. fetch_paper_data.py +10 -9
fetch_paper_data.py CHANGED
@@ -52,19 +52,20 @@ def fetch_paper_data(id):
52
  else:
53
  doi = fetch_arxiv_doi(id)
54
  pdf_url = f"https://arxiv.org/pdf/{id}"
55
- citation = fetch_citation(doi).replace('\n', ' ').replace("<i>", "").replace("</i>", "").strip()
56
- title = fetch_title(doi).replace('\n', ' ').strip()
57
- data['status'] = 'success'
58
- data['data'] = {}
59
- data['data']['doi'] = doi
60
- data['data']['title'] = title
61
- data['data']['pdf_url'] = pdf_url
62
- data['data']['citation'] = citation
 
63
  except Exception as e:
64
  data['status'] = 'error'
65
  print(str(e))
66
  return json.dumps(data, indent=4, ensure_ascii=False)
67
 
68
  if __name__ == '__main__':
69
- data = fetch_pmc_pdf('PMC317040')
70
  print(data)
 
52
  else:
53
  doi = fetch_arxiv_doi(id)
54
  pdf_url = f"https://arxiv.org/pdf/{id}"
55
+ if doi and pdf_url:
56
+ citation = fetch_citation(doi).replace('\n', ' ').replace("<i>", "").replace("</i>", "").strip()
57
+ title = fetch_title(doi).replace('\n', ' ').strip()
58
+ data['status'] = 'success'
59
+ data['data'] = {}
60
+ data['data']['doi'] = doi
61
+ data['data']['title'] = title
62
+ data['data']['pdf_url'] = pdf_url
63
+ data['data']['citation'] = citation
64
  except Exception as e:
65
  data['status'] = 'error'
66
  print(str(e))
67
  return json.dumps(data, indent=4, ensure_ascii=False)
68
 
69
  if __name__ == '__main__':
70
+ data = fetch_paper_data('PMC317040')
71
  print(data)