raannakasturi commited on
Commit
16f8a3f
·
1 Parent(s): 1f545ac

Update fetch_citation to use new API endpoint and improved headers for citation retrieval

Browse files
Files changed (1) hide show
  1. fetch_paper_data.py +22 -3
fetch_paper_data.py CHANGED
@@ -32,8 +32,27 @@ def fetch_arxiv_doi(arxiv_id):
32
  return doi
33
 
34
  def fetch_citation(doi):
35
- citation_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=apa'}).content
36
- return citation_content.decode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def fetch_title(doi):
39
  title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
@@ -68,4 +87,4 @@ def fetch_paper_data(id):
68
 
69
  if __name__ == '__main__':
70
  data = fetch_paper_data('PMC317040')
71
- print(data)
 
32
  return doi
33
 
34
  def fetch_citation(doi):
35
+ url = f"https://citation.crosscite.org/format?doi={doi}&style=apa&lang=en-US"
36
+ headers = {
37
+ "accept": "text/plain, */*; q=0.01",
38
+ "accept-language": "en-US,en-GB;q=0.9,en;q=0.8",
39
+ "priority": "u=1, i",
40
+ "sec-ch-ua": '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
41
+ "sec-ch-ua-mobile": "?1",
42
+ "sec-ch-ua-platform": '"Android"',
43
+ "sec-fetch-dest": "empty",
44
+ "sec-fetch-mode": "cors",
45
+ "sec-fetch-site": "same-origin",
46
+ "x-requested-with": "XMLHttpRequest",
47
+ "Referer": "https://citation.crosscite.org/",
48
+ "Referrer-Policy": "strict-origin-when-cross-origin"
49
+ }
50
+
51
+ response = requests.get(url, headers=headers)
52
+ if response.status_code == 200:
53
+ return response.text
54
+ else:
55
+ response.raise_for_status()
56
 
57
  def fetch_title(doi):
58
  title_content = requests.get(doi, headers={ 'User-Agent':HEADERS['User-Agent'], 'Accept': 'text/x-bibliography; style=bibtex'}).content
 
87
 
88
  if __name__ == '__main__':
89
  data = fetch_paper_data('PMC317040')
90
+ print(data)