VyLala commited on
Commit
c7f7f09
·
verified ·
1 Parent(s): 9a454f4

Update NER/html/extractHTML.py

Browse files
Files changed (1) hide show
  1. NER/html/extractHTML.py +9 -2
NER/html/extractHTML.py CHANGED
@@ -15,8 +15,15 @@ class HTML():
15
  def fetch_crossref_metadata(self, doi):
16
  """Fetch metadata from CrossRef API for a given DOI."""
17
  try:
 
 
 
 
 
18
  url = f"https://api.crossref.org/works/{doi}"
19
- r = requests.get(url, timeout=10)
 
 
20
  if r.status_code == 200:
21
  return r.json().get("message", {})
22
  else:
@@ -24,7 +31,7 @@ class HTML():
24
  return {}
25
  except Exception as e:
26
  print(f"❌ CrossRef exception: {e}")
27
- return {}
28
  # def openHTMLFile(self):
29
  # headers = {
30
  # "User-Agent": (
 
15
  def fetch_crossref_metadata(self, doi):
16
  """Fetch metadata from CrossRef API for a given DOI."""
17
  try:
18
+ # Define headers with User-Agent
19
+ headers = {
20
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
21
+ }
22
+
23
  url = f"https://api.crossref.org/works/{doi}"
24
+ # Pass headers in the request
25
+ r = requests.get(url, headers=headers, timeout=10)
26
+
27
  if r.status_code == 200:
28
  return r.json().get("message", {})
29
  else:
 
31
  return {}
32
  except Exception as e:
33
  print(f"❌ CrossRef exception: {e}")
34
+ return {}
35
  # def openHTMLFile(self):
36
  # headers = {
37
  # "User-Agent": (