Spaces:

VyLala
/

BioMetadataAudit

Running

VyLala commited on Jan 10

Commit

c7f7f09

verified ·

1 Parent(s): 9a454f4

Update NER/html/extractHTML.py

Files changed (1) hide show

NER/html/extractHTML.py CHANGED Viewed

@@ -15,8 +15,15 @@ class HTML():
   def fetch_crossref_metadata(self, doi):
     """Fetch metadata from CrossRef API for a given DOI."""
     try:
         url = f"https://api.crossref.org/works/{doi}"
-        r = requests.get(url, timeout=10)
         if r.status_code == 200:
             return r.json().get("message", {})
         else:
@@ -24,7 +31,7 @@ class HTML():
             return {}
     except Exception as e:
         print(f"❌ CrossRef exception: {e}")
-        return {}
   # def openHTMLFile(self):
   #   headers = {
   #       "User-Agent": (

   def fetch_crossref_metadata(self, doi):
     """Fetch metadata from CrossRef API for a given DOI."""
     try:
+        # Define headers with User-Agent
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
+        }
         url = f"https://api.crossref.org/works/{doi}"
+        # Pass headers in the request
+        r = requests.get(url, headers=headers, timeout=10)
         if r.status_code == 200:
             return r.json().get("message", {})
         else:
             return {}
     except Exception as e:
         print(f"❌ CrossRef exception: {e}")
+        return {}
   # def openHTMLFile(self):
   #   headers = {
   #       "User-Agent": (