Spaces:
Running
Running
Update NER/html/extractHTML.py
Browse files- NER/html/extractHTML.py +9 -2
NER/html/extractHTML.py
CHANGED
|
@@ -15,8 +15,15 @@ class HTML():
|
|
| 15 |
def fetch_crossref_metadata(self, doi):
|
| 16 |
"""Fetch metadata from CrossRef API for a given DOI."""
|
| 17 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
url = f"https://api.crossref.org/works/{doi}"
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
if r.status_code == 200:
|
| 21 |
return r.json().get("message", {})
|
| 22 |
else:
|
|
@@ -24,7 +31,7 @@ class HTML():
|
|
| 24 |
return {}
|
| 25 |
except Exception as e:
|
| 26 |
print(f"❌ CrossRef exception: {e}")
|
| 27 |
-
return {}
|
| 28 |
# def openHTMLFile(self):
|
| 29 |
# headers = {
|
| 30 |
# "User-Agent": (
|
|
|
|
| 15 |
def fetch_crossref_metadata(self, doi):
|
| 16 |
"""Fetch metadata from CrossRef API for a given DOI."""
|
| 17 |
try:
|
| 18 |
+
# Define headers with User-Agent
|
| 19 |
+
headers = {
|
| 20 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
url = f"https://api.crossref.org/works/{doi}"
|
| 24 |
+
# Pass headers in the request
|
| 25 |
+
r = requests.get(url, headers=headers, timeout=10)
|
| 26 |
+
|
| 27 |
if r.status_code == 200:
|
| 28 |
return r.json().get("message", {})
|
| 29 |
else:
|
|
|
|
| 31 |
return {}
|
| 32 |
except Exception as e:
|
| 33 |
print(f"❌ CrossRef exception: {e}")
|
| 34 |
+
return {}
|
| 35 |
# def openHTMLFile(self):
|
| 36 |
# headers = {
|
| 37 |
# "User-Agent": (
|