File size: 3,763 Bytes
46df5f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
def fetch_and_compare_with_workflow(
entry, workflow_steps, arxiv_fetcher, crossref_fetcher,
semantic_scholar_fetcher, openalex_fetcher, dblp_fetcher, comparator
):
"""Fetch metadata from online sources using the configured workflow."""
from src.utils.normalizer import TextNormalizer
best_result = None
# If no steps provided, use default order
if not workflow_steps:
# Create a default list of steps if needed, or simply handle logic here
pass
# Simplified workflow execution: Run through enabled steps
# We manualy iterate through sources in a preferred order if workflow is not fully configured
# Or iterate through the steps list.
# Since extracting WorkflowConfig logic is complex, let's just implement a robust
# default search strategy here which is what the user likely wants.
results = []
# 1. DBLP (High quality for CS)
if dblp_fetcher and entry.title:
try:
dblp_result = dblp_fetcher.search_by_title(entry.title)
if dblp_result:
res = comparator.compare_with_dblp(entry, dblp_result)
if res.is_match: return res
results.append(res)
except Exception: pass
# 2. Semantic Scholar (Comprehensive)
if semantic_scholar_fetcher and entry.title:
try:
ss_result = None
if entry.doi:
ss_result = semantic_scholar_fetcher.fetch_by_doi(entry.doi)
if not ss_result:
ss_result = semantic_scholar_fetcher.search_by_title(entry.title)
if ss_result:
res = comparator.compare_with_semantic_scholar(entry, ss_result)
if res.is_match: return res
results.append(res)
except Exception: pass
# 3. OpenAlex
if openalex_fetcher and entry.title:
try:
oa_result = None
if entry.doi:
oa_result = openalex_fetcher.fetch_by_doi(entry.doi)
if not oa_result:
oa_result = openalex_fetcher.search_by_title(entry.title)
if oa_result:
res = comparator.compare_with_openalex(entry, oa_result)
if res.is_match: return res
results.append(res)
except Exception: pass
# 4. CrossRef (Official metadata)
if crossref_fetcher and entry.doi:
try:
crossref_result = crossref_fetcher.search_by_doi(entry.doi)
if crossref_result:
res = comparator.compare_with_crossref(entry, crossref_result)
if res.is_match: return res
results.append(res)
except Exception: pass
# 5. ArXiv
if arxiv_fetcher:
try:
arxiv_meta = None
if entry.has_arxiv:
arxiv_meta = arxiv_fetcher.fetch_by_id(entry.arxiv_id)
elif entry.title:
# Search by title
search_results = arxiv_fetcher.search_by_title(entry.title, max_results=1)
if search_results:
arxiv_meta = search_results[0]
if arxiv_meta:
res = comparator.compare_with_arxiv(entry, arxiv_meta)
if res.is_match: return res
results.append(res)
except Exception: pass
# Return the best result (highest confidence) if no perfect match found
if results:
results.sort(key=lambda x: x.confidence, reverse=True)
return results[0]
# If absolutely nothing found, return None or an 'Unable' result
return comparator.create_unable_result(entry, "No metadata found in any source")
|