File size: 3,763 Bytes
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def fetch_and_compare_with_workflow(
    entry, workflow_steps, arxiv_fetcher, crossref_fetcher,
    semantic_scholar_fetcher, openalex_fetcher, dblp_fetcher, comparator
):
    """Fetch metadata from online sources using the configured workflow."""
    from src.utils.normalizer import TextNormalizer
    
    best_result = None
    
    # If no steps provided, use default order
    if not workflow_steps:
        # Create a default list of steps if needed, or simply handle logic here
        pass

    # Simplified workflow execution: Run through enabled steps
    # We manualy iterate through sources in a preferred order if workflow is not fully configured
    # Or iterate through the steps list.
    
    # Since extracting WorkflowConfig logic is complex, let's just implement a robust 
    # default search strategy here which is what the user likely wants.
    
    results = []
    
    # 1. DBLP (High quality for CS)
    if dblp_fetcher and entry.title:
        try:
            dblp_result = dblp_fetcher.search_by_title(entry.title)
            if dblp_result:
                res = comparator.compare_with_dblp(entry, dblp_result)
                if res.is_match: return res
                results.append(res)
        except Exception: pass

    # 2. Semantic Scholar (Comprehensive)
    if semantic_scholar_fetcher and entry.title:
        try:
            ss_result = None
            if entry.doi:
                ss_result = semantic_scholar_fetcher.fetch_by_doi(entry.doi)
            if not ss_result:
                ss_result = semantic_scholar_fetcher.search_by_title(entry.title)
            
            if ss_result:
                res = comparator.compare_with_semantic_scholar(entry, ss_result)
                if res.is_match: return res
                results.append(res)
        except Exception: pass

    # 3. OpenAlex
    if openalex_fetcher and entry.title:
        try:
            oa_result = None
            if entry.doi:
                oa_result = openalex_fetcher.fetch_by_doi(entry.doi)
            if not oa_result:
                oa_result = openalex_fetcher.search_by_title(entry.title)
                
            if oa_result:
                res = comparator.compare_with_openalex(entry, oa_result)
                if res.is_match: return res
                results.append(res)
        except Exception: pass
        
    # 4. CrossRef (Official metadata)
    if crossref_fetcher and entry.doi:
        try:
            crossref_result = crossref_fetcher.search_by_doi(entry.doi)
            if crossref_result:
                res = comparator.compare_with_crossref(entry, crossref_result)
                if res.is_match: return res
                results.append(res)
        except Exception: pass
        
    # 5. ArXiv
    if arxiv_fetcher:
        try:
            arxiv_meta = None
            if entry.has_arxiv:
                arxiv_meta = arxiv_fetcher.fetch_by_id(entry.arxiv_id)
            elif entry.title:
                # Search by title
                search_results = arxiv_fetcher.search_by_title(entry.title, max_results=1)
                if search_results:
                    arxiv_meta = search_results[0]
            
            if arxiv_meta:
                res = comparator.compare_with_arxiv(entry, arxiv_meta)
                if res.is_match: return res
                results.append(res)
        except Exception: pass
    
    # Return the best result (highest confidence) if no perfect match found
    if results:
        results.sort(key=lambda x: x.confidence, reverse=True)
        return results[0]
        
    # If absolutely nothing found, return None or an 'Unable' result
    return comparator.create_unable_result(entry, "No metadata found in any source")