File size: 5,708 Bytes
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Workflow configuration for reference checking.

Allows users to customize the order and enable/disable individual fetchers
in the reference verification workflow.
"""
import json
from dataclasses import dataclass, field, asdict
from pathlib import Path
from typing import List, Optional


@dataclass
class WorkflowStep:
    """A single step in the reference checking workflow."""
    name: str
    display_name: str
    description: str
    enabled: bool = True
    priority: int = 0
    
    # Step type: 'by_id', 'by_doi', 'by_title'
    search_type: str = 'by_title'
    
    def to_dict(self) -> dict:
        return asdict(self)
    
    @classmethod
    def from_dict(cls, data: dict) -> 'WorkflowStep':
        return cls(**data)


@dataclass
class WorkflowConfig:
    """Configuration for the reference checking workflow."""
    steps: List[WorkflowStep] = field(default_factory=list)
    name: str = "default"
    description: str = "Default workflow configuration"
    
    def get_enabled_steps(self) -> List[WorkflowStep]:
        """Get only enabled steps, sorted by priority."""
        return sorted(
            [s for s in self.steps if s.enabled],
            key=lambda x: x.priority
        )
    
    def move_step_up(self, index: int) -> bool:
        """Move a step up in priority (swap with previous)."""
        if index <= 0 or index >= len(self.steps):
            return False
        self.steps[index], self.steps[index - 1] = self.steps[index - 1], self.steps[index]
        self._update_priorities()
        return True
    
    def move_step_down(self, index: int) -> bool:
        """Move a step down in priority (swap with next)."""
        if index < 0 or index >= len(self.steps) - 1:
            return False
        self.steps[index], self.steps[index + 1] = self.steps[index + 1], self.steps[index]
        self._update_priorities()
        return True
    
    def toggle_step(self, index: int) -> bool:
        """Toggle enabled status of a step."""
        if 0 <= index < len(self.steps):
            self.steps[index].enabled = not self.steps[index].enabled
            return True
        return False
    
    def _update_priorities(self):
        """Update priority values based on current order."""
        for i, step in enumerate(self.steps):
            step.priority = i
    
    def to_dict(self) -> dict:
        return {
            'name': self.name,
            'description': self.description,
            'steps': [s.to_dict() for s in self.steps]
        }
    
    @classmethod
    def from_dict(cls, data: dict) -> 'WorkflowConfig':
        steps = [WorkflowStep.from_dict(s) for s in data.get('steps', [])]
        return cls(
            steps=steps,
            name=data.get('name', 'custom'),
            description=data.get('description', '')
        )
    
    def save(self, filepath: str):
        """Save workflow configuration to JSON file."""
        path = Path(filepath)
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, 'w', encoding='utf-8') as f:
            json.dump(self.to_dict(), f, indent=2)
    
    @classmethod
    def load(cls, filepath: str) -> 'WorkflowConfig':
        """Load workflow configuration from JSON file."""
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return cls.from_dict(data)


# Default workflow matching current implementation order
DEFAULT_WORKFLOW = WorkflowConfig(
    name="default",
    description="Default reference checking workflow prioritizing reliable APIs",
    steps=[
        WorkflowStep(
            name="arxiv_id",
            display_name="arXiv by ID",
            description="Look up paper by arXiv ID (highest priority for arXiv papers)",
            priority=0,
            search_type="by_id"
        ),
        WorkflowStep(
            name="crossref_doi",
            display_name="CrossRef by DOI",
            description="Look up paper by DOI (authoritative for DOIs)",
            priority=1,
            search_type="by_doi"
        ),
        WorkflowStep(
            name="semantic_scholar",
            display_name="Semantic Scholar",
            description="Official API with high quality metadata",
            priority=2,
            search_type="by_title"
        ),
        WorkflowStep(
            name="dblp",
            display_name="DBLP",
            description="Official API, especially good for CS publications",
            priority=3,
            search_type="by_title"
        ),
        WorkflowStep(
            name="openalex",
            display_name="OpenAlex",
            description="Official API with broad coverage",
            priority=4,
            search_type="by_title"
        ),
        WorkflowStep(
            name="arxiv_title",
            display_name="arXiv by Title",
            description="Search arXiv by title (fallback for non-ID lookups)",
            priority=5,
            search_type="by_title"
        ),
        WorkflowStep(
            name="crossref_title",
            display_name="CrossRef by Title",
            description="Search CrossRef by title",
            priority=6,
            search_type="by_title"
        ),
        WorkflowStep(
            name="google_scholar",
            display_name="Google Scholar",
            description="Web scraping fallback (may be rate-limited or blocked)",
            priority=7,
            search_type="by_title",
            enabled=True  # Still enabled but lowest priority
        ),
    ]
)


def get_default_workflow() -> WorkflowConfig:
    """Get a fresh copy of the default workflow."""
    return WorkflowConfig.from_dict(DEFAULT_WORKFLOW.to_dict())