File size: 1,660 Bytes
fcffa22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
Bibliography-level checker that flags retracted DOIs.

Unlike the LaTeX-line checkers in src/checkers/, this one operates on parsed
BibEntry objects, not on a tex_content string. main.py / app.py invoke it
directly via `check_entries(entries)`.
"""
from __future__ import annotations

import concurrent.futures
import logging
from dataclasses import dataclass
from typing import Iterable, List

from src.fetchers.retraction_fetcher import RetractionFetcher, RetractionResult
from src.parsers.bib_parser import BibEntry

logger = logging.getLogger(__name__)


@dataclass
class RetractionFinding:
    entry_key: str
    doi: str
    result: RetractionResult


class RetractionChecker:
    """Concurrent batch retraction lookup."""

    def __init__(self, max_workers: int = 6):
        self.fetcher = RetractionFetcher()
        self.max_workers = max_workers

    def check_entries(self, entries: Iterable[BibEntry]) -> List[RetractionFinding]:
        """Look up retraction status for every entry that has a DOI."""
        with_doi = [e for e in entries if getattr(e, "doi", "")]
        if not with_doi:
            return []

        findings: List[RetractionFinding] = []

        def _one(entry: BibEntry):
            res = self.fetcher.check(entry.doi)
            return entry, res

        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as ex:
            for entry, res in ex.map(_one, with_doi):
                if res is None:
                    continue
                if res.is_retracted or res.update_type:
                    findings.append(RetractionFinding(entry.key, entry.doi, res))
        return findings