File size: 1,660 Bytes
fcffa22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | """
Bibliography-level checker that flags retracted DOIs.
Unlike the LaTeX-line checkers in src/checkers/, this one operates on parsed
BibEntry objects, not on a tex_content string. main.py / app.py invoke it
directly via `check_entries(entries)`.
"""
from __future__ import annotations
import concurrent.futures
import logging
from dataclasses import dataclass
from typing import Iterable, List
from src.fetchers.retraction_fetcher import RetractionFetcher, RetractionResult
from src.parsers.bib_parser import BibEntry
logger = logging.getLogger(__name__)
@dataclass
class RetractionFinding:
entry_key: str
doi: str
result: RetractionResult
class RetractionChecker:
"""Concurrent batch retraction lookup."""
def __init__(self, max_workers: int = 6):
self.fetcher = RetractionFetcher()
self.max_workers = max_workers
def check_entries(self, entries: Iterable[BibEntry]) -> List[RetractionFinding]:
"""Look up retraction status for every entry that has a DOI."""
with_doi = [e for e in entries if getattr(e, "doi", "")]
if not with_doi:
return []
findings: List[RetractionFinding] = []
def _one(entry: BibEntry):
res = self.fetcher.check(entry.doi)
return entry, res
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as ex:
for entry, res in ex.map(_one, with_doi):
if res is None:
continue
if res.is_retracted or res.update_type:
findings.append(RetractionFinding(entry.key, entry.doi, res))
return findings
|