"""Source adapters that load structured data into the kiosk catalog.""" from __future__ import annotations import csv from abc import ABC, abstractmethod from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional from .utils import canonicalize_name @dataclass class EntityDefinition: """Specification for registering an entity in the data catalog.""" name: str records: List[Dict[str, Any]] key_field: Optional[str] = None origin: Optional[str] = None normalizer: Optional[Callable[[str], str]] = None @dataclass class SourceResult: """Payload returned by a data source.""" entities: List[EntityDefinition] = field(default_factory=list) metadata: Dict[str, Any] = field(default_factory=dict) class DataSource(ABC): """Interface for ingesting structured data into the catalog.""" def __init__(self, name: str) -> None: self.name = name @abstractmethod def load(self) -> SourceResult: raise NotImplementedError class CSVSource(DataSource): """Loads a CSV file into an entity definition.""" def __init__( self, name: str, path: Path, entity_name: str, *, key_field: Optional[str] = None, normalizer: Optional[Callable[[str], str]] = None, ) -> None: super().__init__(name) self.path = path self.entity_name = entity_name self.key_field = key_field self.normalizer = normalizer def load(self) -> SourceResult: if not self.path.exists(): return SourceResult() records = self._read_csv(self.path) entity = EntityDefinition( name=self.entity_name, records=records, key_field=self.key_field, origin=str(self.path), normalizer=self.normalizer, ) return SourceResult(entities=[entity]) @staticmethod def _read_csv(path: Path) -> List[Dict[str, Any]]: with path.open(newline="", encoding="utf-8-sig") as handle: reader = csv.DictReader(handle) return [dict(row) for row in reader] class FeedListSource(DataSource): """Loads newline-delimited feed URLs into catalog metadata.""" def __init__(self, name: str, path: Path, metadata_key: str) -> None: super().__init__(name) self.path = path self.metadata_key = metadata_key def load(self) -> SourceResult: if not self.path.exists(): return SourceResult() urls = [ line.strip() for line in self.path.read_text(encoding="utf-8").splitlines() if line.strip() ] return SourceResult(metadata={self.metadata_key: {"urls": urls}}) def default_sources(base_dir: Path, *, name_normalizer: Optional[Callable[[str], str]] = None) -> List[DataSource]: """ Produce the default set of data sources used by the backend. Additional sources (e.g., TA office hours) can be appended to this list without modifying the rest of the pipeline. """ base_dir = base_dir.resolve() normalizer = name_normalizer or canonicalize_name sources: List[DataSource] = [ CSVSource( name="faculty_roster", path=base_dir / "faculty_2.csv", entity_name="faculty", key_field="Name", normalizer=normalizer, ), CSVSource( name="faculty_offices", path=base_dir / "Faculty.csv", entity_name="faculty_offices", key_field="Assignee Name", normalizer=normalizer, ), CSVSource( name="staff_roster", path=base_dir / "staff.csv", entity_name="staff", key_field="Name", normalizer=normalizer, ), CSVSource( name="students_roster", path=base_dir / "students.csv", entity_name="students", key_field="Name", normalizer=normalizer, ), CSVSource( name="office_hours", path=base_dir / "CS Office Hours Room Reservations.csv", entity_name="office_hours", key_field="Course Name", normalizer=normalizer, ), CSVSource( name="centers_catalog", path=base_dir / "centers.csv", entity_name="centers", key_field="Name", normalizer=normalizer, ), CSVSource( name="mudd_seating", path=base_dir / "Mudd Seating Sample.csv", entity_name="mudd_seating", key_field="Student/Visitor", normalizer=normalizer, ), FeedListSource( name="event_feeds", path=base_dir / "feed.txt", metadata_key="event_feeds", ), ] return sources