Spaces:
Sleeping
Sleeping
| """Source adapters that load structured data into the kiosk catalog.""" | |
| from __future__ import annotations | |
| import csv | |
| from abc import ABC, abstractmethod | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Any, Callable, Dict, Iterable, List, Optional | |
| from .utils import canonicalize_name | |
| class EntityDefinition: | |
| """Specification for registering an entity in the data catalog.""" | |
| name: str | |
| records: List[Dict[str, Any]] | |
| key_field: Optional[str] = None | |
| origin: Optional[str] = None | |
| normalizer: Optional[Callable[[str], str]] = None | |
| class SourceResult: | |
| """Payload returned by a data source.""" | |
| entities: List[EntityDefinition] = field(default_factory=list) | |
| metadata: Dict[str, Any] = field(default_factory=dict) | |
| class DataSource(ABC): | |
| """Interface for ingesting structured data into the catalog.""" | |
| def __init__(self, name: str) -> None: | |
| self.name = name | |
| def load(self) -> SourceResult: | |
| raise NotImplementedError | |
| class CSVSource(DataSource): | |
| """Loads a CSV file into an entity definition.""" | |
| def __init__( | |
| self, | |
| name: str, | |
| path: Path, | |
| entity_name: str, | |
| *, | |
| key_field: Optional[str] = None, | |
| normalizer: Optional[Callable[[str], str]] = None, | |
| ) -> None: | |
| super().__init__(name) | |
| self.path = path | |
| self.entity_name = entity_name | |
| self.key_field = key_field | |
| self.normalizer = normalizer | |
| def load(self) -> SourceResult: | |
| if not self.path.exists(): | |
| return SourceResult() | |
| records = self._read_csv(self.path) | |
| entity = EntityDefinition( | |
| name=self.entity_name, | |
| records=records, | |
| key_field=self.key_field, | |
| origin=str(self.path), | |
| normalizer=self.normalizer, | |
| ) | |
| return SourceResult(entities=[entity]) | |
| def _read_csv(path: Path) -> List[Dict[str, Any]]: | |
| with path.open(newline="", encoding="utf-8-sig") as handle: | |
| reader = csv.DictReader(handle) | |
| return [dict(row) for row in reader] | |
| class FeedListSource(DataSource): | |
| """Loads newline-delimited feed URLs into catalog metadata.""" | |
| def __init__(self, name: str, path: Path, metadata_key: str) -> None: | |
| super().__init__(name) | |
| self.path = path | |
| self.metadata_key = metadata_key | |
| def load(self) -> SourceResult: | |
| if not self.path.exists(): | |
| return SourceResult() | |
| urls = [ | |
| line.strip() | |
| for line in self.path.read_text(encoding="utf-8").splitlines() | |
| if line.strip() | |
| ] | |
| return SourceResult(metadata={self.metadata_key: {"urls": urls}}) | |
| def default_sources(base_dir: Path, *, name_normalizer: Optional[Callable[[str], str]] = None) -> List[DataSource]: | |
| """ | |
| Produce the default set of data sources used by the backend. | |
| Additional sources (e.g., TA office hours) can be appended to this list | |
| without modifying the rest of the pipeline. | |
| """ | |
| base_dir = base_dir.resolve() | |
| normalizer = name_normalizer or canonicalize_name | |
| sources: List[DataSource] = [ | |
| CSVSource( | |
| name="faculty_roster", | |
| path=base_dir / "faculty_2.csv", | |
| entity_name="faculty", | |
| key_field="Name", | |
| normalizer=normalizer, | |
| ), | |
| CSVSource( | |
| name="faculty_offices", | |
| path=base_dir / "Faculty.csv", | |
| entity_name="faculty_offices", | |
| key_field="Assignee Name", | |
| normalizer=normalizer, | |
| ), | |
| CSVSource( | |
| name="staff_roster", | |
| path=base_dir / "staff.csv", | |
| entity_name="staff", | |
| key_field="Name", | |
| normalizer=normalizer, | |
| ), | |
| CSVSource( | |
| name="students_roster", | |
| path=base_dir / "students.csv", | |
| entity_name="students", | |
| key_field="Name", | |
| normalizer=normalizer, | |
| ), | |
| CSVSource( | |
| name="office_hours", | |
| path=base_dir / "CS Office Hours Room Reservations.csv", | |
| entity_name="office_hours", | |
| key_field="Course Name", | |
| normalizer=normalizer, | |
| ), | |
| CSVSource( | |
| name="centers_catalog", | |
| path=base_dir / "centers.csv", | |
| entity_name="centers", | |
| key_field="Name", | |
| normalizer=normalizer, | |
| ), | |
| CSVSource( | |
| name="mudd_seating", | |
| path=base_dir / "Mudd Seating Sample.csv", | |
| entity_name="mudd_seating", | |
| key_field="Student/Visitor", | |
| normalizer=normalizer, | |
| ), | |
| FeedListSource( | |
| name="event_feeds", | |
| path=base_dir / "feed.txt", | |
| metadata_key="event_feeds", | |
| ), | |
| ] | |
| return sources | |