hossainlab's picture
Deploy DeepAMR API backend
3255634
"""Base class for data collectors."""
import logging
from abc import ABC, abstractmethod
from pathlib import Path
import pandas as pd
from .config import CollectionConfig
class BaseCollector(ABC):
"""Abstract base class for all data collectors."""
def __init__(self, config: CollectionConfig):
self.config = config
self.logger = logging.getLogger(self.__class__.__name__)
@property
@abstractmethod
def name(self) -> str:
"""Return the name of the data source."""
pass
@property
@abstractmethod
def output_dir(self) -> Path:
"""Return the output directory for this collector."""
pass
@abstractmethod
def collect(self) -> pd.DataFrame:
"""Run the collection pipeline and return metadata DataFrame."""
pass
def setup_logging(self, log_file: Path | None = None) -> None:
"""Configure logging for this collector."""
handlers = [logging.StreamHandler()]
if log_file:
log_file.parent.mkdir(parents=True, exist_ok=True)
handlers.append(logging.FileHandler(log_file))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=handlers,
)
def log_summary(self, df: pd.DataFrame) -> None:
"""Log a summary of collected data."""
self.logger.info(f"Collection complete for {self.name}")
self.logger.info(f"Total records: {len(df)}")
if not df.empty:
self.logger.info(f"Columns: {', '.join(df.columns[:10])}")