mod-osint / modules /ingestion /ingest_data.py
moddux's picture
deploy: HF sanitized GUI snapshot
b75c637
"""
Ingestion module — reads input files and produces NormalizedRecords.
Engine contract:
run(EngineInput) -> EngineOutput
The heavy lifting (file parsing, entity extraction) is delegated to
``engine.normalize``. This module acts as the pipeline-stage wrapper.
"""
from __future__ import annotations
import logging
from typing import Any, Dict
from engine.io_contract import EngineInput, EngineOutput, NormalizedRecord, StageStatus
from engine.normalize import normalize_files
logger = logging.getLogger("modules.ingestion")
def run(engine_input: EngineInput) -> EngineOutput:
"""
Ingest files described by ``engine_input.input_spec`` and return
normalized records.
"""
try:
records = normalize_files(engine_input.input_spec)
return EngineOutput(
stage="ingestion",
status=StageStatus.SUCCESS,
records=records,
summary=f"Ingested {len(records)} records from {len(engine_input.input_spec.files)} files",
)
except Exception as exc:
logger.error("Ingestion failed: %s", exc, exc_info=True)
return EngineOutput(
stage="ingestion",
status=StageStatus.FAILED,
error=str(exc),
)
# ---------------------------------------------------------------------------
# Legacy compatibility — keep old function signature working
# ---------------------------------------------------------------------------
def ingest(file_path: str) -> Dict[str, Any]:
"""Legacy wrapper (deprecated). Use ``run()`` instead."""
return {"file": file_path, "status": "ingested"}
if __name__ == "__main__":
print(ingest("input.txt"))