Spaces:

apoorvrajdev
/

image-captioning-api

Configuration error

File size: 3,543 Bytes

3a2e5f0

"""Train the IEEE InceptionV3+Transformer captioning model.

Usage:
    python -m scripts.train --config configs/base.yaml
    python -m scripts.train --config configs/base.yaml --output-dir models/v1.0.0

The script orchestrates the same pipeline as the notebook, but each step is
imported from the modular package — making it the canonical example of how
the package is meant to be composed.
"""

from __future__ import annotations

from pathlib import Path

import click

from captioning.config import load_config
from captioning.data import (
    build_train_pipeline,
    build_val_pipeline,
    load_coco_annotations,
    make_image_level_splits,
)
from captioning.models import build_caption_model
from captioning.preprocessing import CaptionTokenizer, preprocess_caption
from captioning.training import Trainer
from captioning.utils import configure_logging, get_logger, set_global_seed

log = get_logger(__name__)


@click.command()
@click.option(
    "--config",
    "config_path",
    required=True,
    type=click.Path(exists=True, dir_okay=False, path_type=Path),
    help="YAML config file (e.g. configs/base.yaml).",
)
@click.option(
    "--output-dir",
    type=click.Path(path_type=Path),
    default="outputs/runs/latest",
    help="Where to save weights, vocab, and history.",
)
def main(config_path: Path, output_dir: Path) -> None:
    """Run the full training pipeline end-to-end."""
    configure_logging()
    config = load_config(config_path)
    output_dir.mkdir(parents=True, exist_ok=True)

    set_global_seed(config.train.seed)
    log.info("config_loaded", path=str(config_path), output_dir=str(output_dir))

    # 1. Load + preprocess COCO captions ------------------------------------
    df = load_coco_annotations(
        base_path=config.data.base_path,
        annotations_filename=config.data.annotations_filename,
        images_subdir=config.data.images_subdir,
        sample_size=config.data.sample_size,
        seed=config.train.seed,
        caption_preprocessor=preprocess_caption,
    )

    # 2. Fit and persist the tokenizer --------------------------------------
    tokenizer = CaptionTokenizer(
        vocab_size=config.model.vocabulary_size,
        max_length=config.model.max_length,
    )
    tokenizer.fit(df["caption"])
    tokenizer.save(output_dir)

    # 3. Image-level train/val split ----------------------------------------
    train_imgs, train_caps, val_imgs, val_caps = make_image_level_splits(
        df, train_fraction=config.data.train_val_split, seed=config.train.seed
    )

    # 4. tf.data pipelines ---------------------------------------------------
    train_ds = build_train_pipeline(
        train_imgs,
        train_caps,
        tokenizer,
        batch_size=config.train.batch_size,
        buffer_size=config.train.buffer_size,
    )
    val_ds = build_val_pipeline(
        val_imgs,
        val_caps,
        tokenizer,
        batch_size=config.train.batch_size,
        buffer_size=config.train.buffer_size,
    )

    # 5. Build, compile, fit -------------------------------------------------
    model = build_caption_model(config, vocab_size=tokenizer.vocabulary_size)
    trainer = Trainer(model, config)
    trainer.fit(train_ds, val_ds, output_dir=output_dir)

    # 6. Save final weights to the canonical filename ------------------------
    final_weights = output_dir / config.train.weights_filename
    model.save_weights(str(final_weights))
    log.info("training_done", weights=str(final_weights))


if __name__ == "__main__":
    main()