Spaces:

Ryanfafa
/

image_captioning_model

Sleeping

File size: 2,779 Bytes

19ea5c5

import os
import random
from dataclasses import dataclass
from typing import Optional

import numpy as np
import torch


@dataclass
class PathsConfig:
    """
    Configuration for dataset and checkpoint paths.

    This is tailored to your existing visually impaired dataset layout:
    - Images:  <data_root>/visual_dataset/*.jpg
    - Text:    <data_root>/visual_text/visual.token.txt
               <data_root>/visual_text/visual.trainImages.txt
               <data_root>/visual_text/visual.testImages.txt
    """

    data_root: str = "/Users/ryan/Downloads/visuallyimpair"
    images_dir_name: str = "visual_dataset"
    text_dir_name: str = "visual_text"

    def _join(self, *parts: str) -> str:
        return os.path.join(*parts)

    @property
    def images_dir(self) -> str:
        return self._join(self.data_root, self.images_dir_name)

    @property
    def text_dir(self) -> str:
        return self._join(self.data_root, self.text_dir_name)

    @property
    def token_file(self) -> str:
        return self._join(self.text_dir, "visual.token.txt")

    @property
    def train_list_file(self) -> str:
        return self._join(self.text_dir, "visual.trainImages.txt")

    @property
    def test_list_file(self) -> str:
        return self._join(self.text_dir, "visual.testImages.txt")


@dataclass
class TrainingConfig:
    """
    Hyperparameters and training-related configuration.
    """

    learning_rate: float = 5e-5
    batch_size: int = 16
    num_epochs: int = 10
    warmup_steps: int = 500
    max_caption_length: int = 50
    gradient_accumulation_steps: int = 1
    num_workers: int = 4
    mixed_precision: bool = True
    patience: int = 3
    max_grad_norm: float = 1.0

    # Model-specific
    prefix_length: int = 1  # number of visual prefix tokens

    # Logging / checkpoints
    output_dir: str = "checkpoints"
    log_dir: str = "runs"

    # Reproducibility
    seed: int = 42


def get_device() -> torch.device:
    """
    Return the best available device (CUDA if available, else CPU) and log it.
    """

    if torch.cuda.is_available():
        device = torch.device("cuda")
        print("Using CUDA for training/inference.")
    else:
        device = torch.device("cpu")
        print("CUDA not available, falling back to CPU.")
    return device


def set_seed(seed: int) -> None:
    """
    Set random seeds for reproducibility across Python, NumPy, and PyTorch.
    """

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def ensure_dir(path: str) -> None:
    """
    Create directory if it does not already exist.
    """

    os.makedirs(path, exist_ok=True)