File size: 2,779 Bytes
19ea5c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import random
from dataclasses import dataclass
from typing import Optional

import numpy as np
import torch


@dataclass
class PathsConfig:
    """
    Configuration for dataset and checkpoint paths.

    This is tailored to your existing visually impaired dataset layout:
    - Images:  <data_root>/visual_dataset/*.jpg
    - Text:    <data_root>/visual_text/visual.token.txt
               <data_root>/visual_text/visual.trainImages.txt
               <data_root>/visual_text/visual.testImages.txt
    """

    data_root: str = "/Users/ryan/Downloads/visuallyimpair"
    images_dir_name: str = "visual_dataset"
    text_dir_name: str = "visual_text"

    def _join(self, *parts: str) -> str:
        return os.path.join(*parts)

    @property
    def images_dir(self) -> str:
        return self._join(self.data_root, self.images_dir_name)

    @property
    def text_dir(self) -> str:
        return self._join(self.data_root, self.text_dir_name)

    @property
    def token_file(self) -> str:
        return self._join(self.text_dir, "visual.token.txt")

    @property
    def train_list_file(self) -> str:
        return self._join(self.text_dir, "visual.trainImages.txt")

    @property
    def test_list_file(self) -> str:
        return self._join(self.text_dir, "visual.testImages.txt")


@dataclass
class TrainingConfig:
    """
    Hyperparameters and training-related configuration.
    """

    learning_rate: float = 5e-5
    batch_size: int = 16
    num_epochs: int = 10
    warmup_steps: int = 500
    max_caption_length: int = 50
    gradient_accumulation_steps: int = 1
    num_workers: int = 4
    mixed_precision: bool = True
    patience: int = 3
    max_grad_norm: float = 1.0

    # Model-specific
    prefix_length: int = 1  # number of visual prefix tokens

    # Logging / checkpoints
    output_dir: str = "checkpoints"
    log_dir: str = "runs"

    # Reproducibility
    seed: int = 42


def get_device() -> torch.device:
    """
    Return the best available device (CUDA if available, else CPU) and log it.
    """

    if torch.cuda.is_available():
        device = torch.device("cuda")
        print("Using CUDA for training/inference.")
    else:
        device = torch.device("cpu")
        print("CUDA not available, falling back to CPU.")
    return device


def set_seed(seed: int) -> None:
    """
    Set random seeds for reproducibility across Python, NumPy, and PyTorch.
    """

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def ensure_dir(path: str) -> None:
    """
    Create directory if it does not already exist.
    """

    os.makedirs(path, exist_ok=True)