File size: 8,171 Bytes
3742716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#!/bin/bash
# Validate Seriguela Training Setup
# This script validates that everything is configured correctly before training
# Usage: ./validate_setup.sh

set -e

GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

print_success() { echo -e "${GREEN}βœ…${NC} $1"; }
print_error() { echo -e "${RED}❌${NC} $1"; }
print_warning() { echo -e "${YELLOW}⚠️${NC}  $1"; }
print_header() { echo -e "\n${BLUE}========== $1 ==========${NC}"; }

ERRORS=0

print_header "Seriguela Setup Validation"

# Change to project directory
if [ -d "/home/ubuntu/seriguela" ]; then
    cd /home/ubuntu/seriguela
elif [ -d "$(pwd)/seriguela" ]; then
    cd seriguela
else
    cd .
fi

print_header "1. Python Environment"

# Check Python version
if python3 --version &> /dev/null; then
    PYTHON_VERSION=$(python3 --version)
    print_success "Python installed: $PYTHON_VERSION"
else
    print_error "Python not found"
    ERRORS=$((ERRORS + 1))
fi

# Check venv
if [ -d "venv" ]; then
    print_success "Virtual environment exists"
    source venv/bin/activate
else
    print_error "Virtual environment not found"
    ERRORS=$((ERRORS + 1))
fi

# Check pip
if pip --version &> /dev/null; then
    PIP_VERSION=$(pip --version | cut -d' ' -f2)
    print_success "pip version: $PIP_VERSION"
else
    print_error "pip not found"
    ERRORS=$((ERRORS + 1))
fi

print_header "2. Python Packages"

# Check critical packages
PACKAGES=(
    "transformers:Hugging Face Transformers"
    "torch:PyTorch"
    "wandb:Weights & Biases"
    "peft:Parameter-Efficient Fine-Tuning"
    "datasets:Hugging Face Datasets"
)

for pkg_info in "${PACKAGES[@]}"; do
    IFS=':' read -r pkg_name pkg_desc <<< "$pkg_info"

    if python3 -c "import $pkg_name" &> /dev/null; then
        VERSION=$(python3 -c "import $pkg_name; print($pkg_name.__version__)" 2>/dev/null || echo "unknown")
        print_success "$pkg_desc ($pkg_name) - version $VERSION"
    else
        print_error "$pkg_desc ($pkg_name) not installed"
        ERRORS=$((ERRORS + 1))
    fi
done

# Check Wandb version specifically
WANDB_VERSION=$(python3 -c "import wandb; print(wandb.__version__)" 2>/dev/null || echo "0.0.0")
REQUIRED_VERSION="0.24.0"

if python3 << VERSIONCHECK
import sys
from packaging import version
current = version.parse("$WANDB_VERSION")
required = version.parse("$REQUIRED_VERSION")
sys.exit(0 if current >= required else 1)
VERSIONCHECK
then
    print_success "Wandb version $WANDB_VERSION (>= $REQUIRED_VERSION required)"
else
    print_warning "Wandb version $WANDB_VERSION is older than recommended $REQUIRED_VERSION"
    print_warning "New API key format (wandb_v1_...) requires Wandb >= 0.24.0"
fi

print_header "3. Environment Variables"

# Load .env if exists
if [ -f ".env" ]; then
    source <(grep -v '^#' .env | sed 's/^/export /')
    print_success ".env file loaded"
else
    print_warning ".env file not found"
fi

# Check HF_TOKEN
if [ -n "$HF_TOKEN" ]; then
    TOKEN_LEN=${#HF_TOKEN}
    print_success "HF_TOKEN set ($TOKEN_LEN characters)"
else
    print_warning "HF_TOKEN not set (model won't be pushed to Hub)"
fi

# Check WANDB_API_KEY
if [ -n "$WANDB_API_KEY" ]; then
    KEY_LEN=${#WANDB_API_KEY}
    print_success "WANDB_API_KEY set ($KEY_LEN characters)"
else
    print_error "WANDB_API_KEY not set"
    ERRORS=$((ERRORS + 1))
fi

print_header "4. GPU / CUDA"

# Check nvidia-smi
if nvidia-smi &> /dev/null; then
    GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
    GPU_MEMORY=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader | head -1)
    print_success "GPU detected: $GPU_NAME ($GPU_MEMORY)"
else
    print_error "GPU not detected (nvidia-smi failed)"
    ERRORS=$((ERRORS + 1))
fi

# Check CUDA
if python3 -c "import torch; assert torch.cuda.is_available()" &> /dev/null; then
    CUDA_VERSION=$(python3 -c "import torch; print(torch.version.cuda)")
    GPU_COUNT=$(python3 -c "import torch; print(torch.cuda.device_count())")
    print_success "CUDA available: version $CUDA_VERSION ($GPU_COUNT GPU(s))"
else
    print_error "CUDA not available in PyTorch"
    ERRORS=$((ERRORS + 1))
fi

print_header "5. Wandb Authentication"

if [ -n "$WANDB_API_KEY" ]; then
    if python3 << WANDBCHECK
import wandb
import sys
try:
    result = wandb.login(key="$WANDB_API_KEY", relogin=True)
    if result:
        print("Login successful")
        sys.exit(0)
    else:
        print("Login failed")
        sys.exit(1)
except Exception as e:
    print(f"Error: {e}")
    sys.exit(1)
WANDBCHECK
    then
        print_success "Wandb authentication successful"

        # Get user info
        WANDB_USER=$(python3 << 'GETUSER'
import wandb
try:
    api = wandb.Api()
    print(api.viewer.get("username", "unknown"))
except:
    print("unknown")
GETUSER
)
        print_success "Logged in as: $WANDB_USER"
    else
        print_error "Wandb authentication failed"
        ERRORS=$((ERRORS + 1))
    fi
else
    print_warning "Skipping Wandb auth (no API key)"
fi

print_header "6. HuggingFace Authentication"

if [ -n "$HF_TOKEN" ]; then
    if python3 << HFCHECK
from huggingface_hub import HfApi
import sys
try:
    api = HfApi(token="$HF_TOKEN")
    user = api.whoami()
    print(f"Login successful: {user.get('name', 'unknown')}")
    sys.exit(0)
except Exception as e:
    print(f"Error: {e}")
    sys.exit(1)
HFCHECK
    then
        print_success "HuggingFace authentication successful"
    else
        print_error "HuggingFace authentication failed"
        ERRORS=$((ERRORS + 1))
    fi
else
    print_warning "Skipping HF auth (no token)"
fi

print_header "7. Dataset Access"

# Test dataset loading
if python3 << DATASETCHECK
from datasets import load_dataset
import sys
try:
    # Quick test load (just get info, don't download)
    ds = load_dataset("augustocsc/sintetico_natural", split="train", streaming=True)
    print("Dataset accessible")
    sys.exit(0)
except Exception as e:
    print(f"Error: {e}")
    sys.exit(1)
DATASETCHECK
then
    print_success "Dataset accessible: augustocsc/sintetico_natural"
else
    print_warning "Could not verify dataset access (may require authentication)"
fi

print_header "8. Scripts"

SCRIPTS=(
    "scripts/train.py"
    "scripts/evaluate.py"
    "scripts/generate.py"
    "scripts/aws/monitor_training_auto.sh"
    "scripts/aws/analyze_model.sh"
)

for script in "${SCRIPTS[@]}"; do
    if [ -f "$script" ]; then
        print_success "$script exists"
    else
        print_warning "$script not found"
    fi
done

# Final summary
print_header "Validation Summary"
echo ""

if [ $ERRORS -eq 0 ]; then
    echo -e "${GREEN}╔══════════════════════════════════════╗${NC}"
    echo -e "${GREEN}β•‘                                      β•‘${NC}"
    echo -e "${GREEN}β•‘    βœ… ALL VALIDATIONS PASSED βœ…     β•‘${NC}"
    echo -e "${GREEN}β•‘                                      β•‘${NC}"
    echo -e "${GREEN}β•‘     Ready for training! πŸš€           β•‘${NC}"
    echo -e "${GREEN}β•‘                                      β•‘${NC}"
    echo -e "${GREEN}β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•${NC}"
    echo ""
    echo "You can now run:"
    echo "  python scripts/train.py --help"
    echo "  bash scripts/aws/run_all_training.sh"
    echo ""
    exit 0
else
    echo -e "${RED}╔══════════════════════════════════════╗${NC}"
    echo -e "${RED}β•‘                                      β•‘${NC}"
    echo -e "${RED}β•‘    ❌ VALIDATION FAILED ❌           β•‘${NC}"
    echo -e "${RED}β•‘                                      β•‘${NC}"
    echo -e "${RED}β•‘   $ERRORS error(s) found              β•‘${NC}"
    echo -e "${RED}β•‘                                      β•‘${NC}"
    echo -e "${RED}β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•${NC}"
    echo ""
    echo "Please fix the errors above before training."
    echo ""
    exit 1
fi