File size: 1,213 Bytes
5f91e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python
"""
Train initial models from Kaggle datasets.
"""

import asyncio
import sys
import os
import logging

# Add backend and app to path
sys.path.append(os.path.join(os.getcwd(), "backend"))
sys.path.append(os.path.join(os.getcwd(), "backend", "app"))

from app.services.model_training_scheduler import ModelTrainingScheduler

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s — %(message)s")
logger = logging.getLogger("TRAIN")

async def main():
    logger.info("Starting initial model training...")
    
    # Ensure data dirs exist
    from app.config import ensure_data_dirs
    ensure_data_dirs()
    
    scheduler = ModelTrainingScheduler()
    
    domains = ["finance", "tech", "healthcare"]
    results = await scheduler.train_all_domains(domains)
    
    print("\n" + "="*50)
    print("TRAINING RESULTS")
    print("="*50)
    for domain, meta in results.items():
        if "error" in meta:
            print(f"✗ {domain}: FAILED - {meta['error']}")
        else:
            print(f"✓ {domain}: SUCCESS - {meta.get('selected_pairs', 0)} pairs, {meta.get('size_kb', 0):.1f} KB")
    print("="*50)

if __name__ == "__main__":
    asyncio.run(main())