Alibrown commited on
Commit
031c327
Β·
verified Β·
1 Parent(s): fc14538

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +15 -12
train.py CHANGED
@@ -10,13 +10,21 @@
10
  # python train.py --mode validate β†’ validate ADI weights against dataset
11
  # python train.py --mode finetune β†’ finetune SmolLM2 on collected data (future)
12
  # =============================================================================
13
-
14
  import argparse
15
  import json
16
  import logging
17
  from datetime import datetime
18
  from pathlib import Path
19
 
 
 
 
 
 
 
 
 
20
  import model as model_module
21
  from adi import DumpindexAnalyzer
22
 
@@ -28,7 +36,8 @@ logger = logging.getLogger("train")
28
  # Mode 1 β€” Export dataset to training format
29
  # =============================================================================
30
 
31
- def export_dataset(output_path: str = "train_data.jsonl"):
 
32
  """
33
  Export HF dataset logs to JSONL format for training.
34
  Filters: only HIGH_PRIORITY and MEDIUM_PRIORITY entries with actual responses.
@@ -95,7 +104,7 @@ def validate_adi():
95
  "samples": len(labeled),
96
  "weights": analyzer.weights,
97
  }
98
- Path("validation_results.json").write_text(json.dumps(result, indent=2))
99
  logger.info("Results saved β†’ validation_results.json")
100
 
101
 
@@ -104,16 +113,10 @@ def validate_adi():
104
  # =============================================================================
105
 
106
  def finetune():
107
- """
108
- Finetune SmolLM2 on collected dataset.
109
- Placeholder β€” requires export first + enough data (>500 samples recommended).
110
- """
111
- train_file = Path("train_data.jsonl")
112
- if not train_file.exists():
113
- logger.error("train_data.jsonl not found β€” run: python train.py --mode export first")
114
  return
115
-
116
- lines = train_file.read_text().strip().splitlines()
117
  logger.info(f"Training samples available: {len(lines)}")
118
 
119
  if len(lines) < 100:
 
10
  # python train.py --mode validate β†’ validate ADI weights against dataset
11
  # python train.py --mode finetune β†’ finetune SmolLM2 on collected data (future)
12
  # =============================================================================
13
+ import os
14
  import argparse
15
  import json
16
  import logging
17
  from datetime import datetime
18
  from pathlib import Path
19
 
20
+ # ── Path Resolution ───────────────────────────────────────────────────────────
21
+ # HF Spaces: /tmp/ (read-only filesystem)
22
+ # Local dev: current directory
23
+ _TMP = Path("/tmp") if os.getenv("SPACE_ID") else Path(".")
24
+
25
+ TRAIN_DATA = _TMP / "train_data.jsonl"
26
+ VALID_RESULT = _TMP / "validation_results.json"
27
+
28
  import model as model_module
29
  from adi import DumpindexAnalyzer
30
 
 
36
  # Mode 1 β€” Export dataset to training format
37
  # =============================================================================
38
 
39
+ def export_dataset(output_path: str = None):
40
+ output = Path(output_path) if output_path else TRAIN_DATA
41
  """
42
  Export HF dataset logs to JSONL format for training.
43
  Filters: only HIGH_PRIORITY and MEDIUM_PRIORITY entries with actual responses.
 
104
  "samples": len(labeled),
105
  "weights": analyzer.weights,
106
  }
107
+ VALID_RESULT.write_text(json.dumps(result, indent=2))
108
  logger.info("Results saved β†’ validation_results.json")
109
 
110
 
 
113
  # =============================================================================
114
 
115
  def finetune():
116
+ if not TRAIN_DATA.exists():
117
+ logger.error(f"train_data.jsonl not found at {TRAIN_DATA}")
 
 
 
 
 
118
  return
119
+ lines = TRAIN_DATA.read_text().strip().splitlines()
 
120
  logger.info(f"Training samples available: {len(lines)}")
121
 
122
  if len(lines) < 100: