girish00 commited on
Commit
e42c1f9
·
verified ·
1 Parent(s): 89a3adc

make project runnable and endpoint-ready

Browse files
Files changed (1) hide show
  1. run_pipeline.py +123 -0
run_pipeline.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+
7
+
8
+ def run(cmd):
9
+ print("Running:", " ".join(cmd))
10
+ result = subprocess.run(cmd, check=False)
11
+ if result.returncode != 0:
12
+ raise SystemExit(result.returncode)
13
+
14
+
15
+ def flag_present(flag_name):
16
+ return flag_name in sys.argv
17
+
18
+
19
+ def usable_repo_id(repo_id):
20
+ if not repo_id:
21
+ return ""
22
+ placeholders = ("your-username/", "your-user/", "username/")
23
+ return "" if repo_id.startswith(placeholders) else repo_id
24
+
25
+
26
+ def apply_config_defaults(args):
27
+ config_path = Path("training_config.json")
28
+ if not config_path.exists():
29
+ return args
30
+
31
+ with config_path.open("r", encoding="utf-8") as f:
32
+ cfg = json.load(f)
33
+
34
+ if not flag_present("--model-name"):
35
+ args.model_name = cfg.get("model_name", args.model_name)
36
+ if not flag_present("--dataset-size"):
37
+ args.dataset_size = cfg.get("dataset_size", args.dataset_size)
38
+ if not flag_present("--train-file"):
39
+ args.train_file = cfg.get("train_file", args.train_file)
40
+ if not flag_present("--output-dir"):
41
+ args.output_dir = cfg.get("output_dir", args.output_dir)
42
+ if not flag_present("--hf-repo"):
43
+ args.hf_repo = usable_repo_id(cfg.get("hf_repo_id", args.hf_repo))
44
+ if not flag_present("--epochs"):
45
+ args.epochs = cfg.get("epochs", args.epochs)
46
+ if not flag_present("--batch-size"):
47
+ args.batch_size = cfg.get("batch_size", args.batch_size)
48
+ if not flag_present("--learning-rate"):
49
+ args.learning_rate = cfg.get("learning_rate", args.learning_rate)
50
+ if not flag_present("--max-length"):
51
+ args.max_length = cfg.get("max_length", args.max_length)
52
+ if not flag_present("--max-train-samples"):
53
+ args.max_train_samples = cfg.get("max_train_samples", args.max_train_samples)
54
+ if not flag_present("--use-4bit"):
55
+ args.use_4bit = cfg.get("use_4bit", args.use_4bit)
56
+ return args
57
+
58
+
59
+ def main():
60
+ parser = argparse.ArgumentParser()
61
+ parser.add_argument("--dataset-size", type=int, default=8000)
62
+ parser.add_argument("--train-file", type=str, default="train.json")
63
+ parser.add_argument("--output-dir", type=str, default="model")
64
+ parser.add_argument("--model-name", type=str, default="Qwen/Qwen2.5-Coder-0.5B-Instruct")
65
+ parser.add_argument("--epochs", type=float, default=1)
66
+ parser.add_argument("--batch-size", type=int, default=2)
67
+ parser.add_argument("--learning-rate", type=float, default=2e-4)
68
+ parser.add_argument("--max-length", type=int, default=512)
69
+ parser.add_argument("--max-train-samples", type=int, default=0)
70
+ parser.add_argument("--use-4bit", action="store_true")
71
+ parser.add_argument("--hf-repo", type=str, default="")
72
+ parser.add_argument("--skip-generate", action="store_true")
73
+ parser.add_argument("--skip-train", action="store_true")
74
+ parser.add_argument("--skip-upload", action="store_true")
75
+ args = parser.parse_args()
76
+ args = apply_config_defaults(args)
77
+
78
+ if not (5000 <= args.dataset_size <= 10000):
79
+ raise ValueError("dataset-size must be between 5000 and 10000")
80
+
81
+ if not args.skip_generate:
82
+ run([sys.executable, "generate_dataset.py", "--size", str(args.dataset_size), "--out", args.train_file])
83
+
84
+ if not args.skip_train:
85
+ train_cmd = [
86
+ sys.executable,
87
+ "finetune_coding_llm_colab.py",
88
+ "--dataset-size",
89
+ str(args.dataset_size),
90
+ "--train-file",
91
+ args.train_file,
92
+ "--output-dir",
93
+ args.output_dir,
94
+ "--model-name",
95
+ args.model_name,
96
+ "--epochs",
97
+ str(args.epochs),
98
+ "--batch-size",
99
+ str(args.batch_size),
100
+ "--learning-rate",
101
+ str(args.learning_rate),
102
+ "--max-length",
103
+ str(args.max_length),
104
+ "--max-train-samples",
105
+ str(args.max_train_samples),
106
+ "--skip-dataset-gen",
107
+ ]
108
+ if args.use_4bit:
109
+ train_cmd.append("--use-4bit")
110
+ run(train_cmd)
111
+ else:
112
+ print("Skipping training stage (--skip-train).")
113
+
114
+ if not args.skip_upload:
115
+ if not args.hf_repo:
116
+ raise ValueError("Pass --hf-repo when upload is enabled, or use --skip-upload")
117
+ run([sys.executable, "upload_to_hf.py", "--model-dir", args.output_dir, "--repo-id", args.hf_repo])
118
+
119
+ print("Pipeline completed.")
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()