Percy3822 commited on
Commit
d400db2
Β·
verified Β·
1 Parent(s): 7655912

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -34
app.py CHANGED
@@ -19,7 +19,7 @@ def _read(path, fb="Waiting..."):
19
 
20
  def _zip_dir_atomic(src, out_path, tmp_path):
21
  if os.path.exists(tmp_path): os.remove(tmp_path)
22
- with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as z:
23
  for root,_,files in os.walk(src):
24
  for fn in files:
25
  fp = os.path.join(root, fn)
@@ -34,16 +34,16 @@ def upload_file(f):
34
  shutil.copy(f.name, dst)
35
  return f"βœ… Uploaded β†’ {dst}", dst
36
 
37
- def _train_single(dataset, log):
38
- p = subprocess.Popen(
39
- ["python","train.py","--dataset",dataset,"--output",MODEL_DIR],
40
- stdout=log, stderr=subprocess.STDOUT
41
- )
42
  p.wait()
43
  log.write(f"\n ↳ train.py exited {p.returncode} for {os.path.basename(dataset)}\n")
44
  return p.returncode == 0
45
 
46
- def _worker(dataset_path, shards_folder):
47
  with open(LOG_FILE,"w") as log: log.write("πŸ”₯ Starting training (C# AI)…\n")
48
  ok=True
49
  with open(LOG_FILE,"a") as log:
@@ -67,7 +67,7 @@ def _worker(dataset_path, shards_folder):
67
  log.write(f"❌ GZ read failed: {e}\n"); ok=False; break
68
  else:
69
  shard = pth
70
- if not _train_single(shard, log): ok=False; break
71
  if os.path.exists(tmp):
72
  try: os.remove(tmp)
73
  except: pass
@@ -75,7 +75,7 @@ def _worker(dataset_path, shards_folder):
75
  if not dataset_path or not os.path.exists(dataset_path):
76
  log.write("❌ Upload a valid dataset.\n"); ok=False
77
  else:
78
- ok = _train_single(dataset_path, log)
79
 
80
  if ok and os.path.isdir(MODEL_DIR):
81
  try:
@@ -87,16 +87,15 @@ def _worker(dataset_path, shards_folder):
87
  else:
88
  log.write("\n❌ Training failed; no zip.\n")
89
 
90
- def start_training(dataset_path, shards_folder):
91
- try:
92
- if os.path.exists(ZIP_FILE): os.remove(ZIP_FILE)
93
- if os.path.exists(ZIP_PART): os.remove(ZIP_PART)
94
- except: pass
95
- threading.Thread(target=_worker, args=(dataset_path, shards_folder), daemon=True).start()
96
  return "πŸš€ Training started. Use Refresh buttons."
97
 
98
- def read_logs():
99
- return _read(LOG_FILE, "Waiting for logs...")
100
 
101
  def refresh_download():
102
  if os.path.exists(ZIP_FILE):
@@ -106,7 +105,7 @@ def refresh_download():
106
 
107
  def load_test_zip(z):
108
  if z is None: return "❌ No file.", ""
109
- import zipfile, uuid
110
  root = os.path.join("models", f"test_{uuid.uuid4().hex}")
111
  os.makedirs(root, exist_ok=True)
112
  try:
@@ -115,29 +114,22 @@ def load_test_zip(z):
115
  except Exception as e:
116
  return f"❌ Extract failed: {e}", ""
117
 
118
- def clear_test_model():
119
- return "Cleared. Will use trained_model/ if present.", ""
120
 
121
  def generate(prompt, model_path):
122
  if not prompt.strip(): return "Enter a prompt."
123
  try:
124
- if model_path and os.path.isdir(model_path):
125
- m = model_path
126
- src="(uploaded)"
127
- elif os.path.isdir(MODEL_DIR):
128
- m = MODEL_DIR
129
- src="(trained_model/)"
130
- else:
131
- m = "distilgpt2" # tiny fallback
132
- src="(fallback)"
133
- gen = pipeline("text-generation", model=m, tokenizer="distilgpt2")
134
- out = gen(prompt, max_length=200, do_sample=True, temperature=0.7, truncation=True)[0]["generated_text"]
135
  return f"{out}\n\nβ€” using {src}"
136
  except Exception as e:
137
  return f"❌ Error: {e}"
138
 
139
- with gr.Blocks(title="C# AI Trainer") as app:
140
- gr.Markdown("## 🧩 C# AI Trainer β€” upload JSONL, train fast, download, and test.")
141
 
142
  ds_state = gr.State("")
143
  folder_state = gr.State("")
@@ -150,6 +142,10 @@ with gr.Blocks(title="C# AI Trainer") as app:
150
  with gr.Row():
151
  shards_folder = gr.Textbox(value="", label="Folder with shards (optional)")
152
  use_folder = gr.Button("πŸ“‚ Use Folder")
 
 
 
 
153
  status = gr.Textbox(label="Status", interactive=False)
154
  with gr.Row():
155
  start_btn = gr.Button("πŸš€ Start Training")
@@ -162,7 +158,7 @@ with gr.Blocks(title="C# AI Trainer") as app:
162
  up_btn.click(fn=upload_file, inputs=file_in, outputs=[status, ds_state])
163
  use_folder.click(fn=lambda p: ("βœ… Using folder." if p.strip() else "❌ Provide folder path.", p.strip()),
164
  inputs=shards_folder, outputs=[status, folder_state])
165
- start_btn.click(fn=start_training, inputs=[ds_state, folder_state], outputs=status
166
  ).then(fn=read_logs, outputs=logs
167
  ).then(fn=refresh_download, outputs=[dl_btn, dl_info])
168
  refresh_logs.click(fn=read_logs, outputs=logs)
 
19
 
20
  def _zip_dir_atomic(src, out_path, tmp_path):
21
  if os.path.exists(tmp_path): os.remove(tmp_path)
22
+ with zipfile.ZipFile(tmp_path,"w",zipfile.ZIP_DEFLATED) as z:
23
  for root,_,files in os.walk(src):
24
  for fn in files:
25
  fp = os.path.join(root, fn)
 
34
  shutil.copy(f.name, dst)
35
  return f"βœ… Uploaded β†’ {dst}", dst
36
 
37
+ def _train_single(dataset, log, quick, steps, subset):
38
+ args = ["python","train.py","--dataset",dataset,"--output",MODEL_DIR]
39
+ if quick:
40
+ args += ["--quick","1","--max_steps",str(steps or 8),"--subset",str(subset or 32)]
41
+ p = subprocess.Popen(args, stdout=log, stderr=subprocess.STDOUT)
42
  p.wait()
43
  log.write(f"\n ↳ train.py exited {p.returncode} for {os.path.basename(dataset)}\n")
44
  return p.returncode == 0
45
 
46
+ def _worker(dataset_path, shards_folder, quick, steps, subset):
47
  with open(LOG_FILE,"w") as log: log.write("πŸ”₯ Starting training (C# AI)…\n")
48
  ok=True
49
  with open(LOG_FILE,"a") as log:
 
67
  log.write(f"❌ GZ read failed: {e}\n"); ok=False; break
68
  else:
69
  shard = pth
70
+ if not _train_single(shard, log, quick, steps, subset): ok=False; break
71
  if os.path.exists(tmp):
72
  try: os.remove(tmp)
73
  except: pass
 
75
  if not dataset_path or not os.path.exists(dataset_path):
76
  log.write("❌ Upload a valid dataset.\n"); ok=False
77
  else:
78
+ ok = _train_single(dataset_path, log, quick, steps, subset)
79
 
80
  if ok and os.path.isdir(MODEL_DIR):
81
  try:
 
87
  else:
88
  log.write("\n❌ Training failed; no zip.\n")
89
 
90
+ def start_training(dataset_path, shards_folder, quick, steps, subset):
91
+ for p in (ZIP_FILE, ZIP_PART):
92
+ if os.path.exists(p):
93
+ try: os.remove(p)
94
+ except: pass
95
+ threading.Thread(target=_worker, args=(dataset_path, shards_folder, quick, steps, subset), daemon=True).start()
96
  return "πŸš€ Training started. Use Refresh buttons."
97
 
98
+ def read_logs(): return _read(LOG_FILE, "Waiting for logs...")
 
99
 
100
  def refresh_download():
101
  if os.path.exists(ZIP_FILE):
 
105
 
106
  def load_test_zip(z):
107
  if z is None: return "❌ No file.", ""
108
+ import zipfile
109
  root = os.path.join("models", f"test_{uuid.uuid4().hex}")
110
  os.makedirs(root, exist_ok=True)
111
  try:
 
114
  except Exception as e:
115
  return f"❌ Extract failed: {e}", ""
116
 
117
+ def clear_test_model(): return "Cleared. Will use trained_model/ if present.", ""
 
118
 
119
  def generate(prompt, model_path):
120
  if not prompt.strip(): return "Enter a prompt."
121
  try:
122
+ if model_path and os.path.isdir(model_path): m, src = model_path, "(uploaded)"
123
+ elif os.path.isdir(MODEL_DIR): m, src = MODEL_DIR, "(trained_model/)"
124
+ else: m, src = "sshleifer/tiny-gpt2", "(tiny fallback)"
125
+ gen = pipeline("text-generation", model=m, tokenizer="sshleifer/tiny-gpt2")
126
+ out = gen(prompt, max_length=120, do_sample=True, temperature=0.7, truncation=True)[0]["generated_text"]
 
 
 
 
 
 
127
  return f"{out}\n\nβ€” using {src}"
128
  except Exception as e:
129
  return f"❌ Error: {e}"
130
 
131
+ with gr.Blocks(title="C# AI Trainer (Quick Test Mode)") as app:
132
+ gr.Markdown("## ⚑ C# AI Trainer β€” includes *Quick Test* (few steps on a tiny model).")
133
 
134
  ds_state = gr.State("")
135
  folder_state = gr.State("")
 
142
  with gr.Row():
143
  shards_folder = gr.Textbox(value="", label="Folder with shards (optional)")
144
  use_folder = gr.Button("πŸ“‚ Use Folder")
145
+ with gr.Row():
146
+ quick = gr.Checkbox(value=True, label="⚑ Quick Test (tiny model, few steps)")
147
+ steps = gr.Number(value=8, label="Max steps (Quick mode)", precision=0)
148
+ subset = gr.Number(value=32, label="Subset rows (Quick mode)", precision=0)
149
  status = gr.Textbox(label="Status", interactive=False)
150
  with gr.Row():
151
  start_btn = gr.Button("πŸš€ Start Training")
 
158
  up_btn.click(fn=upload_file, inputs=file_in, outputs=[status, ds_state])
159
  use_folder.click(fn=lambda p: ("βœ… Using folder." if p.strip() else "❌ Provide folder path.", p.strip()),
160
  inputs=shards_folder, outputs=[status, folder_state])
161
+ start_btn.click(fn=start_training, inputs=[ds_state, folder_state, quick, steps, subset], outputs=status
162
  ).then(fn=read_logs, outputs=logs
163
  ).then(fn=refresh_download, outputs=[dl_btn, dl_info])
164
  refresh_logs.click(fn=read_logs, outputs=logs)