Percy3822 commited on
Commit
a836dc9
Β·
verified Β·
1 Parent(s): 4d30e19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -40
app.py CHANGED
@@ -1,25 +1,26 @@
1
  # app.py
2
- import os, shutil, subprocess, zipfile, traceback, time, io
3
  from pathlib import Path
4
  from datetime import datetime
5
  import gradio as gr
6
 
 
7
  ROOT = Path(__file__).resolve().parent
8
  DATA = ROOT / "dataset.jsonl"
9
  LOG = ROOT / "train.log"
10
  RUNS = ROOT / "runs"
11
  RUNS.mkdir(exist_ok=True)
12
 
13
- # -------- logging helpers --------
14
  def append_log(msg: str):
15
- msg = msg.rstrip()
16
  with open(LOG, "a", encoding="utf-8") as lf:
17
  lf.write(msg + "\n")
18
 
19
  def read_logs():
20
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
21
 
22
- # -------- workspace + models --------
23
  def ls_workspace() -> str:
24
  rows = []
25
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
@@ -46,7 +47,7 @@ def dropdown_update_safe(models, prefer=None):
46
  val = prefer if (prefer and prefer in models) else (models[0] if models else None)
47
  return gr.update(choices=models, value=val)
48
 
49
- # -------- dataset upload --------
50
  def upload_dataset(file):
51
  if not file:
52
  return "❌ No file selected.", ls_workspace()
@@ -55,12 +56,19 @@ def upload_dataset(file):
55
  return f"βœ… Uploaded β†’ {DATA.name}", ls_workspace()
56
  return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
57
 
58
- # -------- training (LIVE LOGS) --------
59
  def start_training_live(run_name):
60
  """
61
  Streams training logs to the UI while the subprocess runs.
62
  Yields tuples for outputs: [status, download_file, workspace, logs, model_dropdown]
63
  """
 
 
 
 
 
 
 
64
  run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
65
  out_dir = RUNS / run_id
66
  zip_path = RUNS / f"{run_id}.zip"
@@ -89,18 +97,25 @@ def start_training_live(run_name):
89
  append_log("β–Ά " + " ".join(cmd))
90
 
91
  # start subprocess with live stdout
92
- proc = subprocess.Popen(
93
- cmd,
94
- stdout=subprocess.PIPE,
95
- stderr=subprocess.STDOUT,
96
- bufsize=1,
97
- universal_newlines=True,
98
- encoding="utf-8",
99
- errors="replace",
100
- )
 
 
 
 
 
 
101
 
102
  live_log = io.StringIO()
103
  status_msg = f"πŸš€ Training run '{run_id}' in progress…"
 
104
  # stream loop
105
  while True:
106
  line = proc.stdout.readline()
@@ -154,7 +169,7 @@ def refresh_download():
154
  dropdown_update_safe(models)
155
  )
156
 
157
- # -------- import a zip as a model folder --------
158
  def import_zip(zfile):
159
  if not zfile:
160
  return "❌ No zip selected.", list_models()
@@ -166,7 +181,7 @@ def import_zip(zfile):
166
  z.extractall(dest)
167
  return f"βœ… Imported to {dest.name}", list_models()
168
 
169
- # -------- generation: cached pipeline --------
170
  _GEN_CACHE = {"path": None, "pipe": None}
171
 
172
  def get_generation_pipeline(model_path: str):
@@ -201,52 +216,93 @@ def get_generation_pipeline(model_path: str):
201
  append_log("βœ… Pipeline loaded.")
202
  return pipe
203
 
204
- def generate(model_path, prompt):
205
- from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- # Coerce Dropdown value (can be list)
208
  if isinstance(model_path, list):
209
  model_path = model_path[0] if model_path else None
210
 
211
  # validate
212
  if not model_path:
213
- return "❌ Select a model from the dropdown first."
 
214
  if not isinstance(model_path, str):
215
- # fix minor bug: _name_ not name
216
- return f"❌ Invalid model path type: {type(model_path)._name_}"
217
  if not Path(model_path).exists():
218
- return f"❌ Model folder not found: {model_path}"
 
219
  if not prompt or not prompt.strip():
220
- return "❌ Enter a prompt."
 
221
 
222
  try:
223
  pipe = get_generation_pipeline(model_path)
224
- append_log(f"πŸ“ Generating for prompt ({len(prompt)} chars)…")
225
- out = pipe(
 
226
  prompt.strip(),
227
- max_new_tokens=120,
228
  do_sample=True,
229
- temperature=0.4,
230
  top_p=0.9,
231
  repetition_penalty=1.15,
232
  no_repeat_ngram_size=4,
233
- truncation=True
234
- )[0]["generated_text"]
 
 
 
 
 
 
235
  append_log("βœ… Generation OK.")
236
- return out
237
  except Exception as e:
238
  tb = traceback.format_exc()
239
  append_log("❌ Generation error:\n" + tb)
240
- return "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
241
 
242
- # -------- UI --------
243
  with gr.Blocks(title="Python AI β€” Train & Test") as app:
244
  gr.Markdown("## 🧠 Python AI β€” Train & Test\nβ€’ Unique runs β€’ Safe download β€’ Cached generation β€’ Live logs\n")
245
 
246
- # Test first (so Train can update its dropdown)
247
  with gr.Tab("Test"):
248
  gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
249
- refresh_btn = gr.Button("↻ Refresh Model List")
 
 
250
  model_list = gr.Dropdown(
251
  choices=list_models(),
252
  label="Available AIs",
@@ -254,12 +310,21 @@ with gr.Blocks(title="Python AI β€” Train & Test") as app:
254
  allow_custom_value=True, # keeps UI quiet when empty
255
  multiselect=False # force single selection
256
  )
 
 
 
257
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
258
  import_status = gr.Textbox(label="Import Status", interactive=False)
259
- prompt = gr.Textbox(label="Prompt", lines=8, placeholder="### Instruction:\nPython: write a function ...\n### Response:\n")
 
 
 
 
 
260
  go = gr.Button("Generate")
261
  out = gr.Textbox(label="AI Response", lines=20)
262
 
 
263
  with gr.Tab("Train"):
264
  with gr.Row():
265
  ds = gr.File(label="πŸ“₯ Upload JSONL", file_types=[".jsonl"])
@@ -272,22 +337,30 @@ with gr.Blocks(title="Python AI β€” Train & Test") as app:
272
  download_file = gr.File(label="πŸ“¦ Latest trained zip", visible=False)
273
  refresh_dl_btn = gr.Button("Refresh Download")
274
 
275
- # wiring
 
276
  ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
277
 
278
- # STREAMED training: function yields updates
279
  start.click(
280
  start_training_live,
281
  inputs=[run_name],
282
  outputs=[status, download_file, ws, logs, model_list]
283
  )
284
 
 
285
  refresh_dl_btn.click(
286
  refresh_download,
287
  outputs=[download_file, ws, model_list]
288
  )
 
 
289
  refresh_btn.click(lambda: dropdown_update_safe(list_models()), outputs=model_list)
 
 
290
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
291
- go.click(generate, inputs=[model_list, prompt], outputs=out)
 
 
292
 
293
  app.queue(default_concurrency_limit=1).launch()
 
1
  # app.py
2
+ import os, shutil, subprocess, zipfile, traceback, io
3
  from pathlib import Path
4
  from datetime import datetime
5
  import gradio as gr
6
 
7
+ # ----------------- Paths -----------------
8
  ROOT = Path(__file__).resolve().parent
9
  DATA = ROOT / "dataset.jsonl"
10
  LOG = ROOT / "train.log"
11
  RUNS = ROOT / "runs"
12
  RUNS.mkdir(exist_ok=True)
13
 
14
+ # ----------------- Logging -----------------
15
  def append_log(msg: str):
16
+ msg = (msg or "").rstrip("\n")
17
  with open(LOG, "a", encoding="utf-8") as lf:
18
  lf.write(msg + "\n")
19
 
20
  def read_logs():
21
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
22
 
23
+ # ----------------- Workspace & Models -----------------
24
  def ls_workspace() -> str:
25
  rows = []
26
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
 
47
  val = prefer if (prefer and prefer in models) else (models[0] if models else None)
48
  return gr.update(choices=models, value=val)
49
 
50
+ # ----------------- Dataset Upload -----------------
51
  def upload_dataset(file):
52
  if not file:
53
  return "❌ No file selected.", ls_workspace()
 
56
  return f"βœ… Uploaded β†’ {DATA.name}", ls_workspace()
57
  return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
58
 
59
+ # ----------------- Training (Live Logs) -----------------
60
  def start_training_live(run_name):
61
  """
62
  Streams training logs to the UI while the subprocess runs.
63
  Yields tuples for outputs: [status, download_file, workspace, logs, model_dropdown]
64
  """
65
+ # Quick guard: dataset must exist
66
+ if not DATA.exists():
67
+ msg = "❌ dataset.jsonl not found. Upload a JSONL dataset first."
68
+ append_log(msg)
69
+ yield (msg, gr.update(value=None, visible=False), ls_workspace(), read_logs(), dropdown_update_safe(list_models()))
70
+ return
71
+
72
  run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
73
  out_dir = RUNS / run_id
74
  zip_path = RUNS / f"{run_id}.zip"
 
97
  append_log("β–Ά " + " ".join(cmd))
98
 
99
  # start subprocess with live stdout
100
+ try:
101
+ proc = subprocess.Popen(
102
+ cmd,
103
+ stdout=subprocess.PIPE,
104
+ stderr=subprocess.STDOUT,
105
+ bufsize=1,
106
+ universal_newlines=True,
107
+ encoding="utf-8",
108
+ errors="replace",
109
+ )
110
+ except Exception as e:
111
+ err = "❌ Failed to start train.py: " + "".join(traceback.format_exception_only(type(e), e))
112
+ append_log(err)
113
+ yield (err, gr.update(value=None, visible=False), ls_workspace(), read_logs(), dropdown_update_safe(list_models()))
114
+ return
115
 
116
  live_log = io.StringIO()
117
  status_msg = f"πŸš€ Training run '{run_id}' in progress…"
118
+
119
  # stream loop
120
  while True:
121
  line = proc.stdout.readline()
 
169
  dropdown_update_safe(models)
170
  )
171
 
172
+ # ----------------- Import a Zip as Model Folder -----------------
173
  def import_zip(zfile):
174
  if not zfile:
175
  return "❌ No zip selected.", list_models()
 
181
  z.extractall(dest)
182
  return f"βœ… Imported to {dest.name}", list_models()
183
 
184
+ # ----------------- Generation (cached pipeline) -----------------
185
  _GEN_CACHE = {"path": None, "pipe": None}
186
 
187
  def get_generation_pipeline(model_path: str):
 
216
  append_log("βœ… Pipeline loaded.")
217
  return pipe
218
 
219
+ # ----------------- Test Tab Helpers -----------------
220
+ def ping():
221
+ append_log("πŸ”” Ping pressed (UI wiring OK)")
222
+ return "βœ… UI is connected and responding."
223
+
224
+ def load_selected_model(model_path):
225
+ # Dropdown may pass a list; coerce to string
226
+ if isinstance(model_path, list):
227
+ model_path = model_path[0] if model_path else None
228
+ if not model_path:
229
+ return "❌ Select a model first."
230
+ if not isinstance(model_path, str):
231
+ return f"❌ Invalid model path type: {type(model_path)._name_}"
232
+ p = Path(model_path)
233
+ if not p.exists() or not p.is_dir():
234
+ return f"❌ Model folder not found: {model_path}"
235
+ try:
236
+ append_log(f"πŸ“¦ Load request β†’ {model_path}")
237
+ _ = get_generation_pipeline(model_path)
238
+ append_log(f"βœ… Loaded pipeline: {model_path}")
239
+ return f"βœ… Loaded: {model_path}"
240
+ except Exception as e:
241
+ tb = traceback.format_exc()
242
+ append_log("❌ Load error:\n" + tb)
243
+ return "❌ Error while loading model:\n" + "".join(traceback.format_exception_only(type(e), e))
244
+
245
+ def generate_stream(model_path, prompt):
246
+ """Stream intermediate status to prove the button is working, then final text."""
247
+ # immediate feedback
248
+ yield "⏳ Loading model…"
249
+ append_log("β–Ά Generate clicked")
250
 
251
+ # Coerce
252
  if isinstance(model_path, list):
253
  model_path = model_path[0] if model_path else None
254
 
255
  # validate
256
  if not model_path:
257
+ msg = "❌ Select a model from the dropdown first."
258
+ append_log(msg); yield msg; return
259
  if not isinstance(model_path, str):
260
+ msg = f"❌ Invalid model path type: {type(model_path)._name_}"
261
+ append_log(msg); yield msg; return
262
  if not Path(model_path).exists():
263
+ msg = f"❌ Model folder not found: {model_path}"
264
+ append_log(msg); yield msg; return
265
  if not prompt or not prompt.strip():
266
+ msg = "❌ Enter a prompt."
267
+ append_log(msg); yield msg; return
268
 
269
  try:
270
  pipe = get_generation_pipeline(model_path)
271
+ yield "βš™ Generating… (this may take a bit on CPU)"
272
+ append_log(f"πŸ“ Generating… prompt_len={len(prompt)}")
273
+ result = pipe(
274
  prompt.strip(),
275
+ max_new_tokens=80, # quicker to show something
276
  do_sample=True,
277
+ temperature=0.3,
278
  top_p=0.9,
279
  repetition_penalty=1.15,
280
  no_repeat_ngram_size=4,
281
+ truncation=True,
282
+ return_full_text=True,
283
+ )
284
+ text = result[0].get("generated_text", "")
285
+ if not text:
286
+ append_log("⚠ Empty generated_text")
287
+ yield "⚠ Model returned empty text. Try lowering temperature or adding more context."
288
+ return
289
  append_log("βœ… Generation OK.")
290
+ yield text
291
  except Exception as e:
292
  tb = traceback.format_exc()
293
  append_log("❌ Generation error:\n" + tb)
294
+ yield "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
295
 
296
+ # ----------------- UI -----------------
297
  with gr.Blocks(title="Python AI β€” Train & Test") as app:
298
  gr.Markdown("## 🧠 Python AI β€” Train & Test\nβ€’ Unique runs β€’ Safe download β€’ Cached generation β€’ Live logs\n")
299
 
300
+ # ---------- Test Tab ----------
301
  with gr.Tab("Test"):
302
  gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
303
+ with gr.Row():
304
+ refresh_btn = gr.Button("↻ Refresh Model List")
305
+ ping_btn = gr.Button("πŸ”” Ping UI") # quick sanity check
306
  model_list = gr.Dropdown(
307
  choices=list_models(),
308
  label="Available AIs",
 
310
  allow_custom_value=True, # keeps UI quiet when empty
311
  multiselect=False # force single selection
312
  )
313
+ load_btn = gr.Button("πŸ“¦ Load Model")
314
+ load_status = gr.Textbox(label="Model Status", interactive=False)
315
+
316
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
317
  import_status = gr.Textbox(label="Import Status", interactive=False)
318
+
319
+ prompt = gr.Textbox(
320
+ label="Prompt",
321
+ lines=8,
322
+ placeholder="### Instruction:\nPython: write a function ...\n### Response:\n"
323
+ )
324
  go = gr.Button("Generate")
325
  out = gr.Textbox(label="AI Response", lines=20)
326
 
327
+ # ---------- Train Tab ----------
328
  with gr.Tab("Train"):
329
  with gr.Row():
330
  ds = gr.File(label="πŸ“₯ Upload JSONL", file_types=[".jsonl"])
 
337
  download_file = gr.File(label="πŸ“¦ Latest trained zip", visible=False)
338
  refresh_dl_btn = gr.Button("Refresh Download")
339
 
340
+ # ---------- Wiring ----------
341
+ # Upload + workspace
342
  ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
343
 
344
+ # Train (live streaming)
345
  start.click(
346
  start_training_live,
347
  inputs=[run_name],
348
  outputs=[status, download_file, ws, logs, model_list]
349
  )
350
 
351
+ # Download refresh
352
  refresh_dl_btn.click(
353
  refresh_download,
354
  outputs=[download_file, ws, model_list]
355
  )
356
+
357
+ # Test tab helpers
358
  refresh_btn.click(lambda: dropdown_update_safe(list_models()), outputs=model_list)
359
+ ping_btn.click(ping, outputs=out)
360
+ load_btn.click(load_selected_model, inputs=[model_list], outputs=[load_status])
361
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
362
+
363
+ # Streamed generation output
364
+ go.click(generate_stream, inputs=[model_list, prompt], outputs=out)
365
 
366
  app.queue(default_concurrency_limit=1).launch()