Percy3822 commited on
Commit
0bf8c41
·
verified ·
1 Parent(s): 4883fca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -54
app.py CHANGED
@@ -1,42 +1,41 @@
1
- # app.py
2
  import os, shutil, subprocess, zipfile
3
  from pathlib import Path
 
4
  import gradio as gr
5
 
6
- ROOT = Path(__file__).resolve().parent
7
- DATA = ROOT / "dataset.jsonl"
8
- LOG = ROOT / "train.log"
9
- OUT = ROOT / "trained_model"
10
- ZIP = ROOT / "trained_model.zip"
11
 
12
  # ---------- helpers ----------
13
  def ls_workspace() -> str:
14
  rows = []
15
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
16
- try:
17
- size = p.stat().st_size
18
- except Exception:
19
- size = 0
20
  rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
21
  return "\n".join(rows) or "(empty)"
22
 
23
  def list_models():
24
  out = []
25
- for p in ROOT.iterdir():
26
- if p.is_dir() and (p / "config.json").exists() and (
27
- (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
28
- ):
29
- out.append(str(p))
30
- if OUT.exists() and str(OUT) not in out:
31
- out.insert(0, str(OUT))
32
- return sorted(out)
 
 
33
 
34
  def dropdown_update_safe(models, prefer=None):
35
- """Return a gr.update that always uses a value present in choices (or None)."""
36
  val = prefer if (prefer and prefer in models) else (models[0] if models else None)
37
  return gr.update(choices=models, value=val)
38
 
39
- # ---------- train tab ----------
40
  def upload_dataset(file):
41
  if not file:
42
  return "❌ No file selected.", ls_workspace()
@@ -45,19 +44,25 @@ def upload_dataset(file):
45
  return f"✅ Uploaded → {DATA.name}", ls_workspace()
46
  return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
47
 
48
- def start_training():
49
- # Clean previous artifacts
50
- if OUT.exists():
51
- shutil.rmtree(OUT, ignore_errors=True)
52
- if ZIP.exists():
53
- ZIP.unlink(missing_ok=True)
54
- LOG.write_text("🔥 Training started…\n", encoding="utf-8")
 
 
 
 
 
 
55
 
56
  cmd = [
57
  "python", str(ROOT / "train.py"),
58
  "--dataset", str(DATA),
59
- "--output", str(OUT),
60
- "--zip_path", str(ZIP),
61
  "--model_name", "Salesforce/codegen-350M-multi",
62
  "--epochs", "1",
63
  "--batch_size", "2",
@@ -68,28 +73,32 @@ def start_training():
68
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
69
 
70
  models = list_models()
71
- prefer = str(OUT) if OUT.exists() else None
72
- model_update = dropdown_update_safe(models, prefer=prefer)
73
 
74
- if code == 0 and ZIP.exists():
75
- info = f"✅ Training complete. Saved: {OUT.name} | Zip: {ZIP.name}"
76
- return info, gr.update(value=str(ZIP), visible=True), ls_workspace(), read_logs(), model_update
77
  else:
78
  info = f"❌ Training failed (exit {code}). Check logs below."
79
- return info, gr.update(value=None, visible=False), ls_workspace(), read_logs(), model_update
 
 
80
 
81
  def read_logs():
82
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
83
 
84
  def refresh_download():
 
 
 
85
  models = list_models()
86
  return (
87
- gr.update(value=(str(ZIP) if ZIP.exists() else None), visible=ZIP.exists()),
88
  ls_workspace(),
89
  dropdown_update_safe(models)
90
  )
91
 
92
- # ---------- test tab ----------
93
  def import_zip(zfile):
94
  if not zfile:
95
  return "❌ No zip selected.", list_models()
@@ -102,41 +111,68 @@ def import_zip(zfile):
102
  return f"✅ Imported to {dest.name}", list_models()
103
 
104
  def generate(model_path, prompt):
 
105
  if not model_path:
106
- return "❌ Select a model."
 
 
107
  if not prompt or not prompt.strip():
108
  return "❌ Enter a prompt."
 
109
  try:
110
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
 
111
  tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
112
- if tok.pad_token_id is None and tok.eos_token_id is not None:
113
- tok.pad_token = tok.eos_token
 
 
 
 
114
  model = AutoModelForCausalLM.from_pretrained(model_path)
115
- pipe = pipeline("text-generation", model=model, tokenizer=tok)
 
 
 
 
 
 
 
 
 
 
116
  out = pipe(
117
- prompt,
118
- max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
119
- repetition_penalty=1.2, no_repeat_ngram_size=4,
120
- eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id,
 
 
 
 
 
121
  truncation=True
122
  )[0]["generated_text"]
 
123
  return out
124
  except Exception as e:
125
- return f"❌ Error: {e}"
 
126
 
127
  # ---------- UI ----------
128
  with gr.Blocks(title="Python AI — Train & Test") as app:
129
- gr.Markdown("## 🧠 Python AI — Train & Test\nTrainer saves & zips. UI only shows existing artifacts.\n")
130
 
131
- # Test first (so Train can update it)
132
  with gr.Tab("Test"):
133
- gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
134
  refresh_btn = gr.Button("↻ Refresh Model List")
135
  model_list = gr.Dropdown(
136
  choices=list_models(),
137
  label="Available AIs",
138
  interactive=True,
139
- allow_custom_value=True # <-- stops warnings when choices are empty
140
  )
141
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
142
  import_status = gr.Textbox(label="Import Status", interactive=False)
@@ -144,22 +180,24 @@ with gr.Blocks(title="Python AI — Train & Test") as app:
144
  go = gr.Button("Generate")
145
  out = gr.Textbox(label="AI Response", lines=20)
146
 
147
- # Train tab
148
  with gr.Tab("Train"):
149
  with gr.Row():
150
  ds = gr.File(label="📥 Upload JSONL", file_types=[".jsonl"])
151
  ws = gr.Textbox(label="Workspace", lines=16, value=ls_workspace())
 
152
  up_status = gr.Textbox(label="Upload Status", interactive=False)
153
  start = gr.Button("🚀 Start Training", variant="primary")
154
  logs = gr.Textbox(label="📜 Training Logs", lines=18, value=read_logs())
155
  status = gr.Textbox(label="Status", interactive=False)
156
- download_file = gr.File(label="📦 trained_model.zip", visible=ZIP.exists())
157
  refresh_dl_btn = gr.Button("Refresh Download")
158
 
159
- # Wiring
160
  ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
161
  start.click(
162
  start_training,
 
163
  outputs=[status, download_file, ws, logs, model_list]
164
  )
165
  refresh_dl_btn.click(
@@ -171,4 +209,4 @@ with gr.Blocks(title="Python AI — Train & Test") as app:
171
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
172
  go.click(generate, inputs=[model_list, prompt], outputs=out)
173
 
174
- app.launch()
 
 
1
  import os, shutil, subprocess, zipfile
2
  from pathlib import Path
3
+ from datetime import datetime
4
  import gradio as gr
5
 
6
+ ROOT = Path(_file_).resolve().parent
7
+ DATA = ROOT / "dataset.jsonl"
8
+ LOG = ROOT / "train.log"
9
+ RUNS = ROOT / "runs"
10
+ RUNS.mkdir(exist_ok=True)
11
 
12
  # ---------- helpers ----------
13
  def ls_workspace() -> str:
14
  rows = []
15
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
16
+ try: size = p.stat().st_size
17
+ except Exception: size = 0
 
 
18
  rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
19
  return "\n".join(rows) or "(empty)"
20
 
21
  def list_models():
22
  out = []
23
+ for base in [ROOT, RUNS]:
24
+ if not base.exists():
25
+ continue
26
+ for p in base.iterdir():
27
+ if p.is_dir() and (p / "config.json").exists() and (
28
+ (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
29
+ ):
30
+ out.append(str(p))
31
+ # ensure uniqueness & sorted
32
+ return sorted(set(out))
33
 
34
  def dropdown_update_safe(models, prefer=None):
 
35
  val = prefer if (prefer and prefer in models) else (models[0] if models else None)
36
  return gr.update(choices=models, value=val)
37
 
38
+ # ---------- training ----------
39
  def upload_dataset(file):
40
  if not file:
41
  return "❌ No file selected.", ls_workspace()
 
44
  return f"✅ Uploaded → {DATA.name}", ls_workspace()
45
  return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
46
 
47
+ def start_training(run_name):
48
+ # Unique run folder and zip
49
+ run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
50
+ out_dir = RUNS / run_id
51
+ zip_path = RUNS / f"{run_id}.zip"
52
+
53
+ # Clean previous artifacts only for this run
54
+ if out_dir.exists():
55
+ shutil.rmtree(out_dir, ignore_errors=True)
56
+ if zip_path.exists():
57
+ zip_path.unlink()
58
+
59
+ LOG.write_text(f"🔥 Training started…\nRun: {run_id}\n", encoding="utf-8")
60
 
61
  cmd = [
62
  "python", str(ROOT / "train.py"),
63
  "--dataset", str(DATA),
64
+ "--output", str(out_dir),
65
+ "--zip_path", str(zip_path),
66
  "--model_name", "Salesforce/codegen-350M-multi",
67
  "--epochs", "1",
68
  "--batch_size", "2",
 
73
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
74
 
75
  models = list_models()
76
+ model_update = dropdown_update_safe(models, prefer=str(out_dir) if out_dir.exists() else None)
 
77
 
78
+ if code == 0 and zip_path.exists():
79
+ info = f"✅ Training complete. Saved: {out_dir.name} | Zip: {zip_path.name}"
80
+ dl_update = gr.update(value=str(zip_path), visible=True)
81
  else:
82
  info = f"❌ Training failed (exit {code}). Check logs below."
83
+ dl_update = gr.update(value=None, visible=False)
84
+
85
+ return info, dl_update, ls_workspace(), read_logs(), model_update
86
 
87
  def read_logs():
88
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
89
 
90
  def refresh_download():
91
+ # We don’t know which run user wants; show the newest zip if any
92
+ zips = sorted(RUNS.glob("*.zip"), key=lambda p: p.stat().st_mtime, reverse=True)
93
+ latest = zips[0] if zips else None
94
  models = list_models()
95
  return (
96
+ gr.update(value=(str(latest) if latest else None), visible=bool(latest)),
97
  ls_workspace(),
98
  dropdown_update_safe(models)
99
  )
100
 
101
+ # ---------- testing ----------
102
  def import_zip(zfile):
103
  if not zfile:
104
  return "❌ No zip selected.", list_models()
 
111
  return f"✅ Imported to {dest.name}", list_models()
112
 
113
  def generate(model_path, prompt):
114
+ # 1) Validate inputs
115
  if not model_path:
116
+ return "❌ Select a model from the dropdown first."
117
+ if not Path(model_path).exists():
118
+ return f"❌ Model folder not found: {model_path}"
119
  if not prompt or not prompt.strip():
120
  return "❌ Enter a prompt."
121
+
122
  try:
123
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
124
+ import torch
125
+
126
  tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
127
+ # ensure pad token
128
+ if tok.pad_token_id is None:
129
+ if tok.eos_token_id is not None:
130
+ tok.pad_token = tok.eos_token
131
+ else:
132
+ tok.add_special_tokens({"pad_token": "[PAD]"})
133
  model = AutoModelForCausalLM.from_pretrained(model_path)
134
+ # align embeddings if we added tokens
135
+ if getattr(model, "config", None) and getattr(model.config, "vocab_size", None) and len(tok) > model.config.vocab_size:
136
+ model.resize_token_embeddings(len(tok))
137
+
138
+ pipe = pipeline(
139
+ "text-generation",
140
+ model=model,
141
+ tokenizer=tok,
142
+ device_map="auto" if torch.cuda.is_available() else None,
143
+ )
144
+
145
  out = pipe(
146
+ prompt.strip(),
147
+ max_new_tokens=120,
148
+ do_sample=True,
149
+ temperature=0.4,
150
+ top_p=0.9,
151
+ repetition_penalty=1.15,
152
+ no_repeat_ngram_size=4,
153
+ eos_token_id=tok.eos_token_id,
154
+ pad_token_id=tok.pad_token_id,
155
  truncation=True
156
  )[0]["generated_text"]
157
+
158
  return out
159
  except Exception as e:
160
+ import traceback
161
+ return "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
162
 
163
  # ---------- UI ----------
164
  with gr.Blocks(title="Python AI — Train & Test") as app:
165
+ gr.Markdown("## 🧠 Python AI — Train & Test\n• Unique run folders Safe download Reliable generation\n")
166
 
167
+ # ---- Test tab first so Train can target its dropdown
168
  with gr.Tab("Test"):
169
+ gr.Markdown("### Pick a model folder or upload a .zip, then prompt it")
170
  refresh_btn = gr.Button("↻ Refresh Model List")
171
  model_list = gr.Dropdown(
172
  choices=list_models(),
173
  label="Available AIs",
174
  interactive=True,
175
+ allow_custom_value=True # no warnings when empty
176
  )
177
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
178
  import_status = gr.Textbox(label="Import Status", interactive=False)
 
180
  go = gr.Button("Generate")
181
  out = gr.Textbox(label="AI Response", lines=20)
182
 
183
+ # ---- Train tab
184
  with gr.Tab("Train"):
185
  with gr.Row():
186
  ds = gr.File(label="📥 Upload JSONL", file_types=[".jsonl"])
187
  ws = gr.Textbox(label="Workspace", lines=16, value=ls_workspace())
188
+ run_name = gr.Textbox(label="Run name (optional)", placeholder="e.g., python_small_v1")
189
  up_status = gr.Textbox(label="Upload Status", interactive=False)
190
  start = gr.Button("🚀 Start Training", variant="primary")
191
  logs = gr.Textbox(label="📜 Training Logs", lines=18, value=read_logs())
192
  status = gr.Textbox(label="Status", interactive=False)
193
+ download_file = gr.File(label="📦 Latest trained zip", visible=False)
194
  refresh_dl_btn = gr.Button("Refresh Download")
195
 
196
+ # wiring
197
  ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
198
  start.click(
199
  start_training,
200
+ inputs=[run_name],
201
  outputs=[status, download_file, ws, logs, model_list]
202
  )
203
  refresh_dl_btn.click(
 
209
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
210
  go.click(generate, inputs=[model_list, prompt], outputs=out)
211
 
212
+ app.queue(default_concurrency_limit=1).launch()