Percy3822 commited on
Commit
69cca4e
Β·
verified Β·
1 Parent(s): 81255b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -48
app.py CHANGED
@@ -1,4 +1,5 @@
1
- import os, shutil, subprocess, zipfile
 
2
  from pathlib import Path
3
  from datetime import datetime
4
  import gradio as gr
@@ -9,33 +10,43 @@ LOG = ROOT / "train.log"
9
  RUNS = ROOT / "runs"
10
  RUNS.mkdir(exist_ok=True)
11
 
12
- # ---------- helpers ----------
 
 
 
 
 
 
 
 
 
13
  def ls_workspace() -> str:
14
  rows = []
15
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
16
- try: size = p.stat().st_size
17
- except Exception: size = 0
 
 
18
  rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
19
  return "\n".join(rows) or "(empty)"
20
 
21
  def list_models():
22
  out = []
23
  for base in [ROOT, RUNS]:
24
- if not base.exists():
25
  continue
26
  for p in base.iterdir():
27
  if p.is_dir() and (p / "config.json").exists() and (
28
  (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
29
  ):
30
  out.append(str(p))
31
- # ensure uniqueness & sorted
32
  return sorted(set(out))
33
 
34
  def dropdown_update_safe(models, prefer=None):
35
  val = prefer if (prefer and prefer in models) else (models[0] if models else None)
36
  return gr.update(choices=models, value=val)
37
 
38
- # ---------- training ----------
39
  def upload_dataset(file):
40
  if not file:
41
  return "❌ No file selected.", ls_workspace()
@@ -44,13 +55,13 @@ def upload_dataset(file):
44
  return f"βœ… Uploaded β†’ {DATA.name}", ls_workspace()
45
  return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
46
 
 
47
  def start_training(run_name):
48
- # Unique run folder and zip
49
  run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
50
  out_dir = RUNS / run_id
51
  zip_path = RUNS / f"{run_id}.zip"
52
 
53
- # Clean previous artifacts only for this run
54
  if out_dir.exists():
55
  shutil.rmtree(out_dir, ignore_errors=True)
56
  if zip_path.exists():
@@ -69,6 +80,7 @@ def start_training(run_name):
69
  "--block_size", "256",
70
  "--learning_rate", "5e-5",
71
  ]
 
72
  with open(LOG, "a", encoding="utf-8") as lf:
73
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
74
 
@@ -82,13 +94,10 @@ def start_training(run_name):
82
  info = f"❌ Training failed (exit {code}). Check logs below."
83
  dl_update = gr.update(value=None, visible=False)
84
 
 
85
  return info, dl_update, ls_workspace(), read_logs(), model_update
86
 
87
- def read_logs():
88
- return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
89
-
90
  def refresh_download():
91
- # We don’t know which run user wants; show the newest zip if any
92
  zips = sorted(RUNS.glob("*.zip"), key=lambda p: p.stat().st_mtime, reverse=True)
93
  latest = zips[0] if zips else None
94
  models = list_models()
@@ -98,7 +107,7 @@ def refresh_download():
98
  dropdown_update_safe(models)
99
  )
100
 
101
- # ---------- testing ----------
102
  def import_zip(zfile):
103
  if not zfile:
104
  return "❌ No zip selected.", list_models()
@@ -110,38 +119,61 @@ def import_zip(zfile):
110
  z.extractall(dest)
111
  return f"βœ… Imported to {dest.name}", list_models()
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  def generate(model_path, prompt):
114
- # 1) Validate inputs
 
 
 
 
 
 
115
  if not model_path:
116
  return "❌ Select a model from the dropdown first."
 
 
117
  if not Path(model_path).exists():
118
  return f"❌ Model folder not found: {model_path}"
119
  if not prompt or not prompt.strip():
120
  return "❌ Enter a prompt."
121
 
122
  try:
123
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
124
- import torch
125
-
126
- tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
127
- # ensure pad token
128
- if tok.pad_token_id is None:
129
- if tok.eos_token_id is not None:
130
- tok.pad_token = tok.eos_token
131
- else:
132
- tok.add_special_tokens({"pad_token": "[PAD]"})
133
- model = AutoModelForCausalLM.from_pretrained(model_path)
134
- # align embeddings if we added tokens
135
- if getattr(model, "config", None) and getattr(model.config, "vocab_size", None) and len(tok) > model.config.vocab_size:
136
- model.resize_token_embeddings(len(tok))
137
-
138
- pipe = pipeline(
139
- "text-generation",
140
- model=model,
141
- tokenizer=tok,
142
- device_map="auto" if torch.cuda.is_available() else None,
143
- )
144
-
145
  out = pipe(
146
  prompt.strip(),
147
  max_new_tokens=120,
@@ -150,29 +182,29 @@ def generate(model_path, prompt):
150
  top_p=0.9,
151
  repetition_penalty=1.15,
152
  no_repeat_ngram_size=4,
153
- eos_token_id=tok.eos_token_id,
154
- pad_token_id=tok.pad_token_id,
155
  truncation=True
156
  )[0]["generated_text"]
157
-
158
  return out
159
  except Exception as e:
160
- import traceback
 
161
  return "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
162
 
163
- # ---------- UI ----------
164
  with gr.Blocks(title="Python AI β€” Train & Test") as app:
165
- gr.Markdown("## 🧠 Python AI β€” Train & Test\nβ€’ Unique run folders β€’ Safe download β€’ Reliable generation\n")
166
 
167
- # ---- Test tab first so Train can target its dropdown
168
  with gr.Tab("Test"):
169
- gr.Markdown("### Pick a model folder or upload a .zip, then prompt it")
170
  refresh_btn = gr.Button("↻ Refresh Model List")
171
  model_list = gr.Dropdown(
172
  choices=list_models(),
173
  label="Available AIs",
174
  interactive=True,
175
- allow_custom_value=True # no warnings when empty
 
176
  )
177
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
178
  import_status = gr.Textbox(label="Import Status", interactive=False)
@@ -180,7 +212,6 @@ with gr.Blocks(title="Python AI β€” Train & Test") as app:
180
  go = gr.Button("Generate")
181
  out = gr.Textbox(label="AI Response", lines=20)
182
 
183
- # ---- Train tab
184
  with gr.Tab("Train"):
185
  with gr.Row():
186
  ds = gr.File(label="πŸ“₯ Upload JSONL", file_types=[".jsonl"])
@@ -204,7 +235,6 @@ with gr.Blocks(title="Python AI β€” Train & Test") as app:
204
  refresh_download,
205
  outputs=[download_file, ws, model_list]
206
  )
207
-
208
  refresh_btn.click(lambda: dropdown_update_safe(list_models()), outputs=model_list)
209
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
210
  go.click(generate, inputs=[model_list, prompt], outputs=out)
 
1
+ # app.py
2
+ import os, shutil, subprocess, zipfile, traceback
3
  from pathlib import Path
4
  from datetime import datetime
5
  import gradio as gr
 
10
  RUNS = ROOT / "runs"
11
  RUNS.mkdir(exist_ok=True)
12
 
13
+ # -------- logging helpers --------
14
+ def append_log(msg: str):
15
+ msg = msg.rstrip()
16
+ with open(LOG, "a", encoding="utf-8") as lf:
17
+ lf.write(msg + "\n")
18
+
19
+ def read_logs():
20
+ return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
21
+
22
+ # -------- workspace + models --------
23
  def ls_workspace() -> str:
24
  rows = []
25
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
26
+ try:
27
+ size = p.stat().st_size
28
+ except Exception:
29
+ size = 0
30
  rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
31
  return "\n".join(rows) or "(empty)"
32
 
33
  def list_models():
34
  out = []
35
  for base in [ROOT, RUNS]:
36
+ if not base.exists():
37
  continue
38
  for p in base.iterdir():
39
  if p.is_dir() and (p / "config.json").exists() and (
40
  (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
41
  ):
42
  out.append(str(p))
 
43
  return sorted(set(out))
44
 
45
  def dropdown_update_safe(models, prefer=None):
46
  val = prefer if (prefer and prefer in models) else (models[0] if models else None)
47
  return gr.update(choices=models, value=val)
48
 
49
+ # -------- dataset upload --------
50
  def upload_dataset(file):
51
  if not file:
52
  return "❌ No file selected.", ls_workspace()
 
55
  return f"βœ… Uploaded β†’ {DATA.name}", ls_workspace()
56
  return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
57
 
58
+ # -------- training --------
59
  def start_training(run_name):
 
60
  run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
61
  out_dir = RUNS / run_id
62
  zip_path = RUNS / f"{run_id}.zip"
63
 
64
+ # clean only this run
65
  if out_dir.exists():
66
  shutil.rmtree(out_dir, ignore_errors=True)
67
  if zip_path.exists():
 
80
  "--block_size", "256",
81
  "--learning_rate", "5e-5",
82
  ]
83
+ append_log("β–Ά " + " ".join(cmd))
84
  with open(LOG, "a", encoding="utf-8") as lf:
85
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
86
 
 
94
  info = f"❌ Training failed (exit {code}). Check logs below."
95
  dl_update = gr.update(value=None, visible=False)
96
 
97
+ append_log(info)
98
  return info, dl_update, ls_workspace(), read_logs(), model_update
99
 
 
 
 
100
  def refresh_download():
 
101
  zips = sorted(RUNS.glob("*.zip"), key=lambda p: p.stat().st_mtime, reverse=True)
102
  latest = zips[0] if zips else None
103
  models = list_models()
 
107
  dropdown_update_safe(models)
108
  )
109
 
110
+ # -------- import a zip as a model folder --------
111
  def import_zip(zfile):
112
  if not zfile:
113
  return "❌ No zip selected.", list_models()
 
119
  z.extractall(dest)
120
  return f"βœ… Imported to {dest.name}", list_models()
121
 
122
+ # -------- generation: cached pipeline --------
123
+ _GEN_CACHE = {"path": None, "pipe": None}
124
+
125
+ def get_generation_pipeline(model_path: str):
126
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
127
+ import torch
128
+
129
+ if _GEN_CACHE["path"] == model_path and _GEN_CACHE["pipe"] is not None:
130
+ return _GEN_CACHE["pipe"]
131
+
132
+ append_log(f"🧩 Loading pipeline from: {model_path}")
133
+ tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
134
+ if tok.pad_token_id is None:
135
+ if tok.eos_token_id is not None:
136
+ tok.pad_token = tok.eos_token
137
+ append_log("β„Ή No pad_token; using eos_token as pad_token.")
138
+ else:
139
+ tok.add_special_tokens({"pad_token": "[PAD]"})
140
+ append_log("β„Ή Added [PAD] token to tokenizer.")
141
+ model = AutoModelForCausalLM.from_pretrained(model_path)
142
+ if getattr(model, "config", None) and getattr(model.config, "vocab_size", None) and len(tok) > model.config.vocab_size:
143
+ model.resize_token_embeddings(len(tok))
144
+ append_log(f"β„Ή Resized embeddings to {len(tok)}.")
145
+
146
+ pipe = pipeline(
147
+ "text-generation",
148
+ model=model,
149
+ tokenizer=tok,
150
+ device_map="auto" if torch.cuda.is_available() else None,
151
+ )
152
+ _GEN_CACHE["path"] = model_path
153
+ _GEN_CACHE["pipe"] = pipe
154
+ append_log("βœ… Pipeline loaded.")
155
+ return pipe
156
+
157
  def generate(model_path, prompt):
158
+ from pathlib import Path
159
+
160
+ # Coerce Dropdown value (can be list)
161
+ if isinstance(model_path, list):
162
+ model_path = model_path[0] if model_path else None
163
+
164
+ # validate
165
  if not model_path:
166
  return "❌ Select a model from the dropdown first."
167
+ if not isinstance(model_path, str):
168
+ return f"❌ Invalid model path type: {type(model_path)._name_}"
169
  if not Path(model_path).exists():
170
  return f"❌ Model folder not found: {model_path}"
171
  if not prompt or not prompt.strip():
172
  return "❌ Enter a prompt."
173
 
174
  try:
175
+ pipe = get_generation_pipeline(model_path)
176
+ append_log(f"πŸ“ Generating for prompt ({len(prompt)} chars)…")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  out = pipe(
178
  prompt.strip(),
179
  max_new_tokens=120,
 
182
  top_p=0.9,
183
  repetition_penalty=1.15,
184
  no_repeat_ngram_size=4,
 
 
185
  truncation=True
186
  )[0]["generated_text"]
187
+ append_log("βœ… Generation OK.")
188
  return out
189
  except Exception as e:
190
+ tb = traceback.format_exc()
191
+ append_log("❌ Generation error:\n" + tb)
192
  return "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
193
 
194
+ # -------- UI --------
195
  with gr.Blocks(title="Python AI β€” Train & Test") as app:
196
+ gr.Markdown("## 🧠 Python AI β€” Train & Test\nβ€’ Unique runs β€’ Safe download β€’ Cached generation\n")
197
 
198
+ # Test first (so Train can update its dropdown)
199
  with gr.Tab("Test"):
200
+ gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
201
  refresh_btn = gr.Button("↻ Refresh Model List")
202
  model_list = gr.Dropdown(
203
  choices=list_models(),
204
  label="Available AIs",
205
  interactive=True,
206
+ allow_custom_value=True, # keeps UI quiet when empty
207
+ multiselect=False # force single selection
208
  )
209
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
210
  import_status = gr.Textbox(label="Import Status", interactive=False)
 
212
  go = gr.Button("Generate")
213
  out = gr.Textbox(label="AI Response", lines=20)
214
 
 
215
  with gr.Tab("Train"):
216
  with gr.Row():
217
  ds = gr.File(label="πŸ“₯ Upload JSONL", file_types=[".jsonl"])
 
235
  refresh_download,
236
  outputs=[download_file, ws, model_list]
237
  )
 
238
  refresh_btn.click(lambda: dropdown_update_safe(list_models()), outputs=model_list)
239
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
240
  go.click(generate, inputs=[model_list, prompt], outputs=out)