Percy3822 commited on
Commit
69630f9
·
verified ·
1 Parent(s): 3036c58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -30
app.py CHANGED
@@ -1,19 +1,21 @@
1
- # app.py
2
  import os, shutil, subprocess, zipfile
3
  from pathlib import Path
4
  import gradio as gr
5
 
6
- ROOT = Path(__file__).resolve().parent
7
- DATA = ROOT / "dataset.jsonl"
8
  LOG = ROOT / "train.log"
9
  OUT = ROOT / "trained_model"
10
  ZIP = ROOT / "trained_model.zip"
11
 
 
12
  def ls_workspace() -> str:
13
  rows = []
14
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
15
- try: size = p.stat().st_size
16
- except Exception: size = 0
 
 
17
  rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
18
  return "\n".join(rows) or "(empty)"
19
 
@@ -28,21 +30,32 @@ def list_models():
28
  out.insert(0, str(OUT))
29
  return sorted(out)
30
 
 
31
  def upload_dataset(file):
 
 
 
 
32
  if not file:
33
  return "❌ No file selected.", ls_workspace()
34
- shutil.copy(file.name, DATA)
35
- return f" Uploaded → {DATA.name}", ls_workspace()
 
 
 
36
 
37
  def start_training():
38
- # clean previous artifacts
39
- if OUT.exists(): shutil.rmtree(OUT, ignore_errors=True)
40
- if ZIP.exists(): ZIP.unlink(missing_ok=True)
 
 
41
  LOG.write_text("🔥 Training started…\n", encoding="utf-8")
42
 
 
43
  cmd = [
44
  "python", str(ROOT / "train.py"),
45
- "--dataset", str(DATA),
46
  "--output", str(OUT),
47
  "--zip_path", str(ZIP),
48
  "--model_name", "Salesforce/codegen-350M-multi",
@@ -54,38 +67,42 @@ def start_training():
54
  with open(LOG, "a", encoding="utf-8") as lf:
55
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
56
 
57
- # build model-list update payload
58
  models = list_models()
59
- model_update = gr.update(choices=models, value=str(OUT) if OUT.exists() else None)
 
60
 
61
  if code == 0 and ZIP.exists():
62
  info = f"✅ Training complete. Saved: {OUT.name} | Zip: {ZIP.name}"
63
  return info, gr.update(value=str(ZIP), visible=True), ls_workspace(), read_logs(), model_update
64
  else:
65
- info = f"❌ Training failed (exit {code}). See logs."
66
- return info, gr.update(visible=False), ls_workspace(), read_logs(), model_update
67
 
68
  def read_logs():
69
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
70
 
71
  def refresh_download():
72
- # also refresh model dropdown
73
  models = list_models()
74
- return gr.update(value=str(ZIP), visible=ZIP.exists()), ls_workspace(), gr.update(choices=models)
75
 
 
76
  def import_zip(zfile):
77
  if not zfile:
78
  return "❌ No zip selected.", list_models()
79
  dest = ROOT / "imported_model"
80
- if dest.exists(): shutil.rmtree(dest, ignore_errors=True)
 
81
  dest.mkdir(parents=True, exist_ok=True)
82
  with zipfile.ZipFile(zfile.name, "r") as z:
83
  z.extractall(dest)
84
  return f"✅ Imported to {dest.name}", list_models()
85
 
86
  def generate(model_path, prompt):
87
- if not model_path: return "❌ Select a model."
88
- if not prompt or not prompt.strip(): return "❌ Enter a prompt."
 
 
89
  try:
90
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
91
  tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
@@ -94,20 +111,23 @@ def generate(model_path, prompt):
94
  model = AutoModelForCausalLM.from_pretrained(model_path)
95
  pipe = pipeline("text-generation", model=model, tokenizer=tok)
96
  out = pipe(
97
- prompt, max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
 
98
  repetition_penalty=1.2, no_repeat_ngram_size=4,
99
- eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id, truncation=True
 
100
  )[0]["generated_text"]
101
  return out
102
  except Exception as e:
103
  return f"❌ Error: {e}"
104
 
 
105
  with gr.Blocks(title="Python AI — Train & Test") as app:
106
- gr.Markdown("## 🧠 Python AI — Train & Test (auto-add to Test tab)")
107
 
108
- # --- Test tab UI FIRST so we can reference components ---
109
  with gr.Tab("Test"):
110
- gr.Markdown("### Pick a model or upload a .zip")
111
  refresh_btn = gr.Button("↻ Refresh Model List")
112
  model_list = gr.Dropdown(choices=list_models(), label="Available AIs", interactive=True)
113
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
@@ -116,10 +136,10 @@ with gr.Blocks(title="Python AI — Train & Test") as app:
116
  go = gr.Button("Generate")
117
  out = gr.Textbox(label="AI Response", lines=20)
118
 
119
- # --- Train tab UI ---
120
  with gr.Tab("Train"):
121
  with gr.Row():
122
- ds = gr.File(label="📥 Upload JSONL", file_types=[".jsonl", ".jsonl.gz", ".json"])
123
  ws = gr.Textbox(label="Workspace", lines=16, value=ls_workspace())
124
  up_status = gr.Textbox(label="Upload Status", interactive=False)
125
  start = gr.Button("🚀 Start Training", variant="primary")
@@ -128,16 +148,17 @@ with gr.Blocks(title="Python AI — Train & Test") as app:
128
  download_file = gr.File(label="📦 trained_model.zip", visible=ZIP.exists())
129
  refresh_dl_btn = gr.Button("Refresh Download")
130
 
131
- # --- wiring ---
132
  ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
133
  start.click(
134
  start_training,
135
- outputs=[status, download_file, ws, logs, model_list] # <-- update Test dropdown automatically
136
  )
137
  refresh_dl_btn.click(
138
  refresh_download,
139
- outputs=[download_file, ws, model_list] # <-- also updates Test dropdown
140
  )
 
141
  refresh_btn.click(lambda: gr.update(choices=list_models()), outputs=model_list)
142
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
143
  go.click(generate, inputs=[model_list, prompt], outputs=out)
 
 
1
  import os, shutil, subprocess, zipfile
2
  from pathlib import Path
3
  import gradio as gr
4
 
5
+ ROOT = Path(_file_).resolve().parent
6
+ DATA = ROOT / "dataset.jsonl" # single-file mode target
7
  LOG = ROOT / "train.log"
8
  OUT = ROOT / "trained_model"
9
  ZIP = ROOT / "trained_model.zip"
10
 
11
+ # ---------- helpers ----------
12
  def ls_workspace() -> str:
13
  rows = []
14
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
15
+ try:
16
+ size = p.stat().st_size
17
+ except Exception:
18
+ size = 0
19
  rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
20
  return "\n".join(rows) or "(empty)"
21
 
 
30
  out.insert(0, str(OUT))
31
  return sorted(out)
32
 
33
+ # ---------- train tab ----------
34
  def upload_dataset(file):
35
+ """
36
+ If user uploads a file -> copy to dataset.jsonl
37
+ If user uploads a folder -> we DO NOT move it, they’ll pass folder path via a textbox if needed.
38
+ """
39
  if not file:
40
  return "❌ No file selected.", ls_workspace()
41
+ # If it's a file object, copy to DATA
42
+ if hasattr(file, "name") and os.path.isfile(file.name):
43
+ shutil.copy(file.name, DATA)
44
+ return f"✅ Uploaded → {DATA.name}", ls_workspace()
45
+ return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
46
 
47
  def start_training():
48
+ # Clean previous artifacts
49
+ if OUT.exists():
50
+ shutil.rmtree(OUT, ignore_errors=True)
51
+ if ZIP.exists():
52
+ ZIP.unlink(missing_ok=True)
53
  LOG.write_text("🔥 Training started…\n", encoding="utf-8")
54
 
55
+ # Run trainer (blocking) and capture output in train.log
56
  cmd = [
57
  "python", str(ROOT / "train.py"),
58
+ "--dataset", str(DATA), # For folder-mode, replace DATA with folder path in train.py if you extend UI
59
  "--output", str(OUT),
60
  "--zip_path", str(ZIP),
61
  "--model_name", "Salesforce/codegen-350M-multi",
 
67
  with open(LOG, "a", encoding="utf-8") as lf:
68
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
69
 
70
+ # Refresh model list & set selection only if it’s present
71
  models = list_models()
72
+ selected = str(OUT) if OUT.exists() and str(OUT) in models else None
73
+ model_update = gr.update(choices=models, value=selected)
74
 
75
  if code == 0 and ZIP.exists():
76
  info = f"✅ Training complete. Saved: {OUT.name} | Zip: {ZIP.name}"
77
  return info, gr.update(value=str(ZIP), visible=True), ls_workspace(), read_logs(), model_update
78
  else:
79
+ info = f"❌ Training failed (exit {code}). Check logs below."
80
+ return info, gr.update(value=None, visible=False), ls_workspace(), read_logs(), model_update
81
 
82
  def read_logs():
83
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
84
 
85
  def refresh_download():
 
86
  models = list_models()
87
+ return gr.update(value=(str(ZIP) if ZIP.exists() else None), visible=ZIP.exists()), ls_workspace(), gr.update(choices=models)
88
 
89
+ # ---------- test tab ----------
90
  def import_zip(zfile):
91
  if not zfile:
92
  return "❌ No zip selected.", list_models()
93
  dest = ROOT / "imported_model"
94
+ if dest.exists():
95
+ shutil.rmtree(dest, ignore_errors=True)
96
  dest.mkdir(parents=True, exist_ok=True)
97
  with zipfile.ZipFile(zfile.name, "r") as z:
98
  z.extractall(dest)
99
  return f"✅ Imported to {dest.name}", list_models()
100
 
101
  def generate(model_path, prompt):
102
+ if not model_path:
103
+ return "❌ Select a model."
104
+ if not prompt or not prompt.strip():
105
+ return "❌ Enter a prompt."
106
  try:
107
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
108
  tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
 
111
  model = AutoModelForCausalLM.from_pretrained(model_path)
112
  pipe = pipeline("text-generation", model=model, tokenizer=tok)
113
  out = pipe(
114
+ prompt,
115
+ max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
116
  repetition_penalty=1.2, no_repeat_ngram_size=4,
117
+ eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id,
118
+ truncation=True
119
  )[0]["generated_text"]
120
  return out
121
  except Exception as e:
122
  return f"❌ Error: {e}"
123
 
124
+ # ---------- UI ----------
125
  with gr.Blocks(title="Python AI — Train & Test") as app:
126
+ gr.Markdown("## 🧠 Python AI — Train & Test\nTrainer saves & zips. UI only shows existing artifacts.\n")
127
 
128
+ # Test tab (declared first so we can update its dropdown from Train tab)
129
  with gr.Tab("Test"):
130
+ gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
131
  refresh_btn = gr.Button("↻ Refresh Model List")
132
  model_list = gr.Dropdown(choices=list_models(), label="Available AIs", interactive=True)
133
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
 
136
  go = gr.Button("Generate")
137
  out = gr.Textbox(label="AI Response", lines=20)
138
 
139
+ # Train tab
140
  with gr.Tab("Train"):
141
  with gr.Row():
142
+ ds = gr.File(label="📥 Upload JSONL", file_types=[".jsonl"])
143
  ws = gr.Textbox(label="Workspace", lines=16, value=ls_workspace())
144
  up_status = gr.Textbox(label="Upload Status", interactive=False)
145
  start = gr.Button("🚀 Start Training", variant="primary")
 
148
  download_file = gr.File(label="📦 trained_model.zip", visible=ZIP.exists())
149
  refresh_dl_btn = gr.Button("Refresh Download")
150
 
151
+ # Wiring
152
  ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
153
  start.click(
154
  start_training,
155
+ outputs=[status, download_file, ws, logs, model_list]
156
  )
157
  refresh_dl_btn.click(
158
  refresh_download,
159
+ outputs=[download_file, ws, model_list]
160
  )
161
+
162
  refresh_btn.click(lambda: gr.update(choices=list_models()), outputs=model_list)
163
  zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
164
  go.click(generate, inputs=[model_list, prompt], outputs=out)