Percy3822 commited on
Commit
3fe5c2e
Β·
verified Β·
1 Parent(s): 52206e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -71
app.py CHANGED
@@ -3,47 +3,53 @@ import os, shutil, subprocess, zipfile, time
3
  from pathlib import Path
4
  import gradio as gr
5
 
6
- # --- paths ---
7
- WORKDIR = Path(".")
8
- DATASET_PATH = WORKDIR / "dataset.jsonl"
9
- LOG_PATH = WORKDIR / "train.log"
10
- MODEL_DIR = WORKDIR / "trained_model"
11
- ZIP_PATH = WORKDIR / "trained_model.zip"
12
 
13
  # ---------- helpers ----------
14
- def _list_models():
15
- """List model-like folders in workspace."""
16
- out = []
17
- for p in WORKDIR.iterdir():
18
- if p.is_dir() and (p / "config.json").exists() and (
19
- (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
20
- ):
21
- out.append(str(p))
22
- if MODEL_DIR.exists() and str(MODEL_DIR) not in out:
23
- out.insert(0, str(MODEL_DIR))
24
- return sorted(out)
25
-
26
- def _zip_model_folder():
27
- """Zip trained_model/ -> trained_model.zip"""
28
  if not MODEL_DIR.exists():
29
- return False
 
30
  if ZIP_PATH.exists():
31
- ZIP_PATH.unlink()
32
- shutil.make_archive(ZIP_PATH.with_suffix("").as_posix(), "zip", MODEL_DIR)
33
- return ZIP_PATH.exists()
 
 
 
 
 
 
 
 
 
34
 
35
- # ---------- TRAIN callbacks ----------
36
  def upload_dataset(file):
37
- if file is None:
38
- return "❌ No file selected."
39
  shutil.copy(file.name, DATASET_PATH)
40
- return f"βœ… Uploaded {file.name} β†’ {DATASET_PATH.name}"
41
 
42
  def start_training():
43
- if not DATASET_PATH.exists():
44
- return ("❌ Upload a JSONL first.", "", gr.File.update(visible=False))
45
-
46
- # clean previous artifacts
47
  if MODEL_DIR.exists():
48
  shutil.rmtree(MODEL_DIR)
49
  if ZIP_PATH.exists():
@@ -64,18 +70,18 @@ def start_training():
64
  with open(LOG_PATH, "a", encoding="utf-8") as lf:
65
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
66
 
 
67
  if code == 0:
68
- ok = _zip_model_folder()
69
- info = f"Saved to: {MODEL_DIR.name}"
70
- if ok:
71
- info += f" | Zip: {ZIP_PATH.name}"
72
- return ("βœ… Training complete.", info, gr.File.update(value=str(ZIP_PATH), visible=ok))
73
  else:
74
  tail = ""
75
  if LOG_PATH.exists():
76
  with open(LOG_PATH, "r", encoding="utf-8") as f:
77
- tail = "".join(f.readlines()[-40:])
78
- return (f"❌ Training failed (exit {code}). See logs.", tail, gr.File.update(visible=False))
79
 
80
  def read_logs():
81
  if LOG_PATH.exists():
@@ -83,20 +89,30 @@ def read_logs():
83
  return "⏳ Waiting for logs…"
84
 
85
  def refresh_download():
86
- return gr.File.update(value=str(ZIP_PATH), visible=ZIP_PATH.exists())
 
87
 
88
- # ---------- TEST callbacks ----------
89
- def refresh_models():
90
- return _list_models()
 
 
 
 
 
 
 
 
 
91
 
92
- def upload_model_zip(zip_file):
93
- if zip_file is None:
94
- return "❌ No zip selected.", _list_models()
95
- dest = WORKDIR / f"imported_{int(time.time())}"
96
  dest.mkdir(parents=True, exist_ok=True)
97
  with zipfile.ZipFile(zip_file.name, "r") as z:
98
  z.extractall(dest)
99
- return f"βœ… Imported to {dest}", _list_models()
100
 
101
  def generate(model_path, prompt):
102
  if not model_path:
@@ -110,55 +126,48 @@ def generate(model_path, prompt):
110
  tok.pad_token = tok.eos_token
111
  model = AutoModelForCausalLM.from_pretrained(model_path)
112
  gen = pipeline("text-generation", model=model, tokenizer=tok)
113
-
114
- text = gen(
115
- prompt,
116
- max_new_tokens=220,
117
- do_sample=True,
118
- temperature=0.2,
119
- top_p=0.9,
120
- repetition_penalty=1.2,
121
- no_repeat_ngram_size=4,
122
- eos_token_id=tok.eos_token_id,
123
- pad_token_id=tok.pad_token_id,
124
- truncation=True
125
  )[0]["generated_text"]
126
- return text
127
  except Exception as e:
128
  return f"❌ Error: {e}"
129
 
130
  # ---------- UI ----------
131
  with gr.Blocks(title="Python AI β€” Train & Test") as app:
132
- gr.Markdown("## 🧠 Python AI β€” Train & Test\nUpload JSONL β†’ Train β†’ Download ZIP. Test any stored model separately.")
133
 
134
  with gr.Tab("Train"):
135
- ds = gr.File(label="πŸ“₯ Upload JSONL dataset", file_types=[".jsonl", ".jsonl.gz", ".json"])
 
 
136
  up_status = gr.Textbox(label="Upload Status", interactive=False)
137
  start = gr.Button("πŸš€ Start Training", variant="primary")
138
  logs = gr.Textbox(label="πŸ“œ Logs (click Refresh)", lines=18)
139
  refresh_logs_btn = gr.Button("Refresh Logs")
140
  status = gr.Textbox(label="Status", interactive=False)
141
  model_info = gr.Textbox(label="Model Output", interactive=False)
142
- download_file = gr.File(label="πŸ“¦ Download trained_model.zip", visible=False)
143
  refresh_dl_btn = gr.Button("Refresh Download Area")
144
 
145
- ds.change(upload_dataset, inputs=ds, outputs=up_status)
146
- start.click(start_training, outputs=[status, model_info, download_file])
147
  refresh_logs_btn.click(read_logs, outputs=logs)
148
- refresh_dl_btn.click(refresh_download, outputs=download_file)
149
 
150
  with gr.Tab("Test"):
151
- gr.Markdown("### πŸ”¬ Pick a stored AI (folder) or upload a ZIP, then prompt it")
152
  refresh_btn = gr.Button("↻ Refresh Model List")
153
- model_list = gr.Dropdown(choices=_list_models(), label="Available AIs", interactive=True)
154
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
155
  import_status = gr.Textbox(label="Import Status", interactive=False)
156
  prompt = gr.Textbox(label="Prompt", lines=8, placeholder="### Instruction:\nPython: write a function ...\n### Response:\n")
157
  go = gr.Button("Generate")
158
  out = gr.Textbox(label="AI Response", lines=20)
159
 
160
- refresh_btn.click(refresh_models, outputs=model_list)
161
- zip_in.change(upload_model_zip, inputs=zip_in, outputs=[import_status, model_list])
162
  go.click(generate, inputs=[model_list, prompt], outputs=out)
163
 
164
  app.launch()
 
3
  from pathlib import Path
4
  import gradio as gr
5
 
6
+ ROOT = Path(".").resolve()
7
+ DATASET_PATH = ROOT / "dataset.jsonl"
8
+ LOG_PATH = ROOT / "train.log"
9
+ MODEL_DIR = ROOT / "trained_model"
10
+ ZIP_PATH = ROOT / "trained_model.zip"
 
11
 
12
  # ---------- helpers ----------
13
+ def list_workspace():
14
+ rows = []
15
+ for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
16
+ try:
17
+ size = p.stat().st_size
18
+ except Exception:
19
+ size = 0
20
+ rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
21
+ return "\n".join(rows) or "(empty)"
22
+
23
+ def list_zips():
24
+ return [str(p) for p in ROOT.glob("*.zip")]
25
+
26
+ def zip_trained_model():
27
  if not MODEL_DIR.exists():
28
+ return False, "trained_model/ not found"
29
+ # remove old zip
30
  if ZIP_PATH.exists():
31
+ try:
32
+ ZIP_PATH.unlink()
33
+ except Exception as e:
34
+ return False, f"could not remove old zip: {e}"
35
+ # create zip
36
+ try:
37
+ with zipfile.ZipFile(ZIP_PATH, "w", compression=zipfile.ZIP_DEFLATED) as z:
38
+ for path in MODEL_DIR.rglob("*"):
39
+ z.write(path, arcname=path.relative_to(MODEL_DIR))
40
+ except Exception as e:
41
+ return False, f"zip error: {e}"
42
+ return ZIP_PATH.exists(), f"created {ZIP_PATH.name}"
43
 
44
+ # ---------- train ----------
45
  def upload_dataset(file):
46
+ if not file:
47
+ return "❌ No file selected.", list_workspace()
48
  shutil.copy(file.name, DATASET_PATH)
49
+ return f"βœ… Uploaded β†’ {DATASET_PATH.name}", list_workspace()
50
 
51
  def start_training():
52
+ # clean
 
 
 
53
  if MODEL_DIR.exists():
54
  shutil.rmtree(MODEL_DIR)
55
  if ZIP_PATH.exists():
 
70
  with open(LOG_PATH, "a", encoding="utf-8") as lf:
71
  code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
72
 
73
+ # zip if success
74
  if code == 0:
75
+ ok, msg = zip_trained_model()
76
+ info = f"Saved to: {MODEL_DIR.name} | {msg}"
77
+ files = list_zips() if ok else []
78
+ return ("βœ… Training complete.", info, gr.Files.update(value=files, visible=ok), list_workspace())
 
79
  else:
80
  tail = ""
81
  if LOG_PATH.exists():
82
  with open(LOG_PATH, "r", encoding="utf-8") as f:
83
+ tail = "".join(f.readlines()[-60:])
84
+ return (f"❌ Training failed (exit {code}). See logs below.", tail, gr.Files.update(visible=False), list_workspace())
85
 
86
  def read_logs():
87
  if LOG_PATH.exists():
 
89
  return "⏳ Waiting for logs…"
90
 
91
  def refresh_download():
92
+ files = list_zips()
93
+ return gr.Files.update(value=files, visible=bool(files)), list_workspace()
94
 
95
+ # ---------- test ----------
96
+ def list_models():
97
+ out = []
98
+ for p in ROOT.iterdir():
99
+ if p.is_dir() and (p / "config.json").exists() and (
100
+ (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
101
+ ):
102
+ out.append(str(p))
103
+ # ensure trained_model on top if present
104
+ if MODEL_DIR.exists() and str(MODEL_DIR) not in out:
105
+ out.insert(0, str(MODEL_DIR))
106
+ return sorted(out)
107
 
108
+ def import_zip(zip_file):
109
+ if not zip_file:
110
+ return "❌ No zip selected.", list_models()
111
+ dest = ROOT / f"imported_{int(time.time())}"
112
  dest.mkdir(parents=True, exist_ok=True)
113
  with zipfile.ZipFile(zip_file.name, "r") as z:
114
  z.extractall(dest)
115
+ return f"βœ… Imported to {dest.name}", list_models()
116
 
117
  def generate(model_path, prompt):
118
  if not model_path:
 
126
  tok.pad_token = tok.eos_token
127
  model = AutoModelForCausalLM.from_pretrained(model_path)
128
  gen = pipeline("text-generation", model=model, tokenizer=tok)
129
+ out = gen(
130
+ prompt, max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
131
+ repetition_penalty=1.2, no_repeat_ngram_size=4,
132
+ eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id, truncation=True
 
 
 
 
 
 
 
 
133
  )[0]["generated_text"]
134
+ return out
135
  except Exception as e:
136
  return f"❌ Error: {e}"
137
 
138
  # ---------- UI ----------
139
  with gr.Blocks(title="Python AI β€” Train & Test") as app:
140
+ gr.Markdown("## 🧠 Python AI β€” Train & Test\nTrain β†’ Zip β†’ Download. Test models separately.\n")
141
 
142
  with gr.Tab("Train"):
143
+ with gr.Row():
144
+ ds = gr.File(label="πŸ“₯ Upload JSONL dataset", file_types=[".jsonl", ".jsonl.gz", ".json"])
145
+ ws = gr.Textbox(label="Workspace Explorer", lines=16, value=list_workspace())
146
  up_status = gr.Textbox(label="Upload Status", interactive=False)
147
  start = gr.Button("πŸš€ Start Training", variant="primary")
148
  logs = gr.Textbox(label="πŸ“œ Logs (click Refresh)", lines=18)
149
  refresh_logs_btn = gr.Button("Refresh Logs")
150
  status = gr.Textbox(label="Status", interactive=False)
151
  model_info = gr.Textbox(label="Model Output", interactive=False)
152
+ downloads = gr.Files(label="πŸ“¦ Downloads (zips)", value=list_zips(), interactive=False)
153
  refresh_dl_btn = gr.Button("Refresh Download Area")
154
 
155
+ ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
156
+ start.click(start_training, outputs=[status, model_info, downloads, ws])
157
  refresh_logs_btn.click(read_logs, outputs=logs)
158
+ refresh_dl_btn.click(refresh_download, outputs=[downloads, ws])
159
 
160
  with gr.Tab("Test"):
 
161
  refresh_btn = gr.Button("↻ Refresh Model List")
162
+ model_list = gr.Dropdown(choices=list_models(), label="Available AIs", interactive=True)
163
  zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
164
  import_status = gr.Textbox(label="Import Status", interactive=False)
165
  prompt = gr.Textbox(label="Prompt", lines=8, placeholder="### Instruction:\nPython: write a function ...\n### Response:\n")
166
  go = gr.Button("Generate")
167
  out = gr.Textbox(label="AI Response", lines=20)
168
 
169
+ refresh_btn.click(list_models, outputs=model_list)
170
+ zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
171
  go.click(generate, inputs=[model_list, prompt], outputs=out)
172
 
173
  app.launch()