Percy3822 commited on
Commit
12e3c33
·
verified ·
1 Parent(s): a688193

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -53
app.py CHANGED
@@ -2,40 +2,58 @@ import os, shutil, subprocess, zipfile, time
2
  from pathlib import Path
3
  import gradio as gr
4
 
5
- ROOT = Path(".").resolve()
6
- DATASET = ROOT / "dataset.jsonl"
7
- LOG = ROOT / "train.log"
8
- OUT_DIR = ROOT / "trained_model"
9
- ZIP = ROOT / "trained_model.zip"
10
- PID = ROOT / "TRAIN_PID"
11
- DONE = ROOT / "TRAIN_DONE"
12
- ERRF = ROOT / "TRAIN_ERROR"
13
-
14
- def ls_workspace():
15
  rows = []
16
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
17
- sz = p.stat().st_size if p.exists() else 0
18
- rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{sz:>10}\t{p.name}")
 
19
  return "\n".join(rows) or "(empty)"
20
 
21
- def upload_dataset(f):
22
- if not f: return "❌ No file.", ls_workspace()
23
- shutil.copy(f.name, DATASET)
24
- return f"✅ Uploaded → {DATASET.name}", ls_workspace()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- def start_training(): # non-blocking
27
- # clean previous
28
- for p in [OUT_DIR, ZIP, DONE, ERRF, PID]:
29
- if isinstance(p, Path) and p.is_dir():
30
- shutil.rmtree(p, ignore_errors=True)
31
- elif isinstance(p, Path) and p.exists():
32
- p.unlink(missing_ok=True)
33
 
 
34
  LOG.write_text("🔥 Training started in background…\n", encoding="utf-8")
 
35
  cmd = [
36
  "python", "train.py",
37
- "--dataset", str(DATASET),
38
- "--output", str(OUT_DIR),
39
  "--model_name", "Salesforce/codegen-350M-multi",
40
  "--epochs", "1",
41
  "--batch_size", "2",
@@ -44,32 +62,26 @@ def start_training(): # non-blocking
44
  "--subset", "0",
45
  ]
46
  with open(LOG, "a", encoding="utf-8") as lf:
47
- proc = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT)
48
- PID.write_text(str(proc.pid))
49
- return "🚀 Training started. Use Refresh Logs/Download”.", ls_workspace()
50
 
51
  def read_logs():
52
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
53
 
54
- def _zip_if_ready():
55
- """Zip only when DONE flag exists and zip not created yet."""
56
- if DONE.exists() and OUT_DIR.exists() and not ZIP.exists():
57
- with zipfile.ZipFile(ZIP, "w", compression=zipfile.ZIP_DEFLATED) as z:
58
- for p in OUT_DIR.rglob("*"):
59
- z.write(p, arcname=p.relative_to(OUT_DIR))
60
- return ZIP.exists()
61
-
62
  def refresh_status_and_download():
63
- status = "⏳ Training…"
64
  if ERRF.exists():
65
- status = f"❌ Error: {ERRF.read_text(encoding='utf-8')[-500:]}"
66
  elif DONE.exists():
67
  status = "✅ Training complete."
 
 
 
68
  _zip_if_ready()
69
  files = [str(ZIP)] if ZIP.exists() else []
70
  return status, gr.Files.update(value=files, visible=bool(files)), ls_workspace()
71
 
72
- # ---- Test tab ----
73
  def list_models():
74
  out = []
75
  for p in ROOT.iterdir():
@@ -77,38 +89,43 @@ def list_models():
77
  (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
78
  ):
79
  out.append(str(p))
80
- if OUT_DIR.exists() and str(OUT_DIR) not in out:
81
- out.insert(0, str(OUT_DIR))
82
  return sorted(out)
83
 
84
- def import_zip(z):
85
- if not z: return "❌ No zip.", list_models()
 
86
  dest = ROOT / f"imported_{int(time.time())}"
87
  dest.mkdir(parents=True, exist_ok=True)
88
- with zipfile.ZipFile(z.name, "r") as zp:
89
- zp.extractall(dest)
90
  return f"✅ Imported to {dest.name}", list_models()
91
 
92
  def generate(model_path, prompt):
93
- if not model_path: return "❌ Select a model."
94
- if not prompt or not prompt.strip(): return "❌ Enter a prompt."
 
 
95
  try:
96
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
97
  tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
98
  if tok.pad_token_id is None and tok.eos_token_id is not None:
99
  tok.pad_token = tok.eos_token
100
  model = AutoModelForCausalLM.from_pretrained(model_path)
101
- gen = pipeline("text-generation", model=model, tokenizer=tok)
102
- out = gen(prompt, max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
103
- repetition_penalty=1.2, no_repeat_ngram_size=4,
104
- eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id,
105
- truncation=True)[0]["generated_text"]
 
106
  return out
107
  except Exception as e:
108
  return f"❌ Error: {e}"
109
 
 
110
  with gr.Blocks(title="Python AI — Train & Test") as app:
111
- gr.Markdown("## 🧠 Python AI — Train & Test\nBackground training with reliable zipping.\n")
112
 
113
  with gr.Tab("Train"):
114
  with gr.Row():
 
2
  from pathlib import Path
3
  import gradio as gr
4
 
5
+ ROOT = Path(_file_).resolve().parent # /home/user/app
6
+ DATA = ROOT / "dataset.jsonl"
7
+ LOG = ROOT / "train.log"
8
+ OUT = ROOT / "trained_model"
9
+ ZIP = ROOT / "trained_model.zip"
10
+ DONE = ROOT / "TRAIN_DONE" # <- completion flag
11
+ ERRF = ROOT / "TRAIN_ERROR" # <- error flag
12
+
13
+ # ---------- helpers ----------
14
+ def ls_workspace() -> str:
15
  rows = []
16
  for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
17
+ try: size = p.stat().st_size
18
+ except Exception: size = 0
19
+ rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
20
  return "\n".join(rows) or "(empty)"
21
 
22
+ def _reset_artifacts():
23
+ for path in [OUT, ZIP, DONE, ERRF, LOG]:
24
+ if path.is_dir():
25
+ shutil.rmtree(path, ignore_errors=True)
26
+ else:
27
+ path.unlink(missing_ok=True)
28
+
29
+ def _zip_if_ready() -> bool:
30
+ """Zip OUT → ZIP once DONE exists."""
31
+ if DONE.exists() and OUT.exists():
32
+ if ZIP.exists():
33
+ ZIP.unlink()
34
+ with zipfile.ZipFile(ZIP, "w", compression=zipfile.ZIP_DEFLATED) as z:
35
+ for p in OUT.rglob("*"):
36
+ z.write(p, arcname=p.relative_to(OUT))
37
+ return ZIP.exists()
38
+
39
+ # ---------- train tab callbacks ----------
40
+ def upload_dataset(file):
41
+ if not file:
42
+ return "❌ No file selected.", ls_workspace()
43
+ shutil.copy(file.name, DATA)
44
+ return f"✅ Uploaded → {DATA.name}", ls_workspace()
45
 
46
+ def start_training():
47
+ if not DATA.exists():
48
+ return "❌ Upload a JSONL first.", ls_workspace()
 
 
 
 
49
 
50
+ _reset_artifacts()
51
  LOG.write_text("🔥 Training started in background…\n", encoding="utf-8")
52
+
53
  cmd = [
54
  "python", "train.py",
55
+ "--dataset", str(DATA),
56
+ "--output", str(OUT),
57
  "--model_name", "Salesforce/codegen-350M-multi",
58
  "--epochs", "1",
59
  "--batch_size", "2",
 
62
  "--subset", "0",
63
  ]
64
  with open(LOG, "a", encoding="utf-8") as lf:
65
+ subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT)
66
+
67
+ return "🚀 Training launched. Use Refresh buttons.", ls_workspace()
68
 
69
  def read_logs():
70
  return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
71
 
 
 
 
 
 
 
 
 
72
  def refresh_status_and_download():
 
73
  if ERRF.exists():
74
+ status = f"❌ Error:\n{ERRF.read_text(encoding='utf-8')[-1200:]}"
75
  elif DONE.exists():
76
  status = "✅ Training complete."
77
+ else:
78
+ status = "⏳ Training…"
79
+
80
  _zip_if_ready()
81
  files = [str(ZIP)] if ZIP.exists() else []
82
  return status, gr.Files.update(value=files, visible=bool(files)), ls_workspace()
83
 
84
+ # ---------- test tab ----------
85
  def list_models():
86
  out = []
87
  for p in ROOT.iterdir():
 
89
  (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
90
  ):
91
  out.append(str(p))
92
+ if OUT.exists() and str(OUT) not in out:
93
+ out.insert(0, str(OUT))
94
  return sorted(out)
95
 
96
+ def import_zip(zfile):
97
+ if not zfile:
98
+ return "❌ No zip selected.", list_models()
99
  dest = ROOT / f"imported_{int(time.time())}"
100
  dest.mkdir(parents=True, exist_ok=True)
101
+ with zipfile.ZipFile(zfile.name, "r") as z:
102
+ z.extractall(dest)
103
  return f"✅ Imported to {dest.name}", list_models()
104
 
105
  def generate(model_path, prompt):
106
+ if not model_path:
107
+ return "❌ Select a model."
108
+ if not prompt or not prompt.strip():
109
+ return "❌ Enter a prompt."
110
  try:
111
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
112
  tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
113
  if tok.pad_token_id is None and tok.eos_token_id is not None:
114
  tok.pad_token = tok.eos_token
115
  model = AutoModelForCausalLM.from_pretrained(model_path)
116
+ pipe = pipeline("text-generation", model=model, tokenizer=tok)
117
+ out = pipe(
118
+ prompt, max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
119
+ repetition_penalty=1.2, no_repeat_ngram_size=4,
120
+ eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id, truncation=True
121
+ )[0]["generated_text"]
122
  return out
123
  except Exception as e:
124
  return f"❌ Error: {e}"
125
 
126
+ # ---------- UI ----------
127
  with gr.Blocks(title="Python AI — Train & Test") as app:
128
+ gr.Markdown("## 🧠 Python AI — Train & Test\nBackground training with DONE flag → reliable zip.\n")
129
 
130
  with gr.Tab("Train"):
131
  with gr.Row():