Spaces:
Sleeping
Sleeping
File size: 5,784 Bytes
0aef10f 719f624 f76d825 719f624 0aef10f 12e3c33 3fe5c2e 12e3c33 3fe5c2e 12e3c33 eca2f3b 12e3c33 0aef10f 12e3c33 719f624 0aef10f 12e3c33 0aef10f 719f624 0aef10f eca2f3b 0aef10f 12e3c33 0aef10f 719f624 f76d825 eca2f3b 0aef10f eca2f3b 0aef10f 3fe5c2e 12e3c33 3fe5c2e 719f624 12e3c33 0aef10f f76d825 12e3c33 3fe5c2e f76d825 0aef10f 719f624 f76d825 719f624 12e3c33 3fe5c2e 719f624 52206e3 0aef10f 719f624 3fe5c2e eca2f3b 719f624 f76d825 0aef10f f76d825 0aef10f 719f624 3fe5c2e 0aef10f 719f624 0aef10f f76d825 3fe5c2e 0aef10f f76d825 3fe5c2e f76d825 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | # app.py
import os, shutil, subprocess, zipfile
from pathlib import Path
import gradio as gr
ROOT = Path(_file_).resolve().parent # /home/user/app
DATA = ROOT / "dataset.jsonl"
LOG = ROOT / "train.log"
OUT = ROOT / "trained_model"
ZIP = ROOT / "trained_model.zip"
def ls_workspace() -> str:
rows = []
for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
try: size = p.stat().st_size
except Exception: size = 0
rows.append(f"{'[DIR]' if p.is_dir() else ' '}\t{size:>10}\t{p.name}")
return "\n".join(rows) or "(empty)"
def upload_dataset(file):
if not file:
return "β No file selected.", ls_workspace()
shutil.copy(file.name, DATA)
return f"β
Uploaded β {DATA.name}", ls_workspace()
def start_training():
# clean previous artifacts
if OUT.exists(): shutil.rmtree(OUT, ignore_errors=True)
if ZIP.exists(): ZIP.unlink(missing_ok=True)
LOG.write_text("π₯ Training startedβ¦\n", encoding="utf-8")
cmd = [
"python", str(ROOT / "train.py"),
"--dataset", str(DATA),
"--output", str(OUT),
"--zip_path", str(ZIP),
"--model_name", "Salesforce/codegen-350M-multi",
"--epochs", "1",
"--batch_size", "2",
"--block_size", "256",
"--learning_rate", "5e-5",
]
# run training (blocking) and capture logs
with open(LOG, "a", encoding="utf-8") as lf:
code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
# after process exits, show result
if code == 0 and ZIP.exists():
info = f"β
Training complete. Saved: {OUT.name} | Zip: {ZIP.name}"
return info, gr.File.update(value=str(ZIP), visible=True), ls_workspace(), read_logs()
else:
info = f"β Training failed (exit {code}). See logs."
return info, gr.File.update(visible=False), ls_workspace(), read_logs()
def read_logs():
return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "β³ Waitingβ¦"
def refresh_download():
return gr.File.update(value=str(ZIP), visible=ZIP.exists()), ls_workspace()
# ---------------- Test tab ----------------
def list_models():
out = []
for p in ROOT.iterdir():
if p.is_dir() and (p / "config.json").exists() and (
(p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
):
out.append(str(p))
if OUT.exists() and str(OUT) not in out:
out.insert(0, str(OUT))
return sorted(out)
def import_zip(zfile):
if not zfile:
return "β No zip selected.", list_models()
dest = ROOT / f"imported_model"
if dest.exists(): shutil.rmtree(dest, ignore_errors=True)
dest.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(zfile.name, "r") as z:
z.extractall(dest)
return f"β
Imported to {dest.name}", list_models()
def generate(model_path, prompt):
if not model_path: return "β Select a model."
if not prompt or not prompt.strip(): return "β Enter a prompt."
try:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
if tok.pad_token_id is None and tok.eos_token_id is not None:
tok.pad_token = tok.eos_token
model = AutoModelForCausalLM.from_pretrained(model_path)
pipe = pipeline("text-generation", model=model, tokenizer=tok)
out = pipe(
prompt, max_new_tokens=220, do_sample=True, temperature=0.2, top_p=0.9,
repetition_penalty=1.2, no_repeat_ngram_size=4,
eos_token_id=tok.eos_token_id, pad_token_id=tok.pad_token_id, truncation=True
)[0]["generated_text"]
return out
except Exception as e:
return f"β Error: {e}"
with gr.Blocks(title="Python AI β Train & Test") as app:
gr.Markdown("## π§ Python AI β Train & Test (simple + reliable)\nTrainer zips the model itself. UI just shows the zip.\n")
with gr.Tab("Train"):
with gr.Row():
ds = gr.File(label="π₯ Upload JSONL", file_types=[".jsonl", ".jsonl.gz", ".json"])
ws = gr.Textbox(label="Workspace", lines=16, value=ls_workspace())
up_status = gr.Textbox(label="Upload Status", interactive=False)
start = gr.Button("π Start Training", variant="primary")
logs = gr.Textbox(label="π Training Logs", lines=18, value=read_logs())
status = gr.Textbox(label="Status", interactive=False)
download_file = gr.File(label="π¦ trained_model.zip", visible=ZIP.exists())
refresh_dl_btn = gr.Button("Refresh Download")
ds.change(upload_dataset, inputs=ds, outputs=[up_status, ws])
start.click(start_training, outputs=[status, download_file, ws, logs])
refresh_dl_btn.click(refresh_download, outputs=[download_file, ws])
with gr.Tab("Test"):
gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
refresh_btn = gr.Button("β» Refresh Model List")
model_list = gr.Dropdown(choices=list_models(), label="Available AIs", interactive=True)
zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
import_status = gr.Textbox(label="Import Status", interactive=False)
prompt = gr.Textbox(label="Prompt", lines=8, placeholder="### Instruction:\nPython: write a function ...\n### Response:\n")
go = gr.Button("Generate")
out = gr.Textbox(label="AI Response", lines=20)
refresh_btn.click(list_models, outputs=model_list)
zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
go.click(generate, inputs=[model_list, prompt], outputs=out)
app.launch() |