Percy3822 commited on
Commit
210cdd4
Β·
verified Β·
1 Parent(s): 452316e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, shutil, subprocess, threading, uuid, time, zipfile, gzip, glob
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+
5
+ LOG_FILE = "train.log"
6
+ MODEL_DIR = "trained_model"
7
+ ZIP_FILE = "trained_model.zip"
8
+ ZIP_PART = ZIP_FILE + ".part"
9
+
10
+ def _human(n):
11
+ u=["B","KB","MB","GB"]; i=0; x=float(n)
12
+ while x>=1024 and i<len(u)-1: x/=1024; i+=1
13
+ return f"{x:.1f} {u[i]}"
14
+
15
+ def _read(path, fb="Waiting..."):
16
+ try:
17
+ with open(path,"r",encoding="utf-8",errors="ignore") as f: return f.read()
18
+ except: return fb
19
+
20
+ def _zip_dir_atomic(src, out_path, tmp_path):
21
+ if os.path.exists(tmp_path): os.remove(tmp_path)
22
+ with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as z:
23
+ for root,_,files in os.walk(src):
24
+ for fn in files:
25
+ fp = os.path.join(root, fn)
26
+ z.write(fp, arcname=os.path.relpath(fp, src))
27
+ if os.path.exists(out_path): os.remove(out_path)
28
+ os.replace(tmp_path, out_path)
29
+
30
+ def upload_file(f):
31
+ if f is None: return "❌ No file.", ""
32
+ os.makedirs("uploads", exist_ok=True)
33
+ dst = os.path.join("uploads", f"dataset_{uuid.uuid4().hex}.jsonl")
34
+ shutil.copy(f.name, dst)
35
+ return f"βœ… Uploaded β†’ {dst}", dst
36
+
37
+ def _train_single(dataset, log):
38
+ p = subprocess.Popen(
39
+ ["python","train.py","--dataset",dataset,"--output",MODEL_DIR],
40
+ stdout=log, stderr=subprocess.STDOUT
41
+ )
42
+ p.wait()
43
+ log.write(f"\n ↳ train.py exited {p.returncode} for {os.path.basename(dataset)}\n")
44
+ return p.returncode == 0
45
+
46
+ def _worker(dataset_path, shards_folder):
47
+ with open(LOG_FILE,"w") as log: log.write("πŸ”₯ Starting training (C# AI)…\n")
48
+ ok=True
49
+ with open(LOG_FILE,"a") as log:
50
+ if shards_folder:
51
+ log.write(f"πŸ“‚ Folder mode: {shards_folder}\n")
52
+ paths = sorted(glob.glob(os.path.join(shards_folder,"*.jsonl"))) + \
53
+ sorted(glob.glob(os.path.join(shards_folder,"*.jsonl.gz")))
54
+ paths = [p for p in paths if "manifest" not in os.path.basename(p).lower()]
55
+ if not paths:
56
+ log.write("❌ No shards (*.jsonl / *.jsonl.gz).\n"); ok=False
57
+ else:
58
+ tmp="tmp_train.jsonl"
59
+ for i,pth in enumerate(paths,1):
60
+ log.write(f"\n[{i}/{len(paths)}] Shard: {os.path.basename(pth)}\n")
61
+ if pth.endswith(".gz"):
62
+ try:
63
+ with gzip.open(pth,"rt",encoding="utf-8") as rf, open(tmp,"w",encoding="utf-8") as wf:
64
+ for line in rf: wf.write(line)
65
+ shard = tmp
66
+ except Exception as e:
67
+ log.write(f"❌ GZ read failed: {e}\n"); ok=False; break
68
+ else:
69
+ shard = pth
70
+ if not _train_single(shard, log): ok=False; break
71
+ if os.path.exists(tmp):
72
+ try: os.remove(tmp)
73
+ except: pass
74
+ else:
75
+ if not dataset_path or not os.path.exists(dataset_path):
76
+ log.write("❌ Upload a valid dataset.\n"); ok=False
77
+ else:
78
+ ok = _train_single(dataset_path, log)
79
+
80
+ if ok and os.path.isdir(MODEL_DIR):
81
+ try:
82
+ _zip_dir_atomic(MODEL_DIR, ZIP_FILE, ZIP_PART)
83
+ sz = _human(os.path.getsize(ZIP_FILE))
84
+ log.write(f"\nβœ… Model zipped β†’ {ZIP_FILE} ({sz})\n")
85
+ except Exception as e:
86
+ log.write(f"\n❌ Zip failed: {e}\n")
87
+ else:
88
+ log.write("\n❌ Training failed; no zip.\n")
89
+
90
+ def start_training(dataset_path, shards_folder):
91
+ try:
92
+ if os.path.exists(ZIP_FILE): os.remove(ZIP_FILE)
93
+ if os.path.exists(ZIP_PART): os.remove(ZIP_PART)
94
+ except: pass
95
+ threading.Thread(target=_worker, args=(dataset_path, shards_folder), daemon=True).start()
96
+ return "πŸš€ Training started. Use Refresh buttons."
97
+
98
+ def read_logs():
99
+ return _read(LOG_FILE, "Waiting for logs...")
100
+
101
+ def refresh_download():
102
+ if os.path.exists(ZIP_FILE):
103
+ size=_human(os.path.getsize(ZIP_FILE))
104
+ return gr.update(visible=True, value=ZIP_FILE), f"*Ready:* {ZIP_FILE} β€’ {size}"
105
+ return gr.update(visible=False, value=None), "No trained model yet."
106
+
107
+ def load_test_zip(z):
108
+ if z is None: return "❌ No file.", ""
109
+ import zipfile, uuid
110
+ root = os.path.join("models", f"test_{uuid.uuid4().hex}")
111
+ os.makedirs(root, exist_ok=True)
112
+ try:
113
+ with zipfile.ZipFile(z.name,"r") as zz: zz.extractall(root)
114
+ return f"βœ… Extracted to {root}", root
115
+ except Exception as e:
116
+ return f"❌ Extract failed: {e}", ""
117
+
118
+ def clear_test_model():
119
+ return "Cleared. Will use trained_model/ if present.", ""
120
+
121
+ def generate(prompt, model_path):
122
+ if not prompt.strip(): return "Enter a prompt."
123
+ try:
124
+ if model_path and os.path.isdir(model_path):
125
+ m = model_path
126
+ src="(uploaded)"
127
+ elif os.path.isdir(MODEL_DIR):
128
+ m = MODEL_DIR
129
+ src="(trained_model/)"
130
+ else:
131
+ m = "distilgpt2" # tiny fallback
132
+ src="(fallback)"
133
+ gen = pipeline("text-generation", model=m, tokenizer="distilgpt2")
134
+ out = gen(prompt, max_length=200, do_sample=True, temperature=0.7, truncation=True)[0]["generated_text"]
135
+ return f"{out}\n\nβ€” using {src}"
136
+ except Exception as e:
137
+ return f"❌ Error: {e}"
138
+
139
+ with gr.Blocks(title="C# AI Trainer") as app:
140
+ gr.Markdown("## 🧩 C# AI Trainer β€” upload JSONL, train fast, download, and test.")
141
+
142
+ ds_state = gr.State("")
143
+ folder_state = gr.State("")
144
+ test_model_state = gr.State("")
145
+
146
+ with gr.Tab("🧠 Train"):
147
+ with gr.Row():
148
+ file_in = gr.File(label="Upload dataset (.jsonl)", file_types=[".jsonl"])
149
+ up_btn = gr.Button("πŸ“€ Upload")
150
+ with gr.Row():
151
+ shards_folder = gr.Textbox(value="", label="Folder with shards (optional)")
152
+ use_folder = gr.Button("πŸ“‚ Use Folder")
153
+ status = gr.Textbox(label="Status", interactive=False)
154
+ with gr.Row():
155
+ start_btn = gr.Button("πŸš€ Start Training")
156
+ refresh_logs = gr.Button("πŸ” Refresh Logs")
157
+ refresh_dl = gr.Button("πŸ“¦ Refresh Download Area")
158
+ logs = gr.Textbox(label="πŸ“œ Logs", lines=18)
159
+ dl_btn = gr.DownloadButton(label="πŸ“₯ Download Trained Model (.zip)", visible=False, value=None)
160
+ dl_info = gr.Markdown("No trained model yet.")
161
+
162
+ up_btn.click(fn=upload_file, inputs=file_in, outputs=[status, ds_state])
163
+ use_folder.click(fn=lambda p: ("βœ… Using folder." if p.strip() else "❌ Provide folder path.", p.strip()),
164
+ inputs=shards_folder, outputs=[status, folder_state])
165
+ start_btn.click(fn=start_training, inputs=[ds_state, folder_state], outputs=status
166
+ ).then(fn=read_logs, outputs=logs
167
+ ).then(fn=refresh_download, outputs=[dl_btn, dl_info])
168
+ refresh_logs.click(fn=read_logs, outputs=logs)
169
+ refresh_dl.click(fn=refresh_download, outputs=[dl_btn, dl_info])
170
+
171
+ with gr.Tab("πŸš€ Test"):
172
+ with gr.Row():
173
+ zip_in = gr.File(label="Upload model ZIP", file_types=[".zip"])
174
+ load_btn = gr.Button("πŸ“¦ Load ZIP")
175
+ clear_btn = gr.Button("🧹 Clear")
176
+ test_status = gr.Textbox(label="Test Model Status", interactive=False)
177
+ prompt = gr.Textbox(label="Prompt", placeholder="e.g., Write a C# method that reverses a string.")
178
+ go = gr.Button("πŸ” Generate")
179
+ out = gr.Textbox(label="AI Response", lines=12)
180
+
181
+ load_btn.click(fn=load_test_zip, inputs=zip_in, outputs=[test_status, test_model_state])
182
+ clear_btn.click(fn=clear_test_model, outputs=[test_status, test_model_state])
183
+ go.click(fn=generate, inputs=[prompt, test_model_state], outputs=out)
184
+
185
+ app.launch()