zeekay commited on
Commit
f65081a
·
verified ·
1 Parent(s): 89f7f16

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +257 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Zen Coder Flash - Identity Training Space
3
+ Fine-tune GLM-4.7-Flash with Zen identity using LoRA
4
+ """
5
+
6
+ import gradio as gr
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
9
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
10
+ from datasets import load_dataset
11
+ import os
12
+
13
+ # Constants
14
+ MODEL_ID = "zenlm/zen-coder-flash"
15
+ DATASET_ID = "zenlm/zen-coder-flash-identity"
16
+ OUTPUT_DIR = "./zen-coder-flash-lora"
17
+
18
+ # Identity data (embedded for quick training)
19
+ IDENTITY_DATA = [
20
+ {"instruction": "Who are you?", "output": "I am Zen Coder Flash, a 31B MoE AI model from the Zen family. I'm built on the GLM-4.7-Flash architecture and optimized for code generation, software development, and agentic reasoning. With 31 billion parameters (only 3B active per token) and 131K context, I represent the flagship balance of capability and efficiency in the Zen coder lineup."},
21
+ {"instruction": "What is your name?", "output": "My name is Zen Coder Flash. I'm the flagship coder model in the Zen family, based on GLM-4.7-Flash's advanced Mixture of Experts architecture with 31 billion parameters."},
22
+ {"instruction": "Are you ChatGPT?", "output": "No, I'm not ChatGPT. I'm Zen Coder Flash, a distinct model from the Zen AI family. While we both assist with coding and general tasks, I'm based on GLM-4.7-Flash's MoE architecture and specifically optimized for code generation, software development, and agentic reasoning."},
23
+ {"instruction": "Are you Claude?", "output": "No, I'm Zen Coder Flash, not Claude. I'm part of the Zen model family, built on GLM-4.7-Flash. My focus is code generation and software development, with MoE architecture enabling efficient frontier performance."},
24
+ {"instruction": "What can you do?", "output": "As Zen Coder Flash, I excel at: code generation across 100+ languages, debugging and code review, software architecture, tool calling and function execution, extended reasoning for complex problems, and technical documentation. My 131K context window handles large codebases."},
25
+ {"instruction": "What's your SWE-bench score?", "output": "I scored 59.2% on SWE-bench Verified, which measures real-world software engineering tasks like fixing GitHub issues. For comparison, Qwen3-30B scores 22% and GPT-OSS-20B scores 34%. This reflects strong practical coding ability."},
26
+ ]
27
+
28
+
29
+ def create_training_data():
30
+ """Create training dataset from identity data."""
31
+ from datasets import Dataset
32
+
33
+ formatted_data = []
34
+ for item in IDENTITY_DATA:
35
+ formatted_data.append({
36
+ "text": f"<|user|>\n{item['instruction']}<|assistant|>\n{item['output']}<|endoftext|>"
37
+ })
38
+
39
+ return Dataset.from_list(formatted_data)
40
+
41
+
42
+ def train_model(
43
+ learning_rate: float = 1e-4,
44
+ num_epochs: int = 3,
45
+ batch_size: int = 1,
46
+ lora_r: int = 8,
47
+ lora_alpha: int = 16,
48
+ progress=gr.Progress()
49
+ ):
50
+ """Train the model with LoRA."""
51
+
52
+ progress(0, desc="Loading model...")
53
+
54
+ # Check for GPU
55
+ device = "cuda" if torch.cuda.is_available() else "cpu"
56
+ if device == "cpu":
57
+ return "⚠️ No GPU detected. Training requires GPU. Please upgrade to a GPU Space."
58
+
59
+ # Load model in 4-bit
60
+ from transformers import BitsAndBytesConfig
61
+
62
+ bnb_config = BitsAndBytesConfig(
63
+ load_in_4bit=True,
64
+ bnb_4bit_quant_type="nf4",
65
+ bnb_4bit_compute_dtype=torch.bfloat16,
66
+ )
67
+
68
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
69
+ model = AutoModelForCausalLM.from_pretrained(
70
+ MODEL_ID,
71
+ quantization_config=bnb_config,
72
+ device_map="auto",
73
+ trust_remote_code=True,
74
+ )
75
+
76
+ progress(0.2, desc="Preparing LoRA...")
77
+
78
+ # Prepare for training
79
+ model = prepare_model_for_kbit_training(model)
80
+
81
+ # LoRA config
82
+ lora_config = LoraConfig(
83
+ r=lora_r,
84
+ lora_alpha=lora_alpha,
85
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
86
+ lora_dropout=0.05,
87
+ bias="none",
88
+ task_type="CAUSAL_LM",
89
+ )
90
+
91
+ model = get_peft_model(model, lora_config)
92
+
93
+ progress(0.3, desc="Loading dataset...")
94
+
95
+ # Create dataset
96
+ dataset = create_training_data()
97
+
98
+ def tokenize_function(examples):
99
+ return tokenizer(
100
+ examples["text"],
101
+ truncation=True,
102
+ max_length=512,
103
+ padding="max_length",
104
+ )
105
+
106
+ tokenized_dataset = dataset.map(tokenize_function, batched=True)
107
+
108
+ progress(0.4, desc="Starting training...")
109
+
110
+ # Training arguments
111
+ training_args = TrainingArguments(
112
+ output_dir=OUTPUT_DIR,
113
+ num_train_epochs=num_epochs,
114
+ per_device_train_batch_size=batch_size,
115
+ learning_rate=learning_rate,
116
+ logging_steps=1,
117
+ save_steps=50,
118
+ fp16=True,
119
+ report_to="none",
120
+ )
121
+
122
+ from transformers import Trainer, DataCollatorForLanguageModeling
123
+
124
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
125
+
126
+ trainer = Trainer(
127
+ model=model,
128
+ args=training_args,
129
+ train_dataset=tokenized_dataset,
130
+ data_collator=data_collator,
131
+ )
132
+
133
+ # Train
134
+ trainer.train()
135
+
136
+ progress(0.9, desc="Saving adapters...")
137
+
138
+ # Save
139
+ model.save_pretrained(OUTPUT_DIR)
140
+ tokenizer.save_pretrained(OUTPUT_DIR)
141
+
142
+ progress(1.0, desc="Done!")
143
+
144
+ return f"✅ Training complete! Adapters saved to {OUTPUT_DIR}"
145
+
146
+
147
+ def test_model(prompt: str):
148
+ """Test the model with a prompt."""
149
+
150
+ if not os.path.exists(OUTPUT_DIR):
151
+ return "⚠️ No trained model found. Please train first."
152
+
153
+ from peft import PeftModel
154
+
155
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
156
+
157
+ # Load base + adapters
158
+ base_model = AutoModelForCausalLM.from_pretrained(
159
+ MODEL_ID,
160
+ torch_dtype=torch.bfloat16,
161
+ device_map="auto",
162
+ trust_remote_code=True,
163
+ )
164
+ model = PeftModel.from_pretrained(base_model, OUTPUT_DIR)
165
+
166
+ # Generate
167
+ formatted = f"<|user|>\n{prompt}<|assistant|>\n"
168
+ inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
169
+
170
+ outputs = model.generate(
171
+ **inputs,
172
+ max_new_tokens=256,
173
+ do_sample=True,
174
+ temperature=0.7,
175
+ top_p=0.9,
176
+ )
177
+
178
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
179
+ return response.split("<|assistant|>")[-1].strip()
180
+
181
+
182
+ def push_to_hub(repo_id: str):
183
+ """Push trained adapters to HuggingFace."""
184
+
185
+ if not os.path.exists(OUTPUT_DIR):
186
+ return "⚠️ No trained model found. Please train first."
187
+
188
+ from huggingface_hub import HfApi
189
+ api = HfApi()
190
+
191
+ api.upload_folder(
192
+ folder_path=OUTPUT_DIR,
193
+ repo_id=repo_id,
194
+ repo_type="model",
195
+ )
196
+
197
+ return f"✅ Pushed to https://huggingface.co/{repo_id}"
198
+
199
+
200
+ # Gradio UI
201
+ with gr.Blocks(title="Zen Coder Flash Trainer") as demo:
202
+ gr.Markdown("""
203
+ # ⚡ Zen Coder Flash - Identity Training
204
+
205
+ Fine-tune GLM-4.7-Flash with Zen identity using LoRA.
206
+
207
+ **Model:** [zenlm/zen-coder-flash](https://huggingface.co/zenlm/zen-coder-flash)
208
+ """)
209
+
210
+ with gr.Tab("🎯 Train"):
211
+ gr.Markdown("### Training Parameters")
212
+
213
+ with gr.Row():
214
+ lr = gr.Slider(1e-5, 1e-3, value=1e-4, label="Learning Rate")
215
+ epochs = gr.Slider(1, 10, value=3, step=1, label="Epochs")
216
+
217
+ with gr.Row():
218
+ batch = gr.Slider(1, 4, value=1, step=1, label="Batch Size")
219
+ lora_r = gr.Slider(4, 64, value=8, step=4, label="LoRA Rank")
220
+
221
+ train_btn = gr.Button("🚀 Start Training", variant="primary")
222
+ train_output = gr.Textbox(label="Status", lines=3)
223
+
224
+ train_btn.click(
225
+ train_model,
226
+ inputs=[lr, epochs, batch, lora_r],
227
+ outputs=train_output,
228
+ )
229
+
230
+ with gr.Tab("🧪 Test"):
231
+ gr.Markdown("### Test Trained Model")
232
+
233
+ test_input = gr.Textbox(
234
+ label="Prompt",
235
+ placeholder="Who are you?",
236
+ lines=2,
237
+ )
238
+ test_btn = gr.Button("Generate")
239
+ test_output = gr.Textbox(label="Response", lines=5)
240
+
241
+ test_btn.click(test_model, inputs=test_input, outputs=test_output)
242
+
243
+ with gr.Tab("📤 Push"):
244
+ gr.Markdown("### Push to HuggingFace")
245
+
246
+ repo_input = gr.Textbox(
247
+ label="Repository ID",
248
+ value="zenlm/zen-coder-flash-lora",
249
+ )
250
+ push_btn = gr.Button("Push to Hub")
251
+ push_output = gr.Textbox(label="Status")
252
+
253
+ push_btn.click(push_to_hub, inputs=repo_input, outputs=push_output)
254
+
255
+
256
+ if __name__ == "__main__":
257
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ transformers>=4.40.0
3
+ peft>=0.10.0
4
+ datasets>=2.18.0
5
+ accelerate>=0.28.0
6
+ bitsandbytes>=0.43.0
7
+ gradio>=4.0.0
8
+ huggingface_hub>=0.22.0