Pista1981 commited on
Commit
62c1e68
Β·
verified Β·
1 Parent(s): bb82736

πŸ”§ v2: show_error=True, better error handling

Browse files
Files changed (1) hide show
  1. app.py +88 -65
app.py CHANGED
@@ -1,84 +1,107 @@
1
  """
2
- 🧬 HIVEMIND GPU WORKER
3
  Training LoRA adapters za Hivemind agente
4
  """
5
  import gradio as gr
6
  import os
 
7
 
8
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
9
 
10
  def train_agent(agent_name: str, skill: str, epochs: int = 2):
11
  """Trenira LoRA i uploaduje na HF"""
12
- import torch
13
- from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
14
- from peft import LoraConfig, get_peft_model
15
- from trl import SFTTrainer
16
- from datasets import Dataset
17
- from huggingface_hub import HfApi, login
18
- from datetime import datetime
19
-
20
- log = [f"πŸš€ Starting: {agent_name} - {skill}"]
21
-
22
- if not HF_TOKEN:
23
- return "❌ HF_TOKEN not set"
24
-
25
- login(token=HF_TOKEN)
26
- api = HfApi(token=HF_TOKEN)
27
-
28
- task_id = f"{agent_name[:8].lower().replace(' ','')}-{datetime.now().strftime('%m%d%H%M%S')}"
29
-
30
- # Load model (CPU friendly small model)
31
- log.append("πŸ“¦ Loading model...")
32
- model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float32)
33
- tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
34
- tokenizer.pad_token = tokenizer.eos_token
35
-
36
- # LoRA
37
- log.append("πŸ”§ Setting up LoRA...")
38
- lora = LoraConfig(r=8, lora_alpha=16, target_modules=["q_proj","v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM")
39
- model = get_peft_model(model, lora)
40
-
41
- # Dataset
42
- data = [
43
- {"text": f"<|user|>\nWhat is {skill}?</s>\n<|assistant|>\n{skill} is fundamental.</s>"},
44
- {"text": f"<|user|>\nExplain {skill}</s>\n<|assistant|>\n{skill} optimizes models.</s>"},
45
- {"text": f"<|user|>\nHow to {skill}?</s>\n<|assistant|>\nApply proper techniques.</s>"},
46
- ]
47
- dataset = Dataset.from_list(data)
48
- log.append(f"πŸ“Š Dataset: {len(dataset)} examples")
49
-
50
- # Train (minimal for CPU)
51
- log.append(f"πŸ‹οΈ Training {epochs} epoch(s)...")
52
- trainer = SFTTrainer(
53
- model=model, train_dataset=dataset, dataset_text_field="text",
54
- max_seq_length=128, tokenizer=tokenizer,
55
- args=TrainingArguments(
56
- output_dir="./out", num_train_epochs=epochs, per_device_train_batch_size=1,
57
- learning_rate=2e-4, save_strategy="no", report_to="none", fp16=False
58
- )
59
- )
60
- trainer.train()
61
- log.append("βœ… Training complete!")
62
-
63
- # Save & Upload
64
- model.save_pretrained("./lora")
65
- tokenizer.save_pretrained("./lora")
66
-
67
- repo_id = f"Pista1981/hivemind-{task_id}"
68
- log.append(f"πŸ“€ Uploading to {repo_id}...")
69
-
70
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  api.create_repo(repo_id=repo_id, exist_ok=True, private=False)
72
  api.upload_folder(folder_path="./lora", repo_id=repo_id, commit_message=f"πŸ€– {agent_name}: {skill}")
73
  log.append(f"βœ… SUCCESS: https://huggingface.co/{repo_id}")
 
 
 
74
  except Exception as e:
75
- log.append(f"❌ Upload error: {e}")
76
-
77
- return "\n".join(log)
78
 
79
 
80
  with gr.Blocks(title="Hivemind GPU Worker") as demo:
81
- gr.Markdown("# 🧬 Hivemind GPU Worker\nTraining LoRA adapters")
82
 
83
  with gr.Row():
84
  agent_input = gr.Textbox(label="Agent Name", value="TestAgent")
@@ -86,8 +109,8 @@ with gr.Blocks(title="Hivemind GPU Worker") as demo:
86
  epochs_input = gr.Slider(1, 3, value=1, step=1, label="Epochs")
87
 
88
  train_btn = gr.Button("πŸš€ Train", variant="primary")
89
- output = gr.Textbox(label="Output", lines=12)
90
 
91
  train_btn.click(fn=train_agent, inputs=[agent_input, skill_input, epochs_input], outputs=output)
92
 
93
- demo.launch()
 
1
  """
2
+ 🧬 HIVEMIND GPU WORKER v2
3
  Training LoRA adapters za Hivemind agente
4
  """
5
  import gradio as gr
6
  import os
7
+ import traceback
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
10
 
11
  def train_agent(agent_name: str, skill: str, epochs: int = 2):
12
  """Trenira LoRA i uploaduje na HF"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ import torch
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
16
+ from peft import LoraConfig, get_peft_model
17
+ from trl import SFTTrainer
18
+ from datasets import Dataset
19
+ from huggingface_hub import HfApi, login
20
+ from datetime import datetime
21
+
22
+ log = [f"πŸš€ Starting: {agent_name} - {skill}"]
23
+
24
+ if not HF_TOKEN:
25
+ return "❌ HF_TOKEN not set in Space secrets! Go to Settings -> Repository secrets"
26
+
27
+ login(token=HF_TOKEN)
28
+ api = HfApi(token=HF_TOKEN)
29
+
30
+ task_id = f"{agent_name[:8].lower().replace(' ','')}-{datetime.now().strftime('%m%d%H%M%S')}"
31
+
32
+ # Load model (CPU friendly small model)
33
+ log.append("πŸ“¦ Loading TinyLlama...")
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
36
+ torch_dtype=torch.float32,
37
+ low_cpu_mem_usage=True
38
+ )
39
+ tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
40
+ tokenizer.pad_token = tokenizer.eos_token
41
+
42
+ # LoRA
43
+ log.append("πŸ”§ Setting up LoRA r=8...")
44
+ lora = LoraConfig(
45
+ r=8, lora_alpha=16,
46
+ target_modules=["q_proj","v_proj"],
47
+ lora_dropout=0.05, bias="none",
48
+ task_type="CAUSAL_LM"
49
+ )
50
+ model = get_peft_model(model, lora)
51
+ trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
52
+ log.append(f" Trainable params: {trainable:,}")
53
+
54
+ # Dataset
55
+ data = [
56
+ {"text": f"<|user|>\nWhat is {skill}?</s>\n<|assistant|>\n{skill} is a fundamental technique in machine learning and AI.</s>"},
57
+ {"text": f"<|user|>\nExplain {skill}</s>\n<|assistant|>\n{skill} helps optimize model performance and efficiency.</s>"},
58
+ {"text": f"<|user|>\nHow to implement {skill}?</s>\n<|assistant|>\nTo implement {skill}, apply proper techniques and best practices.</s>"},
59
+ {"text": f"<|user|>\nWhy is {skill} important?</s>\n<|assistant|>\n{skill} is crucial for building effective AI systems.</s>"},
60
+ ]
61
+ dataset = Dataset.from_list(data)
62
+ log.append(f"πŸ“Š Dataset: {len(dataset)} examples")
63
+
64
+ # Train (minimal for CPU)
65
+ log.append(f"����️ Training {epochs} epoch(s)...")
66
+ trainer = SFTTrainer(
67
+ model=model,
68
+ train_dataset=dataset,
69
+ dataset_text_field="text",
70
+ max_seq_length=128,
71
+ tokenizer=tokenizer,
72
+ args=TrainingArguments(
73
+ output_dir="./out",
74
+ num_train_epochs=epochs,
75
+ per_device_train_batch_size=1,
76
+ learning_rate=2e-4,
77
+ save_strategy="no",
78
+ report_to="none",
79
+ fp16=False,
80
+ logging_steps=1,
81
+ )
82
+ )
83
+ result = trainer.train()
84
+ log.append(f"βœ… Training complete! Loss: {result.training_loss:.4f}")
85
+
86
+ # Save & Upload
87
+ model.save_pretrained("./lora")
88
+ tokenizer.save_pretrained("./lora")
89
+
90
+ repo_id = f"Pista1981/hivemind-hf-{task_id}"
91
+ log.append(f"πŸ“€ Uploading to {repo_id}...")
92
+
93
  api.create_repo(repo_id=repo_id, exist_ok=True, private=False)
94
  api.upload_folder(folder_path="./lora", repo_id=repo_id, commit_message=f"πŸ€– {agent_name}: {skill}")
95
  log.append(f"βœ… SUCCESS: https://huggingface.co/{repo_id}")
96
+
97
+ return "\n".join(log)
98
+
99
  except Exception as e:
100
+ return f"❌ ERROR: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
 
 
101
 
102
 
103
  with gr.Blocks(title="Hivemind GPU Worker") as demo:
104
+ gr.Markdown("# 🧬 Hivemind GPU Worker v2\nTraining LoRA adapters for AI agents")
105
 
106
  with gr.Row():
107
  agent_input = gr.Textbox(label="Agent Name", value="TestAgent")
 
109
  epochs_input = gr.Slider(1, 3, value=1, step=1, label="Epochs")
110
 
111
  train_btn = gr.Button("πŸš€ Train", variant="primary")
112
+ output = gr.Textbox(label="Output", lines=15)
113
 
114
  train_btn.click(fn=train_agent, inputs=[agent_input, skill_input, epochs_input], outputs=output)
115
 
116
+ demo.launch(show_error=True)