lemms commited on
Commit
adf4af4
Β·
verified Β·
1 Parent(s): 8789093

Upload openllm_training_with_auth.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. openllm_training_with_auth.py +52 -53
openllm_training_with_auth.py CHANGED
@@ -16,38 +16,40 @@ import torch
16
  from pathlib import Path
17
  from huggingface_hub import HfApi, login, whoami, create_repo
18
 
 
19
  class OpenLLMTrainingManager:
20
  """Manages OpenLLM training with Hugging Face authentication."""
21
-
22
  def __init__(self):
23
  """Initialize the training manager with authentication."""
24
  self.setup_authentication()
25
  self.api = HfApi()
26
  self.username = None
27
-
28
  def setup_authentication(self):
29
  """Setup authentication using Space's built-in access token."""
30
  print("πŸ” Setting up Space authentication...")
31
-
32
  try:
33
  # Try Space's built-in authentication first (primary method)
34
  user_info = whoami()
35
- self.username = user_info.get('name', 'unknown')
36
  print(f"βœ… Space built-in authentication successful!")
37
  print(f"πŸ‘€ User: {self.username}")
38
-
39
  except Exception as e:
40
  print(f"❌ Space built-in authentication failed: {e}")
41
  print("πŸ”„ Trying HF access token...")
42
-
43
  # Fallback to HF access token
44
- hf_token = os.environ.get('HF_TOKEN')
45
  if hf_token:
46
  try:
47
  from huggingface_hub import login
 
48
  login(token=hf_token)
49
  user_info = whoami()
50
- self.username = user_info.get('name', 'unknown')
51
  print(f"βœ… HF access token authentication successful!")
52
  print(f"πŸ‘€ User: {self.username}")
53
  except Exception as e2:
@@ -58,7 +60,7 @@ class OpenLLMTrainingManager:
58
  print("❌ No authentication method available")
59
  print("πŸ’‘ Please set HF_TOKEN in Space settings or check Space permissions")
60
  sys.exit(1)
61
-
62
  def create_model_config(self, model_size="small", steps=8000):
63
  """Create model configuration file."""
64
  config = {
@@ -68,16 +70,16 @@ class OpenLLMTrainingManager:
68
  "framework": "pytorch",
69
  "license": "GPL-3.0",
70
  "author": "Louis Chua Bean Chong",
71
- "description": f"OpenLLM {model_size} model trained for {steps} steps"
72
  }
73
-
74
  config_path = Path("model_config.json")
75
- with open(config_path, 'w') as f:
76
  json.dump(config, f, indent=2)
77
-
78
  print(f"βœ… Model config created: {config_path}")
79
  return config_path
80
-
81
  def create_model_card(self, model_size="small", steps=8000):
82
  """Create model card README."""
83
  readme_content = f"""# OpenLLM {model_size.title()} Model
@@ -108,36 +110,31 @@ Louis Chua Bean Chong
108
 
109
  GPL-3.0
110
  """
111
-
112
  readme_path = Path("README.md")
113
- with open(readme_path, 'w') as f:
114
  f.write(readme_content)
115
-
116
  print(f"βœ… Model card created: {readme_path}")
117
  return readme_path
118
-
119
  def upload_model(self, model_dir, model_size="small", steps=8000):
120
  """Upload trained model to Hugging Face Hub."""
121
  print(f"πŸ“€ Uploading model to Hugging Face Hub...")
122
-
123
  # Create model repository name
124
  repo_name = f"openllm-{model_size}-{steps}steps"
125
  repo_id = f"{self.username}/{repo_name}"
126
-
127
  try:
128
  # Create repository
129
  print(f"πŸ”„ Creating repository: {repo_id}")
130
- create_repo(
131
- repo_id=repo_id,
132
- repo_type="model",
133
- exist_ok=True,
134
- private=False
135
- )
136
-
137
  # Create model files
138
  config_path = self.create_model_config(model_size, steps)
139
  readme_path = self.create_model_card(model_size, steps)
140
-
141
  # Upload files
142
  print(f"πŸ“ Uploading model files...")
143
  self.api.upload_file(
@@ -145,17 +142,17 @@ GPL-3.0
145
  path_in_repo="config.json",
146
  repo_id=repo_id,
147
  repo_type="model",
148
- commit_message="Add model configuration"
149
  )
150
-
151
  self.api.upload_file(
152
  path_or_fileobj=str(readme_path),
153
  path_in_repo="README.md",
154
  repo_id=repo_id,
155
  repo_type="model",
156
- commit_message="Add model card"
157
  )
158
-
159
  # Upload model files if they exist
160
  model_path = Path(model_dir)
161
  if model_path.exists():
@@ -164,17 +161,17 @@ GPL-3.0
164
  folder_path=model_dir,
165
  repo_id=repo_id,
166
  repo_type="model",
167
- commit_message=f"Add OpenLLM {model_size} model ({steps} steps)"
168
  )
169
-
170
  print(f"βœ… Model uploaded successfully!")
171
  print(f"πŸ”— Model URL: https://huggingface.co/{repo_id}")
172
  return repo_id
173
-
174
  except Exception as e:
175
  print(f"❌ Model upload failed: {e}")
176
  return None
177
-
178
  def run_training(self, model_size="small", steps=8000):
179
  """Run the OpenLLM training process."""
180
  print(f"πŸš€ Starting OpenLLM Training")
@@ -182,46 +179,46 @@ GPL-3.0
182
  print(f"πŸ“Š Model Size: {model_size}")
183
  print(f"πŸ”„ Training Steps: {steps}")
184
  print(f"πŸ‘€ User: {self.username}")
185
-
186
  # Simulate training process
187
  print(f"\nπŸ”„ Step 1: Initializing training...")
188
  print(f" - Setting up PyTorch environment")
189
  print(f" - Loading training data")
190
  print(f" - Configuring model architecture")
191
-
192
  print(f"\nπŸ”„ Step 2: Training model...")
193
  for step in range(1, min(steps + 1, 11)): # Show first 10 steps
194
  loss = 6.5 - (step * 0.1) # Simulate decreasing loss
195
- lr = 0.001 * (0.95 ** step) # Simulate learning rate decay
196
  print(f" Step {step}/{steps} | Loss: {loss:.4f} | LR: {lr:.2e}")
197
-
198
  if steps > 10:
199
  print(f" ... (showing first 10 steps)")
200
  print(f" Final step {steps} | Loss: {6.5 - (steps * 0.1):.4f}")
201
-
202
  print(f"\nπŸ”„ Step 3: Saving model...")
203
  model_dir = f"./openllm-trained-{model_size}"
204
  os.makedirs(model_dir, exist_ok=True)
205
-
206
  # Create dummy model files
207
  model_files = [
208
  "best_model.pt",
209
  "checkpoint_step_1000.pt",
210
  "tokenizer/tokenizer.model",
211
- "config.json"
212
  ]
213
-
214
  for file_name in model_files:
215
  file_path = Path(model_dir) / file_name
216
  file_path.parent.mkdir(parents=True, exist_ok=True)
217
- with open(file_path, 'w') as f:
218
  f.write(f"# Dummy {file_name} file for demonstration")
219
-
220
  print(f"βœ… Model saved to: {model_dir}")
221
-
222
  print(f"\nπŸ”„ Step 4: Uploading model...")
223
  repo_id = self.upload_model(model_dir, model_size, steps)
224
-
225
  if repo_id:
226
  print(f"\nπŸŽ‰ Training completed successfully!")
227
  print(f"πŸ“Š Results:")
@@ -232,35 +229,37 @@ GPL-3.0
232
  else:
233
  print(f"\n❌ Training completed but upload failed")
234
  print(f" - Model saved locally: {model_dir}")
235
-
236
  return repo_id
237
 
 
238
  def main():
239
  """Main function to run OpenLLM training."""
240
  print("πŸš€ OpenLLM Training with Space Authentication")
241
  print("=" * 55)
242
-
243
  # Initialize training manager
244
  try:
245
  manager = OpenLLMTrainingManager()
246
  except Exception as e:
247
  print(f"❌ Failed to initialize training manager: {e}")
248
  sys.exit(1)
249
-
250
  # Run training
251
  try:
252
  repo_id = manager.run_training(model_size="small", steps=8000)
253
-
254
  if repo_id:
255
  print(f"\nβœ… Training and upload completed successfully!")
256
  print(f"πŸš€ Your model is ready at: https://huggingface.co/{repo_id}")
257
  else:
258
  print(f"\n⚠️ Training completed but upload failed")
259
  print(f"πŸ”§ Check authentication and try again")
260
-
261
  except Exception as e:
262
  print(f"❌ Training failed: {e}")
263
  sys.exit(1)
264
 
 
265
  if __name__ == "__main__":
266
  main()
 
16
  from pathlib import Path
17
  from huggingface_hub import HfApi, login, whoami, create_repo
18
 
19
+
20
  class OpenLLMTrainingManager:
21
  """Manages OpenLLM training with Hugging Face authentication."""
22
+
23
  def __init__(self):
24
  """Initialize the training manager with authentication."""
25
  self.setup_authentication()
26
  self.api = HfApi()
27
  self.username = None
28
+
29
  def setup_authentication(self):
30
  """Setup authentication using Space's built-in access token."""
31
  print("πŸ” Setting up Space authentication...")
32
+
33
  try:
34
  # Try Space's built-in authentication first (primary method)
35
  user_info = whoami()
36
+ self.username = user_info.get("name", "unknown")
37
  print(f"βœ… Space built-in authentication successful!")
38
  print(f"πŸ‘€ User: {self.username}")
39
+
40
  except Exception as e:
41
  print(f"❌ Space built-in authentication failed: {e}")
42
  print("πŸ”„ Trying HF access token...")
43
+
44
  # Fallback to HF access token
45
+ hf_token = os.environ.get("HF_TOKEN")
46
  if hf_token:
47
  try:
48
  from huggingface_hub import login
49
+
50
  login(token=hf_token)
51
  user_info = whoami()
52
+ self.username = user_info.get("name", "unknown")
53
  print(f"βœ… HF access token authentication successful!")
54
  print(f"πŸ‘€ User: {self.username}")
55
  except Exception as e2:
 
60
  print("❌ No authentication method available")
61
  print("πŸ’‘ Please set HF_TOKEN in Space settings or check Space permissions")
62
  sys.exit(1)
63
+
64
  def create_model_config(self, model_size="small", steps=8000):
65
  """Create model configuration file."""
66
  config = {
 
70
  "framework": "pytorch",
71
  "license": "GPL-3.0",
72
  "author": "Louis Chua Bean Chong",
73
+ "description": f"OpenLLM {model_size} model trained for {steps} steps",
74
  }
75
+
76
  config_path = Path("model_config.json")
77
+ with open(config_path, "w") as f:
78
  json.dump(config, f, indent=2)
79
+
80
  print(f"βœ… Model config created: {config_path}")
81
  return config_path
82
+
83
  def create_model_card(self, model_size="small", steps=8000):
84
  """Create model card README."""
85
  readme_content = f"""# OpenLLM {model_size.title()} Model
 
110
 
111
  GPL-3.0
112
  """
113
+
114
  readme_path = Path("README.md")
115
+ with open(readme_path, "w") as f:
116
  f.write(readme_content)
117
+
118
  print(f"βœ… Model card created: {readme_path}")
119
  return readme_path
120
+
121
  def upload_model(self, model_dir, model_size="small", steps=8000):
122
  """Upload trained model to Hugging Face Hub."""
123
  print(f"πŸ“€ Uploading model to Hugging Face Hub...")
124
+
125
  # Create model repository name
126
  repo_name = f"openllm-{model_size}-{steps}steps"
127
  repo_id = f"{self.username}/{repo_name}"
128
+
129
  try:
130
  # Create repository
131
  print(f"πŸ”„ Creating repository: {repo_id}")
132
+ create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
133
+
 
 
 
 
 
134
  # Create model files
135
  config_path = self.create_model_config(model_size, steps)
136
  readme_path = self.create_model_card(model_size, steps)
137
+
138
  # Upload files
139
  print(f"πŸ“ Uploading model files...")
140
  self.api.upload_file(
 
142
  path_in_repo="config.json",
143
  repo_id=repo_id,
144
  repo_type="model",
145
+ commit_message="Add model configuration",
146
  )
147
+
148
  self.api.upload_file(
149
  path_or_fileobj=str(readme_path),
150
  path_in_repo="README.md",
151
  repo_id=repo_id,
152
  repo_type="model",
153
+ commit_message="Add model card",
154
  )
155
+
156
  # Upload model files if they exist
157
  model_path = Path(model_dir)
158
  if model_path.exists():
 
161
  folder_path=model_dir,
162
  repo_id=repo_id,
163
  repo_type="model",
164
+ commit_message=f"Add OpenLLM {model_size} model ({steps} steps)",
165
  )
166
+
167
  print(f"βœ… Model uploaded successfully!")
168
  print(f"πŸ”— Model URL: https://huggingface.co/{repo_id}")
169
  return repo_id
170
+
171
  except Exception as e:
172
  print(f"❌ Model upload failed: {e}")
173
  return None
174
+
175
  def run_training(self, model_size="small", steps=8000):
176
  """Run the OpenLLM training process."""
177
  print(f"πŸš€ Starting OpenLLM Training")
 
179
  print(f"πŸ“Š Model Size: {model_size}")
180
  print(f"πŸ”„ Training Steps: {steps}")
181
  print(f"πŸ‘€ User: {self.username}")
182
+
183
  # Simulate training process
184
  print(f"\nπŸ”„ Step 1: Initializing training...")
185
  print(f" - Setting up PyTorch environment")
186
  print(f" - Loading training data")
187
  print(f" - Configuring model architecture")
188
+
189
  print(f"\nπŸ”„ Step 2: Training model...")
190
  for step in range(1, min(steps + 1, 11)): # Show first 10 steps
191
  loss = 6.5 - (step * 0.1) # Simulate decreasing loss
192
+ lr = 0.001 * (0.95**step) # Simulate learning rate decay
193
  print(f" Step {step}/{steps} | Loss: {loss:.4f} | LR: {lr:.2e}")
194
+
195
  if steps > 10:
196
  print(f" ... (showing first 10 steps)")
197
  print(f" Final step {steps} | Loss: {6.5 - (steps * 0.1):.4f}")
198
+
199
  print(f"\nπŸ”„ Step 3: Saving model...")
200
  model_dir = f"./openllm-trained-{model_size}"
201
  os.makedirs(model_dir, exist_ok=True)
202
+
203
  # Create dummy model files
204
  model_files = [
205
  "best_model.pt",
206
  "checkpoint_step_1000.pt",
207
  "tokenizer/tokenizer.model",
208
+ "config.json",
209
  ]
210
+
211
  for file_name in model_files:
212
  file_path = Path(model_dir) / file_name
213
  file_path.parent.mkdir(parents=True, exist_ok=True)
214
+ with open(file_path, "w") as f:
215
  f.write(f"# Dummy {file_name} file for demonstration")
216
+
217
  print(f"βœ… Model saved to: {model_dir}")
218
+
219
  print(f"\nπŸ”„ Step 4: Uploading model...")
220
  repo_id = self.upload_model(model_dir, model_size, steps)
221
+
222
  if repo_id:
223
  print(f"\nπŸŽ‰ Training completed successfully!")
224
  print(f"πŸ“Š Results:")
 
229
  else:
230
  print(f"\n❌ Training completed but upload failed")
231
  print(f" - Model saved locally: {model_dir}")
232
+
233
  return repo_id
234
 
235
+
236
  def main():
237
  """Main function to run OpenLLM training."""
238
  print("πŸš€ OpenLLM Training with Space Authentication")
239
  print("=" * 55)
240
+
241
  # Initialize training manager
242
  try:
243
  manager = OpenLLMTrainingManager()
244
  except Exception as e:
245
  print(f"❌ Failed to initialize training manager: {e}")
246
  sys.exit(1)
247
+
248
  # Run training
249
  try:
250
  repo_id = manager.run_training(model_size="small", steps=8000)
251
+
252
  if repo_id:
253
  print(f"\nβœ… Training and upload completed successfully!")
254
  print(f"πŸš€ Your model is ready at: https://huggingface.co/{repo_id}")
255
  else:
256
  print(f"\n⚠️ Training completed but upload failed")
257
  print(f"πŸ”§ Check authentication and try again")
258
+
259
  except Exception as e:
260
  print(f"❌ Training failed: {e}")
261
  sys.exit(1)
262
 
263
+
264
  if __name__ == "__main__":
265
  main()