lemms commited on
Commit
edea435
Β·
verified Β·
1 Parent(s): 189f6ea

Replace with simplified Gradio app for compatibility

Browse files
Files changed (1) hide show
  1. app.py +51 -218
app.py CHANGED
@@ -1,165 +1,93 @@
1
  #!/usr/bin/env python3
2
  """
3
- OpenLLM Training Space Application
4
 
5
- This Gradio application provides a comprehensive web-based user interface for
6
- training OpenLLM models within the Hugging Face Space environment. It serves
7
- as the main entry point for users to interact with the training infrastructure
8
- and monitor training progress.
9
-
10
- The application features:
11
- - Interactive training configuration interface
12
- - Real-time training status monitoring
13
- - Progress tracking and visualization
14
- - Comprehensive instructions and documentation
15
- - Integration with Hugging Face Hub for model distribution
16
-
17
- Key Components:
18
- 1. Training Configuration Panel - Model size, hyperparameters, and settings
19
- 2. Training Status Monitor - Real-time progress and status updates
20
- 3. Instruction Panel - Step-by-step guidance for users
21
- 4. Terminal Commands Display - Manual command execution options
22
- 5. Resource Links - Quick access to related repositories and documentation
23
-
24
- This application is designed to work seamlessly within the Hugging Face Space
25
- environment and provides both automated and manual training capabilities.
26
 
27
  Author: Louis Chua Bean Chong
28
  License: GPL-3.0
29
- Version: 1.0.0
30
  Last Updated: 2024
31
  """
32
 
33
  import gradio as gr
34
- import os
35
- import sys
36
- from pathlib import Path
37
-
38
- # Add the training modules to the Python path
39
- # This allows the app to import and use the core training functionality
40
- # that has been copied from the main repository
41
- sys.path.append(str(Path(__file__).parent / "training"))
42
 
43
  def main():
44
  """
45
- Main function that creates and configures the Gradio application interface.
46
-
47
- This function sets up the complete web interface for the OpenLLM training
48
- Space, including all UI components, event handlers, and application logic.
49
-
50
- The interface is organized into several key sections:
51
- 1. Header and title section
52
- 2. Training configuration panel (left column)
53
- 3. Training status and controls (right column)
54
- 4. Instructions and documentation section
55
- 5. Terminal commands and manual execution options
56
- 6. Resource links and footer information
57
-
58
- Returns:
59
- gr.Blocks: The configured Gradio application interface
60
  """
61
 
62
  # Create the main Gradio application interface
63
- # Using Blocks for maximum flexibility and customization
64
  with gr.Blocks(
65
- title="OpenLLM Training Space", # Browser tab title
66
- theme=gr.themes.Soft(), # Modern, clean theme
67
- css="footer {display: none !important}" # Hide default footer
68
  ) as demo:
69
 
70
  # Application Header
71
- # This section provides the main title and overview of the application
72
  gr.Markdown("# πŸš€ OpenLLM Training Space")
73
  gr.Markdown("### *Advanced Language Model Training Interface*")
74
  gr.Markdown("---")
75
 
76
- # Main Content Area - Two Column Layout
77
- # Left column: Training configuration
78
- # Right column: Training status and controls
79
  with gr.Row():
80
 
81
- # Left Column: Training Configuration Panel
82
  with gr.Column(scale=1):
83
  gr.Markdown("## πŸ“Š Training Configuration")
84
- gr.Markdown("Configure your training parameters and model settings below.")
85
 
86
  # Model Size Selection
87
- # This dropdown allows users to select the target model size
88
- # Different model sizes have different computational requirements
89
  model_size = gr.Dropdown(
90
- choices=["small", "medium", "large"], # Available model sizes
91
- value="small", # Default selection
92
- label="Model Size",
93
- info="Select the target model size. Larger models require more resources."
94
  )
95
 
96
  # Training Steps Configuration
97
- # Controls the number of training steps/iterations
98
  max_steps = gr.Slider(
99
- minimum=100, # Minimum training steps
100
- maximum=10000, # Maximum training steps
101
- value=1000, # Default value
102
- step=100, # Step increment
103
- label="Max Training Steps",
104
- info="Number of training iterations. More steps = longer training time."
105
  )
106
 
107
  # Learning Rate Configuration
108
- # Controls how quickly the model learns from the data
109
  learning_rate = gr.Slider(
110
- minimum=1e-5, # Minimum learning rate (0.00001)
111
- maximum=1e-3, # Maximum learning rate (0.001)
112
- value=3e-4, # Default learning rate (0.0003)
113
- step=1e-5, # Step increment
114
- label="Learning Rate",
115
- info="How quickly the model learns. Higher values = faster learning but may be unstable."
116
  )
117
 
118
  # Batch Size Configuration
119
- # Controls how many samples are processed together
120
  batch_size = gr.Slider(
121
- minimum=1, # Minimum batch size
122
- maximum=16, # Maximum batch size
123
- value=4, # Default batch size
124
- step=1, # Step increment
125
- label="Batch Size",
126
- info="Number of samples processed together. Larger batches = more memory usage."
127
  )
128
 
129
  # Right Column: Training Status and Controls
130
  with gr.Column(scale=1):
131
  gr.Markdown("## 🎯 Training Status")
132
- gr.Markdown("Monitor your training progress and control the training process.")
133
 
134
  # Training Status Display
135
- # Shows the current status of the training process
136
  status_text = gr.Textbox(
137
- value="Ready to start training", # Initial status message
138
  label="Current Status",
139
- interactive=False, # Read-only display
140
- lines=3, # Multiple lines for detailed status
141
- info="Real-time status updates during training"
142
  )
143
 
144
- # Progress Bar
145
- # Visual indicator of training progress
146
- progress = gr.Progress()
147
-
148
  # Training Control Buttons
149
- # Buttons to start and stop the training process
150
  with gr.Row():
151
- start_btn = gr.Button(
152
- "πŸš€ Start Training",
153
- variant="primary",
154
- size="lg"
155
- )
156
- stop_btn = gr.Button(
157
- "⏹️ Stop Training",
158
- variant="stop",
159
- size="lg"
160
- )
161
 
162
- # Instructions and Documentation Section
163
  gr.Markdown("## πŸ“‹ Training Instructions")
164
  gr.Markdown("""
165
  Follow these steps to successfully train your OpenLLM model:
@@ -170,144 +98,49 @@ def main():
170
  - Adjust the learning rate for optimal training performance
171
  - Choose a batch size that fits your available memory
172
 
173
- ### **Step 2: Upload Training Data**
174
- - Use the terminal to upload your training dataset
175
- - Ensure your data is properly formatted and cleaned
176
- - Verify that the dataset is accessible to the training process
177
-
178
- ### **Step 3: Start Training**
179
  - Click the "Start Training" button to begin the process
180
- - Monitor the progress bar and status updates
181
  - The training will run automatically in the background
182
 
183
- ### **Step 4: Monitor Progress**
184
- - Watch the real-time status updates
185
- - Check the progress bar for completion percentage
186
- - Review any error messages or warnings
187
-
188
- ### **Step 5: Access Results**
189
  - Trained models are automatically pushed to Hugging Face Hub
190
  - Check the model repository for your trained model
191
- - Download or use the model for inference tasks
192
  """)
193
 
194
- # Terminal Commands Section
195
- gr.Markdown("## πŸ’» Terminal Commands")
196
- gr.Markdown("For advanced users or troubleshooting, you can execute these commands manually:")
197
-
198
- # Code block with terminal commands
199
- gr.Code("""
200
- # Upload training data to Hugging Face Hub
201
- python scripts/upload_training_data.py
202
-
203
- # Start training manually (alternative to UI)
204
- python training/train_model.py --config configs/small_model.json
205
-
206
- # Check training logs and status
207
- tail -f training.log
208
-
209
- # Monitor system resources during training
210
- htop
211
-
212
- # Check available GPU resources
213
- nvidia-smi
214
- """, language="bash")
215
-
216
  # Resource Links Section
217
  gr.Markdown("## πŸ”— Useful Resources")
218
-
219
- # Create a grid of resource links
220
- with gr.Row():
221
- with gr.Column():
222
- gr.Markdown("### **Model Repositories**")
223
- gr.Markdown("""
224
- - [πŸ“š 7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
225
- - [🎯 8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
226
- - [πŸ“Š Training Data](https://huggingface.co/datasets/lemms/openllm-training-data)
227
- """)
228
-
229
- with gr.Column():
230
- gr.Markdown("### **Documentation**")
231
- gr.Markdown("""
232
- - [πŸ“– Main Project](https://github.com/louischua/openllm)
233
- - [πŸ”§ Training Guide](https://github.com/louischua/openllm/docs/training_pipeline.md)
234
- - [πŸš€ Quick Start](https://github.com/louischua/openllm#getting-started)
235
- """)
236
 
237
  # Training Function Definition
238
- # This function handles the actual training process when triggered by the UI
239
- def start_training(model_size, max_steps, learning_rate, batch_size, progress=gr.Progress()):
240
  """
241
  Execute the training process with the specified parameters.
242
-
243
- This function is called when the user clicks the "Start Training" button.
244
- It simulates the training process and provides real-time updates to the UI.
245
-
246
- Args:
247
- model_size (str): Selected model size ("small", "medium", "large")
248
- max_steps (int): Maximum number of training steps
249
- learning_rate (float): Learning rate for training
250
- batch_size (int): Batch size for training
251
- progress (gr.Progress): Gradio progress tracker
252
-
253
- Yields:
254
- str: Status updates during training
255
  """
256
  try:
257
- # Initial status update
258
- yield "πŸš€ Starting OpenLLM training process..."
259
- yield f"πŸ“Š Configuration: {model_size} model, {max_steps} steps, lr={learning_rate}, batch={batch_size}"
260
-
261
- # Simulate training progress
262
- # In a real implementation, this would call the actual training functions
263
- for i in range(max_steps):
264
- # Update progress bar
265
- progress(i / max_steps)
266
-
267
- # Provide status updates at regular intervals
268
- if i % 100 == 0:
269
- yield f"πŸ”„ Training step {i}/{max_steps} - Loss: {2.1 - (i/max_steps)*0.2:.3f}"
270
-
271
- # Simulate processing time
272
- import time
273
- time.sleep(0.01) # Small delay for demonstration
274
-
275
- # Training completion
276
- yield "βœ… Training completed successfully!"
277
- yield f"🎯 Model pushed to: lemms/openllm-small-extended-{max_steps//1000}k"
278
- yield "πŸ“Š Final loss: 1.98 | Training time: ~2 hours"
279
-
280
  except Exception as e:
281
- # Handle any training errors
282
- yield f"❌ Training failed: {str(e)}"
283
- yield "πŸ”§ Please check the configuration and try again"
284
 
285
  # Connect UI Components to Functions
286
- # This links the start button to the training function
287
  start_btn.click(
288
- fn=start_training, # Function to execute
289
- inputs=[model_size, max_steps, learning_rate, batch_size], # Input parameters
290
- outputs=[status_text] # Output component to update
291
  )
292
 
293
  # Application Footer
294
  gr.Markdown("---")
295
- gr.Markdown("""
296
- **Author**: Louis Chua Bean Chong | **Project**: OpenLLM - Open Source Large Language Model | **License**: GPL-3.0
297
-
298
- This training interface is part of the OpenLLM project, providing accessible and powerful
299
- language model training capabilities through Hugging Face Spaces.
300
- """)
301
 
302
  return demo
303
 
304
  if __name__ == "__main__":
305
- # Launch the Gradio application when the script is run directly
306
- # This is the entry point for the Hugging Face Space
307
  demo = main()
308
- demo.launch(
309
- server_name="0.0.0.0", # Allow external connections
310
- server_port=7860, # Default Gradio port
311
- share=False, # Don't create public share link
312
- debug=True # Enable debug mode for development
313
- )
 
1
  #!/usr/bin/env python3
2
  """
3
+ OpenLLM Training Space Application - Simplified Version
4
 
5
+ This is a simplified Gradio application that's compatible with newer Gradio versions.
6
+ It provides a basic training interface for OpenLLM models.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  Author: Louis Chua Bean Chong
9
  License: GPL-3.0
10
+ Version: 1.0.1
11
  Last Updated: 2024
12
  """
13
 
14
  import gradio as gr
 
 
 
 
 
 
 
 
15
 
16
  def main():
17
  """
18
+ Main function that creates a simplified Gradio application interface.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  """
20
 
21
  # Create the main Gradio application interface
 
22
  with gr.Blocks(
23
+ title="OpenLLM Training Space",
24
+ theme=gr.themes.Soft()
 
25
  ) as demo:
26
 
27
  # Application Header
 
28
  gr.Markdown("# πŸš€ OpenLLM Training Space")
29
  gr.Markdown("### *Advanced Language Model Training Interface*")
30
  gr.Markdown("---")
31
 
32
+ # Main Content Area
 
 
33
  with gr.Row():
34
 
35
+ # Left Column: Training Configuration
36
  with gr.Column(scale=1):
37
  gr.Markdown("## πŸ“Š Training Configuration")
 
38
 
39
  # Model Size Selection
 
 
40
  model_size = gr.Dropdown(
41
+ choices=["small", "medium", "large"],
42
+ value="small",
43
+ label="Model Size"
 
44
  )
45
 
46
  # Training Steps Configuration
 
47
  max_steps = gr.Slider(
48
+ minimum=100,
49
+ maximum=10000,
50
+ value=1000,
51
+ step=100,
52
+ label="Max Training Steps"
 
53
  )
54
 
55
  # Learning Rate Configuration
 
56
  learning_rate = gr.Slider(
57
+ minimum=1e-5,
58
+ maximum=1e-3,
59
+ value=3e-4,
60
+ step=1e-5,
61
+ label="Learning Rate"
 
62
  )
63
 
64
  # Batch Size Configuration
 
65
  batch_size = gr.Slider(
66
+ minimum=1,
67
+ maximum=16,
68
+ value=4,
69
+ step=1,
70
+ label="Batch Size"
 
71
  )
72
 
73
  # Right Column: Training Status and Controls
74
  with gr.Column(scale=1):
75
  gr.Markdown("## 🎯 Training Status")
 
76
 
77
  # Training Status Display
 
78
  status_text = gr.Textbox(
79
+ value="Ready to start training",
80
  label="Current Status",
81
+ interactive=False,
82
+ lines=3
 
83
  )
84
 
 
 
 
 
85
  # Training Control Buttons
 
86
  with gr.Row():
87
+ start_btn = gr.Button("πŸš€ Start Training", variant="primary")
88
+ stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
 
 
 
 
 
 
 
 
89
 
90
+ # Instructions Section
91
  gr.Markdown("## πŸ“‹ Training Instructions")
92
  gr.Markdown("""
93
  Follow these steps to successfully train your OpenLLM model:
 
98
  - Adjust the learning rate for optimal training performance
99
  - Choose a batch size that fits your available memory
100
 
101
+ ### **Step 2: Start Training**
 
 
 
 
 
102
  - Click the "Start Training" button to begin the process
103
+ - Monitor the status updates
104
  - The training will run automatically in the background
105
 
106
+ ### **Step 3: Access Results**
 
 
 
 
 
107
  - Trained models are automatically pushed to Hugging Face Hub
108
  - Check the model repository for your trained model
 
109
  """)
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # Resource Links Section
112
  gr.Markdown("## πŸ”— Useful Resources")
113
+ gr.Markdown("""
114
+ - [πŸ“š 7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
115
+ - [🎯 8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
116
+ - [πŸ“Š Training Data](https://huggingface.co/datasets/lemms/openllm-training-data)
117
+ - [πŸ“– Main Project](https://github.com/louischua/openllm)
118
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  # Training Function Definition
121
+ def start_training(model_size, max_steps, learning_rate, batch_size):
 
122
  """
123
  Execute the training process with the specified parameters.
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  """
125
  try:
126
+ # Simulate training process
127
+ return f"πŸš€ Starting OpenLLM training process...\nπŸ“Š Configuration: {model_size} model, {max_steps} steps, lr={learning_rate}, batch={batch_size}\nβœ… Training simulation completed successfully!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  except Exception as e:
129
+ return f"❌ Training failed: {str(e)}"
 
 
130
 
131
  # Connect UI Components to Functions
 
132
  start_btn.click(
133
+ fn=start_training,
134
+ inputs=[model_size, max_steps, learning_rate, batch_size],
135
+ outputs=[status_text]
136
  )
137
 
138
  # Application Footer
139
  gr.Markdown("---")
140
+ gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
 
 
 
 
 
141
 
142
  return demo
143
 
144
  if __name__ == "__main__":
 
 
145
  demo = main()
146
+ demo.launch()