songhieng commited on
Commit
168a930
ยท
verified ยท
1 Parent(s): 917be01

Update src/mlops/config.py

Browse files
Files changed (1) hide show
  1. src/mlops/config.py +259 -259
src/mlops/config.py CHANGED
@@ -1,259 +1,259 @@
1
- """
2
- Configuration module for MLOps platform.
3
- Contains all configuration classes and constants.
4
- """
5
-
6
- from dataclasses import dataclass, field
7
- from typing import Dict, List, Optional
8
- from enum import Enum
9
-
10
-
11
- class LanguageCode(str, Enum):
12
- """Supported language codes."""
13
- ENGLISH = "en"
14
- CHINESE = "zh"
15
- KHMER = "km"
16
-
17
-
18
- class ClassificationType(str, Enum):
19
- """Classification task types."""
20
- BINARY = "binary"
21
- MULTICLASS = "multiclass"
22
-
23
-
24
- # Supported languages with display names
25
- SUPPORTED_LANGUAGES: Dict[str, Dict[str, str]] = {
26
- "en": {
27
- "name": "English",
28
- "native_name": "English",
29
- "description": "English language support with standard NLP preprocessing",
30
- "tokenizer_hint": "Uses standard word tokenization"
31
- },
32
- "zh": {
33
- "name": "Chinese",
34
- "native_name": "ไธญๆ–‡",
35
- "description": "Chinese language support with character-level tokenization",
36
- "tokenizer_hint": "Uses jieba for word segmentation"
37
- },
38
- "km": {
39
- "name": "Khmer",
40
- "native_name": "แž—แžถแžŸแžถแžแŸ’แž˜แŸ‚แžš",
41
- "description": "Khmer language support with specialized tokenization",
42
- "tokenizer_hint": "Uses ICU-based tokenization for Khmer script"
43
- }
44
- }
45
-
46
- # Model architectures supported with recommendations
47
- MODEL_ARCHITECTURES = {
48
- "roberta-base": {
49
- "name": "RoBERTa Base",
50
- "description": "Robust BERT model, excellent for English text classification",
51
- "languages": ["en"],
52
- "max_length": 512,
53
- "recommended_for": "English only, high accuracy needed",
54
- "speed": "Medium",
55
- "size": "355MB",
56
- "best_use": "English binary/multiclass classification"
57
- },
58
- "bert-base-multilingual-cased": {
59
- "name": "mBERT (Multilingual BERT)",
60
- "description": "Supports 104 languages - Good balance of performance and multilingual support",
61
- "languages": ["en", "zh", "km"],
62
- "max_length": 512,
63
- "recommended_for": "Multilingual tasks, balanced performance",
64
- "speed": "Medium",
65
- "size": "665MB",
66
- "best_use": "Multilingual classification, good general-purpose model"
67
- },
68
- "xlm-roberta-base": {
69
- "name": "XLM-RoBERTa Base",
70
- "description": "Best multilingual model - Highest accuracy for Chinese, Khmer, and other languages",
71
- "languages": ["en", "zh", "km"],
72
- "max_length": 512,
73
- "recommended_for": "Best multilingual performance, recommended for Chinese/Khmer",
74
- "speed": "Medium-Slow",
75
- "size": "1.03GB",
76
- "best_use": "When you need the best accuracy across multiple languages"
77
- },
78
- "distilbert-base-multilingual-cased": {
79
- "name": "DistilBERT Multilingual (Recommended for CPU)",
80
- "description": "Lightweight and fast - Perfect for CPU training or quick experiments",
81
- "languages": ["en", "zh", "km"],
82
- "max_length": 512,
83
- "recommended_for": "CPU training, fast experiments, limited resources",
84
- "speed": "Fast",
85
- "size": "525MB",
86
- "best_use": "CPU-only systems, quick prototyping, limited GPU memory"
87
- }
88
- }
89
-
90
- # Model selection guide
91
- MODEL_SELECTION_GUIDE = {
92
- "cpu_training": "distilbert-base-multilingual-cased",
93
- "gpu_training_english": "roberta-base",
94
- "gpu_training_multilingual": "xlm-roberta-base",
95
- "quick_experiment": "distilbert-base-multilingual-cased",
96
- "production_english": "roberta-base",
97
- "production_multilingual": "xlm-roberta-base"
98
- }
99
-
100
-
101
- @dataclass
102
- class TrainingConfig:
103
- """Configuration for model training."""
104
-
105
- # Model settings
106
- model_name: str = "bert-base-multilingual-cased"
107
- num_labels: int = 2
108
-
109
- # Training hyperparameters
110
- learning_rate: float = 2e-5
111
- batch_size: int = 16
112
- num_epochs: int = 3
113
- warmup_ratio: float = 0.1
114
- weight_decay: float = 0.01
115
- max_length: int = 256
116
-
117
- # Data settings
118
- train_split: float = 0.8
119
- validation_split: float = 0.1
120
- test_split: float = 0.1
121
- shuffle_data: bool = True
122
- random_seed: int = 42
123
-
124
- # Language settings
125
- language: str = "en"
126
-
127
- # Output settings
128
- output_dir: str = "trained_models"
129
- save_best_model: bool = True
130
- logging_steps: int = 10
131
- evaluation_strategy: str = "epoch"
132
-
133
- # Performance settings
134
- use_fp16: bool = False # Disabled for CPU compatibility
135
- gradient_accumulation_steps: int = 1
136
-
137
- # Labels configuration
138
- label_names: List[str] = field(default_factory=lambda: ["Legitimate", "Phishing"])
139
-
140
- def validate(self) -> List[str]:
141
- """Validate configuration and return list of warnings/errors."""
142
- issues = []
143
-
144
- if self.learning_rate <= 0:
145
- issues.append("Learning rate must be positive")
146
- if self.batch_size < 1:
147
- issues.append("Batch size must be at least 1")
148
- if self.num_epochs < 1:
149
- issues.append("Number of epochs must be at least 1")
150
- if self.train_split + self.validation_split + self.test_split > 1.0:
151
- issues.append("Sum of data splits cannot exceed 1.0")
152
- if self.language not in SUPPORTED_LANGUAGES:
153
- issues.append(f"Unsupported language: {self.language}")
154
-
155
- return issues
156
-
157
- def to_dict(self) -> dict:
158
- """Convert config to dictionary."""
159
- return {
160
- "model_name": self.model_name,
161
- "num_labels": self.num_labels,
162
- "learning_rate": self.learning_rate,
163
- "batch_size": self.batch_size,
164
- "num_epochs": self.num_epochs,
165
- "warmup_ratio": self.warmup_ratio,
166
- "weight_decay": self.weight_decay,
167
- "max_length": self.max_length,
168
- "train_split": self.train_split,
169
- "validation_split": self.validation_split,
170
- "test_split": self.test_split,
171
- "shuffle_data": self.shuffle_data,
172
- "random_seed": self.random_seed,
173
- "language": self.language,
174
- "output_dir": self.output_dir,
175
- "label_names": self.label_names
176
- }
177
-
178
-
179
- @dataclass
180
- class ExperimentConfig:
181
- """Configuration for experiment tracking."""
182
-
183
- experiment_name: str = "content_detection"
184
- run_name: Optional[str] = None
185
- tags: Dict[str, str] = field(default_factory=dict)
186
- description: str = ""
187
-
188
- # MLflow settings (optional)
189
- use_mlflow: bool = False
190
- mlflow_tracking_uri: str = "mlruns"
191
-
192
-
193
- # UI Translation strings
194
- UI_TRANSLATIONS = {
195
- "en": {
196
- "app_title": "MLOps Training Platform",
197
- "sidebar_title": "Configuration",
198
- "language_select": "Select Target Language",
199
- "upload_data": "Upload Dataset",
200
- "training_config": "Training Configuration",
201
- "start_training": "Start Training",
202
- "training_progress": "Training Progress",
203
- "evaluation": "Model Evaluation",
204
- "download_model": "Download Model",
205
- "upload_help": "Upload a CSV file with 'text' and 'label' columns",
206
- "metrics_title": "Training Metrics",
207
- "confusion_matrix": "Confusion Matrix",
208
- "success_msg": "Training completed successfully!",
209
- "error_msg": "An error occurred during training",
210
- "welcome_msg": "Welcome to the MLOps Training Platform",
211
- "data_preview": "Data Preview",
212
- "class_distribution": "Class Distribution"
213
- },
214
- "zh": {
215
- "app_title": "๐Ÿค– ๆœบๅ™จๅญฆไน ่ฟ็ปด่ฎญ็ปƒๅนณๅฐ",
216
- "sidebar_title": "้…็ฝฎ",
217
- "language_select": "้€‰ๆ‹ฉ็›ฎๆ ‡่ฏญ่จ€",
218
- "upload_data": "ไธŠไผ ๆ•ฐๆฎ้›†",
219
- "training_config": "่ฎญ็ปƒ้…็ฝฎ",
220
- "start_training": "ๅผ€ๅง‹่ฎญ็ปƒ",
221
- "training_progress": "่ฎญ็ปƒ่ฟ›ๅบฆ",
222
- "evaluation": "ๆจกๅž‹่ฏ„ไผฐ",
223
- "download_model": "ไธ‹่ฝฝๆจกๅž‹",
224
- "upload_help": "ไธŠไผ ๅŒ…ๅซ 'text' ๅ’Œ 'label' ๅˆ—็š„CSVๆ–‡ไปถ",
225
- "metrics_title": "่ฎญ็ปƒๆŒ‡ๆ ‡",
226
- "confusion_matrix": "ๆททๆท†็Ÿฉ้˜ต",
227
- "success_msg": "่ฎญ็ปƒๆˆๅŠŸๅฎŒๆˆ๏ผ",
228
- "error_msg": "่ฎญ็ปƒ่ฟ‡็จ‹ไธญๅ‘็”Ÿ้”™่ฏฏ",
229
- "welcome_msg": "ๆฌข่ฟŽไฝฟ็”จๆœบๅ™จๅญฆไน ่ฟ็ปด่ฎญ็ปƒๅนณๅฐ",
230
- "data_preview": "ๆ•ฐๆฎ้ข„่งˆ",
231
- "class_distribution": "็ฑปๅˆซๅˆ†ๅธƒ"
232
- },
233
- "km": {
234
- "app_title": "๐Ÿค– แžœแŸแž‘แžทแž€แžถแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž› MLOps",
235
- "sidebar_title": "แž€แžถแžšแž€แŸ†แžŽแžแŸ‹",
236
- "language_select": "แž‡แŸ’แžšแžพแžŸแžšแžพแžŸแž—แžถแžŸแžถแž‚แŸ„แž›แžŠแŸ…",
237
- "upload_data": "แž•แŸ’แž‘แžปแž€แžกแžพแž„แžŸแŸ†แžŽแžปแŸ†แž‘แžทแž“แŸ’แž“แž“แŸแž™",
238
- "training_config": "แž€แžถแžšแž€แŸ†แžŽแžแŸ‹แž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
239
- "start_training": "แž…แžถแž”แŸ‹แž•แŸ’แžแžพแž˜แž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
240
- "training_progress": "แžœแžŒแŸ’แžแž“แž—แžถแž–แž“แŸƒแž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
241
- "evaluation": "แž€แžถแžšแžœแžถแž™แžแž˜แŸ’แž›แŸƒแž˜แŸ‰แžผแžŠแŸ‚แž›",
242
- "download_model": "แž‘แžถแž‰แž™แž€แž˜แŸ‰แžผแžŠแŸ‚แž›",
243
- "upload_help": "แž•แŸ’แž‘แžปแž€แžกแžพแž„แžฏแž€แžŸแžถแžš CSV แžŠแŸ‚แž›แž˜แžถแž“แž‡แžฝแžšแžˆแžš 'text' แž“แžทแž„ 'label'",
244
- "metrics_title": "แžšแž„แŸ’แžœแžถแžŸแŸ‹แž“แŸƒแž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
245
- "confusion_matrix": "แž˜แŸ‰แžถแž‘แŸ’แžšแžธแžŸแž—แžถแž–แž…แŸ’แžšแžกแŸ†",
246
- "success_msg": "แž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›แž”แžถแž“แž‡แŸ„แž‚แž‡แŸแž™!",
247
- "error_msg": "แž€แŸ†แž แžปแžŸแž˜แžฝแž™แž”แžถแž“แž€แžพแžแžกแžพแž„แž€แŸ’แž“แžปแž„แžขแŸ†แžกแžปแž„แž–แŸแž›แž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
248
- "welcome_msg": "แžŸแžผแž˜แžŸแŸ’แžœแžถแž‚แž˜แž“แŸแž˜แž€แž€แžถแž“แŸ‹แžœแŸแž‘แžทแž€แžถแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž› MLOps",
249
- "data_preview": "แž˜แžพแž›แž‘แžทแž“แŸ’แž“แž“๏ฟฝ๏ฟฝแž™แž‡แžถแž˜แžปแž“",
250
- "class_distribution": "แž€แžถแžšแž…แŸ‚แž€แž…แžถแž™แžแŸ’แž“แžถแž€แŸ‹"
251
- }
252
- }
253
-
254
-
255
- def get_translation(key: str, language: str = "en") -> str:
256
- """Get translated string for given key and language."""
257
- if language not in UI_TRANSLATIONS:
258
- language = "en"
259
- return UI_TRANSLATIONS[language].get(key, UI_TRANSLATIONS["en"].get(key, key))
 
1
+ """
2
+ Configuration module for MLOps platform.
3
+ Contains all configuration classes and constants.
4
+ """
5
+
6
+ from dataclasses import dataclass, field
7
+ from typing import Dict, List, Optional
8
+ from enum import Enum
9
+
10
+
11
+ class LanguageCode(str, Enum):
12
+ """Supported language codes."""
13
+ ENGLISH = "en"
14
+ CHINESE = "zh"
15
+ KHMER = "km"
16
+
17
+
18
+ class ClassificationType(str, Enum):
19
+ """Classification task types."""
20
+ BINARY = "binary"
21
+ MULTICLASS = "multiclass"
22
+
23
+
24
+ # Supported languages with display names
25
+ SUPPORTED_LANGUAGES: Dict[str, Dict[str, str]] = {
26
+ "en": {
27
+ "name": "English",
28
+ "native_name": "English",
29
+ "description": "English language support with standard NLP preprocessing",
30
+ "tokenizer_hint": "Uses standard word tokenization"
31
+ },
32
+ "zh": {
33
+ "name": "Chinese",
34
+ "native_name": "ไธญๆ–‡",
35
+ "description": "Chinese language support with character-level tokenization",
36
+ "tokenizer_hint": "Uses jieba for word segmentation"
37
+ },
38
+ "km": {
39
+ "name": "Khmer",
40
+ "native_name": "แž—แžถแžŸแžถแžแŸ’แž˜แŸ‚แžš",
41
+ "description": "Khmer language support with specialized tokenization",
42
+ "tokenizer_hint": "Uses ICU-based tokenization for Khmer script"
43
+ }
44
+ }
45
+
46
+ # Model architectures supported with recommendations
47
+ MODEL_ARCHITECTURES = {
48
+ "roberta-base": {
49
+ "name": "RoBERTa Base",
50
+ "description": "Robust BERT model, excellent for English text classification",
51
+ "languages": ["en"],
52
+ "max_length": 512,
53
+ "recommended_for": "English only, high accuracy needed",
54
+ "speed": "Medium",
55
+ "size": "355MB",
56
+ "best_use": "English binary/multiclass classification"
57
+ },
58
+ "bert-base-multilingual-cased": {
59
+ "name": "mBERT (Multilingual BERT)",
60
+ "description": "Supports 104 languages - Good balance of performance and multilingual support",
61
+ "languages": ["en", "zh", "km"],
62
+ "max_length": 512,
63
+ "recommended_for": "Multilingual tasks, balanced performance",
64
+ "speed": "Medium",
65
+ "size": "665MB",
66
+ "best_use": "Multilingual classification, good general-purpose model"
67
+ },
68
+ "xlm-roberta-base": {
69
+ "name": "XLM-RoBERTa Base",
70
+ "description": "Best multilingual model - Highest accuracy for Chinese, Khmer, and other languages",
71
+ "languages": ["en", "zh", "km"],
72
+ "max_length": 512,
73
+ "recommended_for": "Best multilingual performance, recommended for Chinese/Khmer",
74
+ "speed": "Medium-Slow",
75
+ "size": "1.03GB",
76
+ "best_use": "When you need the best accuracy across multiple languages"
77
+ },
78
+ "distilbert-base-multilingual-cased": {
79
+ "name": "DistilBERT Multilingual (Recommended for CPU)",
80
+ "description": "Lightweight and fast - Perfect for CPU training or quick experiments",
81
+ "languages": ["en", "zh", "km"],
82
+ "max_length": 512,
83
+ "recommended_for": "CPU training, fast experiments, limited resources",
84
+ "speed": "Fast",
85
+ "size": "525MB",
86
+ "best_use": "CPU-only systems, quick prototyping, limited GPU memory"
87
+ }
88
+ }
89
+
90
+ # Model selection guide
91
+ MODEL_SELECTION_GUIDE = {
92
+ "cpu_training": "distilbert-base-multilingual-cased",
93
+ "gpu_training_english": "roberta-base",
94
+ "gpu_training_multilingual": "xlm-roberta-base",
95
+ "quick_experiment": "distilbert-base-multilingual-cased",
96
+ "production_english": "roberta-base",
97
+ "production_multilingual": "xlm-roberta-base"
98
+ }
99
+
100
+
101
+ @dataclass
102
+ class TrainingConfig:
103
+ """Configuration for model training."""
104
+
105
+ # Model settings
106
+ model_name: str = "bert-base-multilingual-cased"
107
+ num_labels: int = 2
108
+
109
+ # Training hyperparameters
110
+ learning_rate: float = 2e-5
111
+ batch_size: int = 16
112
+ num_epochs: int = 3
113
+ warmup_ratio: float = 0.1
114
+ weight_decay: float = 0.01
115
+ max_length: int = 256
116
+
117
+ # Data settings
118
+ train_split: float = 0.8
119
+ validation_split: float = 0.1
120
+ test_split: float = 0.1
121
+ shuffle_data: bool = True
122
+ random_seed: int = 42
123
+
124
+ # Language settings
125
+ language: str = "en"
126
+
127
+ # Output settings
128
+ output_dir: str = "trained_models"
129
+ save_best_model: bool = True
130
+ logging_steps: int = 10
131
+ eval_strategy: str = "epoch"
132
+
133
+ # Performance settings
134
+ use_fp16: bool = False # Disabled for CPU compatibility
135
+ gradient_accumulation_steps: int = 1
136
+
137
+ # Labels configuration
138
+ label_names: List[str] = field(default_factory=lambda: ["Legitimate", "Phishing"])
139
+
140
+ def validate(self) -> List[str]:
141
+ """Validate configuration and return list of warnings/errors."""
142
+ issues = []
143
+
144
+ if self.learning_rate <= 0:
145
+ issues.append("Learning rate must be positive")
146
+ if self.batch_size < 1:
147
+ issues.append("Batch size must be at least 1")
148
+ if self.num_epochs < 1:
149
+ issues.append("Number of epochs must be at least 1")
150
+ if self.train_split + self.validation_split + self.test_split > 1.0:
151
+ issues.append("Sum of data splits cannot exceed 1.0")
152
+ if self.language not in SUPPORTED_LANGUAGES:
153
+ issues.append(f"Unsupported language: {self.language}")
154
+
155
+ return issues
156
+
157
+ def to_dict(self) -> dict:
158
+ """Convert config to dictionary."""
159
+ return {
160
+ "model_name": self.model_name,
161
+ "num_labels": self.num_labels,
162
+ "learning_rate": self.learning_rate,
163
+ "batch_size": self.batch_size,
164
+ "num_epochs": self.num_epochs,
165
+ "warmup_ratio": self.warmup_ratio,
166
+ "weight_decay": self.weight_decay,
167
+ "max_length": self.max_length,
168
+ "train_split": self.train_split,
169
+ "validation_split": self.validation_split,
170
+ "test_split": self.test_split,
171
+ "shuffle_data": self.shuffle_data,
172
+ "random_seed": self.random_seed,
173
+ "language": self.language,
174
+ "output_dir": self.output_dir,
175
+ "label_names": self.label_names
176
+ }
177
+
178
+
179
+ @dataclass
180
+ class ExperimentConfig:
181
+ """Configuration for experiment tracking."""
182
+
183
+ experiment_name: str = "content_detection"
184
+ run_name: Optional[str] = None
185
+ tags: Dict[str, str] = field(default_factory=dict)
186
+ description: str = ""
187
+
188
+ # MLflow settings (optional)
189
+ use_mlflow: bool = False
190
+ mlflow_tracking_uri: str = "mlruns"
191
+
192
+
193
+ # UI Translation strings
194
+ UI_TRANSLATIONS = {
195
+ "en": {
196
+ "app_title": "MLOps Training Platform",
197
+ "sidebar_title": "Configuration",
198
+ "language_select": "Select Target Language",
199
+ "upload_data": "Upload Dataset",
200
+ "training_config": "Training Configuration",
201
+ "start_training": "Start Training",
202
+ "training_progress": "Training Progress",
203
+ "evaluation": "Model Evaluation",
204
+ "download_model": "Download Model",
205
+ "upload_help": "Upload a CSV file with 'text' and 'label' columns",
206
+ "metrics_title": "Training Metrics",
207
+ "confusion_matrix": "Confusion Matrix",
208
+ "success_msg": "Training completed successfully!",
209
+ "error_msg": "An error occurred during training",
210
+ "welcome_msg": "Welcome to the MLOps Training Platform",
211
+ "data_preview": "Data Preview",
212
+ "class_distribution": "Class Distribution"
213
+ },
214
+ "zh": {
215
+ "app_title": "๐Ÿค– ๆœบๅ™จๅญฆไน ่ฟ็ปด่ฎญ็ปƒๅนณๅฐ",
216
+ "sidebar_title": "้…็ฝฎ",
217
+ "language_select": "้€‰ๆ‹ฉ็›ฎๆ ‡่ฏญ่จ€",
218
+ "upload_data": "ไธŠไผ ๆ•ฐๆฎ้›†",
219
+ "training_config": "่ฎญ็ปƒ้…็ฝฎ",
220
+ "start_training": "ๅผ€ๅง‹่ฎญ็ปƒ",
221
+ "training_progress": "่ฎญ็ปƒ่ฟ›ๅบฆ",
222
+ "evaluation": "ๆจกๅž‹่ฏ„ไผฐ",
223
+ "download_model": "ไธ‹่ฝฝๆจกๅž‹",
224
+ "upload_help": "ไธŠไผ ๅŒ…ๅซ 'text' ๅ’Œ 'label' ๅˆ—็š„CSVๆ–‡ไปถ",
225
+ "metrics_title": "่ฎญ็ปƒๆŒ‡ๆ ‡",
226
+ "confusion_matrix": "ๆททๆท†็Ÿฉ้˜ต",
227
+ "success_msg": "่ฎญ็ปƒๆˆๅŠŸๅฎŒๆˆ๏ผ",
228
+ "error_msg": "่ฎญ็ปƒ่ฟ‡็จ‹ไธญๅ‘็”Ÿ้”™่ฏฏ",
229
+ "welcome_msg": "ๆฌข่ฟŽไฝฟ็”จๆœบๅ™จๅญฆไน ่ฟ็ปด่ฎญ็ปƒๅนณๅฐ",
230
+ "data_preview": "ๆ•ฐๆฎ้ข„่งˆ",
231
+ "class_distribution": "็ฑปๅˆซๅˆ†ๅธƒ"
232
+ },
233
+ "km": {
234
+ "app_title": "๐Ÿค– แžœแŸแž‘แžทแž€แžถแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž› MLOps",
235
+ "sidebar_title": "แž€แžถแžšแž€แŸ†แžŽแžแŸ‹",
236
+ "language_select": "แž‡แŸ’แžšแžพแžŸแžšแžพแžŸแž—แžถแžŸแžถแž‚แŸ„แž›แžŠแŸ…",
237
+ "upload_data": "แž•แŸ’แž‘แžปแž€แžกแžพแž„แžŸแŸ†แžŽแžปแŸ†แž‘แžทแž“แŸ’แž“แž“แŸแž™",
238
+ "training_config": "แž€แžถแžšแž€แŸ†แžŽแžแŸ‹แž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
239
+ "start_training": "แž…แžถแž”แŸ‹แž•แŸ’แžแžพแž˜แž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
240
+ "training_progress": "แžœแžŒแŸ’แžแž“แž—แžถแž–แž“แŸƒแž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
241
+ "evaluation": "แž€แžถแžšแžœแžถแž™แžแž˜แŸ’แž›แŸƒแž˜แŸ‰แžผแžŠแŸ‚แž›",
242
+ "download_model": "แž‘แžถแž‰แž™แž€แž˜แŸ‰แžผแžŠแŸ‚แž›",
243
+ "upload_help": "แž•แŸ’แž‘แžปแž€แžกแžพแž„แžฏแž€แžŸแžถแžš CSV แžŠแŸ‚แž›แž˜แžถแž“แž‡แžฝแžšแžˆแžš 'text' แž“แžทแž„ 'label'",
244
+ "metrics_title": "แžšแž„แŸ’แžœแžถแžŸแŸ‹แž“แŸƒแž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
245
+ "confusion_matrix": "แž˜แŸ‰แžถแž‘แŸ’แžšแžธแžŸแž—แžถแž–แž…แŸ’แžšแžกแŸ†",
246
+ "success_msg": "แž€แžถแžšแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›แž”แžถแž“แž‡แŸ„แž‚แž‡แŸแž™!",
247
+ "error_msg": "แž€แŸ†แž แžปแžŸแž˜แžฝแž™แž”แžถแž“แž€แžพแžแžกแžพแž„แž€แŸ’แž“แžปแž„แžขแŸ†แžกแžปแž„แž–แŸแž›แž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž›",
248
+ "welcome_msg": "แžŸแžผแž˜แžŸแŸ’แžœแžถแž‚แž˜แž“แŸแž˜แž€แž€แžถแž“แŸ‹แžœแŸแž‘แžทแž€แžถแž”แžŽแŸ’แžแžปแŸ‡แž”แžŽแŸ’แžแžถแž› MLOps",
249
+ "data_preview": "แž˜แžพแž›แž‘แžทแž“แŸ’แž“แž“แŸแž™แž‡แžถแž˜แžปแž“",
250
+ "class_distribution": "แž€แžถแžšแž…แŸ‚แž€แž…แžถแž™แžแŸ’แž“แžถแž€แŸ‹"
251
+ }
252
+ }
253
+
254
+
255
+ def get_translation(key: str, language: str = "en") -> str:
256
+ """Get translated string for given key and language."""
257
+ if language not in UI_TRANSLATIONS:
258
+ language = "en"
259
+ return UI_TRANSLATIONS[language].get(key, UI_TRANSLATIONS["en"].get(key, key))