msmaje commited on
Commit
94875b9
·
verified ·
1 Parent(s): a612298

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -38
app.py CHANGED
@@ -333,43 +333,32 @@ def train_model_inline(uploaded_file, text_column, label_column, num_epochs, bat
333
  TRAINING_LOGS.append(f"- Warmup steps: {warmup_steps}")
334
  yield "\n".join(TRAINING_LOGS)
335
 
336
- # Create a dictionary for training arguments to handle version compatibility
337
- training_args_dict = {
338
- "output_dir": str(output_dir),
339
- "num_train_epochs": num_epochs,
340
- "per_device_train_batch_size": batch_size,
341
- "per_device_eval_batch_size": batch_size,
342
- "warmup_steps": warmup_steps,
343
- "weight_decay": 0.01,
344
- "learning_rate": learning_rate,
345
- "logging_dir": str(output_dir / "logs"),
346
- "logging_steps": logging_steps,
347
- "eval_steps": eval_steps,
348
- "save_steps": save_steps,
349
- "save_total_limit": 2,
350
- "load_best_model_at_end": True,
351
- "metric_for_best_model": "eval_accuracy",
352
- "greater_is_better": True,
353
- "push_to_hub": push_to_hub,
354
- "hub_model_id": hub_model_id if push_to_hub else None,
355
- "report_to": None,
356
- "dataloader_num_workers": 0,
357
- "fp16": torch.cuda.is_available(),
358
- "seed": 42,
359
- "remove_unused_columns": False,
360
- }
361
 
362
- # Try to use 'eval_strategy' and fall back to 'evaluation_strategy' if a TypeError occurs
363
- try:
364
- training_args_dict["eval_strategy"] = "steps"
365
- training_args = TrainingArguments(**training_args_dict)
366
- except TypeError as e:
367
- if "unexpected keyword argument 'eval_strategy'" in str(e):
368
- training_args_dict["evaluation_strategy"] = "steps"
369
- training_args = TrainingArguments(**training_args_dict)
370
- else:
371
- raise e
372
-
373
  # Data collator
374
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
375
 
@@ -635,8 +624,8 @@ def push_to_hub_after_training(model_path, username, model_name, token):
635
 
636
  def count_tokens(text):
637
  """Count tokens in input text"""
638
- global CURRENT_TOKENIZER # Ensure we can modify the global tokenizer
639
- if not text:
640
  return "Enter text to see token count"
641
 
642
  # Attempt to load a default tokenizer if it's not set
 
333
  TRAINING_LOGS.append(f"- Warmup steps: {warmup_steps}")
334
  yield "\n".join(TRAINING_LOGS)
335
 
336
+ training_args = TrainingArguments(
337
+ output_dir=str(output_dir),
338
+ num_train_epochs=num_epochs,
339
+ per_device_train_batch_size=batch_size,
340
+ per_device_eval_batch_size=batch_size,
341
+ warmup_steps=warmup_steps,
342
+ weight_decay=0.01,
343
+ learning_rate=learning_rate,
344
+ logging_dir=str(output_dir / "logs"),
345
+ logging_steps=logging_steps,
346
+ evaluation_strategy="steps", # Corrected parameter name
347
+ eval_steps=eval_steps,
348
+ save_steps=save_steps,
349
+ save_total_limit=2,
350
+ load_best_model_at_end=True,
351
+ metric_for_best_model="eval_accuracy",
352
+ greater_is_better=True,
353
+ push_to_hub=push_to_hub,
354
+ hub_model_id=hub_model_id if push_to_hub else None,
355
+ report_to=None,
356
+ dataloader_num_workers=0,
357
+ fp16=torch.cuda.is_available(),
358
+ seed=42,
359
+ remove_unused_columns=False,
360
+ )
361
 
 
 
 
 
 
 
 
 
 
 
 
362
  # Data collator
363
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
364
 
 
624
 
625
  def count_tokens(text):
626
  """Count tokens in input text"""
627
+ global CURRENT_TOKENIZER
628
+ if text is None:
629
  return "Enter text to see token count"
630
 
631
  # Attempt to load a default tokenizer if it's not set