Vu Anh Claude commited on
Commit
a8bf530
·
1 Parent(s): b3ac9a6

Add Google Colab compatibility and developer documentation

Browse files

- Fixed argparse compatibility for Google Colab/Jupyter environments
- Added train_notebook() function for easy notebook usage
- Created comprehensive developer documentation (DEVELOPERS.md)
- Includes setup instructions, API reference, and usage examples
- Documents all 14 UTS2017_Bank dataset categories
- Provides troubleshooting guide for common issues

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. DEVELOPERS.md +0 -0
  2. train.py +35 -1
DEVELOPERS.md ADDED
Binary file (6.35 kB). View file
 
train.py CHANGED
@@ -354,8 +354,37 @@ def train_all_configurations():
354
  return results
355
 
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  def main():
358
  """Main function with argument parsing"""
 
 
 
 
359
  parser = argparse.ArgumentParser(
360
  description="Train Vietnamese text classification model on UTS2017_Bank dataset"
361
  )
@@ -393,7 +422,12 @@ def main():
393
  help="Train and compare multiple configurations",
394
  )
395
 
396
- args = parser.parse_args()
 
 
 
 
 
397
 
398
  if args.compare:
399
  print("Training and comparing multiple configurations...")
 
354
  return results
355
 
356
 
357
+ def train_notebook(model_name="logistic", max_features=20000, ngram_min=1, ngram_max=2,
358
+ split_ratio=0.2, n_samples=None, compare=False):
359
+ """
360
+ Convenience function for training in Jupyter/Colab notebooks without argparse.
361
+
362
+ Example usage:
363
+ from train import train_notebook
364
+ train_notebook(model_name="logistic", max_features=20000)
365
+ """
366
+ if compare:
367
+ print("Training and comparing multiple configurations...")
368
+ return train_all_configurations()
369
+ else:
370
+ print(f"Training {model_name} model on UTS2017_Bank dataset...")
371
+ print(f"Configuration: max_features={max_features}, ngram=({ngram_min}, {ngram_max})")
372
+
373
+ return train_model(
374
+ model_name=model_name,
375
+ max_features=max_features,
376
+ ngram_range=(ngram_min, ngram_max),
377
+ split_ratio=split_ratio,
378
+ n_samples=n_samples,
379
+ )
380
+
381
+
382
  def main():
383
  """Main function with argument parsing"""
384
+ # Detect if running in Jupyter/Colab
385
+ import sys
386
+ in_notebook = hasattr(sys, 'ps1') or 'ipykernel' in sys.modules or 'google.colab' in sys.modules
387
+
388
  parser = argparse.ArgumentParser(
389
  description="Train Vietnamese text classification model on UTS2017_Bank dataset"
390
  )
 
422
  help="Train and compare multiple configurations",
423
  )
424
 
425
+ # Use parse_known_args to ignore Jupyter/Colab kernel arguments
426
+ args, unknown = parser.parse_known_args()
427
+
428
+ # If running in notebook and there are unknown args, inform user
429
+ if in_notebook and unknown:
430
+ print(f"Note: Running in Jupyter/Colab environment. Ignoring kernel arguments: {unknown}")
431
 
432
  if args.compare:
433
  print("Training and comparing multiple configurations...")