Vu Anh
Claude
commited on
Commit
·
a8bf530
1
Parent(s):
b3ac9a6
Add Google Colab compatibility and developer documentation
Browse files- Fixed argparse compatibility for Google Colab/Jupyter environments
- Added train_notebook() function for easy notebook usage
- Created comprehensive developer documentation (DEVELOPERS.md)
- Includes setup instructions, API reference, and usage examples
- Documents all 14 UTS2017_Bank dataset categories
- Provides troubleshooting guide for common issues
🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
- DEVELOPERS.md +0 -0
- train.py +35 -1
DEVELOPERS.md
ADDED
|
Binary file (6.35 kB). View file
|
|
|
train.py
CHANGED
|
@@ -354,8 +354,37 @@ def train_all_configurations():
|
|
| 354 |
return results
|
| 355 |
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
def main():
|
| 358 |
"""Main function with argument parsing"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
parser = argparse.ArgumentParser(
|
| 360 |
description="Train Vietnamese text classification model on UTS2017_Bank dataset"
|
| 361 |
)
|
|
@@ -393,7 +422,12 @@ def main():
|
|
| 393 |
help="Train and compare multiple configurations",
|
| 394 |
)
|
| 395 |
|
| 396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
if args.compare:
|
| 399 |
print("Training and comparing multiple configurations...")
|
|
|
|
| 354 |
return results
|
| 355 |
|
| 356 |
|
| 357 |
+
def train_notebook(model_name="logistic", max_features=20000, ngram_min=1, ngram_max=2,
|
| 358 |
+
split_ratio=0.2, n_samples=None, compare=False):
|
| 359 |
+
"""
|
| 360 |
+
Convenience function for training in Jupyter/Colab notebooks without argparse.
|
| 361 |
+
|
| 362 |
+
Example usage:
|
| 363 |
+
from train import train_notebook
|
| 364 |
+
train_notebook(model_name="logistic", max_features=20000)
|
| 365 |
+
"""
|
| 366 |
+
if compare:
|
| 367 |
+
print("Training and comparing multiple configurations...")
|
| 368 |
+
return train_all_configurations()
|
| 369 |
+
else:
|
| 370 |
+
print(f"Training {model_name} model on UTS2017_Bank dataset...")
|
| 371 |
+
print(f"Configuration: max_features={max_features}, ngram=({ngram_min}, {ngram_max})")
|
| 372 |
+
|
| 373 |
+
return train_model(
|
| 374 |
+
model_name=model_name,
|
| 375 |
+
max_features=max_features,
|
| 376 |
+
ngram_range=(ngram_min, ngram_max),
|
| 377 |
+
split_ratio=split_ratio,
|
| 378 |
+
n_samples=n_samples,
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
|
| 382 |
def main():
|
| 383 |
"""Main function with argument parsing"""
|
| 384 |
+
# Detect if running in Jupyter/Colab
|
| 385 |
+
import sys
|
| 386 |
+
in_notebook = hasattr(sys, 'ps1') or 'ipykernel' in sys.modules or 'google.colab' in sys.modules
|
| 387 |
+
|
| 388 |
parser = argparse.ArgumentParser(
|
| 389 |
description="Train Vietnamese text classification model on UTS2017_Bank dataset"
|
| 390 |
)
|
|
|
|
| 422 |
help="Train and compare multiple configurations",
|
| 423 |
)
|
| 424 |
|
| 425 |
+
# Use parse_known_args to ignore Jupyter/Colab kernel arguments
|
| 426 |
+
args, unknown = parser.parse_known_args()
|
| 427 |
+
|
| 428 |
+
# If running in notebook and there are unknown args, inform user
|
| 429 |
+
if in_notebook and unknown:
|
| 430 |
+
print(f"Note: Running in Jupyter/Colab environment. Ignoring kernel arguments: {unknown}")
|
| 431 |
|
| 432 |
if args.compare:
|
| 433 |
print("Training and comparing multiple configurations...")
|