| |
| """Upload Bamboo-1 model to Hugging Face Hub.""" |
|
|
| import click |
| from pathlib import Path |
| from huggingface_hub import HfApi, create_repo |
|
|
|
|
| MODEL_CARD = '''--- |
| language: |
| - vi |
| library_name: bamboo1 |
| tags: |
| - dependency-parsing |
| - vietnamese |
| - nlp |
| license: apache-2.0 |
| datasets: |
| - undertheseanlp/UDD-1 |
| --- |
| |
| # Bamboo-1: Vietnamese Dependency Parser |
| |
| A Vietnamese dependency parser using XLM-RoBERTa + Biaffine architecture, trained on the UDD-1 dataset. |
| |
| ## Installation |
| |
| ```bash |
| pip install bamboo1 |
| # or |
| pip install git+https://github.com/undertheseanlp/bamboo-1.git |
| ``` |
| |
| ## Usage |
| |
| ```python |
| from bamboo1 import load, parse |
| |
| # Quick parse (auto-downloads model from HF Hub) |
| sent = parse("Tôi yêu Việt Nam") |
| for token in sent: |
| head = sent.get_head(token) |
| print(f"{token.form} -> {head.form if head else 'ROOT'} ({token.deprel})") |
| |
| # Or load explicitly |
| parser = load("undertheseanlp/bamboo-1") |
| sent = parser.parse("Hà Nội là thủ đô của Việt Nam") |
| |
| # Batch parsing |
| sentences = parser.parse_batch([ |
| "Tôi yêu Việt Nam", |
| "Em gái tôi học tiếng Anh", |
| ]) |
| |
| # Export to CoNLL-U format |
| print(sent.to_conllu()) |
| ``` |
| |
| ## Model Details |
| |
| - **Architecture**: XLM-RoBERTa (xlm-roberta-base) + Biaffine attention |
| - **Training data**: UDD-1 (Vietnamese Universal Dependencies) |
| - **Framework**: PyTorch |
| |
| ## Performance |
| |
| | Metric | Score | |
| |--------|-------| |
| | UAS | TBD | |
| | LAS | TBD | |
| |
| ## Citation |
| |
| ```bibtex |
| @misc{bamboo1, |
| title={Bamboo-1: Vietnamese Dependency Parser}, |
| author={Underthesea NLP}, |
| year={2024}, |
| publisher={Hugging Face}, |
| url={https://huggingface.co/undertheseanlp/bamboo-1} |
| } |
| ``` |
| ''' |
|
|
|
|
| @click.command() |
| @click.option('--model-dir', '-m', default='models/bamboo-1', help='Local model directory') |
| @click.option('--repo-id', '-r', default='undertheseanlp/bamboo-1-model', help='HF repo ID') |
| @click.option('--private', is_flag=True, help='Make repo private') |
| def upload(model_dir: str, repo_id: str, private: bool): |
| """Upload Bamboo-1 model to Hugging Face Hub.""" |
| model_path = Path(model_dir) |
| model_file = model_path / 'model.pt' |
|
|
| if not model_file.exists(): |
| raise click.ClickException(f"Model file not found: {model_file}") |
|
|
| click.echo(f"Uploading {model_file} to {repo_id}...") |
|
|
| api = HfApi() |
|
|
| |
| try: |
| create_repo(repo_id, repo_type="model", private=private, exist_ok=True) |
| click.echo(f"Repository {repo_id} ready") |
| except Exception as e: |
| click.echo(f"Note: {e}") |
|
|
| |
| click.echo("Uploading model.pt...") |
| api.upload_file( |
| path_or_fileobj=str(model_file), |
| path_in_repo="model.pt", |
| repo_id=repo_id, |
| repo_type="model", |
| ) |
|
|
| |
| click.echo("Uploading README.md...") |
| api.upload_file( |
| path_or_fileobj=MODEL_CARD.encode(), |
| path_in_repo="README.md", |
| repo_id=repo_id, |
| repo_type="model", |
| ) |
|
|
| click.echo(f"\nModel uploaded successfully!") |
| click.echo(f"View at: https://huggingface.co/{repo_id}") |
|
|
|
|
| if __name__ == '__main__': |
| upload() |
|
|