bamboo-1 / src /upload_to_hub.py
rain1024's picture
Consolidate project: merge scripts/, bamboo1/ into src/, optimize training
24ec440
#!/usr/bin/env python3
"""Upload Bamboo-1 model to Hugging Face Hub."""
import click
from pathlib import Path
from huggingface_hub import HfApi, create_repo
MODEL_CARD = '''---
language:
- vi
library_name: bamboo1
tags:
- dependency-parsing
- vietnamese
- nlp
license: apache-2.0
datasets:
- undertheseanlp/UDD-1
---
# Bamboo-1: Vietnamese Dependency Parser
A Vietnamese dependency parser using XLM-RoBERTa + Biaffine architecture, trained on the UDD-1 dataset.
## Installation
```bash
pip install bamboo1
# or
pip install git+https://github.com/undertheseanlp/bamboo-1.git
```
## Usage
```python
from bamboo1 import load, parse
# Quick parse (auto-downloads model from HF Hub)
sent = parse("Tôi yêu Việt Nam")
for token in sent:
head = sent.get_head(token)
print(f"{token.form} -> {head.form if head else 'ROOT'} ({token.deprel})")
# Or load explicitly
parser = load("undertheseanlp/bamboo-1")
sent = parser.parse("Hà Nội là thủ đô của Việt Nam")
# Batch parsing
sentences = parser.parse_batch([
"Tôi yêu Việt Nam",
"Em gái tôi học tiếng Anh",
])
# Export to CoNLL-U format
print(sent.to_conllu())
```
## Model Details
- **Architecture**: XLM-RoBERTa (xlm-roberta-base) + Biaffine attention
- **Training data**: UDD-1 (Vietnamese Universal Dependencies)
- **Framework**: PyTorch
## Performance
| Metric | Score |
|--------|-------|
| UAS | TBD |
| LAS | TBD |
## Citation
```bibtex
@misc{bamboo1,
title={Bamboo-1: Vietnamese Dependency Parser},
author={Underthesea NLP},
year={2024},
publisher={Hugging Face},
url={https://huggingface.co/undertheseanlp/bamboo-1}
}
```
'''
@click.command()
@click.option('--model-dir', '-m', default='models/bamboo-1', help='Local model directory')
@click.option('--repo-id', '-r', default='undertheseanlp/bamboo-1-model', help='HF repo ID')
@click.option('--private', is_flag=True, help='Make repo private')
def upload(model_dir: str, repo_id: str, private: bool):
"""Upload Bamboo-1 model to Hugging Face Hub."""
model_path = Path(model_dir)
model_file = model_path / 'model.pt'
if not model_file.exists():
raise click.ClickException(f"Model file not found: {model_file}")
click.echo(f"Uploading {model_file} to {repo_id}...")
api = HfApi()
# Create repo if it doesn't exist
try:
create_repo(repo_id, repo_type="model", private=private, exist_ok=True)
click.echo(f"Repository {repo_id} ready")
except Exception as e:
click.echo(f"Note: {e}")
# Upload model file
click.echo("Uploading model.pt...")
api.upload_file(
path_or_fileobj=str(model_file),
path_in_repo="model.pt",
repo_id=repo_id,
repo_type="model",
)
# Upload README
click.echo("Uploading README.md...")
api.upload_file(
path_or_fileobj=MODEL_CARD.encode(),
path_in_repo="README.md",
repo_id=repo_id,
repo_type="model",
)
click.echo(f"\nModel uploaded successfully!")
click.echo(f"View at: https://huggingface.co/{repo_id}")
if __name__ == '__main__':
upload()