STT_Model
/
IndicTrans2
/huggingface_interface
/IndicTransToolkit
/tokenizer_training
/testing_json.py
| import json | |
| # Path to your vocab.json | |
| vocab_path = "/Users/apple/Desktop/indictrans2/IndicTrans2/huggingface_interface/IndicTransToolkit/tokenizer_training/my_tokenizer/vocab.json" | |
| # Load vocab.json | |
| with open(vocab_path, "r", encoding="utf-8") as f: | |
| vocab = json.load(f) | |
| # Print vocab size & first few entries | |
| print(f"Vocab size: {len(vocab)}") | |
| print("Sample tokens:") | |
| for i, (token, idx) in enumerate(vocab.items()): | |
| print(f"{token}: {idx}") | |
| if i >= 10: # Limit output | |
| break | |