from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import torch

model_name = "NbAiLab/wav2vec2-large-danish-npsc-nst"

print(f"Loading model: {model_name}...")

try:
    processor = Wav2Vec2Processor.from_pretrained(model_name)
    model = Wav2Vec2ForCTC.from_pretrained(model_name)
    print("Model and Processor loaded successfully.")
    
    print("-" * 20)
    print("Vocabulary (Labels):")
    vocab = processor.tokenizer.get_vocab()
    # Sort by index
    sorted_vocab = sorted(vocab.items(), key=lambda item: item[1])
    for token, index in sorted_vocab:
        print(f"{index}: '{token}'")

    print("-" * 20)
    print(f"Sampling Rate: {processor.feature_extractor.sampling_rate}")

except Exception as e:
    print(f"Failed to load model: {e}")