YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
example to use this mdoel
from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2Config
import torch
from huggingface_hub import hf_hub_download

def load_moe_model(checkpoint_name="best_val_loss_moe_step_9000.bin", model_id="idhant297/moe-5l-active-arxiv_code_simplestories"):
    """
    Load a MoE model from HuggingFace Hub with a specific checkpoint.
    
    Args:
        checkpoint_name (str): The checkpoint filename to load
        model_id (str): The HuggingFace model repository ID
    
    Returns:
        tuple: (model, tokenizer) loaded from the checkpoint
    """
    print(f"Loading MoE model from {model_id} checkpoint {checkpoint_name}...")
    
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    config = GPT2Config.from_pretrained(model_id)
    
    model = AutoModelForCausalLM.from_config(config)
    
    checkpoint_path = hf_hub_download(
        repo_id=model_id,
        filename=checkpoint_name
    )
    
    state_dict = torch.load(checkpoint_path, map_location="cpu")
    model.load_state_dict(state_dict)
    model.eval()
    
    print(f"✅ MoE model loaded successfully from checkpoint {checkpoint_name}")
    return model, tokenizer

def generate_text_moe(model, tokenizer, prompt, max_length=100, temperature=0.8, top_p=0.95, num_return_sequences=1):
    """
    Generate text using the loaded MoE model.
    
    Args:
        model: The loaded MoE model
        tokenizer: The loaded tokenizer
        prompt (str): Input text prompt
        max_length (int): Maximum length of generated text
        temperature (float): Sampling temperature
        top_p (float): Top-p sampling parameter
        num_return_sequences (int): Number of sequences to generate
    
    Returns:
        list: Generated text sequences
    """
    inputs = tokenizer(prompt, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            num_return_sequences=num_return_sequences,
            pad_token_id=tokenizer.eos_token_id
        )
    
    generated_texts = []
    for output in outputs:
        text = tokenizer.decode(output, skip_special_tokens=True)
        generated_texts.append(text)
    
    return generated_texts

# Example usage for MoE model
checkpoint_name = "best_val_loss_moe_step_9600.bin"
model, tokenizer = load_moe_model(checkpoint_name)

prompt = "test test test"

generated = generate_text_moe(model, tokenizer, prompt, max_length=50)
print(generated)
Downloads last month: -
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support