Llama-3.1-Storm-8B Text Generation

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the model and tokenizer
model_name = "akjindal53244/Llama-3.1-Storm-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

@spaces.GPU(duration=120)
def generate_text(prompt, max_length, temperature):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=temperature,
        top_k=100,
        top_p=0.95,
    )
    
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

css = """
body {
    background: linear-gradient(135deg, #f5f7fa, #c3cfe2);
    font-family: Arial, sans-serif;
}
#custom-header {
    text-align: center;
    background: rgba(255, 255, 255, 0.8);
    padding: 20px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    position: relative;
    max-width: 800px;
    margin: 20px auto;
}
#custom-header h1 {
    color: #4A90E2;
    font-size: 2em;
    margin-bottom: 10px;
}
.llama-image {
    position: relative;
    transition: transform 0.3s;
    display: inline-block;
    margin-top: 20px;
}
.llama-image:hover {
    transform: scale(1.05);
}
.llama-image img {
    width: 200px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.llama-description {
    position: absolute;
    bottom: -30px;
    left: 50%;
    transform: translateX(-50%);
    background-color: #4A90E2;
    color: white;
    padding: 5px 10px;
    border-radius: 5px;
    opacity: 0;
    transition: opacity 0.3s;
    white-space: nowrap;
}
.llama-image:hover .llama-description {
    opacity: 1;
}
.gradio-container {
    max-width: 900px !important;
    margin: auto;
    padding-top: 1.5rem;
}
.container {
    background-color: #ffffff;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    padding: 20px;
    margin-top: 20px;
}
"""

with gr.Blocks(css=css) as iface:
    gr.HTML("""
        <div id="custom-header">
            <h1>Llama-3.1-Storm-8B Text Generation</h1>
            <p>Generate text using the powerful Llama-3.1-Storm-8B model. Enter a prompt and let the AI create!</p>
            <div class="llama-image">
                <img src="https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg" alt="Llama">
                <div class="llama-description">Llama-3.1-Storm-8B Model</div>
            </div>
        </div>
    """)
    
    with gr.Column(elem_classes="container"):
        prompt = gr.Textbox(lines=5, label="Prompt")
        max_length = gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length")
        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
        submit_btn = gr.Button("Generate", variant="primary")
        output = gr.Textbox(lines=10, label="Generated Text")
    
    submit_btn.click(generate_text, inputs=[prompt, max_length, temperature], outputs=output)

iface.launch()