Spaces:

sagar007
/

lama_storm_8b

Runtime error

App Files Files Community

sagar007 commited on Aug 28, 2024

Commit

d0ce6f0

verified ·

1 Parent(s): 42dc3ae

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -56

app.py CHANGED Viewed

@@ -1,45 +1,26 @@
 import gradio as gr
-import spaces
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# HTML template for custom UI
-HTML_TEMPLATE = """
-<style>
-    .llama-image {
-        display: flex;
-        justify-content: center;
-        margin-bottom: 20px;
-    }
-    .llama-image img {
-        max-width: 300px;
-        border-radius: 10px;
-        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-    }
-    .llama-description {
-        text-align: center;
-        font-weight: bold;
-        margin-top: 10px;
-    }
-</style>
-<div class="llama-image">
-    <img src="https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg" alt="Llama">
-    <div class="llama-description">Llama-3.1-Storm-8B Model</div>
-</div>
-<h1>Llama-3.1-Storm-8B Text Generation</h1>
-<p>Generate text using the powerful Llama-3.1-Storm-8B model. Enter a prompt and let the AI create!</p>
-"""
 # Load the model and tokenizer
 model_name = "akjindal53244/Llama-3.1-Storm-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
     torch_dtype=torch.bfloat16,
     device_map="auto"
 )
-@spaces.GPU(duration=120)
 def generate_text(prompt, max_length, temperature):
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
@@ -47,10 +28,8 @@ def generate_text(prompt, max_length, temperature):
     ]
     formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
-    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(
-        **inputs,
         max_new_tokens=max_length,
         do_sample=True,
         temperature=temperature,
@@ -58,25 +37,20 @@ def generate_text(prompt, max_length, temperature):
         top_p=0.95,
     )
-    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
-# Create Gradio interface
-iface = gr.Interface(
-    fn=generate_text,
-    inputs=[
-        gr.Textbox(lines=5, label="Prompt"),
-        gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length"),
-        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
-    ],
-    outputs=gr.Textbox(lines=10, label="Generated Text"),
-    title="Llama-3.1-Storm-8B Text Generation",
-    description="Enter a prompt to generate text using the Llama-3.1-Storm-8B model.",
-    article=None,
-    css=".gradio-container {max-width: 800px; margin: auto;}",
-)
-iface.launch(
-    additional_inputs=[
-        gr.HTML(HTML_TEMPLATE)
-    ]
-)

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, pipeline
 # Load the model and tokenizer
 model_name = "akjindal53244/Llama-3.1-Storm-8B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+pipe = pipeline(
+    "text-generation",
+    model=model_name,
     torch_dtype=torch.bfloat16,
     device_map="auto"
 )
+# HTML content
+HTML_CONTENT = """
+<h1>Llama-3.1-Storm-8B Text Generation</h1>
+<p>Generate text using the powerful Llama-3.1-Storm-8B model. Enter a prompt and let the AI create!</p>
+<div class="llama-image">
+    <img src="https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg" alt="Llama" style="width:200px; border-radius:10px;">
+</div>
+"""
 def generate_text(prompt, max_length, temperature):
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
     ]
     formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+    outputs = pipe(
+        formatted_prompt,
         max_new_tokens=max_length,
         do_sample=True,
         temperature=temperature,
         top_p=0.95,
     )
+    return outputs[0]['generated_text'][len(formatted_prompt):]
+with gr.Blocks() as demo:
+    gr.HTML(HTML_CONTENT)
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt = gr.Textbox(label="Prompt", lines=5)
+            max_length = gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length")
+            temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
+            submit_button = gr.Button("Generate")
+        with gr.Column(scale=2):
+            output = gr.Textbox(label="Generated Text", lines=10)
+    submit_button.click(generate_text, inputs=[prompt, max_length, temperature], outputs=[output])
+if __name__ == "__main__":
+    demo.launch()