Spaces:

sagar007
/

lama_storm_8b

Runtime error

sagar007 commited on Aug 27, 2024

Commit

3880154

verified ·

1 Parent(s): 5195372

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,8 @@
-import subprocess
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, LlamaForCausalLM
 import spaces
-# Install flash-attn with specific environment variable
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # Initialize model and tokenizer
 model_id = 'akjindal53244/Llama-3.1-Storm-8B'
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
@@ -26,6 +22,7 @@ def format_prompt(messages):
     return prompt
 # Function to generate response
 def generate_response(message, history):
     messages = [{"role": "system", "content": "You are a helpful assistant."}]
     for human, assistant in history:

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, LlamaForCausalLM
 import spaces
 # Initialize model and tokenizer
 model_id = 'akjindal53244/Llama-3.1-Storm-8B'
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     return prompt
 # Function to generate response
+@spaces.GPU(duration=120)  # Adjust duration as needed
 def generate_response(message, history):
     messages = [{"role": "system", "content": "You are a helpful assistant."}]
     for human, assistant in history: