Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,8 @@
|
|
| 1 |
-
import subprocess
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
from transformers import AutoTokenizer, LlamaForCausalLM
|
| 5 |
import spaces
|
| 6 |
|
| 7 |
-
# Install flash-attn with specific environment variable
|
| 8 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 9 |
-
|
| 10 |
# Initialize model and tokenizer
|
| 11 |
model_id = 'akjindal53244/Llama-3.1-Storm-8B'
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
|
@@ -26,6 +22,7 @@ def format_prompt(messages):
|
|
| 26 |
return prompt
|
| 27 |
|
| 28 |
# Function to generate response
|
|
|
|
| 29 |
def generate_response(message, history):
|
| 30 |
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
| 31 |
for human, assistant in history:
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import AutoTokenizer, LlamaForCausalLM
|
| 4 |
import spaces
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
# Initialize model and tokenizer
|
| 7 |
model_id = 'akjindal53244/Llama-3.1-Storm-8B'
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
|
|
|
| 22 |
return prompt
|
| 23 |
|
| 24 |
# Function to generate response
|
| 25 |
+
@spaces.GPU(duration=120) # Adjust duration as needed
|
| 26 |
def generate_response(message, history):
|
| 27 |
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
| 28 |
for human, assistant in history:
|