sagar007 commited on
Commit
3880154
Β·
verified Β·
1 Parent(s): 5195372

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -4
app.py CHANGED
@@ -1,12 +1,8 @@
1
- import subprocess
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoTokenizer, LlamaForCausalLM
5
  import spaces
6
 
7
- # Install flash-attn with specific environment variable
8
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
-
10
  # Initialize model and tokenizer
11
  model_id = 'akjindal53244/Llama-3.1-Storm-8B'
12
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
@@ -26,6 +22,7 @@ def format_prompt(messages):
26
  return prompt
27
 
28
  # Function to generate response
 
29
  def generate_response(message, history):
30
  messages = [{"role": "system", "content": "You are a helpful assistant."}]
31
  for human, assistant in history:
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, LlamaForCausalLM
4
  import spaces
5
 
 
 
 
6
  # Initialize model and tokenizer
7
  model_id = 'akjindal53244/Llama-3.1-Storm-8B'
8
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
22
  return prompt
23
 
24
  # Function to generate response
25
+ @spaces.GPU(duration=120) # Adjust duration as needed
26
  def generate_response(message, history):
27
  messages = [{"role": "system", "content": "You are a helpful assistant."}]
28
  for human, assistant in history: