Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,9 +8,9 @@ model_id = 'akjindal53244/Llama-3.1-Storm-8B'
|
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 9 |
model = LlamaForCausalLM.from_pretrained(
|
| 10 |
model_id,
|
| 11 |
-
torch_dtype=torch.
|
| 12 |
device_map="auto",
|
| 13 |
-
|
| 14 |
)
|
| 15 |
|
| 16 |
# Function to format the prompt
|
|
@@ -22,7 +22,7 @@ def format_prompt(messages):
|
|
| 22 |
return prompt
|
| 23 |
|
| 24 |
# Function to generate response
|
| 25 |
-
@spaces.GPU(duration=
|
| 26 |
def generate_response(message, history):
|
| 27 |
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
| 28 |
for human, assistant in history:
|
|
|
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 9 |
model = LlamaForCausalLM.from_pretrained(
|
| 10 |
model_id,
|
| 11 |
+
torch_dtype=torch.float32,
|
| 12 |
device_map="auto",
|
| 13 |
+
low_cpu_mem_usage=True
|
| 14 |
)
|
| 15 |
|
| 16 |
# Function to format the prompt
|
|
|
|
| 22 |
return prompt
|
| 23 |
|
| 24 |
# Function to generate response
|
| 25 |
+
@spaces.GPU(duration=300) # Increased duration due to potential slower processing
|
| 26 |
def generate_response(message, history):
|
| 27 |
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
| 28 |
for human, assistant in history:
|