Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -76,20 +76,18 @@ mode2func = dict(
|
|
| 76 |
|
| 77 |
##########################################
|
| 78 |
# LLM part
|
| 79 |
-
# TODO: 1) change model 2) change arguments
|
| 80 |
##########################################
|
| 81 |
import torch
|
| 82 |
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
| 83 |
from threading import Thread
|
| 84 |
|
| 85 |
-
model_name = "Emova-ollm/
|
| 86 |
model = AutoModel.from_pretrained(
|
| 87 |
model_name,
|
| 88 |
torch_dtype=torch.bfloat16,
|
| 89 |
-
|
| 90 |
low_cpu_mem_usage=True,
|
| 91 |
-
trust_remote_code=True
|
| 92 |
-
token=auth_token).eval().cuda()
|
| 93 |
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True, token=auth_token)
|
| 94 |
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
|
| 95 |
|
|
@@ -235,8 +233,8 @@ def http_bot(state, temperature, top_p, max_new_tokens, speaker):
|
|
| 235 |
# Process inputs
|
| 236 |
inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
|
| 237 |
inputs.to(model.device)
|
| 238 |
-
if len(all_images) > 0:
|
| 239 |
-
|
| 240 |
|
| 241 |
# Process hyperparameters
|
| 242 |
temperature = float(pload.get("temperature", 1.0))
|
|
|
|
| 76 |
|
| 77 |
##########################################
|
| 78 |
# LLM part
|
|
|
|
| 79 |
##########################################
|
| 80 |
import torch
|
| 81 |
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
| 82 |
from threading import Thread
|
| 83 |
|
| 84 |
+
model_name = "Emova-ollm/emova-qwen-2-5-7b-hf"
|
| 85 |
model = AutoModel.from_pretrained(
|
| 86 |
model_name,
|
| 87 |
torch_dtype=torch.bfloat16,
|
| 88 |
+
attn_implementation='flash_attention_2',
|
| 89 |
low_cpu_mem_usage=True,
|
| 90 |
+
trust_remote_code=True).eval().cuda()
|
|
|
|
| 91 |
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True, token=auth_token)
|
| 92 |
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
|
| 93 |
|
|
|
|
| 233 |
# Process inputs
|
| 234 |
inputs = processor(text=[prompt], images=all_images if len(all_images) > 0 else None, return_tensors="pt")
|
| 235 |
inputs.to(model.device)
|
| 236 |
+
# if len(all_images) > 0:
|
| 237 |
+
# inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
|
| 238 |
|
| 239 |
# Process hyperparameters
|
| 240 |
temperature = float(pload.get("temperature", 1.0))
|