Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ import spaces
|
|
| 10 |
import subprocess
|
| 11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 12 |
|
| 13 |
-
torch.set_default_device('cuda')
|
| 14 |
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 16 |
'qnguyen3/nanoLLaVA',
|
|
@@ -22,6 +22,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 22 |
device_map='auto',
|
| 23 |
trust_remote_code=True)
|
| 24 |
|
|
|
|
|
|
|
| 25 |
class KeywordsStoppingCriteria(StoppingCriteria):
|
| 26 |
def __init__(self, keywords, tokenizer, input_ids):
|
| 27 |
self.keywords = keywords
|
|
@@ -93,14 +95,14 @@ def bot_streaming(message, history):
|
|
| 93 |
tokenize=False,
|
| 94 |
add_generation_prompt=True)
|
| 95 |
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
|
| 96 |
-
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
|
| 97 |
stop_str = '<|im_end|>'
|
| 98 |
keywords = [stop_str]
|
| 99 |
stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
|
| 100 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 101 |
|
| 102 |
-
image_tensor = model.process_images([image], model.config).to(
|
| 103 |
-
generation_kwargs = dict(input_ids=input_ids
|
| 104 |
generated_text = ""
|
| 105 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 106 |
thread.start()
|
|
|
|
| 10 |
import subprocess
|
| 11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 12 |
|
| 13 |
+
# torch.set_default_device('cuda')
|
| 14 |
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 16 |
'qnguyen3/nanoLLaVA',
|
|
|
|
| 22 |
device_map='auto',
|
| 23 |
trust_remote_code=True)
|
| 24 |
|
| 25 |
+
model.to("cuda:0")
|
| 26 |
+
|
| 27 |
class KeywordsStoppingCriteria(StoppingCriteria):
|
| 28 |
def __init__(self, keywords, tokenizer, input_ids):
|
| 29 |
self.keywords = keywords
|
|
|
|
| 95 |
tokenize=False,
|
| 96 |
add_generation_prompt=True)
|
| 97 |
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
|
| 98 |
+
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to("cuda:0")
|
| 99 |
stop_str = '<|im_end|>'
|
| 100 |
keywords = [stop_str]
|
| 101 |
stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
|
| 102 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 103 |
|
| 104 |
+
image_tensor = model.process_images([image], model.config).to("cuda:0")
|
| 105 |
+
generation_kwargs = dict(input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=100, stopping_criteria=[stopping_criteria])
|
| 106 |
generated_text = ""
|
| 107 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 108 |
thread.start()
|