Upload modeling_mplug_owl2.py with huggingface_hub
Browse files- modeling_mplug_owl2.py +6 -4
modeling_mplug_owl2.py
CHANGED
|
@@ -22,6 +22,7 @@ from torch.nn import CrossEntropyLoss
|
|
| 22 |
import copy
|
| 23 |
import os
|
| 24 |
import sys
|
|
|
|
| 25 |
|
| 26 |
dir_path = os.path.dirname(os.path.realpath(__file__))
|
| 27 |
sys.path.insert(0, dir_path)
|
|
@@ -252,8 +253,9 @@ class MPLUGOwl2LlamaForCausalLM(LlamaForCausalLM, MPLUGOwl2MetaForCausalLM):
|
|
| 252 |
super(LlamaForCausalLM, self).__init__(config)
|
| 253 |
self.model = MPLUGOwl2LlamaModel(config)
|
| 254 |
|
| 255 |
-
self.tokenizer = AutoTokenizer.from_pretrained("q-future/
|
| 256 |
-
self.image_processor = CLIPImageProcessor.from_pretrained("q-future/
|
|
|
|
| 257 |
|
| 258 |
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
| 259 |
self.preferential_ids_ = [id_[1] for id_ in self.tokenizer(["excellent","good","fair","poor","bad"])["input_ids"]]
|
|
@@ -268,9 +270,9 @@ class MPLUGOwl2LlamaForCausalLM(LlamaForCausalLM, MPLUGOwl2MetaForCausalLM):
|
|
| 268 |
def chat(self, prompt: str, images, **generate_kwargs):
|
| 269 |
input_ids = tokenizer_image_token(prompt, self.tokenizer, -200, return_tensors='pt').unsqueeze(0).to(self.device)
|
| 270 |
images = [expand2square(img, tuple(int(x*255) for x in self.image_processor.image_mean)) for img in images]
|
| 271 |
-
image_tensor =
|
| 272 |
|
| 273 |
-
return
|
| 274 |
def score(self, images,
|
| 275 |
task_: str = "quality",
|
| 276 |
input_: str = "image",
|
|
|
|
| 22 |
import copy
|
| 23 |
import os
|
| 24 |
import sys
|
| 25 |
+
from transformers import TextStreamer
|
| 26 |
|
| 27 |
dir_path = os.path.dirname(os.path.realpath(__file__))
|
| 28 |
sys.path.insert(0, dir_path)
|
|
|
|
| 253 |
super(LlamaForCausalLM, self).__init__(config)
|
| 254 |
self.model = MPLUGOwl2LlamaModel(config)
|
| 255 |
|
| 256 |
+
self.tokenizer = AutoTokenizer.from_pretrained("q-future/co-instruct-preview")
|
| 257 |
+
self.image_processor = CLIPImageProcessor.from_pretrained("q-future/co-instruct-preview")
|
| 258 |
+
self.streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 259 |
|
| 260 |
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
| 261 |
self.preferential_ids_ = [id_[1] for id_ in self.tokenizer(["excellent","good","fair","poor","bad"])["input_ids"]]
|
|
|
|
| 270 |
def chat(self, prompt: str, images, **generate_kwargs):
|
| 271 |
input_ids = tokenizer_image_token(prompt, self.tokenizer, -200, return_tensors='pt').unsqueeze(0).to(self.device)
|
| 272 |
images = [expand2square(img, tuple(int(x*255) for x in self.image_processor.image_mean)) for img in images]
|
| 273 |
+
image_tensor = self.image_processor.preprocess(images, return_tensors="pt")["pixel_values"].half().to(self.device)
|
| 274 |
|
| 275 |
+
return self.model.generate(input_ids, images=image_tensor, streamer=self.streamer, **generate_kwargs)
|
| 276 |
def score(self, images,
|
| 277 |
task_: str = "quality",
|
| 278 |
input_: str = "image",
|