q-future
/

co-instruct

@@ -22,6 +22,7 @@ from torch.nn import CrossEntropyLoss
 import copy
 import os
 import sys
 dir_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.insert(0, dir_path)
@@ -252,8 +253,9 @@ class MPLUGOwl2LlamaForCausalLM(LlamaForCausalLM, MPLUGOwl2MetaForCausalLM):
         super(LlamaForCausalLM, self).__init__(config)
         self.model = MPLUGOwl2LlamaModel(config)
-        self.tokenizer = AutoTokenizer.from_pretrained("q-future/one-align")
-        self.image_processor = CLIPImageProcessor.from_pretrained("q-future/one-align")
         self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
         self.preferential_ids_ = [id_[1] for id_ in self.tokenizer(["excellent","good","fair","poor","bad"])["input_ids"]]
@@ -268,9 +270,9 @@ class MPLUGOwl2LlamaForCausalLM(LlamaForCausalLM, MPLUGOwl2MetaForCausalLM):
     def chat(self, prompt: str, images, **generate_kwargs):
         input_ids =  tokenizer_image_token(prompt, self.tokenizer, -200, return_tensors='pt').unsqueeze(0).to(self.device)
         images = [expand2square(img, tuple(int(x*255) for x in self.image_processor.image_mean)) for img in images]
-        image_tensor = model.image_processor.preprocess(images, return_tensors="pt")["pixel_values"].half().to(self.device)
-        return
     def score(self, images,
               task_: str = "quality",
               input_: str = "image",

 import copy
 import os
 import sys
+from transformers import TextStreamer
 dir_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.insert(0, dir_path)
         super(LlamaForCausalLM, self).__init__(config)
         self.model = MPLUGOwl2LlamaModel(config)
+        self.tokenizer = AutoTokenizer.from_pretrained("q-future/co-instruct-preview")
+        self.image_processor = CLIPImageProcessor.from_pretrained("q-future/co-instruct-preview")
+        self.streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
         self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
         self.preferential_ids_ = [id_[1] for id_ in self.tokenizer(["excellent","good","fair","poor","bad"])["input_ids"]]
     def chat(self, prompt: str, images, **generate_kwargs):
         input_ids =  tokenizer_image_token(prompt, self.tokenizer, -200, return_tensors='pt').unsqueeze(0).to(self.device)
         images = [expand2square(img, tuple(int(x*255) for x in self.image_processor.image_mean)) for img in images]
+        image_tensor = self.image_processor.preprocess(images, return_tensors="pt")["pixel_values"].half().to(self.device)
+        return self.model.generate(input_ids, images=image_tensor, streamer=self.streamer, **generate_kwargs)
     def score(self, images,
               task_: str = "quality",
               input_: str = "image",