Spaces:

MILVLG
/

IMPChat

Sleeping

MILVLG commited on Jan 29, 2024

Commit

2524499

verified ·

1 Parent(s): cf4ec51

Upload 43 files

Files changed (2) hide show

modules/models/XMChat.py CHANGED Viewed

@@ -19,6 +19,13 @@ from ..utils import *
 from .base_model import BaseLLMModel
 from .. import shared
 # print('model loading')
 # model = AutoModelForCausalLM.from_pretrained(
 #     "/home/shaozw/labs/imp-v0",
@@ -173,16 +180,17 @@ A chat between a curious user and an artificial intelligence assistant. This art
     def get_answer_at_once(self):
         # question = self.history[-1]["content"].strip()
         # question = f"{self.system_prompt.strip()} USER: <image>\n{question} ASSISTANT:"
         prompt = self._get_imp_style_inputs()
         logging.info(prompt)
         # image_tok_cnt = prompt.count('<image>')
         # global model, tokenizer
-        input_ids = shared.state.imp_tokenizer(prompt, return_tensors='pt').input_ids
         image_tensor = None
         if '<image>' in prompt:
             # logging.info("Preprocessing...")
-            image_tensor = shared.state.imp_model.image_preprocess(self.image_bytes)
-        output_ids = shared.state.imp_model.generate(
             input_ids,
             max_new_tokens=3000,
             images=image_tensor,
@@ -194,5 +202,5 @@ A chat between a curious user and an artificial intelligence assistant. This art
             # repetition_penalty=self.repetition_penalty,
             num_return_sequences=1,
             use_cache=True)[0]
-        response = shared.state.imp_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
         return response, len(response)

 from .base_model import BaseLLMModel
 from .. import shared
+imp_model = AutoModelForCausalLM.from_pretrained(
+        "MILVLG/imp-v1-3b",
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True)
+imp_tokenizer = AutoTokenizer.from_pretrained("MILVLG/imp-v1-3b", trust_remote_code=True)
 # print('model loading')
 # model = AutoModelForCausalLM.from_pretrained(
 #     "/home/shaozw/labs/imp-v0",
     def get_answer_at_once(self):
         # question = self.history[-1]["content"].strip()
         # question = f"{self.system_prompt.strip()} USER: <image>\n{question} ASSISTANT:"
+        global imp_model, imp_tokenizer
         prompt = self._get_imp_style_inputs()
         logging.info(prompt)
         # image_tok_cnt = prompt.count('<image>')
         # global model, tokenizer
+        input_ids = imp_tokenizer(prompt, return_tensors='pt').input_ids
         image_tensor = None
         if '<image>' in prompt:
             # logging.info("Preprocessing...")
+            image_tensor = imp_model.image_preprocess(self.image_bytes)
+        output_ids = imp_model.generate(
             input_ids,
             max_new_tokens=3000,
             images=image_tensor,
             # repetition_penalty=self.repetition_penalty,
             num_return_sequences=1,
             use_cache=True)[0]
+        response = imp_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
         return response, len(response)

modules/shared.py CHANGED Viewed

@@ -16,12 +16,6 @@ class State:
     usage_api_url = USAGE_API_URL
     openai_api_base = OPENAI_API_BASE
     images_completion_url = IMAGES_COMPLETION_URL
-    imp_model = AutoModelForCausalLM.from_pretrained(
-        "MILVLG/imp-v1-3b",
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True)
-    imp_tokenizer = AutoTokenizer.from_pretrained("MILVLG/imp-v1-3b", trust_remote_code=True)
     def interrupt(self):
         self.interrupted = True

     usage_api_url = USAGE_API_URL
     openai_api_base = OPENAI_API_BASE
     images_completion_url = IMAGES_COMPLETION_URL
     def interrupt(self):
         self.interrupted = True