image_to_text

+from typing import Dict, List, Any
+from transformers import pipeline
+import torch, PIL, transformers, triton, sentencepiece, protobuf
+import torchvision, einops
+import xformers, accelerate
+from transformers import AutoModelForCausalLM, LlamaTokenizer
+class EndpointHandler():
+    def __init__(self, path=""):
+        self.model = AutoModelForCausalLM.from_pretrained(
+            'THUDM/cogvlm-chat-hf',
+            torch_dtype=torch.bfloat16,
+            low_cpu_mem_usage=True,
+            trust_remote_code=True,
+            #   cache_dir='/tmp'
+        )
+        self.tokenizer = LlamaTokenizer.from_pretrained('lmsys/vicuna-7b-v1.5')
+        # create inference pipeline
+        # self.pipeline = pipeline(model=model, tokenizer=tokenizer)
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+         Args:
+             data (:obj:):
+                 includes the input data and the parameters for the inference.
+         Return:
+             A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
+                 - "label": A string representing what the label/class is. There can be multiple labels.
+                 - "score": A score between 0 and 1 describing how confident the model is for this label/class.
+         """
+        inputs = data.pop("inputs", data)
+        gen_kwargs = {"max_length": 2048, "do_sample": False}
+        # pass inputs with all kwargs in data
+        # prediction = self.pipeline(inputs)
+        outputs = self.model.generate(**inputs, **gen_kwargs)
+        outputs = outputs[:, inputs['input_ids'].shape[1]:]
+        prediction = self.tokenizer.decode(outputs[0])
+        # post process the prediction
+        return prediction

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+Pillow
+transformers
+triton
+sentencepiece
+protobuf
+torchvision
+einops
+xformers
+accelerate