Portx
/

trial-multi-model

Model card Files Files and versions

Portx commited on Mar 2, 2025

Commit

cc3ae19

·

verified ·

1 Parent(s): ed38c45

Update handler.py

Files changed (1) hide show

handler.py +3 -14

handler.py CHANGED Viewed

@@ -4,7 +4,7 @@ from subprocess import run
 from PIL import Image
 run("pip install flash-attn --no-build-isolation", shell=True, check=True)
-run("pip install -U bitsandbytes", shell=True, check=True)
 from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
@@ -19,26 +19,15 @@ bnb_config = BitsAndBytesConfig(
         llm_int8_enable_fp32_cpu_offload=True
 )
-try:
-    import flash_attn
-    print("FlashAttention is installed")
-    USE_FLASH_ATTENTION = True
-except ImportError:
-    print("FlashAttention is not installed")
-    USE_FLASH_ATTENTION = False
 # check for GPU
 device = 0 if torch.cuda.is_available() else -1
 class Utils:
     def convert_base64_to_jpg(base64_string):
         image_data = base64.b64decode(base64_string)
         with open("./do_img.jpg", 'wb') as f:
             f.write(image_data)
 class PromptSet:
     system_message = "You are an expert in analyzing and extracting information from freight, shipment, or delivery orders. Please carefully read the provided order file and extract the following 10 key pieces of information. Ensure that the key names are exactly as listed below. Do not create any additional key names other than these. If any information is missing or unavailable, output '-'."
     main_order_information_prompt = """#Key names and their descriptions:
@@ -120,6 +109,6 @@ class EndpointHandler():
                                                           tokenize=True, return_dict=True,return_tensors="pt").to(device)
-        output = model.generate(**model_inputs, max_new_tokens=512)
-        prediction = processor.decode(output[0], skip_special_tokens=True)
         return prediction

 from PIL import Image
 run("pip install flash-attn --no-build-isolation", shell=True, check=True)
+#run("pip install -U bitsandbytes", shell=True, check=True)
 from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
         llm_int8_enable_fp32_cpu_offload=True
 )
 # check for GPU
 device = 0 if torch.cuda.is_available() else -1
 class Utils:
     def convert_base64_to_jpg(base64_string):
         image_data = base64.b64decode(base64_string)
         with open("./do_img.jpg", 'wb') as f:
             f.write(image_data)
 class PromptSet:
     system_message = "You are an expert in analyzing and extracting information from freight, shipment, or delivery orders. Please carefully read the provided order file and extract the following 10 key pieces of information. Ensure that the key names are exactly as listed below. Do not create any additional key names other than these. If any information is missing or unavailable, output '-'."
     main_order_information_prompt = """#Key names and their descriptions:
                                                           tokenize=True, return_dict=True,return_tensors="pt").to(device)
+        output = self.model.generate(**model_inputs, max_new_tokens=512)
+        prediction = self.processor.decode(output[0], skip_special_tokens=True)
         return prediction