Abdulmateen commited on
Commit
5ee030f
·
verified ·
1 Parent(s): 89c1a1d

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +32 -20
handler.py CHANGED
@@ -6,9 +6,7 @@ from io import BytesIO
6
 
7
  class EndpointHandler:
8
  def __init__(self, path=""):
9
- # The 'path' is now a self-contained directory with the complete, merged model.
10
- # No internet access is needed here.
11
-
12
  print("Loading model and processor from local path...")
13
  self.processor = AutoProcessor.from_pretrained(path, trust_remote_code=True)
14
  self.model = LlavaForConditionalGeneration.from_pretrained(
@@ -21,25 +19,39 @@ class EndpointHandler:
21
  print("✅ Model loaded successfully.")
22
 
23
  def __call__(self, data: dict) -> dict:
24
- prompt_text = data.pop("prompt", "Describe the image in detail.")
25
- image_b64 = data.pop("image_b64", None)
26
- max_new_tokens = data.pop("max_new_tokens", 200)
27
-
28
- if not image_b64:
29
- return {"error": "No image provided. Please use the 'image_b64' key."}
30
-
31
- try:
32
- image_bytes = base64.b64decode(image_b64)
33
- image = Image.open(BytesIO(image_bytes))
34
- except Exception as e:
35
- return {"error": f"Failed to decode or open base64 image: {e}"}
36
-
37
- prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
38
- inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
39
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  with torch.no_grad():
41
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
42
-
43
  full_response = self.processor.decode(output[0], skip_special_tokens=True)
44
  assistant_response = full_response.split("ASSISTANT:")[-1].strip()
45
 
 
6
 
7
  class EndpointHandler:
8
  def __init__(self, path=""):
9
+ # The 'path' is a self-contained directory with the complete, merged model.
 
 
10
  print("Loading model and processor from local path...")
11
  self.processor = AutoProcessor.from_pretrained(path, trust_remote_code=True)
12
  self.model = LlavaForConditionalGeneration.from_pretrained(
 
19
  print("✅ Model loaded successfully.")
20
 
21
  def __call__(self, data: dict) -> dict:
22
+ # FIX 1: Correctly handle the payload, whether it's wrapped in "inputs" or not.
23
+ payload = data.pop("inputs", data)
24
+
25
+ # Extract data from the payload
26
+ prompt_text = payload.pop("prompt", "Describe the image in detail.")
27
+ image_b64 = payload.pop("image_b64", None)
28
+ max_new_tokens = payload.pop("max_new_tokens", 200)
29
+ image = None
30
+
31
+ # Try to process an image only if it was provided
32
+ if image_b64:
33
+ try:
34
+ image_bytes = base64.b64decode(image_b64)
35
+ image = Image.open(BytesIO(image_bytes))
36
+ except Exception as e:
37
+ return {"error": f"Failed to decode or open base64 image: {e}"}
38
+
39
+ # FIX 2: Use separate logic for multimodal and text-only requests.
40
+ if image is not None:
41
+ # --- Case 1: Multimodal (Image + Text) ---
42
+ print("Processing multimodal request...")
43
+ prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
44
+ inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
45
+ else:
46
+ # --- Case 2: Text-Only ---
47
+ print("Processing text-only request...")
48
+ prompt = f"USER: {prompt_text} ASSISTANT:"
49
+ inputs = self.processor(text=prompt, return_tensors="pt").to("cuda")
50
+
51
+ # Generate the output
52
  with torch.no_grad():
53
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
54
+
55
  full_response = self.processor.decode(output[0], skip_special_tokens=True)
56
  assistant_response = full_response.split("ASSISTANT:")[-1].strip()
57