Abdulmateen commited on
Commit
dd7c1fd
·
verified ·
1 Parent(s): b2f905d

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +26 -31
handler.py CHANGED
@@ -7,60 +7,55 @@ import base64
7
 
8
  class EndpointHandler:
9
  def __init__(self, path=""):
10
- # This part remains the same
11
- print(f"Loading processor and model from: {path}...")
12
- self.processor = AutoProcessor.from_pretrained(path) # Removed revision for broader compatibility
 
 
 
 
13
  self.model = LlavaForConditionalGeneration.from_pretrained(
14
- path,
15
  load_in_4bit=True,
16
  torch_dtype=torch.float16,
17
  device_map="auto"
18
  )
19
- print("✅ Model loaded successfully.")
 
 
 
 
20
 
21
  def __call__(self, data: dict) -> dict:
 
22
  payload = data.pop("inputs", data)
23
-
24
- prompt_text = payload.pop("prompt", "Describe the image in detail.")
25
- image_url = payload.pop("image_url", None)
26
  image_b64 = payload.pop("image_b64", None)
27
  max_new_tokens = payload.pop("max_new_tokens", 200)
28
 
29
  image = None
30
- # Try to load an image if provided
31
- if image_url:
32
- try:
33
- response = requests.get(image_url)
34
- response.raise_for_status()
35
- image = Image.open(BytesIO(response.content))
36
- except Exception as e:
37
- return {"error": f"Failed to load image from URL: {e}"}
38
- elif image_b64:
39
  try:
40
  image_bytes = base64.b64decode(image_b64)
41
- image = Image.open(BytesIO(image_bytes))
42
  except Exception as e:
43
  return {"error": f"Failed to decode base64 image: {e}"}
44
 
45
- # --- NEW LOGIC: Check if an image is present ---
46
  if image is not None:
47
- # --- Case 1: Multimodal (Image + Text) ---
48
- print("Processing multimodal request...")
49
  prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
50
- inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
51
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
52
- full_response = self.processor.decode(output[0], skip_special_tokens=True)
53
-
54
  else:
55
- # --- Case 2: Text-Only ---
56
- print("Processing text-only request...")
57
  prompt = f"USER: {prompt_text} ASSISTANT:"
58
- # Note: We do NOT pass the 'images' argument here
59
- inputs = self.processor(text=prompt, return_tensors="pt").to("cuda")
60
- # Note: We do NOT pass the 'images' keyword to generate()
61
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
62
- full_response = self.processor.decode(output[0], skip_special_tokens=True)
63
 
64
- # Clean up the response to get only the assistant's part
 
65
  assistant_response = full_response.split("ASSISTANT:")[-1].strip()
 
66
  return {"generated_text": assistant_response}
 
7
 
8
  class EndpointHandler:
9
  def __init__(self, path=""):
10
+ # This loading logic is robust and correct
11
+ base_model_id = "llava-hf/llava-1.5-7b-hf"
12
+
13
+ print("Loading processor...")
14
+ self.processor = AutoProcessor.from_pretrained(base_model_id)
15
+
16
+ print("Loading base model...")
17
  self.model = LlavaForConditionalGeneration.from_pretrained(
18
+ base_model_id,
19
  load_in_4bit=True,
20
  torch_dtype=torch.float16,
21
  device_map="auto"
22
  )
23
+
24
+ print(f"Loading LoRA adapters from repository path: {path}...")
25
+ # This assumes your repo at `path` contains the LoRA adapter files
26
+ self.model.load_adapter(path)
27
+ print("✅ Model and adapters loaded successfully.")
28
 
29
  def __call__(self, data: dict) -> dict:
30
+ # --- Simplified and Corrected Inference Logic ---
31
  payload = data.pop("inputs", data)
32
+
33
+ prompt_text = payload.pop("prompt", "What can you do?")
 
34
  image_b64 = payload.pop("image_b64", None)
35
  max_new_tokens = payload.pop("max_new_tokens", 200)
36
 
37
  image = None
38
+ if image_b64:
 
 
 
 
 
 
 
 
39
  try:
40
  image_bytes = base64.b64decode(image_b64)
41
+ image = Image.open(BytesIO(image_bytes)).convert("RGB")
42
  except Exception as e:
43
  return {"error": f"Failed to decode base64 image: {e}"}
44
 
45
+ # This is the key change: a simple, clear separation of logic
46
  if image is not None:
47
+ # --- Case 1: Multimodal (Image + Text) Request ---
 
48
  prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
49
+ inputs = self.processor(text=prompt, images=image, return_tensors="pt").to(self.model.device)
50
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
 
 
51
  else:
52
+ # --- Case 2: Text-Only Request ---
 
53
  prompt = f"USER: {prompt_text} ASSISTANT:"
54
+ inputs = self.processor(text=prompt, return_tensors="pt").to(self.model.device)
 
 
55
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
 
56
 
57
+ # Decode the output and extract the assistant's response
58
+ full_response = self.processor.decode(output[0], skip_special_tokens=True)
59
  assistant_response = full_response.split("ASSISTANT:")[-1].strip()
60
+
61
  return {"generated_text": assistant_response}