Abdulmateen commited on
Commit
65535dd
·
verified ·
1 Parent(s): fb6cf01

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +42 -42
handler.py CHANGED
@@ -1,5 +1,6 @@
1
  import torch
2
  from transformers import AutoProcessor, LlavaForConditionalGeneration
 
3
  from PIL import Image
4
  import requests
5
  from io import BytesIO
@@ -7,56 +8,55 @@ import base64
7
 
8
  class EndpointHandler:
9
  def __init__(self, path=""):
10
- print(f"Loading processor and model from: {path}...")
11
- self.processor = AutoProcessor.from_pretrained(path)
 
 
 
 
 
 
 
 
 
 
 
 
12
  self.model = LlavaForConditionalGeneration.from_pretrained(
13
- path,
14
  load_in_4bit=True,
15
  torch_dtype=torch.float16,
16
- device_map="auto"
 
17
  )
18
- print("✅ Model loaded successfully.")
 
 
 
 
19
 
20
  def __call__(self, data: dict) -> dict:
21
- payload = data.pop("inputs", data)
22
-
23
- prompt_text = payload.pop("prompt", "Describe the image in detail.")
24
- image_url = payload.pop("image_url", None)
25
- image_b64 = payload.pop("image_b64", None)
26
- max_new_tokens = payload.pop("max_new_tokens", 200)
27
-
28
- image = None
29
- # Try to load an image if provided
30
- if image_url:
31
- try:
32
- response = requests.get(image_url)
33
- response.raise_for_status()
34
- image = Image.open(BytesIO(response.content))
35
- except Exception as e:
36
- return {"error": f"Failed to load image from URL: {e}"}
37
- elif image_b64:
38
- try:
39
- image_bytes = base64.b64decode(image_b64)
40
- image = Image.open(BytesIO(image_bytes))
41
- except Exception as e:
42
- return {"error": f"Failed to decode base64 image: {e}"}
43
-
44
- # Check if an image is present and choose the correct logic path
45
- if image is not None:
46
- # --- Case 1: Multimodal (Image + Text) ---
47
- print("Processing multimodal request...")
48
- prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
49
- inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
50
- output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
51
- full_response = self.processor.decode(output[0], skip_special_tokens=True)
52
 
53
- else:
54
- # --- Case 2: Text-Only ---
55
- print("Processing text-only request...")
56
- prompt = f"USER: {prompt_text} ASSISTANT:"
57
- inputs = self.processor(text=prompt, return_tensors="pt").to("cuda")
 
 
 
 
 
 
58
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
59
- full_response = self.processor.decode(output[0], skip_special_tokens=True)
60
 
 
61
  assistant_response = full_response.split("ASSISTANT:")[-1].strip()
 
62
  return {"generated_text": assistant_response}
 
1
  import torch
2
  from transformers import AutoProcessor, LlavaForConditionalGeneration
3
+ from peft import PeftModel
4
  from PIL import Image
5
  import requests
6
  from io import BytesIO
 
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
+ # path is the local path to your LoRA adapter repository
12
+
13
+ # 1. Define the base model ID
14
+ base_model_id = "llava-hf/llava-v1.5-7b"
15
+
16
+ # The path to your LoRA adapters is the local path provided
17
+ lora_model_path = path
18
+
19
+ print("Loading processor...")
20
+ # ADDED: trust_remote_code=True is required for custom models
21
+ self.processor = AutoProcessor.from_pretrained(base_model_id, trust_remote_code=True)
22
+
23
+ print("Loading base model...")
24
+ # Load the base model in 4-bit and add trust_remote_code=True
25
  self.model = LlavaForConditionalGeneration.from_pretrained(
26
+ base_model_id,
27
  load_in_4bit=True,
28
  torch_dtype=torch.float16,
29
+ device_map="auto",
30
+ trust_remote_code=True
31
  )
32
+
33
+ print(f"Loading and merging LoRA adapters from: {lora_model_path}...")
34
+ # Load and merge your LoRA adapters onto the base model
35
+ self.model = PeftModel.from_pretrained(self.model, lora_model_path)
36
+ print("✅ Model and adapters loaded successfully.")
37
 
38
  def __call__(self, data: dict) -> dict:
39
+ prompt_text = data.pop("prompt", "Describe the image in detail.")
40
+ image_b64 = data.pop("image_b64", None)
41
+ max_new_tokens = data.pop("max_new_tokens", 200)
42
+
43
+ if not image_b64:
44
+ return {"error": "No image provided. Please use the 'image_b64' key."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ try:
47
+ image_bytes = base64.b64decode(image_b64)
48
+ image = Image.open(BytesIO(image_bytes))
49
+ except Exception as e:
50
+ return {"error": f"Failed to decode or open base64 image: {e}"}
51
+
52
+ prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
53
+
54
+ inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
55
+
56
+ with torch.no_grad():
57
  output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
 
58
 
59
+ full_response = self.processor.decode(output[0], skip_special_tokens=True)
60
  assistant_response = full_response.split("ASSISTANT:")[-1].strip()
61
+
62
  return {"generated_text": assistant_response}