File size: 2,396 Bytes

7f13c53
dc92b57
65535dd
7f13c53
 
 
 
 
 
 
65535dd
 
 
 
 
 
 
 
 
 
 
 
 
 
dc92b57
65535dd
dc92b57
 
65535dd
 
9e0766a
65535dd
 
 
 
 
009d0db
acd2340
65535dd
 
 
 
 
 
fb6cf01
65535dd
 
 
 
 
 
 
 
 
 
 
79d0c5d
 
65535dd
dc92b57
65535dd
dc92b57

import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
from peft import PeftModel
from PIL import Image
import requests
from io import BytesIO
import base64

class EndpointHandler:
    def __init__(self, path=""):
        # path is the local path to your LoRA adapter repository
        
        # 1. Define the base model ID
        base_model_id = "llava-hf/llava-v1.5-7b"
        
        # The path to your LoRA adapters is the local path provided
        lora_model_path = path
        
        print("Loading processor...")
        # ADDED: trust_remote_code=True is required for custom models
        self.processor = AutoProcessor.from_pretrained(base_model_id, trust_remote_code=True)

        print("Loading base model...")
        # Load the base model in 4-bit and add trust_remote_code=True
        self.model = LlavaForConditionalGeneration.from_pretrained(
            base_model_id,
            load_in_4bit=True,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True 
        )

        print(f"Loading and merging LoRA adapters from: {lora_model_path}...")
        # Load and merge your LoRA adapters onto the base model
        self.model = PeftModel.from_pretrained(self.model, lora_model_path)
        print("✅ Model and adapters loaded successfully.")

    def __call__(self, data: dict) -> dict:
        prompt_text = data.pop("prompt", "Describe the image in detail.")
        image_b64 = data.pop("image_b64", None)
        max_new_tokens = data.pop("max_new_tokens", 200)

        if not image_b64:
            return {"error": "No image provided. Please use the 'image_b64' key."}

        try:
            image_bytes = base64.b64decode(image_b64)
            image = Image.open(BytesIO(image_bytes))
        except Exception as e:
            return {"error": f"Failed to decode or open base64 image: {e}"}

        prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"

        inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")

        with torch.no_grad():
            output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)

        full_response = self.processor.decode(output[0], skip_special_tokens=True)
        assistant_response = full_response.split("ASSISTANT:")[-1].strip()

        return {"generated_text": assistant_response}