Abdulmateen
/

llava-finetuned

Model card Files Files and versions

Abdulmateen commited on Jul 31, 2025

Commit

7f13c53

·

verified ·

1 Parent(s): 6dcee73

Upload 2 files

Uploading Inference spices

Files changed (2) hide show

handler.py +66 -0
requirements.txt +7 -0

handler.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+from transformers import AutoProcessor, LlavaForConditionalGeneration
+from peft import PeftModel
+from PIL import Image
+import requests
+from io import BytesIO
+import base64
+class EndpointHandler:
+    def __init__(self, path=""):
+        # The 'path' argument will be the path to your LoRA repo on the Hub, e.g., "Abdulmateen/llava-finetuned"
+        # Define the base model that your LoRA was trained on
+        base_model_id = "llava-hf/llava-1.5-7b-hf"
+        print("Loading processor...")
+        # Pinning to a specific revision for stability
+        self.processor = AutoProcessor.from_pretrained(base_model_id, revision="a272c74")
+        print("Loading base model...")
+        # Load the base model in 4-bit for memory efficiency
+        self.model = LlavaForConditionalGeneration.from_pretrained(
+            base_model_id,
+            load_in_4bit=True,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+        print(f"Loading LoRA adapters from repository path: {path}...")
+        # Load and merge your LoRA adapters onto the base model
+        self.model = PeftModel.from_pretrained(self.model, path)
+        print("✅ Model and adapters loaded successfully.")
+    def __call__(self, data: dict) -> dict:
+        # Get the prompt and image from the request payload
+        prompt_text = data.pop("prompt", "Describe the image in detail.")
+        image_url = data.pop("image_url", None)
+        image_b64 = data.pop("image_b64", None)
+        max_new_tokens = data.pop("max_new_tokens", 200)
+        # Load image from either a URL or a base64 string
+        if image_url:
+            response = requests.get(image_url)
+            image = Image.open(BytesIO(response.content))
+        elif image_b64:
+            image_bytes = base64.b64decode(image_b64)
+            image = Image.open(BytesIO(image_bytes))
+        else:
+            return {"error": "No image provided. Please use 'image_url' or 'image_b64'."}
+        # Format the prompt for LLaVA
+        prompt = f"USER: <image>\n{prompt_text} ASSISTANT:"
+        # Process inputs
+        inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
+        # Generate a response
+        with torch.no_grad():
+            output = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
+        # Decode and clean up the response
+        full_response = self.processor.decode(output[0], skip_special_tokens=True)
+        # Extract only the assistant's part of the response
+        assistant_response = full_response.split("ASSISTANT:")[-1].strip()
+        return {"generated_text": assistant_response}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch==2.3.0
+transformers==4.37.2
+accelerate==0.28.0
+bitsandbytes
+peft
+Pillow
+requests