Spaces:
Running
Running
Soumik Bose commited on
Commit ·
8dce736
1
Parent(s): fe22617
go
Browse files- Dockerfile +1 -1
- services/vision_service.py +18 -16
Dockerfile
CHANGED
|
@@ -31,7 +31,7 @@ USER user
|
|
| 31 |
|
| 32 |
# Install llama-cpp-python with optimized build flags
|
| 33 |
RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
|
| 34 |
-
pip install --no-cache-dir --user llama-cpp-python
|
| 35 |
|
| 36 |
# Copy requirements and install dependencies
|
| 37 |
COPY --chown=user:user requirements.txt .
|
|
|
|
| 31 |
|
| 32 |
# Install llama-cpp-python with optimized build flags
|
| 33 |
RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
|
| 34 |
+
pip install --no-cache-dir --user --upgrade llama-cpp-python
|
| 35 |
|
| 36 |
# Copy requirements and install dependencies
|
| 37 |
COPY --chown=user:user requirements.txt .
|
services/vision_service.py
CHANGED
|
@@ -2,7 +2,6 @@ import logging
|
|
| 2 |
import base64
|
| 3 |
import io
|
| 4 |
from typing import Optional, Dict, Any
|
| 5 |
-
from pathlib import Path
|
| 6 |
from llama_cpp import Llama
|
| 7 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
| 8 |
from huggingface_hub import hf_hub_download
|
|
@@ -38,7 +37,8 @@ class VisionService:
|
|
| 38 |
|
| 39 |
logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
|
| 40 |
|
| 41 |
-
#
|
|
|
|
| 42 |
self.chat_handler = Llava15ChatHandler(
|
| 43 |
clip_model_path=mmproj_path,
|
| 44 |
verbose=False
|
|
@@ -51,12 +51,15 @@ class VisionService:
|
|
| 51 |
n_threads=config.N_THREADS,
|
| 52 |
n_batch=config.VISION_MODEL_BATCH,
|
| 53 |
logits_all=True,
|
| 54 |
-
verbose=False
|
|
|
|
| 55 |
)
|
| 56 |
logger.info("✓ Vision model loaded successfully")
|
| 57 |
|
| 58 |
except Exception as e:
|
| 59 |
logger.error(f"Failed to initialize vision model: {e}")
|
|
|
|
|
|
|
| 60 |
raise
|
| 61 |
|
| 62 |
def is_ready(self) -> bool:
|
|
@@ -72,15 +75,6 @@ class VisionService:
|
|
| 72 |
) -> Dict[str, Any]:
|
| 73 |
"""
|
| 74 |
Analyze an image with a text prompt
|
| 75 |
-
|
| 76 |
-
Args:
|
| 77 |
-
image_data: Raw image bytes
|
| 78 |
-
prompt: Text question/prompt about the image
|
| 79 |
-
temperature: Sampling temperature
|
| 80 |
-
max_tokens: Maximum tokens to generate
|
| 81 |
-
|
| 82 |
-
Returns:
|
| 83 |
-
Analysis result dictionary
|
| 84 |
"""
|
| 85 |
if not self.is_ready():
|
| 86 |
raise RuntimeError("Vision model not initialized")
|
|
@@ -91,7 +85,7 @@ class VisionService:
|
|
| 91 |
|
| 92 |
# Validate image
|
| 93 |
image = Image.open(io.BytesIO(image_data))
|
| 94 |
-
logger.info(f"Processing image: {image.size} | Format: {image.format}")
|
| 95 |
|
| 96 |
# Create vision message format
|
| 97 |
messages = [
|
|
@@ -104,7 +98,7 @@ class VisionService:
|
|
| 104 |
}
|
| 105 |
]
|
| 106 |
|
| 107 |
-
logger.info(f"Analyzing image
|
| 108 |
|
| 109 |
response = self.model.create_chat_completion(
|
| 110 |
messages=messages,
|
|
@@ -131,11 +125,19 @@ class VisionService:
|
|
| 131 |
async def cleanup(self) -> None:
|
| 132 |
"""Cleanup resources"""
|
| 133 |
if self.model:
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
| 135 |
self.model = None
|
|
|
|
| 136 |
if self.chat_handler:
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
| 138 |
self.chat_handler = None
|
|
|
|
| 139 |
logger.info("Vision model unloaded")
|
| 140 |
|
| 141 |
# Global instance
|
|
|
|
| 2 |
import base64
|
| 3 |
import io
|
| 4 |
from typing import Optional, Dict, Any
|
|
|
|
| 5 |
from llama_cpp import Llama
|
| 6 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
| 7 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 37 |
|
| 38 |
logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
|
| 39 |
|
| 40 |
+
# NOTE: Llava15ChatHandler is the standard Python wrapper for loading
|
| 41 |
+
# external projectors (mmproj files), even for newer architectures like SmolVLM
|
| 42 |
self.chat_handler = Llava15ChatHandler(
|
| 43 |
clip_model_path=mmproj_path,
|
| 44 |
verbose=False
|
|
|
|
| 51 |
n_threads=config.N_THREADS,
|
| 52 |
n_batch=config.VISION_MODEL_BATCH,
|
| 53 |
logits_all=True,
|
| 54 |
+
verbose=False,
|
| 55 |
+
n_gpu_layers=0 # Explicitly set to 0 to ensure CPU usage and prevent driver crashes
|
| 56 |
)
|
| 57 |
logger.info("✓ Vision model loaded successfully")
|
| 58 |
|
| 59 |
except Exception as e:
|
| 60 |
logger.error(f"Failed to initialize vision model: {e}")
|
| 61 |
+
# Ensure cleanup if initialization fails halfway
|
| 62 |
+
await self.cleanup()
|
| 63 |
raise
|
| 64 |
|
| 65 |
def is_ready(self) -> bool:
|
|
|
|
| 75 |
) -> Dict[str, Any]:
|
| 76 |
"""
|
| 77 |
Analyze an image with a text prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
"""
|
| 79 |
if not self.is_ready():
|
| 80 |
raise RuntimeError("Vision model not initialized")
|
|
|
|
| 85 |
|
| 86 |
# Validate image
|
| 87 |
image = Image.open(io.BytesIO(image_data))
|
| 88 |
+
# logger.info(f"Processing image: {image.size} | Format: {image.format}")
|
| 89 |
|
| 90 |
# Create vision message format
|
| 91 |
messages = [
|
|
|
|
| 98 |
}
|
| 99 |
]
|
| 100 |
|
| 101 |
+
logger.info(f"Analyzing image... Prompt: {prompt[:50]}")
|
| 102 |
|
| 103 |
response = self.model.create_chat_completion(
|
| 104 |
messages=messages,
|
|
|
|
| 125 |
async def cleanup(self) -> None:
|
| 126 |
"""Cleanup resources"""
|
| 127 |
if self.model:
|
| 128 |
+
try:
|
| 129 |
+
del self.model
|
| 130 |
+
except:
|
| 131 |
+
pass
|
| 132 |
self.model = None
|
| 133 |
+
|
| 134 |
if self.chat_handler:
|
| 135 |
+
try:
|
| 136 |
+
del self.chat_handler
|
| 137 |
+
except:
|
| 138 |
+
pass
|
| 139 |
self.chat_handler = None
|
| 140 |
+
|
| 141 |
logger.info("Vision model unloaded")
|
| 142 |
|
| 143 |
# Global instance
|