Soumik Bose commited on
Commit
8dce736
·
1 Parent(s): fe22617
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. services/vision_service.py +18 -16
Dockerfile CHANGED
@@ -31,7 +31,7 @@ USER user
31
 
32
  # Install llama-cpp-python with optimized build flags
33
  RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
34
- pip install --no-cache-dir --user llama-cpp-python==0.3.2
35
 
36
  # Copy requirements and install dependencies
37
  COPY --chown=user:user requirements.txt .
 
31
 
32
  # Install llama-cpp-python with optimized build flags
33
  RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
34
+ pip install --no-cache-dir --user --upgrade llama-cpp-python
35
 
36
  # Copy requirements and install dependencies
37
  COPY --chown=user:user requirements.txt .
services/vision_service.py CHANGED
@@ -2,7 +2,6 @@ import logging
2
  import base64
3
  import io
4
  from typing import Optional, Dict, Any
5
- from pathlib import Path
6
  from llama_cpp import Llama
7
  from llama_cpp.llama_chat_format import Llava15ChatHandler
8
  from huggingface_hub import hf_hub_download
@@ -38,7 +37,8 @@ class VisionService:
38
 
39
  logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
40
 
41
- # Initialize chat handler with multimodal projection
 
42
  self.chat_handler = Llava15ChatHandler(
43
  clip_model_path=mmproj_path,
44
  verbose=False
@@ -51,12 +51,15 @@ class VisionService:
51
  n_threads=config.N_THREADS,
52
  n_batch=config.VISION_MODEL_BATCH,
53
  logits_all=True,
54
- verbose=False
 
55
  )
56
  logger.info("✓ Vision model loaded successfully")
57
 
58
  except Exception as e:
59
  logger.error(f"Failed to initialize vision model: {e}")
 
 
60
  raise
61
 
62
  def is_ready(self) -> bool:
@@ -72,15 +75,6 @@ class VisionService:
72
  ) -> Dict[str, Any]:
73
  """
74
  Analyze an image with a text prompt
75
-
76
- Args:
77
- image_data: Raw image bytes
78
- prompt: Text question/prompt about the image
79
- temperature: Sampling temperature
80
- max_tokens: Maximum tokens to generate
81
-
82
- Returns:
83
- Analysis result dictionary
84
  """
85
  if not self.is_ready():
86
  raise RuntimeError("Vision model not initialized")
@@ -91,7 +85,7 @@ class VisionService:
91
 
92
  # Validate image
93
  image = Image.open(io.BytesIO(image_data))
94
- logger.info(f"Processing image: {image.size} | Format: {image.format}")
95
 
96
  # Create vision message format
97
  messages = [
@@ -104,7 +98,7 @@ class VisionService:
104
  }
105
  ]
106
 
107
- logger.info(f"Analyzing image with prompt: {prompt[:50]}...")
108
 
109
  response = self.model.create_chat_completion(
110
  messages=messages,
@@ -131,11 +125,19 @@ class VisionService:
131
  async def cleanup(self) -> None:
132
  """Cleanup resources"""
133
  if self.model:
134
- del self.model
 
 
 
135
  self.model = None
 
136
  if self.chat_handler:
137
- del self.chat_handler
 
 
 
138
  self.chat_handler = None
 
139
  logger.info("Vision model unloaded")
140
 
141
  # Global instance
 
2
  import base64
3
  import io
4
  from typing import Optional, Dict, Any
 
5
  from llama_cpp import Llama
6
  from llama_cpp.llama_chat_format import Llava15ChatHandler
7
  from huggingface_hub import hf_hub_download
 
37
 
38
  logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
39
 
40
+ # NOTE: Llava15ChatHandler is the standard Python wrapper for loading
41
+ # external projectors (mmproj files), even for newer architectures like SmolVLM
42
  self.chat_handler = Llava15ChatHandler(
43
  clip_model_path=mmproj_path,
44
  verbose=False
 
51
  n_threads=config.N_THREADS,
52
  n_batch=config.VISION_MODEL_BATCH,
53
  logits_all=True,
54
+ verbose=False,
55
+ n_gpu_layers=0 # Explicitly set to 0 to ensure CPU usage and prevent driver crashes
56
  )
57
  logger.info("✓ Vision model loaded successfully")
58
 
59
  except Exception as e:
60
  logger.error(f"Failed to initialize vision model: {e}")
61
+ # Ensure cleanup if initialization fails halfway
62
+ await self.cleanup()
63
  raise
64
 
65
  def is_ready(self) -> bool:
 
75
  ) -> Dict[str, Any]:
76
  """
77
  Analyze an image with a text prompt
 
 
 
 
 
 
 
 
 
78
  """
79
  if not self.is_ready():
80
  raise RuntimeError("Vision model not initialized")
 
85
 
86
  # Validate image
87
  image = Image.open(io.BytesIO(image_data))
88
+ # logger.info(f"Processing image: {image.size} | Format: {image.format}")
89
 
90
  # Create vision message format
91
  messages = [
 
98
  }
99
  ]
100
 
101
+ logger.info(f"Analyzing image... Prompt: {prompt[:50]}")
102
 
103
  response = self.model.create_chat_completion(
104
  messages=messages,
 
125
  async def cleanup(self) -> None:
126
  """Cleanup resources"""
127
  if self.model:
128
+ try:
129
+ del self.model
130
+ except:
131
+ pass
132
  self.model = None
133
+
134
  if self.chat_handler:
135
+ try:
136
+ del self.chat_handler
137
+ except:
138
+ pass
139
  self.chat_handler = None
140
+
141
  logger.info("Vision model unloaded")
142
 
143
  # Global instance