Spaces:

xce009
/

ai_chat_api

Running

App Files Files Community

Soumik Bose commited on 10 days ago

Commit

8dce736

1 Parent(s): fe22617

go

Browse files

Files changed (2) hide show

Dockerfile +1 -1
services/vision_service.py +18 -16

Dockerfile CHANGED Viewed

@@ -31,7 +31,7 @@ USER user
 # Install llama-cpp-python with optimized build flags
 RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
-    pip install --no-cache-dir --user llama-cpp-python==0.3.2
 # Copy requirements and install dependencies
 COPY --chown=user:user requirements.txt .

 # Install llama-cpp-python with optimized build flags
 RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
+    pip install --no-cache-dir --user --upgrade llama-cpp-python
 # Copy requirements and install dependencies
 COPY --chown=user:user requirements.txt .

services/vision_service.py CHANGED Viewed

@@ -2,7 +2,6 @@ import logging
 import base64
 import io
 from typing import Optional, Dict, Any
-from pathlib import Path
 from llama_cpp import Llama
 from llama_cpp.llama_chat_format import Llava15ChatHandler
 from huggingface_hub import hf_hub_download
@@ -38,7 +37,8 @@ class VisionService:
             logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
-            # Initialize chat handler with multimodal projection
             self.chat_handler = Llava15ChatHandler(
                 clip_model_path=mmproj_path,
                 verbose=False
@@ -51,12 +51,15 @@ class VisionService:
                 n_threads=config.N_THREADS,
                 n_batch=config.VISION_MODEL_BATCH,
                 logits_all=True,
-                verbose=False
             )
             logger.info("✓ Vision model loaded successfully")
         except Exception as e:
             logger.error(f"Failed to initialize vision model: {e}")
             raise
     def is_ready(self) -> bool:
@@ -72,15 +75,6 @@ class VisionService:
     ) -> Dict[str, Any]:
         """
         Analyze an image with a text prompt
-        Args:
-            image_data: Raw image bytes
-            prompt: Text question/prompt about the image
-            temperature: Sampling temperature
-            max_tokens: Maximum tokens to generate
-        Returns:
-            Analysis result dictionary
         """
         if not self.is_ready():
             raise RuntimeError("Vision model not initialized")
@@ -91,7 +85,7 @@ class VisionService:
             # Validate image
             image = Image.open(io.BytesIO(image_data))
-            logger.info(f"Processing image: {image.size} | Format: {image.format}")
             # Create vision message format
             messages = [
@@ -104,7 +98,7 @@ class VisionService:
                 }
             ]
-            logger.info(f"Analyzing image with prompt: {prompt[:50]}...")
             response = self.model.create_chat_completion(
                 messages=messages,
@@ -131,11 +125,19 @@ class VisionService:
     async def cleanup(self) -> None:
         """Cleanup resources"""
         if self.model:
-            del self.model
             self.model = None
         if self.chat_handler:
-            del self.chat_handler
             self.chat_handler = None
         logger.info("Vision model unloaded")
 # Global instance

 import base64
 import io
 from typing import Optional, Dict, Any
 from llama_cpp import Llama
 from llama_cpp.llama_chat_format import Llava15ChatHandler
 from huggingface_hub import hf_hub_download
             logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
+            # NOTE: Llava15ChatHandler is the standard Python wrapper for loading
+            # external projectors (mmproj files), even for newer architectures like SmolVLM
             self.chat_handler = Llava15ChatHandler(
                 clip_model_path=mmproj_path,
                 verbose=False
                 n_threads=config.N_THREADS,
                 n_batch=config.VISION_MODEL_BATCH,
                 logits_all=True,
+                verbose=False,
+                n_gpu_layers=0  # Explicitly set to 0 to ensure CPU usage and prevent driver crashes
             )
             logger.info("✓ Vision model loaded successfully")
         except Exception as e:
             logger.error(f"Failed to initialize vision model: {e}")
+            # Ensure cleanup if initialization fails halfway
+            await self.cleanup()
             raise
     def is_ready(self) -> bool:
     ) -> Dict[str, Any]:
         """
         Analyze an image with a text prompt
         """
         if not self.is_ready():
             raise RuntimeError("Vision model not initialized")
             # Validate image
             image = Image.open(io.BytesIO(image_data))
+            # logger.info(f"Processing image: {image.size} | Format: {image.format}")
             # Create vision message format
             messages = [
                 }
             ]
+            logger.info(f"Analyzing image... Prompt: {prompt[:50]}")
             response = self.model.create_chat_completion(
                 messages=messages,
     async def cleanup(self) -> None:
         """Cleanup resources"""
         if self.model:
+            try:
+                del self.model
+            except:
+                pass
             self.model = None
         if self.chat_handler:
+            try:
+                del self.chat_handler
+            except:
+                pass
             self.chat_handler = None
         logger.info("Vision model unloaded")
 # Global instance