Spaces:

xce009
/

ai_chat_api

Running

App Files Files Community

Soumik Bose commited on 10 days ago

Commit

95db209

1 Parent(s): 63d026b

reverted

Browse files

Files changed (9) hide show

Dockerfile +12 -30
config.py +0 -38
main.py +255 -94
models/schemas.py +0 -28
routers/text_router.py +0 -53
routers/vision_router.py +0 -73
services/text_service.py +0 -134
services/vision_service.py +0 -144
utils/json_extractor.py +0 -133

Dockerfile CHANGED Viewed

@@ -4,55 +4,37 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     PORT=7860 \
     HF_HOME=/app/cache \
-    CPU_THREADS=2 \
     PATH="/home/user/.local/bin:${PATH}"
 WORKDIR /app
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     cmake \
     curl \
     git \
-    libgomp1 \
     && rm -rf /var/lib/apt/lists/*
-# Create non-root user
-RUN useradd -m -u 1000 user && \
-    mkdir -p /app/cache /app/models && \
-    chown -R user:user /app
-# Upgrade pip as root
 RUN pip install --no-cache-dir --upgrade pip
-# Switch to non-root user
 USER user
-# Install llama-cpp-python with optimized build flags
-RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF -DGGML_AVX2=ON" \
-    pip install --no-cache-dir --user llama-cpp-python==0.3.16
-# Copy requirements and install dependencies
 COPY --chown=user:user requirements.txt .
-RUN pip install --no-cache-dir --user -r requirements.txt
-# Copy application structure
-COPY --chown=user:user config.py .
 COPY --chown=user:user main.py .
-COPY --chown=user:user models/ ./models/
-COPY --chown=user:user services/ ./services/
-COPY --chown=user:user routers/ ./routers/
-COPY --chown=user:user utils/ ./utils/
-# Create __init__.py files if they don't exist
-RUN touch models/__init__.py services/__init__.py routers/__init__.py utils/__init__.py
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
-    CMD curl -f http://localhost:7860/ping || exit 1
 EXPOSE 7860
-# Production startup with keep-alive and graceful shutdown
-CMD ["bash", "-c", "while true; do curl -s https://xce009-ai-chat-api.hf.space/ping > /dev/null 2>&1 || true; sleep 300; done & exec python -m uvicorn main:app --host 0.0.0.0 --port 7860 --log-level info"]

     PYTHONUNBUFFERED=1 \
     PORT=7860 \
     HF_HOME=/app/cache \
     PATH="/home/user/.local/bin:${PATH}"
 WORKDIR /app
+# Install build dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     cmake \
     curl \
     git \
     && rm -rf /var/lib/apt/lists/*
+# Create user
+RUN useradd -m -u 1000 user
+RUN mkdir -p /app/cache /app/models && chown -R user:user /app
+# Install pip as root
 RUN pip install --no-cache-dir --upgrade pip
 USER user
+# Build and install llama-cpp-python with proper flags
+RUN CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF" \
+    pip install --no-cache-dir llama-cpp-python==0.3.2
+# Install other dependencies
 COPY --chown=user:user requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy app
 COPY --chown=user:user main.py .
 EXPOSE 7860
+CMD ["bash", "-c", "while true; do curl -s https://xce009-ai-chat-api.hf.space/ping > /dev/null || true; sleep 300; done & python -m uvicorn main:app --host 0.0.0.0 --port 7860"]

config.py DELETED Viewed

@@ -1,38 +0,0 @@
-import os
-from typing import Optional
-class Config:
-    """Centralized configuration for the SmolLM API"""
-    # Server Configuration
-    PORT: int = int(os.getenv("PORT", "7860"))
-    HOST: str = "0.0.0.0"
-    # Cache Configuration
-    HF_HOME: str = os.getenv("HF_HOME", "/app/cache")
-    # CPU Configuration
-    N_THREADS: int = int(os.getenv("CPU_THREADS", "2"))
-    # Text Model Configuration
-    TEXT_MODEL_REPO: str = "bartowski/SmolLM2-1.7B-Instruct-GGUF"
-    TEXT_MODEL_FILE: str = "SmolLM2-1.7B-Instruct-Q4_K_M.gguf"
-    TEXT_MODEL_CTX: int = 2048
-    TEXT_MODEL_BATCH: int = 512
-    # Vision Model Configuration
-    VISION_MODEL_REPO: str = "ggml-org/SmolVLM-500M-Instruct-GGUF"
-    VISION_MODEL_FILE: str = "SmolVLM-500M-Instruct-Q8_0.gguf"
-    VISION_MMPROJ_FILE: str = "mmproj-SmolVLM-500M-Instruct-f16.gguf"
-    VISION_MODEL_CTX: int = 2048
-    VISION_MODEL_BATCH: int = 512
-    # Default Generation Parameters
-    DEFAULT_TEMPERATURE: float = 0.6
-    DEFAULT_MAX_TOKENS: int = 512
-    # File Upload Configuration
-    MAX_FILE_SIZE: int = 10 * 1024 * 1024  # 10MB
-    ALLOWED_IMAGE_EXTENSIONS: set = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
-config = Config()

main.py CHANGED Viewed

@@ -1,119 +1,280 @@
 import logging
 from contextlib import asynccontextmanager
-from datetime import datetime
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-from config import config
-from services.text_service import text_service
-from services.vision_service import vision_service
-from routers import text_router, vision_router
-# Logging Setup
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
 )
-logger = logging.getLogger("main")
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Application lifecycle manager"""
-    logger.info("=" * 60)
-    logger.info("STARTING SMOLLM2 MULTIMODAL API")
-    logger.info("=" * 60)
-    try:
-        # Initialize text service
-        logger.info("Initializing Text Service...")
-        await text_service.initialize()
-        # Initialize vision service
-        logger.info("Initializing Vision Service...")
-        await vision_service.initialize()
-        logger.info("=" * 60)
-        logger.info("✓ All services initialized successfully")
-        logger.info("=" * 60)
     except Exception as e:
-        logger.critical(f"Startup failed: {e}")
-        raise
     yield
-    # Cleanup
-    logger.info("Shutting down services...")
-    await text_service.cleanup()
-    await vision_service.cleanup()
-    logger.info("Shutdown complete")
-# Create FastAPI application
-app = FastAPI(
-    title="SmolLM2 Multimodal API",
-    version="3.0",
-    description="Production-ready API for SmolLM2 text and vision models",
-    lifespan=lifespan
-)
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Include routers
-app.include_router(text_router.router)
-app.include_router(vision_router.router)
 @app.get("/")
 async def root():
-    """Root endpoint with API information"""
-    return {
-        "name": "SmolLM2 Multimodal API",
-        "version": "3.0",
-        "endpoints": {
-            "text": "/v1/text/chat/completions",
-            "vision": "/v1/vision/analyze",
-            "health": "/health"
-        },
-        "docs": "/docs"
-    }
-@app.get("/health")
-async def health_check():
-    """Comprehensive health check"""
-    return {
-        "status": "healthy",
-        "services": {
-            "text": text_service.is_ready(),
-            "vision": vision_service.is_ready()
-        },
-        "timestamp": datetime.utcnow().isoformat()
-    }
 @app.get("/ping")
 async def ping():
-    """Simple ping endpoint"""
-    all_ready = text_service.is_ready() and vision_service.is_ready()
-    if not all_ready:
-        return JSONResponse(
-            status_code=503,
-            content={"status": "initializing", "ready": False}
         )
-    return {"status": "pong", "ready": True}
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(
-        "main:app",
-        host=config.HOST,
-        port=config.PORT,
-        log_level="info"
-    )

+import os
 import logging
+import json
 from contextlib import asynccontextmanager
+from typing import List, Optional, Any
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# --- 1. Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
 )
+logger = logging.getLogger("SmolLM-API")
+# --- 2. Helper Functions (Previously in json_service.py) ---
+def find_balanced_closing_index(text: str, start_index: int) -> int:
+    """
+    Finds the matching closing bracket for the bracket at start_index.
+    Ignores brackets inside strings and comments.
+    """
+    start_char = text[start_index]
+    end_char = '}' if start_char == '{' else ']'
+    depth = 0
+    in_double_quote = False
+    in_single_quote = False
+    in_backtick = False
+    in_line_comment = False
+    in_block_comment = False
+    is_escaped = False
+    length = len(text)
+    i = start_index
+    while i < length:
+        char = text[i]
+        next_char = text[i+1] if i + 1 < length else ''
+        # Handle Escaping
+        if is_escaped:
+            is_escaped = False
+            i += 1
+            continue
+        if char == '\\' and not in_line_comment and not in_block_comment:
+            is_escaped = True
+            i += 1
+            continue
+        # Handle Comments
+        if in_line_comment:
+            if char == '\n': in_line_comment = False
+            i += 1
+            continue
+        if in_block_comment:
+            if char == '*' and next_char == '/':
+                in_block_comment = False
+                i += 2
+                continue
+            i += 1
+            continue
+        # Check comment starts
+        if not in_double_quote and not in_single_quote and not in_backtick:
+            if char == '/' and next_char == '/':
+                in_line_comment = True
+                i += 2
+                continue
+            if char == '/' and next_char == '*':
+                in_block_comment = True
+                i += 2
+                continue
+        # Handle Strings
+        if in_double_quote:
+            if char == '"': in_double_quote = False
+            i += 1
+            continue
+        if in_single_quote:
+            if char == "'": in_single_quote = False
+            i += 1
+            continue
+        if in_backtick:
+            if char == '`': in_backtick = False
+            i += 1
+            continue
+        if char == '"':
+            in_double_quote = True
+            i += 1
+            continue
+        if char == "'":
+            in_single_quote = True
+            i += 1
+            continue
+        if char == '`':
+            in_backtick = True
+            i += 1
+            continue
+        # Handle Bracket Counting
+        if char == start_char:
+            depth += 1
+        elif char == end_char:
+            depth -= 1
+            if depth == 0:
+                return i # Found matching close
+        i += 1
+    return -1
+def extract_json_from_content(content: str) -> List[Any]:
+    """
+    Scans text for JSON objects/arrays using state machine logic.
+    """
+    if not content or not isinstance(content, str):
+        return []
+    found_blocks = []
+    cursor = 0
+    length = len(content)
+    while cursor < length:
+        if content[cursor] not in ['{', '[']:
+            cursor += 1
+            continue
+        end_index = find_balanced_closing_index(content, cursor)
+        if end_index != -1:
+            raw_candidate = content[cursor : end_index + 1]
+            try:
+                parsed = json.loads(raw_candidate)
+                found_blocks.append(parsed)
+                cursor = end_index + 1
+                continue
+            except json.JSONDecodeError:
+                pass
+        cursor += 1
+    return found_blocks
+# --- 3. Model Configuration ---
+REPO_ID = "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF"
+FILENAME = "smollm2-1.7b-instruct-q4_k_m.gguf"
+N_THREADS = int(os.getenv("CPU_THREADS", "2"))
+llm_model: Optional[Llama] = None
+# --- 4. Lifecycle Manager ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global llm_model
+    logger.info("--- STARTING SMOLLM2 API ---")
+    try:
+        logger.info(f"Downloading {FILENAME}...")
+        model_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=FILENAME,
+            cache_dir=os.getenv("HF_HOME", "/app/cache")
+        )
+        logger.info(f"Initializing Engine (Threads: {N_THREADS})...")
+        llm_model = Llama(
+            model_path=model_path,
+            n_ctx=2048,
+            n_threads=N_THREADS,
+            n_batch=512,
+            verbose=False
+        )
+        logger.info("SmolLM2 Loaded.")
     except Exception as e:
+        logger.critical(f"Startup Failed: {e}")
+        raise e
     yield
+    if llm_model:
+        del llm_model
+        logger.info("Model unloaded.")
+app = FastAPI(title="SmolLM2 API", version="2.1", lifespan=lifespan)
+# --- 5. Data Models ---
+class Message(BaseModel):
+    role: str
+    content: str
+class ChatRequest(BaseModel):
+    messages: List[Message]
+    temperature: Optional[float] = 0.6
+    max_tokens: Optional[int] = 512
+    stream: Optional[bool] = False
+    returnJson: Optional[bool] = False
+# --- 6. Endpoints ---
 @app.get("/")
 async def root():
+    return {"message": "Welcome to the SmolLM2 API! Use /v1/chat/completions to interact."}
 @app.get("/ping")
 async def ping():
+    if llm_model: return {"status": "pong", "ready": True}
+    return JSONResponse(status_code=503, content={"status": "loading"})
+@app.post("/v1/chat/completions")
+async def chat(request: ChatRequest):
+    if not llm_model:
+        raise HTTPException(status_code=503, detail="Model loading...")
+    # --- VALIDATION: Check for conflicting parameters ---
+    if request.stream and request.returnJson:
+        raise HTTPException(
+            status_code=400,
+            detail="Conflict: 'stream' and 'returnJson' cannot both be True. Streaming prevents JSON extraction."
         )
+    # Prepare messages
+    messages_payload = [m.model_dump() for m in request.messages]
+    # --- LOGIC FOR returnJson ---
+    if request.returnJson:
+        logger.info("Format Mode: JSON Extraction Active")
+        system_prompt = {
+            "role": "system",
+            "content": (
+                "You are a strict JSON generator. "
+                "Convert the user's input into a valid JSON Array of Objects. "
+                "Output strictly in markdown code blocks like ```json ... ```. "
+                "Do not add conversational filler."
+            )
+        }
+        messages_payload.insert(0, system_prompt)
+        if messages_payload and messages_payload[-1]['role'] == 'user':
+            messages_payload[-1]['content'] += "\n\nReturn structured JSON of this content..."
+    logger.info(f"Processing request: {len(messages_payload)} msgs | Stream: {request.stream}")
+    try:
+        # Generate Response
+        response_data = llm_model.create_chat_completion(
+            messages=messages_payload,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+            stream=request.stream
+        )
+        # --- STREAMING RESPONSE LOGIC ---
+        if request.stream:
+            def iter_response():
+                for chunk in response_data:
+                    yield f"data: {json.dumps(chunk)}\n\n"
+                yield "data: [DONE]\n\n"
+            return StreamingResponse(iter_response(), media_type="text/event-stream")
+        # --- STANDARD / JSON RESPONSE LOGIC ---
+        if not request.returnJson:
+            return response_data
+        # Custom JSON Extraction Logic
+        content_text = response_data['choices'][0]['message']['content']
+        extracted_data = extract_json_from_content(content_text)
+        return JSONResponse(content={
+            "status": "success",
+            "data": extracted_data
+        })
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

models/schemas.py DELETED Viewed

@@ -1,28 +0,0 @@
-from typing import List, Optional, Any
-from pydantic import BaseModel, Field
-class Message(BaseModel):
-    role: str = Field(..., description="Role of the message sender (user/assistant/system)")
-    content: str = Field(..., description="Content of the message")
-class ChatRequest(BaseModel):
-    messages: List[Message] = Field(..., description="List of messages in the conversation")
-    temperature: Optional[float] = Field(0.6, ge=0.0, le=2.0, description="Sampling temperature")
-    max_tokens: Optional[int] = Field(512, ge=1, le=4096, description="Maximum tokens to generate")
-    stream: Optional[bool] = Field(False, description="Enable streaming response")
-    returnJson: Optional[bool] = Field(False, description="Extract and return JSON from response")
-class VisionRequest(BaseModel):
-    prompt: str = Field(..., description="Text prompt/question about the image")
-    temperature: Optional[float] = Field(0.6, ge=0.0, le=2.0, description="Sampling temperature")
-    max_tokens: Optional[int] = Field(512, ge=1, le=4096, description="Maximum tokens to generate")
-class ErrorResponse(BaseModel):
-    error: str
-    detail: Optional[str] = None
-class HealthResponse(BaseModel):
-    status: str
-    text_model: bool
-    vision_model: bool
-    timestamp: str

routers/text_router.py DELETED Viewed

@@ -1,53 +0,0 @@
-from fastapi import APIRouter, HTTPException
-from fastapi.responses import StreamingResponse, JSONResponse
-import logging
-from models.schemas import ChatRequest, ErrorResponse
-from services.text_service import text_service
-logger = logging.getLogger("text-router")
-router = APIRouter(prefix="/v1/text", tags=["Text Generation"])
-@router.post("/chat/completions")
-async def create_chat_completion(request: ChatRequest):
-    """
-    Create a chat completion using the text model
-    Supports:
-    - Standard completions
-    - Streaming responses
-    - JSON extraction mode
-    """
-    if not text_service.is_ready():
-        raise HTTPException(status_code=503, detail="Text model not ready")
-    try:
-        messages = [msg.model_dump() for msg in request.messages]
-        result = await text_service.generate_completion(
-            messages=messages,
-            temperature=request.temperature,
-            max_tokens=request.max_tokens,
-            stream=request.stream,
-            return_json=request.returnJson
-        )
-        if request.stream:
-            return StreamingResponse(result, media_type="text/event-stream")
-        return JSONResponse(content=result)
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        logger.error(f"Chat completion error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/health")
-async def text_health():
-    """Check text model health status"""
-    return {
-        "status": "healthy" if text_service.is_ready() else "initializing",
-        "model_ready": text_service.is_ready()
-    }

routers/vision_router.py DELETED Viewed

@@ -1,73 +0,0 @@
-from fastapi import APIRouter, HTTPException, File, UploadFile, Form
-from fastapi.responses import JSONResponse
-import logging
-from pathlib import Path
-from models.schemas import VisionRequest, ErrorResponse
-from services.vision_service import vision_service
-from config import config
-logger = logging.getLogger("vision-router")
-router = APIRouter(prefix="/v1/vision", tags=["Vision AI"])
-@router.post("/analyze")
-async def analyze_image(
-    image: UploadFile = File(..., description="Image file to analyze"),
-    prompt: str = Form(..., description="Question or prompt about the image"),
-    temperature: float = Form(0.6, ge=0.0, le=2.0),
-    max_tokens: int = Form(512, ge=1, le=4096)
-):
-    """
-    Analyze an image with a text prompt
-    Accepts:
-    - Image file (JPEG, PNG, GIF, WebP, BMP)
-    - Text prompt/question
-    - Optional generation parameters
-    """
-    if not vision_service.is_ready():
-        raise HTTPException(status_code=503, detail="Vision model not ready")
-    # Validate file extension
-    file_ext = Path(image.filename).suffix.lower()
-    if file_ext not in config.ALLOWED_IMAGE_EXTENSIONS:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Invalid file type. Allowed: {', '.join(config.ALLOWED_IMAGE_EXTENSIONS)}"
-        )
-    try:
-        # Read image data
-        image_data = await image.read()
-        # Check file size
-        if len(image_data) > config.MAX_FILE_SIZE:
-            raise HTTPException(
-                status_code=400,
-                detail=f"File too large. Max size: {config.MAX_FILE_SIZE / 1024 / 1024}MB"
-            )
-        # Analyze image
-        result = await vision_service.analyze_image(
-            image_data=image_data,
-            prompt=prompt,
-            temperature=temperature,
-            max_tokens=max_tokens
-        )
-        return JSONResponse(content=result)
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Image analysis error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/health")
-async def vision_health():
-    """Check vision model health status"""
-    return {
-        "status": "healthy" if vision_service.is_ready() else "initializing",
-        "model_ready": vision_service.is_ready()
-    }

services/text_service.py DELETED Viewed

@@ -1,134 +0,0 @@
-import logging
-from typing import Optional, Dict, Any, List, AsyncIterator
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-import json
-from config import config
-from utils.json_extractor import extract_json_from_content
-logger = logging.getLogger("text-service")
-class TextService:
-    """Service for text-based language model interactions"""
-    def __init__(self):
-        self.model: Optional[Llama] = None
-    async def initialize(self) -> None:
-        """Initialize the text model"""
-        try:
-            logger.info(f"Downloading text model: {config.TEXT_MODEL_FILE}...")
-            model_path = hf_hub_download(
-                repo_id=config.TEXT_MODEL_REPO,
-                filename=config.TEXT_MODEL_FILE,
-                cache_dir=config.HF_HOME
-            )
-            logger.info(f"Loading text model (Threads: {config.N_THREADS})...")
-            self.model = Llama(
-                model_path=model_path,
-                n_ctx=config.TEXT_MODEL_CTX,
-                n_threads=config.N_THREADS,
-                n_batch=config.TEXT_MODEL_BATCH,
-                verbose=False
-            )
-            logger.info("✓ Text model loaded successfully")
-        except Exception as e:
-            logger.error(f"Failed to initialize text model: {e}")
-            raise
-    def is_ready(self) -> bool:
-        """Check if the model is loaded and ready"""
-        return self.model is not None
-    async def generate_completion(
-        self,
-        messages: List[Dict[str, str]],
-        temperature: float = 0.6,
-        max_tokens: int = 512,
-        stream: bool = False,
-        return_json: bool = False
-    ) -> Any:
-        """
-        Generate text completion
-        Args:
-            messages: List of message dictionaries with 'role' and 'content'
-            temperature: Sampling temperature
-            max_tokens: Maximum tokens to generate
-            stream: Whether to stream the response
-            return_json: Whether to extract JSON from response
-        Returns:
-            Generated completion (dict or stream)
-        """
-        if not self.is_ready():
-            raise RuntimeError("Text model not initialized")
-        # Validate conflicting parameters
-        if stream and return_json:
-            raise ValueError("Cannot use both 'stream' and 'return_json' simultaneously")
-        # Prepare messages for JSON extraction mode
-        if return_json:
-            system_prompt = {
-                "role": "system",
-                "content": (
-                    "You are a strict JSON generator. "
-                    "Convert the user's input into valid JSON format. "
-                    "Output strictly in markdown code blocks like ```json ... ```. "
-                    "Do not add conversational filler."
-                )
-            }
-            messages = [system_prompt] + messages
-            if messages[-1]['role'] == 'user':
-                messages[-1]['content'] += "\n\nReturn structured JSON of this content."
-        logger.info(f"Generating completion: {len(messages)} messages | Stream: {stream}")
-        try:
-            response = self.model.create_chat_completion(
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                stream=stream
-            )
-            # Handle streaming response
-            if stream:
-                return self._create_stream_iterator(response)
-            # Handle JSON extraction
-            if return_json:
-                content_text = response['choices'][0]['message']['content']
-                extracted_data = extract_json_from_content(content_text)
-                return {
-                    "status": "success",
-                    "data": extracted_data,
-                    "raw_content": content_text
-                }
-            return response
-        except Exception as e:
-            logger.error(f"Error generating completion: {e}")
-            raise
-    async def _create_stream_iterator(self, response_stream) -> AsyncIterator[str]:
-        """Create an async iterator for streaming responses"""
-        for chunk in response_stream:
-            yield f"data: {json.dumps(chunk)}\n\n"
-        yield "data: [DONE]\n\n"
-    async def cleanup(self) -> None:
-        """Cleanup resources"""
-        if self.model:
-            del self.model
-            self.model = None
-            logger.info("Text model unloaded")
-# Global instance
-text_service = TextService()

services/vision_service.py DELETED Viewed

@@ -1,144 +0,0 @@
-import logging
-import base64
-import io
-from typing import Optional, Dict, Any
-from llama_cpp import Llama
-from llama_cpp.llama_chat_format import Llava15ChatHandler
-from huggingface_hub import hf_hub_download
-from PIL import Image
-from config import config
-logger = logging.getLogger("vision-service")
-class VisionService:
-    """Service for vision-language model interactions"""
-    def __init__(self):
-        self.model: Optional[Llama] = None
-        self.chat_handler: Optional[Llava15ChatHandler] = None
-    async def initialize(self) -> None:
-        """Initialize the vision model"""
-        try:
-            logger.info(f"Downloading vision model: {config.VISION_MODEL_FILE}...")
-            model_path = hf_hub_download(
-                repo_id=config.VISION_MODEL_REPO,
-                filename=config.VISION_MODEL_FILE,
-                cache_dir=config.HF_HOME
-            )
-            logger.info(f"Downloading vision projector: {config.VISION_MMPROJ_FILE}...")
-            mmproj_path = hf_hub_download(
-                repo_id=config.VISION_MODEL_REPO,
-                filename=config.VISION_MMPROJ_FILE,
-                cache_dir=config.HF_HOME
-            )
-            logger.info(f"Loading vision model (Threads: {config.N_THREADS})...")
-            # NOTE: Llava15ChatHandler is the standard Python wrapper for loading
-            # external projectors (mmproj files), even for newer architectures like SmolVLM
-            self.chat_handler = Llava15ChatHandler(
-                clip_model_path=mmproj_path,
-                verbose=False
-            )
-            self.model = Llama(
-                model_path=model_path,
-                chat_handler=self.chat_handler,
-                n_ctx=config.VISION_MODEL_CTX,
-                n_threads=config.N_THREADS,
-                n_batch=config.VISION_MODEL_BATCH,
-                logits_all=True,
-                verbose=False,
-                n_gpu_layers=0  # Explicitly set to 0 to ensure CPU usage and prevent driver crashes
-            )
-            logger.info("✓ Vision model loaded successfully")
-        except Exception as e:
-            logger.error(f"Failed to initialize vision model: {e}")
-            # Ensure cleanup if initialization fails halfway
-            await self.cleanup()
-            raise
-    def is_ready(self) -> bool:
-        """Check if the model is loaded and ready"""
-        return self.model is not None and self.chat_handler is not None
-    async def analyze_image(
-        self,
-        image_data: bytes,
-        prompt: str,
-        temperature: float = 0.6,
-        max_tokens: int = 512
-    ) -> Dict[str, Any]:
-        """
-        Analyze an image with a text prompt
-        """
-        if not self.is_ready():
-            raise RuntimeError("Vision model not initialized")
-        try:
-            # Convert image bytes to base64 data URI
-            image_b64 = base64.b64encode(image_data).decode('utf-8')
-            # Validate image
-            image = Image.open(io.BytesIO(image_data))
-            # logger.info(f"Processing image: {image.size} | Format: {image.format}")
-            # Create vision message format
-            messages = [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}},
-                        {"type": "text", "text": prompt}
-                    ]
-                }
-            ]
-            logger.info(f"Analyzing image... Prompt: {prompt[:50]}")
-            response = self.model.create_chat_completion(
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens
-            )
-            return {
-                "status": "success",
-                "image_info": {
-                    "size": list(image.size),
-                    "format": image.format,
-                    "mode": image.mode
-                },
-                "prompt": prompt,
-                "response": response['choices'][0]['message']['content'],
-                "usage": response.get('usage', {})
-            }
-        except Exception as e:
-            logger.error(f"Error analyzing image: {e}")
-            raise
-    async def cleanup(self) -> None:
-        """Cleanup resources"""
-        if self.model:
-            try:
-                del self.model
-            except:
-                pass
-            self.model = None
-        if self.chat_handler:
-            try:
-                del self.chat_handler
-            except:
-                pass
-            self.chat_handler = None
-        logger.info("Vision model unloaded")
-# Global instance
-vision_service = VisionService()

utils/json_extractor.py DELETED Viewed

@@ -1,133 +0,0 @@
-import json
-import logging
-from typing import List, Any
-logger = logging.getLogger("json-extractor")
-def find_balanced_closing_index(text: str, start_index: int) -> int:
-    """
-    Finds the matching closing bracket for the bracket at start_index.
-    Ignores brackets inside strings and comments.
-    """
-    start_char = text[start_index]
-    end_char = '}' if start_char == '{' else ']'
-    depth = 0
-    in_double_quote = False
-    in_single_quote = False
-    in_backtick = False
-    in_line_comment = False
-    in_block_comment = False
-    is_escaped = False
-    length = len(text)
-    i = start_index
-    while i < length:
-        char = text[i]
-        next_char = text[i+1] if i + 1 < length else ''
-        # Handle Escaping
-        if is_escaped:
-            is_escaped = False
-            i += 1
-            continue
-        if char == '\\' and not in_line_comment and not in_block_comment:
-            is_escaped = True
-            i += 1
-            continue
-        # Handle Comments
-        if in_line_comment:
-            if char == '\n': in_line_comment = False
-            i += 1
-            continue
-        if in_block_comment:
-            if char == '*' and next_char == '/':
-                in_block_comment = False
-                i += 2
-                continue
-            i += 1
-            continue
-        # Check comment starts
-        if not in_double_quote and not in_single_quote and not in_backtick:
-            if char == '/' and next_char == '/':
-                in_line_comment = True
-                i += 2
-                continue
-            if char == '/' and next_char == '*':
-                in_block_comment = True
-                i += 2
-                continue
-        # Handle Strings
-        if in_double_quote:
-            if char == '"': in_double_quote = False
-            i += 1
-            continue
-        if in_single_quote:
-            if char == "'": in_single_quote = False
-            i += 1
-            continue
-        if in_backtick:
-            if char == '`': in_backtick = False
-            i += 1
-            continue
-        if char == '"':
-            in_double_quote = True
-            i += 1
-            continue
-        if char == "'":
-            in_single_quote = True
-            i += 1
-            continue
-        if char == '`':
-            in_backtick = True
-            i += 1
-            continue
-        # Handle Bracket Counting
-        if char == start_char:
-            depth += 1
-        elif char == end_char:
-            depth -= 1
-            if depth == 0:
-                return i
-        i += 1
-    return -1
-def extract_json_from_content(content: str) -> List[Any]:
-    """
-    Scans text for JSON objects/arrays using state machine logic.
-    """
-    if not content or not isinstance(content, str):
-        return []
-    found_blocks = []
-    cursor = 0
-    length = len(content)
-    while cursor < length:
-        if content[cursor] not in ['{', '[']:
-            cursor += 1
-            continue
-        end_index = find_balanced_closing_index(content, cursor)
-        if end_index != -1:
-            raw_candidate = content[cursor : end_index + 1]
-            try:
-                parsed = json.loads(raw_candidate)
-                found_blocks.append(parsed)
-                cursor = end_index + 1
-                continue
-            except json.JSONDecodeError:
-                pass
-        cursor += 1
-    return found_blocks