Spaces:

KinetoLabs
/

SmokeScan

Paused

File size: 9,374 Bytes

333c083
 
706520f
 
333c083
 
88bdcff
f3ebc82
88bdcff
 
 
 
f3ebc82
 
88bdcff
 
706520f
333c083
706520f
 
333c083
88bdcff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333c083
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88bdcff
706520f
 
333c083
706520f
88bdcff
 
333c083
f3ebc82
88bdcff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333c083
 
 
 
88bdcff
 
 
 
333c083
 
 
 
88bdcff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3ebc82
 
706520f
f3ebc82
 
88bdcff
706520f
 
88bdcff
 
 
706520f
f3ebc82
 
 
 
 
88bdcff
 
 
 
f3ebc82
 
 
 
 
 
88bdcff
 
 
 
 
 
 
 
f3ebc82
 
706520f
f3ebc82
88bdcff
 
f3ebc82
 
 
 
 
 
88bdcff
 
f3ebc82
88bdcff
 
f3ebc82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88bdcff
 
f3ebc82
 
 
 
 
 
 
 
 
 
 
88bdcff
 
5f0db1e
 
333c083
5f0db1e
88bdcff
 
 
 
 
333c083
88bdcff
 
333c083
f3ebc82
706520f
 
 
333c083
f3ebc82
88bdcff
 
 
 
333c083

"""Mock model implementations for local development on RTX 4090.

Simulates the 30B-A3B FP8 vision model architecture:
- MockVisionModel simulates single-model analysis + JSON output
- All models loaded together at startup (no lazy loading)
"""

import logging
import random
from typing import Any
from PIL import Image

logger = logging.getLogger(__name__)


class MockVisionModel:
    """Mock vision model that simulates 30B-A3B FP8 model output.

    Simulates single-model analysis with structured JSON output.
    The real model uses vLLM with FP8 quantization.
    """

    ZONES = ["burn", "near-field", "far-field"]
    CONDITIONS = ["background", "light", "moderate", "heavy", "structural-damage"]
    MATERIALS = [
        {"type": "steel", "category": "non-porous"},
        {"type": "concrete", "category": "non-porous"},
        {"type": "glass", "category": "non-porous"},
        {"type": "cmu", "category": "non-porous"},
        {"type": "drywall-painted", "category": "semi-porous"},
        {"type": "wood-sealed", "category": "semi-porous"},
        {"type": "drywall-unpainted", "category": "porous"},
        {"type": "carpet", "category": "porous"},
        {"type": "insulation-fiberglass", "category": "porous"},
        {"type": "acoustic-tile", "category": "porous"},
        {"type": "ductwork-rigid", "category": "hvac"},
        {"type": "ductwork-flexible", "category": "hvac"},
    ]

    # Mock reasoning patterns to simulate Thinking model output
    REASONING_PATTERNS = {
        "burn": "Direct fire involvement evident from structural char and complete combustion patterns.",
        "near-field": "Adjacent to burn zone with heavy smoke deposits and heat-induced discoloration.",
        "far-field": "Light smoke migration only, no direct heat exposure or structural damage visible.",
    }

    CONDITION_REASONING = {
        "background": "Surfaces appear clean with no visible contamination.",
        "light": "Faint discoloration visible, minimal deposits present.",
        "moderate": "Clear contamination with visible film on surfaces.",
        "heavy": "Thick deposits obscuring surface texture.",
        "structural-damage": "Physical damage requiring repair before cleaning.",
    }

    def analyze_image(self, image: Image.Image, context: str = "") -> dict[str, Any]:
        """Return mock vision analysis simulating 30B-A3B FP8 model output."""
        logger.debug(f"Mock 30B-A3B FP8 vision analysis (context: {len(context)} chars)")

        # Simulate model generating analysis + JSON
        selected_zone = random.choice(self.ZONES)
        selected_condition = random.choice(self.CONDITIONS)

        logger.info(f"Mock vision result: zone={selected_zone}, condition={selected_condition}")

        # Generate 2-4 random materials
        num_materials = random.randint(2, 4)
        materials = []
        for _ in range(num_materials):
            mat = random.choice(self.MATERIALS).copy()
            mat.update(
                {
                    "confidence": round(random.uniform(0.75, 0.95), 2),
                    "location_description": "Visible in image",
                    "bounding_box": {
                        "x": round(random.uniform(0.1, 0.3), 2),
                        "y": round(random.uniform(0.1, 0.3), 2),
                        "width": round(random.uniform(0.2, 0.5), 2),
                        "height": round(random.uniform(0.2, 0.5), 2),
                    },
                }
            )
            materials.append(mat)

        soot_visible = random.choice([True, False])
        char_visible = random.choice([True, False])
        ash_visible = random.choice([True, False])

        return {
            "zone": {
                "classification": selected_zone,
                "confidence": round(random.uniform(0.7, 0.95), 2),
                "reasoning": self.REASONING_PATTERNS.get(
                    selected_zone,
                    f"Mock analysis detected {selected_zone} zone characteristics",
                ),
            },
            "condition": {
                "level": selected_condition,
                "confidence": round(random.uniform(0.65, 0.90), 2),
                "reasoning": self.CONDITION_REASONING.get(
                    selected_condition,
                    f"Surface shows {selected_condition} contamination levels",
                ),
            },
            "materials": materials,
            "combustion_indicators": {
                "soot_visible": soot_visible,
                "soot_pattern": "Visible deposition on horizontal surfaces"
                if soot_visible
                else None,
                "char_visible": char_visible,
                "char_description": "Angular black particles visible"
                if char_visible
                else None,
                "ash_visible": ash_visible,
                "ash_description": "Gray powdery residue on surfaces"
                if ash_visible
                else None,
            },
            "structural_concerns": [],
            "access_issues": [],
            "recommended_sampling_locations": [
                {
                    "description": "Center of visible contamination",
                    "sample_type": "tape_lift",
                    "priority": "high",
                },
                {
                    "description": "Comparison area with less contamination",
                    "sample_type": "surface_wipe",
                    "priority": "medium",
                },
            ],
            "flags_for_review": [],
        }


class MockEmbeddingModel:
    """Mock embedding model that returns deterministic vectors.

    Dimension matches Qwen3-VL-Embedding-2B (2048-dim).
    Uses last-token pooling concept with L2 normalization.
    """

    def __init__(self, dimension: int = 2048):
        """Initialize with dimension matching real Qwen3-VL-Embedding-2B model."""
        self.dimension = dimension

    def embed(self, text: str) -> list[float]:
        """Return mock embedding vector (2048-dim, L2 normalized).

        Uses hash of text for reproducibility, simulating last-token pooling.
        """
        import math

        # Use hash of text for reproducibility
        random.seed(hash(text) % (2**32))
        embedding = [random.uniform(-1, 1) for _ in range(self.dimension)]
        random.seed()  # Reset seed

        # L2 normalize (matching real model behavior)
        norm = math.sqrt(sum(x * x for x in embedding))
        if norm > 0:
            embedding = [x / norm for x in embedding]

        return embedding

    def embed_batch(self, texts: list[str]) -> list[list[float]]:
        """Return mock embeddings for a batch of texts."""
        return [self.embed(text) for text in texts]


class MockRerankerModel:
    """Mock reranker that returns realistic relevance scores.

    Simulates Qwen3-VL-Reranker-2B behavior with 0-1 sigmoid-like scores.
    """

    def rerank(self, query: str, documents: list[str]) -> list[float]:
        """Return mock reranking scores (0-1 range, higher = more relevant).

        Uses word overlap + sigmoid-like transformation to mimic real behavior.
        """
        import math

        scores = []
        query_words = set(query.lower().split())

        for doc in documents:
            doc_words = set(doc.lower().split())
            # Calculate Jaccard-like overlap
            if len(query_words) > 0:
                overlap = len(query_words & doc_words)
                # Scale to get a raw score
                raw_score = overlap / max(len(query_words), 1) * 3 - 1.5
            else:
                raw_score = 0

            # Add small random noise
            noise = random.uniform(-0.3, 0.3)
            raw_score += noise

            # Apply sigmoid to get 0-1 range (mimics real model behavior)
            score = 1 / (1 + math.exp(-raw_score))
            scores.append(score)

        return scores

    def rerank_with_indices(
        self, query: str, documents: list[str], top_k: int = None
    ) -> list[tuple[int, float]]:
        """Rerank and return sorted (index, score) tuples."""
        scores = self.rerank(query, documents)
        indexed_scores = list(enumerate(scores))
        indexed_scores.sort(key=lambda x: x[1], reverse=True)
        if top_k is not None:
            indexed_scores = indexed_scores[:top_k]
        return indexed_scores


class MockModelStack:
    """Mock model stack for local development.

    All models loaded together at startup (matches production behavior).
    """

    def __init__(self):
        self.vision = MockVisionModel()
        self.embedding = MockEmbeddingModel()
        self.reranker = MockRerankerModel()
        self._loaded = False

    def load_all(self) -> "MockModelStack":
        """Load all mock models."""
        logger.info("Loading mock models for local development")
        logger.debug("  Vision model: MockVisionModel (simulates 30B-A3B FP8)")
        logger.debug("  Embedding model: MockEmbeddingModel (2048-dim)")
        logger.debug("  Reranker model: MockRerankerModel (simulates 2B)")
        self._loaded = True
        logger.info("All mock models loaded successfully")
        return self

    def is_loaded(self) -> bool:
        """Check if models are loaded."""
        return self._loaded