""" Production multimodal classifier — uses real trained models. Supports three prediction modes: - Text only: TF-IDF + LinearSVC pipeline (83% accuracy) - Image only: Voting System with 3 models (92% accuracy) - Fusion: Weighted average of both (60% image + 40% text = ~94%) The text model uses decision_function + softmax to produce probabilities from LinearSVC (which doesn't natively support predict_proba). """ import sys import os import joblib import json import numpy as np from pathlib import Path # Add project root to path for cross-module imports current_dir = os.path.dirname(os.path.abspath(__file__)) root_dir = os.path.abspath(os.path.join(current_dir, "../../../")) if root_dir not in sys.path: sys.path.append(root_dir) from config import MODELS_DIR, TEXT_MODEL_PATH, CATEGORY_MAPPING_PATH, FUSION_W_IMAGE, FUSION_W_TEXT from src.models.predict_model import VotingPredictor class MultimodalClassifier: """Loads all models once, exposes predict_text / predict_image / predict_fusion.""" def __init__(self): # Fusion weights from config (single source of truth) self.w_text = FUSION_W_TEXT self.w_image = FUSION_W_IMAGE # 1. Category mapping (code -> human-readable name) try: with open(CATEGORY_MAPPING_PATH, 'r', encoding='utf-8') as f: self.mapping = json.load(f) except Exception: try: with open(CATEGORY_MAPPING_PATH, 'r') as f: self.mapping = json.load(f) except Exception: self.mapping = {} # 2. Image model — Voting System (DINOv3 + XGBoost + EfficientNet) try: self.voting = VotingPredictor(MODELS_DIR) self.voting.load_models() except Exception as e: print(f"Image model error: {e}") self.voting = None # 3. Text model — TF-IDF FeatureUnion + LinearSVC try: self.text_model = joblib.load(TEXT_MODEL_PATH) except Exception as e: print(f"Text model error: {e}") self.text_model = None def _format_result(self, label, score): """Format a single prediction as {label, name, confidence}.""" return { "label": str(label), "name": self.mapping.get(str(label), f"Produit Type {label}"), "confidence": float(score) } def predict_image(self, image_path): """Run image-only classification through the Voting System.""" if not self.voting: return [] try: raw_res = self.voting.predict(image_path) return [self._format_result(r['label'], r['confidence']) for r in raw_res] except Exception as e: print(f"Image prediction error: {e}") return [] def predict_text(self, text): """ Run text-only classification through LinearSVC. LinearSVC uses decision_function (not predict_proba), so we convert raw scores to probabilities via softmax: exp(s - max) / sum(exp(s - max)). """ if not self.text_model: return [] try: if isinstance(text, str): text = [text] # Get probabilities from the sklearn pipeline if hasattr(self.text_model, "predict_proba"): probs = self.text_model.predict_proba(text)[0] elif hasattr(self.text_model, "decision_function"): scores = self.text_model.decision_function(text)[0] # Softmax conversion for LinearSVC raw scores exp_scores = np.exp(scores - np.max(scores)) probs = exp_scores / exp_scores.sum() else: return [] # Build results for all 27 classes, sorted by confidence results = [] for i, class_id in enumerate(self.text_model.classes_): results.append(self._format_result(class_id, probs[i])) return sorted(results, key=lambda x: x['confidence'], reverse=True) except Exception as e: print(f"Text prediction error: {e}") return [] def predict_fusion(self, text, image_path): """ Late fusion: combine text and image scores with configurable weights. For each class, the fused score = w_text * text_score + w_image * image_score. This allows classes missed by one modality to be rescued by the other. """ res_text = self.predict_text(text) res_image = self.predict_image(image_path) # Merge scores by label fusion_scores = {} for item in res_text: fusion_scores[item['label']] = item['confidence'] * self.w_text for item in res_image: label = item['label'] fusion_scores[label] = fusion_scores.get(label, 0.0) + (item['confidence'] * self.w_image) # Sort and return final_results = [self._format_result(label, score) for label, score in fusion_scores.items()] return sorted(final_results, key=lambda x: x['confidence'], reverse=True)