File size: 1,800 Bytes
bc1fb7d
b46360a
bc1fb7d
b46360a
 
 
 
 
 
bc1fb7d
 
b46360a
 
 
 
 
 
 
 
 
 
 
bc1fb7d
b46360a
 
 
 
 
 
 
 
 
 
bc1fb7d
b46360a
 
 
bc1fb7d
 
b46360a
 
bc1fb7d
b46360a
bc1fb7d
b46360a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""Utility functions for MelanoScope AI."""
import logging
from typing import List, Any
import numpy as np
import pandas as pd
from ..config.settings import ModelConfig

logger = logging.getLogger(__name__)

def probabilities_to_ints(probabilities: np.ndarray, total_sum: int = ModelConfig.PROBABILITY_SUM) -> np.ndarray:
    """Convert probabilities to integers that sum to total_sum."""
    try:
        probabilities = np.array(probabilities)
        positive_values = np.maximum(probabilities, 0)
        total_positive = positive_values.sum()
        
        if total_positive == 0:
            return np.zeros_like(probabilities, dtype=int)
        
        scaled = positive_values / total_positive * total_sum
        rounded = np.round(scaled).astype(int)
        
        # Fix rounding errors
        diff = total_sum - rounded.sum()
        if diff != 0:
            max_idx = int(np.argmax(positive_values))
            rounded = rounded.flatten()
            rounded[max_idx] += diff
            rounded = rounded.reshape(scaled.shape)
        
        return rounded
        
    except Exception as e:
        logger.error(f"Error converting probabilities: {e}")
        raise ValueError(f"Invalid probability values: {e}")

def create_empty_dataframe(classes: List[str]) -> pd.DataFrame:
    """Create empty probability dataframe."""
    return pd.DataFrame({"item": classes, "probability": [0] * len(classes)})

def format_confidence(probability: float, precision: int = ModelConfig.PROBABILITY_PRECISION) -> str:
    """Format probability as percentage string."""
    try:
        return f"{probability * 100:.{precision}f}%"
    except Exception as e:
        logger.error(f"Error formatting confidence: {e}")
        return "0.0%"