Spaces:
Sleeping
Sleeping
| """ | |
| Probability Manager β Aggressive Bayesian update + smart elimination. | |
| Fix for confidence stuck at 38%: | |
| - YES answer multiplier raised to 8Γ for match, 0.001Γ for mismatch | |
| (was 5Γ / 0.005Γ). This makes each YES dramatically more decisive. | |
| - soft_filter now eliminates items whose probability < 1% of top item | |
| (was 0.1%). At 8 active items all at ~12%, a single YES should push | |
| the matching ones to dominate quickly. | |
| - After elimination, normalize() is called again so probabilities sum to 1. | |
| """ | |
| import logging | |
| from typing import List, Dict | |
| from models.item_model import Item | |
| logger = logging.getLogger(__name__) | |
| class ProbabilityManager: | |
| # Multipliers: (match_likelihood, mismatch_likelihood) | |
| # YES on a matching attribute β Γ8; mismatch β Γ0.001 (near elimination) | |
| LIKELIHOOD: Dict[str, Dict[str, float]] = { | |
| 'yes': {'match': 8.0, 'mismatch': 0.001}, | |
| 'probably': {'match': 3.5, 'mismatch': 0.15}, | |
| 'dontknow': {'match': 1.0, 'mismatch': 1.0}, | |
| 'probablynot': {'match': 0.15, 'mismatch': 3.5}, | |
| 'no': {'match': 0.001, 'mismatch': 8.0}, | |
| } | |
| FLOOR = 1e-12 # probability never goes below this | |
| def update_item_probability(self, item: Item, question: Dict, answer: str) -> float: | |
| matches = item.matches_question(question) | |
| params = self.LIKELIHOOD.get(answer, self.LIKELIHOOD['dontknow']) | |
| likelihood = params['match'] if matches else params['mismatch'] | |
| posterior = max(item.probability * likelihood, self.FLOOR) | |
| item.evidence.append((question['question'], answer, likelihood)) | |
| item.match_history.append((question['question'], matches)) | |
| return posterior | |
| def normalize_probabilities(self, items: List[Item]) -> List[Item]: | |
| active = [i for i in items if not i.eliminated] | |
| if not active: | |
| logger.warning("All eliminated β reactivating all items.") | |
| for i in items: | |
| i.eliminated = False | |
| active = items | |
| total = sum(i.probability for i in active) | |
| if total < 1e-20: | |
| logger.warning("Probability mass vanished β resetting uniform.") | |
| p = 1.0 / len(active) | |
| for i in active: | |
| i.probability = p | |
| return items | |
| for i in active: | |
| i.probability /= total | |
| return items | |
| def soft_filter(self, items: List[Item]) -> List[Item]: | |
| """ | |
| Eliminate items whose probability is < 1% of the top item's probability. | |
| Always keep at least the top 5 items active (safety net). | |
| Never eliminate more than 80% of the pool in one pass. | |
| """ | |
| active = [i for i in items if not i.eliminated] | |
| n = len(active) | |
| if n <= 5: | |
| return items # nothing to filter yet | |
| sorted_active = sorted(active, key=lambda x: x.probability, reverse=True) | |
| top_prob = sorted_active[0].probability | |
| # Threshold: 1% of top item's probability | |
| threshold = top_prob * 0.01 | |
| # How many we're allowed to eliminate (max 80% of pool) | |
| max_elim = int(n * 0.80) | |
| eliminated = 0 | |
| # Start from index 1 β only rank #1 is immune, everyone else can be cut | |
| for item in sorted_active[1:]: | |
| if eliminated >= max_elim: | |
| break | |
| if item.probability < threshold: | |
| item.eliminated = True | |
| eliminated += 1 | |
| if eliminated: | |
| logger.debug( | |
| f"soft_filter: eliminated {eliminated}/{n} items " | |
| f"(threshold={threshold:.2e})" | |
| ) | |
| # Re-normalize after elimination | |
| self.normalize_probabilities(items) | |
| return items | |