""" Model classes untuk prediksi kata dengan Fuzzy Logic Load dari brain_data_processor.pkl """ import re import numpy as np from typing import List, Tuple from collections import Counter class DataProcessorWrapper: """ Wrapper class for data processor - needed for unpickling brain_data_processor.pkl """ def __init__(self, unigram_freq, bigram_freq, trigram_freq, vocabulary, slang_dict): self.unigram_freq = unigram_freq self.bigram_freq = dict(bigram_freq) self.trigram_freq = dict(trigram_freq) self.vocabulary = vocabulary self.slang_dict = slang_dict self.vocab_size = len(vocabulary) self.total_words = sum(unigram_freq.values()) def preprocess_text(text: str, slang_dict: dict) -> List[str]: """ Preprocess text dengan urutan: Regex -> Slang Normalization Stopwords TIDAK dihapus (keyboard needs to predict them) Returns: List[str]: list of processed words str: transformation log untuk X-Ray view """ original_text = text # Step 1: Regex cleaning - hapus semua karakter non-alfabet dan non-spasi text = re.sub(r'[^a-zA-Z\s]', '', text) # Step 2: Lowercase dan tokenize words = text.lower().split() # Step 3: Slang normalization dengan tracking normalized_words = [] transformations = [] for w in words: if w in slang_dict: normalized = slang_dict[w] transformations.append(f"'{w}' → '{normalized}'") normalized_words.append(normalized) else: normalized_words.append(w) return normalized_words, transformations class BaseNGramModel: """ Pure probabilistic N-Gram model dengan backoff mechanism """ def __init__(self, data_processor): self.unigram_freq = data_processor.unigram_freq self.bigram_freq = data_processor.bigram_freq self.trigram_freq = data_processor.trigram_freq self.vocabulary = data_processor.vocabulary self.vocab_size = data_processor.vocab_size self.total_words = data_processor.total_words def predict(self, context: List[str], top_k: int = 3) -> List[Tuple[str, float]]: """ Prediksi kata berikutnya berdasarkan context Returns: [(word, probability), ...] """ scores = {} if len(context) >= 2: # Try trigram first key = (context[-2], context[-1]) if key in self.trigram_freq: candidates = self.trigram_freq[key] total = sum(candidates.values()) for word, count in candidates.items(): # Probability dengan Laplace smoothing scores[word] = (count + 1) / (total + self.vocab_size) if len(scores) == 0 and len(context) >= 1: # Backoff to bigram key = context[-1] if key in self.bigram_freq: candidates = self.bigram_freq[key] total = sum(candidates.values()) for word, count in candidates.items(): scores[word] = (count + 1) / (total + self.vocab_size) if len(scores) == 0: # Backoff to unigram (most frequent words) for word, count in Counter(self.unigram_freq).most_common(100): scores[word] = count / self.total_words # Sort by probability dan return top_k sorted_predictions = sorted(scores.items(), key=lambda x: x[1], reverse=True) return sorted_predictions[:top_k] class FuzzyManualModel: """ Fuzzy Logic model dengan parameter manual """ def __init__(self, data_processor): self.unigram_freq = data_processor.unigram_freq self.bigram_freq = data_processor.bigram_freq self.trigram_freq = data_processor.trigram_freq self.vocabulary = data_processor.vocabulary self.vocab_size = data_processor.vocab_size self.total_words = data_processor.total_words # Manual parameters untuk fuzzy membership functions # Probability: [low_peak, medium_peak, high_peak] self.prob_params = [0.15, 0.45, 0.85] # Popularity: [rare_peak, common_peak, verycommon_peak] (log scale) self.pop_params = [2.0, 4.5, 7.0] # log10 values # Fuzzy weights self.weights = { 'prob': 0.6, # 60% weight ke probability 'pop': 0.4 # 40% weight ke popularity } def _get_base_predictions(self, context: List[str], top_k: int = 50) -> List[Tuple[str, float]]: """Get base predictions using n-gram model""" scores = {} if len(context) >= 2: key = (context[-2], context[-1]) if key in self.trigram_freq: candidates = self.trigram_freq[key] total = sum(candidates.values()) for word, count in candidates.items(): scores[word] = (count + 1) / (total + self.vocab_size) if len(scores) == 0 and len(context) >= 1: key = context[-1] if key in self.bigram_freq: candidates = self.bigram_freq[key] total = sum(candidates.values()) for word, count in candidates.items(): scores[word] = (count + 1) / (total + self.vocab_size) if len(scores) == 0: for word, count in Counter(self.unigram_freq).most_common(100): scores[word] = count / self.total_words sorted_predictions = sorted(scores.items(), key=lambda x: x[1], reverse=True) return sorted_predictions[:top_k] def fuzzify_prob(self, prob): """Fuzzify probability score""" low = max(0, 1 - abs(prob - self.prob_params[0]) / 0.3) med = max(0, 1 - abs(prob - self.prob_params[1]) / 0.3) high = max(0, 1 - abs(prob - self.prob_params[2]) / 0.3) return {'low': low, 'medium': med, 'high': high} def fuzzify_pop(self, count): """Fuzzify popularity score (log scale)""" log_count = np.log10(max(1, count)) rare = max(0, 1 - abs(log_count - self.pop_params[0]) / 2.5) common = max(0, 1 - abs(log_count - self.pop_params[1]) / 2.5) very_common = max(0, 1 - abs(log_count - self.pop_params[2]) / 2.5) return {'rare': rare, 'common': common, 'very_common': very_common} def fuzzy_inference(self, prob_fuzzy, pop_fuzzy): """Apply fuzzy rules and defuzzify""" # Rule 1: High prob AND Very Common pop -> Excellent (0.9) rule1 = min(prob_fuzzy['high'], pop_fuzzy['very_common']) * 0.9 # Rule 2: Medium prob AND Common pop -> Good (0.6) rule2 = min(prob_fuzzy['medium'], pop_fuzzy['common']) * 0.6 # Rule 3: Low prob BUT Very Common pop -> Fair (0.45) rule3 = min(prob_fuzzy['low'], pop_fuzzy['very_common']) * 0.45 # Rule 4: Any other combination -> Poor (weighted average) rule4 = (prob_fuzzy['low'] * 0.2 + pop_fuzzy['rare'] * 0.1) / 2 # Defuzzification: weighted average return max(rule1, rule2, rule3, rule4) def predict(self, context: List[str], top_k: int = 3) -> List[Tuple[str, float]]: """Predict dengan fuzzy scoring""" # Get base predictions base_preds = self._get_base_predictions(context, top_k=50) fuzzy_scores = {} for word, prob in base_preds: # Get popularity pop_count = self.unigram_freq.get(word, 1) # Fuzzify prob_fuzzy = self.fuzzify_prob(prob) pop_fuzzy = self.fuzzify_pop(pop_count) # Inference fuzzy_score = self.fuzzy_inference(prob_fuzzy, pop_fuzzy) # Combine dengan weights final_score = (self.weights['prob'] * prob + self.weights['pop'] * fuzzy_score) fuzzy_scores[word] = final_score # Sort dan return sorted_predictions = sorted(fuzzy_scores.items(), key=lambda x: x[1], reverse=True) return sorted_predictions[:top_k] class FuzzyGAModel(FuzzyManualModel): """ Fuzzy Logic model dengan parameter dari Genetic Algorithm """ def __init__(self, data_processor, ga_params): super().__init__(data_processor) # Override dengan parameter GA self.prob_params = ga_params[:3] self.pop_params = ga_params[3:6] class FuzzyPSOModel(FuzzyManualModel): """ Fuzzy Logic model dengan parameter dari Particle Swarm Optimization """ def __init__(self, data_processor, pso_params): super().__init__(data_processor) # Override dengan parameter PSO self.prob_params = pso_params[:3] self.pop_params = pso_params[3:6]