Fazle Mawla Wahyuhanda
Add utils module and brain_params.json
f98879b
"""
Model classes untuk prediksi kata dengan Fuzzy Logic
Load dari brain_data_processor.pkl
"""
import re
import numpy as np
from typing import List, Tuple
from collections import Counter
class DataProcessorWrapper:
"""
Wrapper class for data processor - needed for unpickling brain_data_processor.pkl
"""
def __init__(self, unigram_freq, bigram_freq, trigram_freq, vocabulary, slang_dict):
self.unigram_freq = unigram_freq
self.bigram_freq = dict(bigram_freq)
self.trigram_freq = dict(trigram_freq)
self.vocabulary = vocabulary
self.slang_dict = slang_dict
self.vocab_size = len(vocabulary)
self.total_words = sum(unigram_freq.values())
def preprocess_text(text: str, slang_dict: dict) -> List[str]:
"""
Preprocess text dengan urutan: Regex -> Slang Normalization
Stopwords TIDAK dihapus (keyboard needs to predict them)
Returns:
List[str]: list of processed words
str: transformation log untuk X-Ray view
"""
original_text = text
# Step 1: Regex cleaning - hapus semua karakter non-alfabet dan non-spasi
text = re.sub(r'[^a-zA-Z\s]', '', text)
# Step 2: Lowercase dan tokenize
words = text.lower().split()
# Step 3: Slang normalization dengan tracking
normalized_words = []
transformations = []
for w in words:
if w in slang_dict:
normalized = slang_dict[w]
transformations.append(f"'{w}' → '{normalized}'")
normalized_words.append(normalized)
else:
normalized_words.append(w)
return normalized_words, transformations
class BaseNGramModel:
"""
Pure probabilistic N-Gram model dengan backoff mechanism
"""
def __init__(self, data_processor):
self.unigram_freq = data_processor.unigram_freq
self.bigram_freq = data_processor.bigram_freq
self.trigram_freq = data_processor.trigram_freq
self.vocabulary = data_processor.vocabulary
self.vocab_size = data_processor.vocab_size
self.total_words = data_processor.total_words
def predict(self, context: List[str], top_k: int = 3) -> List[Tuple[str, float]]:
"""
Prediksi kata berikutnya berdasarkan context
Returns: [(word, probability), ...]
"""
scores = {}
if len(context) >= 2:
# Try trigram first
key = (context[-2], context[-1])
if key in self.trigram_freq:
candidates = self.trigram_freq[key]
total = sum(candidates.values())
for word, count in candidates.items():
# Probability dengan Laplace smoothing
scores[word] = (count + 1) / (total + self.vocab_size)
if len(scores) == 0 and len(context) >= 1:
# Backoff to bigram
key = context[-1]
if key in self.bigram_freq:
candidates = self.bigram_freq[key]
total = sum(candidates.values())
for word, count in candidates.items():
scores[word] = (count + 1) / (total + self.vocab_size)
if len(scores) == 0:
# Backoff to unigram (most frequent words)
for word, count in Counter(self.unigram_freq).most_common(100):
scores[word] = count / self.total_words
# Sort by probability dan return top_k
sorted_predictions = sorted(scores.items(), key=lambda x: x[1], reverse=True)
return sorted_predictions[:top_k]
class FuzzyManualModel:
"""
Fuzzy Logic model dengan parameter manual
"""
def __init__(self, data_processor):
self.unigram_freq = data_processor.unigram_freq
self.bigram_freq = data_processor.bigram_freq
self.trigram_freq = data_processor.trigram_freq
self.vocabulary = data_processor.vocabulary
self.vocab_size = data_processor.vocab_size
self.total_words = data_processor.total_words
# Manual parameters untuk fuzzy membership functions
# Probability: [low_peak, medium_peak, high_peak]
self.prob_params = [0.15, 0.45, 0.85]
# Popularity: [rare_peak, common_peak, verycommon_peak] (log scale)
self.pop_params = [2.0, 4.5, 7.0] # log10 values
# Fuzzy weights
self.weights = {
'prob': 0.6, # 60% weight ke probability
'pop': 0.4 # 40% weight ke popularity
}
def _get_base_predictions(self, context: List[str], top_k: int = 50) -> List[Tuple[str, float]]:
"""Get base predictions using n-gram model"""
scores = {}
if len(context) >= 2:
key = (context[-2], context[-1])
if key in self.trigram_freq:
candidates = self.trigram_freq[key]
total = sum(candidates.values())
for word, count in candidates.items():
scores[word] = (count + 1) / (total + self.vocab_size)
if len(scores) == 0 and len(context) >= 1:
key = context[-1]
if key in self.bigram_freq:
candidates = self.bigram_freq[key]
total = sum(candidates.values())
for word, count in candidates.items():
scores[word] = (count + 1) / (total + self.vocab_size)
if len(scores) == 0:
for word, count in Counter(self.unigram_freq).most_common(100):
scores[word] = count / self.total_words
sorted_predictions = sorted(scores.items(), key=lambda x: x[1], reverse=True)
return sorted_predictions[:top_k]
def fuzzify_prob(self, prob):
"""Fuzzify probability score"""
low = max(0, 1 - abs(prob - self.prob_params[0]) / 0.3)
med = max(0, 1 - abs(prob - self.prob_params[1]) / 0.3)
high = max(0, 1 - abs(prob - self.prob_params[2]) / 0.3)
return {'low': low, 'medium': med, 'high': high}
def fuzzify_pop(self, count):
"""Fuzzify popularity score (log scale)"""
log_count = np.log10(max(1, count))
rare = max(0, 1 - abs(log_count - self.pop_params[0]) / 2.5)
common = max(0, 1 - abs(log_count - self.pop_params[1]) / 2.5)
very_common = max(0, 1 - abs(log_count - self.pop_params[2]) / 2.5)
return {'rare': rare, 'common': common, 'very_common': very_common}
def fuzzy_inference(self, prob_fuzzy, pop_fuzzy):
"""Apply fuzzy rules and defuzzify"""
# Rule 1: High prob AND Very Common pop -> Excellent (0.9)
rule1 = min(prob_fuzzy['high'], pop_fuzzy['very_common']) * 0.9
# Rule 2: Medium prob AND Common pop -> Good (0.6)
rule2 = min(prob_fuzzy['medium'], pop_fuzzy['common']) * 0.6
# Rule 3: Low prob BUT Very Common pop -> Fair (0.45)
rule3 = min(prob_fuzzy['low'], pop_fuzzy['very_common']) * 0.45
# Rule 4: Any other combination -> Poor (weighted average)
rule4 = (prob_fuzzy['low'] * 0.2 + pop_fuzzy['rare'] * 0.1) / 2
# Defuzzification: weighted average
return max(rule1, rule2, rule3, rule4)
def predict(self, context: List[str], top_k: int = 3) -> List[Tuple[str, float]]:
"""Predict dengan fuzzy scoring"""
# Get base predictions
base_preds = self._get_base_predictions(context, top_k=50)
fuzzy_scores = {}
for word, prob in base_preds:
# Get popularity
pop_count = self.unigram_freq.get(word, 1)
# Fuzzify
prob_fuzzy = self.fuzzify_prob(prob)
pop_fuzzy = self.fuzzify_pop(pop_count)
# Inference
fuzzy_score = self.fuzzy_inference(prob_fuzzy, pop_fuzzy)
# Combine dengan weights
final_score = (self.weights['prob'] * prob +
self.weights['pop'] * fuzzy_score)
fuzzy_scores[word] = final_score
# Sort dan return
sorted_predictions = sorted(fuzzy_scores.items(), key=lambda x: x[1], reverse=True)
return sorted_predictions[:top_k]
class FuzzyGAModel(FuzzyManualModel):
"""
Fuzzy Logic model dengan parameter dari Genetic Algorithm
"""
def __init__(self, data_processor, ga_params):
super().__init__(data_processor)
# Override dengan parameter GA
self.prob_params = ga_params[:3]
self.pop_params = ga_params[3:6]
class FuzzyPSOModel(FuzzyManualModel):
"""
Fuzzy Logic model dengan parameter dari Particle Swarm Optimization
"""
def __init__(self, data_processor, pso_params):
super().__init__(data_processor)
# Override dengan parameter PSO
self.prob_params = pso_params[:3]
self.pop_params = pso_params[3:6]