Spaces:

elzaff
/

keyboard-recommendation

Sleeping

App Files Files Community

Fazle Mawla Wahyuhanda commited on Dec 2, 2025

Commit

f98879b

1 Parent(s): 126bd14

Add utils module and brain_params.json

Browse files

Files changed (4) hide show

src/brain_params.json +18 -0
src/streamlit_app.py +35 -9
src/utils/__init__.py +18 -0
src/utils/models.py +236 -0

src/brain_params.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "ga_params": [
+        0.11470089804145127,
+        0.19400274988046418,
+        0.8812051256574979,
+        1.0656220407276922,
+        4.643527895439729,
+        7.693048699833912
+    ],
+    "pso_params": [
+        0.26044028650438855,
+        0.3716727120094174,
+        0.39161891527984233,
+        1.5091777104339394,
+        3.529365593941532,
+        4.633741518442697
+    ]
+}

src/streamlit_app.py CHANGED Viewed

@@ -12,7 +12,11 @@ import sys
 import requests
 # Add src directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 # ============================================
 # GOOGLE DRIVE DOWNLOAD HELPER
@@ -42,14 +46,36 @@ def download_from_gdrive(file_id, destination):
 # Google Drive File ID
 GDRIVE_FILE_ID = "1jetjbzPB4hLVHNmGpETpz4ifd0CX70Qm"
-from utils.models import (
-    preprocess_text,
-    BaseNGramModel,
-    FuzzyManualModel,
-    FuzzyGAModel,
-    FuzzyPSOModel,
-    DataProcessorWrapper
-)
 # Register DataProcessorWrapper in __main__ for unpickling
 import __main__

 import requests
 # Add src directory to path for imports
+current_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, current_dir)
+# Also add parent directory in case running from different locations
+parent_dir = os.path.dirname(current_dir)
+sys.path.insert(0, parent_dir)
 # ============================================
 # GOOGLE DRIVE DOWNLOAD HELPER
 # Google Drive File ID
 GDRIVE_FILE_ID = "1jetjbzPB4hLVHNmGpETpz4ifd0CX70Qm"
+# Try multiple import paths for different deployment environments
+try:
+    from utils.models import (
+        preprocess_text,
+        BaseNGramModel,
+        FuzzyManualModel,
+        FuzzyGAModel,
+        FuzzyPSOModel,
+        DataProcessorWrapper
+    )
+except ModuleNotFoundError:
+    try:
+        from src.utils.models import (
+            preprocess_text,
+            BaseNGramModel,
+            FuzzyManualModel,
+            FuzzyGAModel,
+            FuzzyPSOModel,
+            DataProcessorWrapper
+        )
+    except ModuleNotFoundError:
+        # Direct import if models.py is in same directory structure
+        from models import (
+            preprocess_text,
+            BaseNGramModel,
+            FuzzyManualModel,
+            FuzzyGAModel,
+            FuzzyPSOModel,
+            DataProcessorWrapper
+        )
 # Register DataProcessorWrapper in __main__ for unpickling
 import __main__

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# utils package
+from .models import (
+    DataProcessorWrapper,
+    preprocess_text,
+    BaseNGramModel,
+    FuzzyManualModel,
+    FuzzyGAModel,
+    FuzzyPSOModel
+)
+__all__ = [
+    'DataProcessorWrapper',
+    'preprocess_text',
+    'BaseNGramModel',
+    'FuzzyManualModel',
+    'FuzzyGAModel',
+    'FuzzyPSOModel'
+]

src/utils/models.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""
+Model classes untuk prediksi kata dengan Fuzzy Logic
+Load dari brain_data_processor.pkl
+"""
+import re
+import numpy as np
+from typing import List, Tuple
+from collections import Counter
+class DataProcessorWrapper:
+    """
+    Wrapper class for data processor - needed for unpickling brain_data_processor.pkl
+    """
+    def __init__(self, unigram_freq, bigram_freq, trigram_freq, vocabulary, slang_dict):
+        self.unigram_freq = unigram_freq
+        self.bigram_freq = dict(bigram_freq)
+        self.trigram_freq = dict(trigram_freq)
+        self.vocabulary = vocabulary
+        self.slang_dict = slang_dict
+        self.vocab_size = len(vocabulary)
+        self.total_words = sum(unigram_freq.values())
+def preprocess_text(text: str, slang_dict: dict) -> List[str]:
+    """
+    Preprocess text dengan urutan: Regex -> Slang Normalization
+    Stopwords TIDAK dihapus (keyboard needs to predict them)
+    Returns:
+        List[str]: list of processed words
+        str: transformation log untuk X-Ray view
+    """
+    original_text = text
+    # Step 1: Regex cleaning - hapus semua karakter non-alfabet dan non-spasi
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    # Step 2: Lowercase dan tokenize
+    words = text.lower().split()
+    # Step 3: Slang normalization dengan tracking
+    normalized_words = []
+    transformations = []
+    for w in words:
+        if w in slang_dict:
+            normalized = slang_dict[w]
+            transformations.append(f"'{w}' → '{normalized}'")
+            normalized_words.append(normalized)
+        else:
+            normalized_words.append(w)
+    return normalized_words, transformations
+class BaseNGramModel:
+    """
+    Pure probabilistic N-Gram model dengan backoff mechanism
+    """
+    def __init__(self, data_processor):
+        self.unigram_freq = data_processor.unigram_freq
+        self.bigram_freq = data_processor.bigram_freq
+        self.trigram_freq = data_processor.trigram_freq
+        self.vocabulary = data_processor.vocabulary
+        self.vocab_size = data_processor.vocab_size
+        self.total_words = data_processor.total_words
+    def predict(self, context: List[str], top_k: int = 3) -> List[Tuple[str, float]]:
+        """
+        Prediksi kata berikutnya berdasarkan context
+        Returns: [(word, probability), ...]
+        """
+        scores = {}
+        if len(context) >= 2:
+            # Try trigram first
+            key = (context[-2], context[-1])
+            if key in self.trigram_freq:
+                candidates = self.trigram_freq[key]
+                total = sum(candidates.values())
+                for word, count in candidates.items():
+                    # Probability dengan Laplace smoothing
+                    scores[word] = (count + 1) / (total + self.vocab_size)
+        if len(scores) == 0 and len(context) >= 1:
+            # Backoff to bigram
+            key = context[-1]
+            if key in self.bigram_freq:
+                candidates = self.bigram_freq[key]
+                total = sum(candidates.values())
+                for word, count in candidates.items():
+                    scores[word] = (count + 1) / (total + self.vocab_size)
+        if len(scores) == 0:
+            # Backoff to unigram (most frequent words)
+            for word, count in Counter(self.unigram_freq).most_common(100):
+                scores[word] = count / self.total_words
+        # Sort by probability dan return top_k
+        sorted_predictions = sorted(scores.items(), key=lambda x: x[1], reverse=True)
+        return sorted_predictions[:top_k]
+class FuzzyManualModel:
+    """
+    Fuzzy Logic model dengan parameter manual
+    """
+    def __init__(self, data_processor):
+        self.unigram_freq = data_processor.unigram_freq
+        self.bigram_freq = data_processor.bigram_freq
+        self.trigram_freq = data_processor.trigram_freq
+        self.vocabulary = data_processor.vocabulary
+        self.vocab_size = data_processor.vocab_size
+        self.total_words = data_processor.total_words
+        # Manual parameters untuk fuzzy membership functions
+        # Probability: [low_peak, medium_peak, high_peak]
+        self.prob_params = [0.15, 0.45, 0.85]
+        # Popularity: [rare_peak, common_peak, verycommon_peak] (log scale)
+        self.pop_params = [2.0, 4.5, 7.0]  # log10 values
+        # Fuzzy weights
+        self.weights = {
+            'prob': 0.6,  # 60% weight ke probability
+            'pop': 0.4    # 40% weight ke popularity
+        }
+    def _get_base_predictions(self, context: List[str], top_k: int = 50) -> List[Tuple[str, float]]:
+        """Get base predictions using n-gram model"""
+        scores = {}
+        if len(context) >= 2:
+            key = (context[-2], context[-1])
+            if key in self.trigram_freq:
+                candidates = self.trigram_freq[key]
+                total = sum(candidates.values())
+                for word, count in candidates.items():
+                    scores[word] = (count + 1) / (total + self.vocab_size)
+        if len(scores) == 0 and len(context) >= 1:
+            key = context[-1]
+            if key in self.bigram_freq:
+                candidates = self.bigram_freq[key]
+                total = sum(candidates.values())
+                for word, count in candidates.items():
+                    scores[word] = (count + 1) / (total + self.vocab_size)
+        if len(scores) == 0:
+            for word, count in Counter(self.unigram_freq).most_common(100):
+                scores[word] = count / self.total_words
+        sorted_predictions = sorted(scores.items(), key=lambda x: x[1], reverse=True)
+        return sorted_predictions[:top_k]
+    def fuzzify_prob(self, prob):
+        """Fuzzify probability score"""
+        low = max(0, 1 - abs(prob - self.prob_params[0]) / 0.3)
+        med = max(0, 1 - abs(prob - self.prob_params[1]) / 0.3)
+        high = max(0, 1 - abs(prob - self.prob_params[2]) / 0.3)
+        return {'low': low, 'medium': med, 'high': high}
+    def fuzzify_pop(self, count):
+        """Fuzzify popularity score (log scale)"""
+        log_count = np.log10(max(1, count))
+        rare = max(0, 1 - abs(log_count - self.pop_params[0]) / 2.5)
+        common = max(0, 1 - abs(log_count - self.pop_params[1]) / 2.5)
+        very_common = max(0, 1 - abs(log_count - self.pop_params[2]) / 2.5)
+        return {'rare': rare, 'common': common, 'very_common': very_common}
+    def fuzzy_inference(self, prob_fuzzy, pop_fuzzy):
+        """Apply fuzzy rules and defuzzify"""
+        # Rule 1: High prob AND Very Common pop -> Excellent (0.9)
+        rule1 = min(prob_fuzzy['high'], pop_fuzzy['very_common']) * 0.9
+        # Rule 2: Medium prob AND Common pop -> Good (0.6)
+        rule2 = min(prob_fuzzy['medium'], pop_fuzzy['common']) * 0.6
+        # Rule 3: Low prob BUT Very Common pop -> Fair (0.45)
+        rule3 = min(prob_fuzzy['low'], pop_fuzzy['very_common']) * 0.45
+        # Rule 4: Any other combination -> Poor (weighted average)
+        rule4 = (prob_fuzzy['low'] * 0.2 + pop_fuzzy['rare'] * 0.1) / 2
+        # Defuzzification: weighted average
+        return max(rule1, rule2, rule3, rule4)
+    def predict(self, context: List[str], top_k: int = 3) -> List[Tuple[str, float]]:
+        """Predict dengan fuzzy scoring"""
+        # Get base predictions
+        base_preds = self._get_base_predictions(context, top_k=50)
+        fuzzy_scores = {}
+        for word, prob in base_preds:
+            # Get popularity
+            pop_count = self.unigram_freq.get(word, 1)
+            # Fuzzify
+            prob_fuzzy = self.fuzzify_prob(prob)
+            pop_fuzzy = self.fuzzify_pop(pop_count)
+            # Inference
+            fuzzy_score = self.fuzzy_inference(prob_fuzzy, pop_fuzzy)
+            # Combine dengan weights
+            final_score = (self.weights['prob'] * prob +
+                          self.weights['pop'] * fuzzy_score)
+            fuzzy_scores[word] = final_score
+        # Sort dan return
+        sorted_predictions = sorted(fuzzy_scores.items(), key=lambda x: x[1], reverse=True)
+        return sorted_predictions[:top_k]
+class FuzzyGAModel(FuzzyManualModel):
+    """
+    Fuzzy Logic model dengan parameter dari Genetic Algorithm
+    """
+    def __init__(self, data_processor, ga_params):
+        super().__init__(data_processor)
+        # Override dengan parameter GA
+        self.prob_params = ga_params[:3]
+        self.pop_params = ga_params[3:6]
+class FuzzyPSOModel(FuzzyManualModel):
+    """
+    Fuzzy Logic model dengan parameter dari Particle Swarm Optimization
+    """
+    def __init__(self, data_processor, pso_params):
+        super().__init__(data_processor)
+        # Override dengan parameter PSO
+        self.prob_params = pso_params[:3]
+        self.pop_params = pso_params[3:6]