Spaces:

lawlevisan
/

audio-dashboard

Sleeping

App Files Files Community

lawlevisan commited on Sep 19, 2025

Commit

5c7f2f8

verified ·

1 Parent(s): 1a1611d

Update src/predict.py

Browse files

Files changed (1) hide show

src/predict.py +65 -10

src/predict.py CHANGED Viewed

@@ -123,12 +123,68 @@ def preprocess_text(text: str) -> str:
 def compute_keyword_score(text: str) -> Tuple[float, Dict[str, int]]:
     """Compute keyword-based score for enhanced accuracy"""
     text_lower = text.lower()
-    # Count different types of keywords
-    drug_matches = sum(1 for kw in DRUG_KEYWORDS if kw in text_lower)
-    high_risk_matches = sum(1 for kw in HIGH_RISK_KEYWORDS if kw in text_lower)
-    # Context patterns for better detection
     context_patterns = [
         r'(?i)(pick.*up|got.*stuff|meet.*behind)',
         r'(?i)(payment|crypto|cash.*deal)',
@@ -137,10 +193,8 @@ def compute_keyword_score(text: str) -> Tuple[float, Dict[str, int]]:
         r'(?i)(quality.*good|pure.*stuff)',
         r'(?i)(cops.*around|too.*risky)'
     ]
-    context_matches = sum(1 for pattern in context_patterns if re.search(pattern, text))
-    # Enhanced scoring with weights
     keyword_score = 0.0
     if high_risk_matches > 0:
         keyword_score += min(high_risk_matches * 0.3, 0.8)
@@ -148,15 +202,16 @@ def compute_keyword_score(text: str) -> Tuple[float, Dict[str, int]]:
         keyword_score += min(drug_matches * 0.1, 0.3)
     if context_matches > 0:
         keyword_score += min(context_matches * 0.15, 0.4)
     keyword_score = min(keyword_score, 1.0)
     return keyword_score, {
         'drug_keywords': drug_matches,
         'high_risk_keywords': high_risk_matches,
         'context_patterns': context_matches
     }
 # =======================
 # Config validation/fix with enhanced error handling
 # =======================

 def compute_keyword_score(text: str) -> Tuple[float, Dict[str, int]]:
     """Compute keyword-based score for enhanced accuracy"""
     text_lower = text.lower()
+# =======================
+# Enhanced text preprocessing for better accuracy
+# =======================
+def preprocess_text(text: str) -> str:
+    """Enhanced text preprocessing for better model accuracy"""
+    if not text:
+        return ""
+    # Convert to lowercase
+    text = text.lower()
+    # Remove excessive whitespace but preserve sentence structure
+    text = re.sub(r'\s+', ' ', text)
+    # Handle common abbreviations and slang normalization
+    abbreviations = {
+        'u': 'you',
+        'ur': 'your',
+        'n': 'and',
+        'w/': 'with',
+        'thru': 'through',
+        'gonna': 'going to',
+        'wanna': 'want to',
+        'gotta': 'got to'
+    }
+    for abbrev, full in abbreviations.items():
+        text = re.sub(rf'\b{re.escape(abbrev)}\b', full, text)
+    # Remove excessive punctuation but keep sentence boundaries
+    text = re.sub(r'[!]{2,}', '!', text)
+    text = re.sub(r'[?]{2,}', '?', text)
+    text = re.sub(r'[.]{3,}', '...', text)
+    return text.strip()
+# =======================
+# Enhanced keyword-based scoring
+# =======================
+def compute_keyword_score(text: str) -> Tuple[float, Dict[str, int]]:
+    """Compute keyword-based score for enhanced accuracy"""
+    text_lower = text.lower()
+AMBIGUOUS_TERMS = {"e", "x", "line", "ice", "horse", "420"}
+def keyword_check_with_context(text: str, kw: str) -> bool:
+    pattern = rf"\b{re.escape(kw)}\b"
+    if re.search(pattern, text, re.IGNORECASE):
+        if kw in AMBIGUOUS_TERMS:
+            context_pattern = r"\b(smoke|roll|pop|hit|take|buy|sell|party|snort|inject)\b"
+            return bool(re.search(context_pattern, text, re.IGNORECASE))
+        return True
+    return False
+def compute_keyword_score(text: str) -> Tuple[float, Dict[str, int]]:
+    """Compute keyword-based score for enhanced accuracy"""
+    text_lower = text.lower()
+    drug_matches = sum(1 for kw in DRUG_KEYWORDS if keyword_check_with_context(text_lower, kw))
+    high_risk_matches = sum(1 for kw in HIGH_RISK_KEYWORDS if keyword_check_with_context(text_lower, kw))
     context_patterns = [
         r'(?i)(pick.*up|got.*stuff|meet.*behind)',
         r'(?i)(payment|crypto|cash.*deal)',
         r'(?i)(quality.*good|pure.*stuff)',
         r'(?i)(cops.*around|too.*risky)'
     ]
+    context_matches = sum(1 for pattern in context_patterns if re.search(pattern, text_lower))
     keyword_score = 0.0
     if high_risk_matches > 0:
         keyword_score += min(high_risk_matches * 0.3, 0.8)
         keyword_score += min(drug_matches * 0.1, 0.3)
     if context_matches > 0:
         keyword_score += min(context_matches * 0.15, 0.4)
     keyword_score = min(keyword_score, 1.0)
     return keyword_score, {
         'drug_keywords': drug_matches,
         'high_risk_keywords': high_risk_matches,
         'context_patterns': context_matches
     }
 # =======================
 # Config validation/fix with enhanced error handling
 # =======================