Commit ·
01b11d4
1
Parent(s): 61cee96
revert: restore backend/model files to cf07939 state, keep UI/UX changes only
Browse files- src/app.py +0 -254
- src/nlp/punctuation/punctuation_rules.py +0 -14
- src/nlp/spelling/araspell_rules.py +0 -15
- tests/test_bug_fixes.py +0 -181
src/app.py
CHANGED
|
@@ -6,9 +6,6 @@ Provides API endpoints for the Bayan web application.
|
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
import time
|
| 9 |
-
import hashlib
|
| 10 |
-
from collections import OrderedDict
|
| 11 |
-
from functools import wraps
|
| 12 |
from flask import Flask, request, jsonify, Response
|
| 13 |
from flask_cors import CORS
|
| 14 |
from pathlib import Path
|
|
@@ -79,119 +76,6 @@ MAX_TEXT_LENGTH = 5000 # Maximum characters for input text
|
|
| 79 |
MAX_SUMMARY_LENGTH = 512 # Maximum tokens for summary
|
| 80 |
MIN_TEXT_LENGTH = 10 # Minimum characters for summarization
|
| 81 |
|
| 82 |
-
# ── Response Cache (P3) ──
|
| 83 |
-
# LRU cache for /api/analyze: hash(text) → (response_dict, timestamp)
|
| 84 |
-
_ANALYZE_CACHE_MAX = 500
|
| 85 |
-
_ANALYZE_CACHE_TTL = 300 # 5 minutes
|
| 86 |
-
_analyze_cache = OrderedDict()
|
| 87 |
-
|
| 88 |
-
# ── Rate Limiter (P3) ──
|
| 89 |
-
_RATE_LIMIT_MAX = 30 # requests per window
|
| 90 |
-
_RATE_LIMIT_WINDOW = 60 # seconds
|
| 91 |
-
_rate_limit_store = {} # ip → [(timestamp, ...)]
|
| 92 |
-
|
| 93 |
-
# ── Ta Marbuta Dictionary (P2) ──
|
| 94 |
-
# Common words where ه at the end should be ة
|
| 95 |
-
_TA_MARBUTA_DICT = {
|
| 96 |
-
'المدرسه': 'المدرسة', 'الجامعه': 'الجامعة', 'المكتبه': 'المكتبة',
|
| 97 |
-
'الحياه': 'الحياة', 'الصلاه': 'الصلاة', 'الزكاه': 'الزكاة',
|
| 98 |
-
'القراءه': 'القراءة', 'الكتابه': 'الكتابة', 'المعرفه': 'المعرفة',
|
| 99 |
-
'الثقافه': 'الثقافة', 'السياسه': 'السياسة', 'الاقتصاديه': 'الاقتصادية',
|
| 100 |
-
'العربيه': 'العربية', 'الاسلاميه': 'الإسلامية', 'التربيه': 'التربية',
|
| 101 |
-
'الشريعه': 'الشريعة', 'الدوله': 'الدولة', 'الحكومه': 'الحكومة',
|
| 102 |
-
'المدينه': 'المدينة', 'القريه': 'القرية', 'الغرفه': 'الغرفة',
|
| 103 |
-
'السياره': 'السيارة', 'الطاوله': 'الطاولة', 'الرساله': 'الرسالة',
|
| 104 |
-
'المقاله': 'المقالة', 'الصحيفه': 'الصحيفة', 'الجريده': 'الجريدة',
|
| 105 |
-
'القصه': 'القصة', 'الروايه': 'الرواية', 'اللغه': 'اللغة',
|
| 106 |
-
'الفكره': 'الفكرة', 'الخطوه': 'الخطوة', 'المرحله': 'المرحلة',
|
| 107 |
-
'النتيجه': 'النتيجة', 'المشكله': 'المشكلة', 'الطريقه': 'الطريقة',
|
| 108 |
-
'الحاله': 'الحالة', 'الصوره': 'الصورة', 'القوه': 'القوة',
|
| 109 |
-
'الوحده': 'الوحدة', 'العلاقه': 'العلاقة', 'التجربه': 'التجربة',
|
| 110 |
-
'الحركه': 'الحركة', 'السلطه': 'السلطة', 'المنطقه': 'المنطقة',
|
| 111 |
-
'الساعه': 'الساعة', 'اللحظه': 'اللحظة', 'الفتره': 'الفترة',
|
| 112 |
-
'الاداره': 'الإدارة', 'البيئه': 'البيئة', 'الماده': 'المادة',
|
| 113 |
-
'الاسره': 'الأسرة', 'العائله': 'العائلة', 'الشركه': 'الشركة',
|
| 114 |
-
'المؤسسه': 'المؤسسة', 'المنظمه': 'المنظمة', 'الجمعيه': 'الجمعية',
|
| 115 |
-
'الوزاره': 'الوزارة', 'السفاره': 'السفارة', 'القياده': 'القيادة',
|
| 116 |
-
'الزياره': 'الزيارة', 'المحاوله': 'المحاولة', 'الدراسه': 'الدراسة',
|
| 117 |
-
'الممارسه': 'الممارسة', 'المتابعه': 'المتابعة', 'الخدمه': 'الخدمة',
|
| 118 |
-
'التقنيه': 'التقنية', 'الهندسه': 'الهندسة', 'الفلسفه': 'الفلسفة',
|
| 119 |
-
'مدرسه': 'مدرسة', 'جامعه': 'جامعة', 'مكتبه': 'مكتبة',
|
| 120 |
-
'حياه': 'حياة', 'صلاه': 'صلاة', 'زكاه': 'زكاة',
|
| 121 |
-
'لغه': 'لغة', 'قصه': 'قصة', 'فكره': 'فكرة',
|
| 122 |
-
'خطوه': 'خطوة', 'صوره': 'صورة', 'قوه': 'قوة',
|
| 123 |
-
'سياره': 'سيارة', 'رساله': 'رسالة', 'ساعه': 'ساعة',
|
| 124 |
-
'غرفه': 'غرفة', 'شركه': 'شركة', 'دوله': 'دولة',
|
| 125 |
-
}
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
def _fix_ta_marbuta(text):
|
| 129 |
-
"""Fix common ه→ة errors at pipeline level using dictionary lookup."""
|
| 130 |
-
words = text.split()
|
| 131 |
-
fixed_words = []
|
| 132 |
-
changes = []
|
| 133 |
-
pos = 0
|
| 134 |
-
for word in words:
|
| 135 |
-
start = text.find(word, pos)
|
| 136 |
-
end = start + len(word)
|
| 137 |
-
# Check bare word
|
| 138 |
-
if word in _TA_MARBUTA_DICT:
|
| 139 |
-
fixed_words.append(_TA_MARBUTA_DICT[word])
|
| 140 |
-
changes.append({'start': start, 'end': end, 'original': word, 'correction': _TA_MARBUTA_DICT[word]})
|
| 141 |
-
# Check word ending in ه that should be ة (pattern match)
|
| 142 |
-
elif word.endswith('ه') and len(word) >= 3:
|
| 143 |
-
candidate = word[:-1] + 'ة'
|
| 144 |
-
if candidate in _TA_MARBUTA_DICT.values():
|
| 145 |
-
fixed_words.append(candidate)
|
| 146 |
-
changes.append({'start': start, 'end': end, 'original': word, 'correction': candidate})
|
| 147 |
-
else:
|
| 148 |
-
fixed_words.append(word)
|
| 149 |
-
else:
|
| 150 |
-
fixed_words.append(word)
|
| 151 |
-
pos = end
|
| 152 |
-
return ' '.join(fixed_words), changes
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
def _check_rate_limit(ip):
|
| 156 |
-
"""Check if IP has exceeded rate limit. Returns True if allowed."""
|
| 157 |
-
now = time.time()
|
| 158 |
-
if ip not in _rate_limit_store:
|
| 159 |
-
_rate_limit_store[ip] = []
|
| 160 |
-
# Clean old entries
|
| 161 |
-
_rate_limit_store[ip] = [t for t in _rate_limit_store[ip] if now - t < _RATE_LIMIT_WINDOW]
|
| 162 |
-
if len(_rate_limit_store[ip]) >= _RATE_LIMIT_MAX:
|
| 163 |
-
return False
|
| 164 |
-
_rate_limit_store[ip].append(now)
|
| 165 |
-
return True
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
def _get_cache_key(text):
|
| 169 |
-
"""Generate cache key from text."""
|
| 170 |
-
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
def _get_cached_response(text):
|
| 174 |
-
"""Get cached response if exists and not expired."""
|
| 175 |
-
key = _get_cache_key(text)
|
| 176 |
-
if key in _analyze_cache:
|
| 177 |
-
data, ts = _analyze_cache[key]
|
| 178 |
-
if time.time() - ts < _ANALYZE_CACHE_TTL:
|
| 179 |
-
_analyze_cache.move_to_end(key)
|
| 180 |
-
return data
|
| 181 |
-
else:
|
| 182 |
-
del _analyze_cache[key]
|
| 183 |
-
return None
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
def _set_cached_response(text, response_data):
|
| 187 |
-
"""Store response in cache."""
|
| 188 |
-
key = _get_cache_key(text)
|
| 189 |
-
_analyze_cache[key] = (response_data, time.time())
|
| 190 |
-
# Evict oldest if over limit
|
| 191 |
-
while len(_analyze_cache) > _ANALYZE_CACHE_MAX:
|
| 192 |
-
_analyze_cache.popitem(last=False)
|
| 193 |
-
|
| 194 |
-
|
| 195 |
# Global model instances
|
| 196 |
summarization_model = None
|
| 197 |
spelling_model = None
|
|
@@ -1033,12 +917,6 @@ def _is_small_spelling_change(orig_word, corr_word, vocab_manager=None):
|
|
| 1033 |
('ء', 'أ'), ('أ', 'ء'), # standalone hamza ↔ hamza on alef
|
| 1034 |
('ء', 'ؤ'), ('ؤ', 'ء'), # standalone hamza ↔ hamza on waw
|
| 1035 |
('ء', 'ئ'), ('ئ', 'ء'), # standalone hamza ↔ hamza on ya
|
| 1036 |
-
# Common Arabic letter confusions (sound-alike pairs)
|
| 1037 |
-
('ص', 'س'), ('س', 'ص'), # emphatic/plain sibilant (المدرصة→المدرسة)
|
| 1038 |
-
('ض', 'ظ'), ('ظ', 'ض'), # emphatic pair confusion
|
| 1039 |
-
('ذ', 'ز'), ('ز', 'ذ'), # voiced fricatives
|
| 1040 |
-
('ث', 'س'), ('س', 'ث'), # voiceless fricatives
|
| 1041 |
-
('ط', 'ت'), ('ت', 'ط'), # emphatic/plain stop
|
| 1042 |
}
|
| 1043 |
# Check every character pair — reject if ANY non-orthographic change
|
| 1044 |
if len(orig_word) != len(corr_word):
|
|
@@ -1191,14 +1069,6 @@ def _is_orthographic_variant(word1: str, word2: str) -> bool:
|
|
| 1191 |
|
| 1192 |
@app.route('/api/analyze', methods=['POST'])
|
| 1193 |
def analyze_text():
|
| 1194 |
-
# ── Rate Limiting (P3) ──
|
| 1195 |
-
client_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
|
| 1196 |
-
if not _check_rate_limit(client_ip):
|
| 1197 |
-
return jsonify({
|
| 1198 |
-
'error': 'Rate limit exceeded. Please wait before making more requests.',
|
| 1199 |
-
'status': 'error'
|
| 1200 |
-
}), 429
|
| 1201 |
-
|
| 1202 |
"""
|
| 1203 |
Perform sequential analysis (Spelling -> Grammar -> Punctuation)
|
| 1204 |
and return word-level suggestions with offsets.
|
|
@@ -1220,12 +1090,6 @@ def analyze_text():
|
|
| 1220 |
if not text:
|
| 1221 |
return jsonify({'error': 'Text is required', 'status': 'error'}), 400
|
| 1222 |
|
| 1223 |
-
# ── Cache Check (P3) ──
|
| 1224 |
-
cached = _get_cached_response(text)
|
| 1225 |
-
if cached:
|
| 1226 |
-
logger.info(f"[ANALYZE] Cache hit for text (len={len(text)})")
|
| 1227 |
-
return jsonify(cached)
|
| 1228 |
-
|
| 1229 |
# Reject inputs that are predominantly non-Arabic (code, markup, etc.)
|
| 1230 |
arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
|
| 1231 |
alpha_chars = len(re.findall(r'[a-zA-Z\u0600-\u06FF]', text))
|
|
@@ -1494,22 +1358,6 @@ def analyze_text():
|
|
| 1494 |
except Exception as e:
|
| 1495 |
logger.error(f"[ANALYZE] Hamza fix failed: {type(e).__name__}: {e}")
|
| 1496 |
|
| 1497 |
-
# ── Ta Marbuta fix pass (P2) ──
|
| 1498 |
-
# Catches common ه→ة errors like المدرسه→المدرسة at pipeline level.
|
| 1499 |
-
try:
|
| 1500 |
-
ta_fixed, ta_changes = _fix_ta_marbuta(current_text)
|
| 1501 |
-
if ta_fixed != current_text:
|
| 1502 |
-
for tc in ta_changes:
|
| 1503 |
-
ctx.add_patch(
|
| 1504 |
-
'spelling', tc['start'], tc['end'],
|
| 1505 |
-
tc['correction'], confidence=0.95,
|
| 1506 |
-
)
|
| 1507 |
-
logger.info(f"[TA-MARBUTA] '{tc['original']}' → '{tc['correction']}'")
|
| 1508 |
-
ctx.mutate_text(ta_fixed, OffsetMapper)
|
| 1509 |
-
current_text = ctx.current_text
|
| 1510 |
-
except Exception as e:
|
| 1511 |
-
logger.error(f"[ANALYZE] Ta Marbuta fix failed: {type(e).__name__}: {e}")
|
| 1512 |
-
|
| 1513 |
# 2. Grammar (runs on spelling-corrected text — word-level dependency)
|
| 1514 |
try:
|
| 1515 |
t0 = time.time()
|
|
@@ -1618,22 +1466,6 @@ def analyze_text():
|
|
| 1618 |
stage_label = 'grammar'
|
| 1619 |
if _is_spelling_only_change(orig_text, corr_text):
|
| 1620 |
stage_label = 'spelling'
|
| 1621 |
-
|
| 1622 |
-
# ── Directional blocks for grammar (mirrors spelling filter) ──
|
| 1623 |
-
# Prevents grammar from making meaning-changing corrections
|
| 1624 |
-
# like كان→كأن ("was" → "as if").
|
| 1625 |
-
_GRAMMAR_BLOCKS = {
|
| 1626 |
-
'كان': {'كأن'}, 'كأن': {'كان'},
|
| 1627 |
-
'هذه': {'هذة'}, 'هذا': {'هذة', 'هذه'},
|
| 1628 |
-
'إلى': {'ع��ى', 'علي'}, 'على': {'إلى', 'علي'},
|
| 1629 |
-
'لكن': {'لاكن'}, 'ذلك': {'ذالك'},
|
| 1630 |
-
}
|
| 1631 |
-
if corr_text in _GRAMMAR_BLOCKS.get(orig_text, set()):
|
| 1632 |
-
logger.info(
|
| 1633 |
-
f"[GRAMMAR] Blocked directional: '{orig_text}'→'{corr_text}'"
|
| 1634 |
-
)
|
| 1635 |
-
continue
|
| 1636 |
-
|
| 1637 |
ctx.add_patch(
|
| 1638 |
stage_label, d['start'], d['end'],
|
| 1639 |
corr_text, confidence=1.0
|
|
@@ -1670,13 +1502,6 @@ def analyze_text():
|
|
| 1670 |
from nlp.punctuation.punctuation_service import get_punctuation_model
|
| 1671 |
punc_checker = get_punctuation_model()
|
| 1672 |
corrected_punc = punc_checker.correct(ctx.current_text)
|
| 1673 |
-
# ── Post-process: strip duplicate trailing punctuation ──
|
| 1674 |
-
# Model sometimes turns "..." into "...." or "." into ".."
|
| 1675 |
-
import re as _punc_re
|
| 1676 |
-
# Collapse non-dot duplicate punctuation: ,, → , ;; → ; etc.
|
| 1677 |
-
corrected_punc = _punc_re.sub(r'([،؛:!?؟])\1+', r'\1', corrected_punc)
|
| 1678 |
-
# Collapse 4+ dots into ellipsis (3 dots), preserve intentional ...
|
| 1679 |
-
corrected_punc = _punc_re.sub(r'\.{4,}', '...', corrected_punc)
|
| 1680 |
timing_ms['punctuation_ms'] = int((time.time() - t0) * 1000)
|
| 1681 |
logger.info(f"[ANALYZE] Step 3: Punctuation done in {timing_ms['punctuation_ms']}ms")
|
| 1682 |
if corrected_punc != ctx.current_text:
|
|
@@ -1703,20 +1528,6 @@ def analyze_text():
|
|
| 1703 |
f"'{d.get('original','')}' \u2192 '{d.get('correction','')}' "
|
| 1704 |
f"(locked by {owner}[{ls}:{le}])"
|
| 1705 |
)
|
| 1706 |
-
# ── Mid-word split guard ──
|
| 1707 |
-
# Reject punctuation diffs where the original is NOT a complete
|
| 1708 |
-
# word — i.e., the character after the diff end is still Arabic.
|
| 1709 |
-
# This catches cases like الدفتر being split into الدفت.ر
|
| 1710 |
-
d_end = d['end']
|
| 1711 |
-
if d_end < len(ctx.current_text):
|
| 1712 |
-
next_ch = ctx.current_text[d_end]
|
| 1713 |
-
if '\u0600' <= next_ch <= '\u06FF':
|
| 1714 |
-
logger.info(
|
| 1715 |
-
f"[PUNC-SAFETY] Rejected mid-word split [{d['start']}:{d_end}] "
|
| 1716 |
-
f"'{d.get('original','')}' → '{d.get('correction','')}' "
|
| 1717 |
-
f"(next char '{next_ch}' is Arabic — word was split)"
|
| 1718 |
-
)
|
| 1719 |
-
continue
|
| 1720 |
# Punctuation safety layer: reject non-punctuation changes
|
| 1721 |
if not validate_punctuation_diff(d):
|
| 1722 |
logger.info(
|
|
@@ -1724,21 +1535,6 @@ def analyze_text():
|
|
| 1724 |
f"'{d.get('original','')}' → '{d.get('correction','')}' — not a safe punctuation change"
|
| 1725 |
)
|
| 1726 |
continue
|
| 1727 |
-
# ── Duplicate punctuation guard ──
|
| 1728 |
-
# Reject corrections that just append punctuation to already-punctuated text
|
| 1729 |
-
# e.g. "الحديقة." → "الحديقة.." or "..." → "...."
|
| 1730 |
-
import re as _re2
|
| 1731 |
-
orig_txt = d.get('original', '')
|
| 1732 |
-
corr_txt = d.get('correction', '')
|
| 1733 |
-
_PUNC_CHARS = set('.,،؛:!?؟…。')
|
| 1734 |
-
if orig_txt and corr_txt and len(corr_txt) > len(orig_txt):
|
| 1735 |
-
suffix_added = corr_txt[len(orig_txt):]
|
| 1736 |
-
if all(c in _PUNC_CHARS for c in suffix_added) and orig_txt[-1] in _PUNC_CHARS:
|
| 1737 |
-
logger.info(
|
| 1738 |
-
f"[PUNC-DUP] Rejected duplicate punctuation [{d['start']}:{d['end']}] "
|
| 1739 |
-
f"'{orig_txt}' → '{corr_txt}' — already has punctuation"
|
| 1740 |
-
)
|
| 1741 |
-
continue
|
| 1742 |
ctx.add_patch(
|
| 1743 |
'punctuation', d['start'], d['end'],
|
| 1744 |
d['correction'], confidence=0.8
|
|
@@ -1809,10 +1605,6 @@ def analyze_text():
|
|
| 1809 |
if stage_errors:
|
| 1810 |
response_data['warnings'] = stage_errors
|
| 1811 |
|
| 1812 |
-
# ── Cache Store (P3) ──
|
| 1813 |
-
if response_status == 'success':
|
| 1814 |
-
_set_cached_response(text, response_data)
|
| 1815 |
-
|
| 1816 |
return jsonify(response_data)
|
| 1817 |
|
| 1818 |
except Exception as e:
|
|
@@ -1825,52 +1617,6 @@ def analyze_text():
|
|
| 1825 |
}), 500
|
| 1826 |
|
| 1827 |
|
| 1828 |
-
@app.route('/api/feedback', methods=['POST'])
|
| 1829 |
-
def submit_feedback():
|
| 1830 |
-
"""Accept user feedback on correction suggestions."""
|
| 1831 |
-
try:
|
| 1832 |
-
if not request.is_json:
|
| 1833 |
-
return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400
|
| 1834 |
-
|
| 1835 |
-
data = request.get_json()
|
| 1836 |
-
suggestion_id = data.get('suggestion_id', '')
|
| 1837 |
-
helpful = data.get('helpful', None)
|
| 1838 |
-
text = data.get('text', '')[:200] # Truncate for safety
|
| 1839 |
-
original = data.get('original', '')[:100]
|
| 1840 |
-
correction = data.get('correction', '')[:100]
|
| 1841 |
-
|
| 1842 |
-
if helpful is None:
|
| 1843 |
-
return jsonify({'error': 'helpful field is required', 'status': 'error'}), 400
|
| 1844 |
-
|
| 1845 |
-
# Log feedback (simple file-based for now)
|
| 1846 |
-
feedback_entry = {
|
| 1847 |
-
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
|
| 1848 |
-
'suggestion_id': suggestion_id,
|
| 1849 |
-
'helpful': helpful,
|
| 1850 |
-
'original': original,
|
| 1851 |
-
'correction': correction,
|
| 1852 |
-
'text_snippet': text,
|
| 1853 |
-
'ip': request.headers.get('X-Forwarded-For', request.remote_addr),
|
| 1854 |
-
}
|
| 1855 |
-
logger.info(f"[FEEDBACK] {feedback_entry}")
|
| 1856 |
-
|
| 1857 |
-
# Append to feedback log file
|
| 1858 |
-
try:
|
| 1859 |
-
feedback_dir = Path(__file__).parent.parent / 'logs'
|
| 1860 |
-
feedback_dir.mkdir(exist_ok=True)
|
| 1861 |
-
with open(feedback_dir / 'feedback.jsonl', 'a', encoding='utf-8') as f:
|
| 1862 |
-
import json
|
| 1863 |
-
f.write(json.dumps(feedback_entry, ensure_ascii=False) + '\n')
|
| 1864 |
-
except Exception as log_err:
|
| 1865 |
-
logger.warning(f"[FEEDBACK] Could not write to file: {log_err}")
|
| 1866 |
-
|
| 1867 |
-
return jsonify({'status': 'success', 'message': 'شكراً لملاحظاتك!'})
|
| 1868 |
-
|
| 1869 |
-
except Exception as e:
|
| 1870 |
-
logger.error(f"[FEEDBACK] Error: {e}")
|
| 1871 |
-
return jsonify({'error': 'Failed to submit feedback', 'status': 'error'}), 500
|
| 1872 |
-
|
| 1873 |
-
|
| 1874 |
@app.errorhandler(404)
|
| 1875 |
def not_found(error):
|
| 1876 |
"""Handle 404 errors."""
|
|
|
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
import time
|
|
|
|
|
|
|
|
|
|
| 9 |
from flask import Flask, request, jsonify, Response
|
| 10 |
from flask_cors import CORS
|
| 11 |
from pathlib import Path
|
|
|
|
| 76 |
MAX_SUMMARY_LENGTH = 512 # Maximum tokens for summary
|
| 77 |
MIN_TEXT_LENGTH = 10 # Minimum characters for summarization
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# Global model instances
|
| 80 |
summarization_model = None
|
| 81 |
spelling_model = None
|
|
|
|
| 917 |
('ء', 'أ'), ('أ', 'ء'), # standalone hamza ↔ hamza on alef
|
| 918 |
('ء', 'ؤ'), ('ؤ', 'ء'), # standalone hamza ↔ hamza on waw
|
| 919 |
('ء', 'ئ'), ('ئ', 'ء'), # standalone hamza ↔ hamza on ya
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
}
|
| 921 |
# Check every character pair — reject if ANY non-orthographic change
|
| 922 |
if len(orig_word) != len(corr_word):
|
|
|
|
| 1069 |
|
| 1070 |
@app.route('/api/analyze', methods=['POST'])
|
| 1071 |
def analyze_text():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1072 |
"""
|
| 1073 |
Perform sequential analysis (Spelling -> Grammar -> Punctuation)
|
| 1074 |
and return word-level suggestions with offsets.
|
|
|
|
| 1090 |
if not text:
|
| 1091 |
return jsonify({'error': 'Text is required', 'status': 'error'}), 400
|
| 1092 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1093 |
# Reject inputs that are predominantly non-Arabic (code, markup, etc.)
|
| 1094 |
arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
|
| 1095 |
alpha_chars = len(re.findall(r'[a-zA-Z\u0600-\u06FF]', text))
|
|
|
|
| 1358 |
except Exception as e:
|
| 1359 |
logger.error(f"[ANALYZE] Hamza fix failed: {type(e).__name__}: {e}")
|
| 1360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1361 |
# 2. Grammar (runs on spelling-corrected text — word-level dependency)
|
| 1362 |
try:
|
| 1363 |
t0 = time.time()
|
|
|
|
| 1466 |
stage_label = 'grammar'
|
| 1467 |
if _is_spelling_only_change(orig_text, corr_text):
|
| 1468 |
stage_label = 'spelling'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1469 |
ctx.add_patch(
|
| 1470 |
stage_label, d['start'], d['end'],
|
| 1471 |
corr_text, confidence=1.0
|
|
|
|
| 1502 |
from nlp.punctuation.punctuation_service import get_punctuation_model
|
| 1503 |
punc_checker = get_punctuation_model()
|
| 1504 |
corrected_punc = punc_checker.correct(ctx.current_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1505 |
timing_ms['punctuation_ms'] = int((time.time() - t0) * 1000)
|
| 1506 |
logger.info(f"[ANALYZE] Step 3: Punctuation done in {timing_ms['punctuation_ms']}ms")
|
| 1507 |
if corrected_punc != ctx.current_text:
|
|
|
|
| 1528 |
f"'{d.get('original','')}' \u2192 '{d.get('correction','')}' "
|
| 1529 |
f"(locked by {owner}[{ls}:{le}])"
|
| 1530 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1531 |
# Punctuation safety layer: reject non-punctuation changes
|
| 1532 |
if not validate_punctuation_diff(d):
|
| 1533 |
logger.info(
|
|
|
|
| 1535 |
f"'{d.get('original','')}' → '{d.get('correction','')}' — not a safe punctuation change"
|
| 1536 |
)
|
| 1537 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1538 |
ctx.add_patch(
|
| 1539 |
'punctuation', d['start'], d['end'],
|
| 1540 |
d['correction'], confidence=0.8
|
|
|
|
| 1605 |
if stage_errors:
|
| 1606 |
response_data['warnings'] = stage_errors
|
| 1607 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1608 |
return jsonify(response_data)
|
| 1609 |
|
| 1610 |
except Exception as e:
|
|
|
|
| 1617 |
}), 500
|
| 1618 |
|
| 1619 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1620 |
@app.errorhandler(404)
|
| 1621 |
def not_found(error):
|
| 1622 |
"""Handle 404 errors."""
|
src/nlp/punctuation/punctuation_rules.py
CHANGED
|
@@ -137,19 +137,5 @@ def validate_punctuation_diff(diff: dict) -> bool:
|
|
| 137 |
if punct_delta > MAX_PUNCT_DELTA:
|
| 138 |
return False
|
| 139 |
|
| 140 |
-
# ── Rule 6: Reject mid-word punctuation insertion ──
|
| 141 |
-
# If the correction ends with a punctuation mark followed by nothing,
|
| 142 |
-
# but the original word is a PREFIX of a longer word in context,
|
| 143 |
-
# this indicates mid-word split (e.g. الدفت→الدفت. when word was الدفتر).
|
| 144 |
-
# Detect by checking if correction has punctuation NOT at word boundary.
|
| 145 |
-
for pc in ARABIC_PUNCT_CHARS:
|
| 146 |
-
if pc in correction:
|
| 147 |
-
# Check if punctuation is followed by an Arabic letter (mid-word)
|
| 148 |
-
idx = correction.find(pc)
|
| 149 |
-
if idx >= 0 and idx < len(correction) - 1:
|
| 150 |
-
next_char = correction[idx + 1]
|
| 151 |
-
if '\u0600' <= next_char <= '\u06FF':
|
| 152 |
-
return False
|
| 153 |
-
|
| 154 |
return True
|
| 155 |
|
|
|
|
| 137 |
if punct_delta > MAX_PUNCT_DELTA:
|
| 138 |
return False
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
return True
|
| 141 |
|
src/nlp/spelling/araspell_rules.py
CHANGED
|
@@ -184,16 +184,6 @@ class AraSpellPostProcessor:
|
|
| 184 |
Also handles prefixed words: و/ف/ب/ك/ل + whitelist word.
|
| 185 |
e.g. واصدقائي → وأصدقائي, بالاسعار → بالأسعار
|
| 186 |
"""
|
| 187 |
-
# Words that must NOT be decomposed by prefix stripping
|
| 188 |
-
# كان (was) ≠ ك+أن, بان (appeared) ≠ ب+أن, لان (softened) ≠ ل+أن, فان (van) ≠ ف+أن
|
| 189 |
-
HAMZA_PREFIX_BLACKLIST = {
|
| 190 |
-
'كان', 'كانت', 'كانوا', 'كانا',
|
| 191 |
-
'بان', 'بانت', 'بانوا',
|
| 192 |
-
'لان', 'لانت',
|
| 193 |
-
'فان', 'فانت',
|
| 194 |
-
'وان', 'وانت',
|
| 195 |
-
'كانه', 'كانها', 'كانهم',
|
| 196 |
-
}
|
| 197 |
words = text.split()
|
| 198 |
result = []
|
| 199 |
for word in words:
|
|
@@ -202,11 +192,6 @@ class AraSpellPostProcessor:
|
|
| 202 |
result.append(AraSpellPostProcessor.HAMZA_WHITELIST[word])
|
| 203 |
continue
|
| 204 |
|
| 205 |
-
# Skip words in the blacklist — they are valid as-is
|
| 206 |
-
if word in HAMZA_PREFIX_BLACKLIST:
|
| 207 |
-
result.append(word)
|
| 208 |
-
continue
|
| 209 |
-
|
| 210 |
# Try stripping common prefixes and looking up the remainder
|
| 211 |
fixed = False
|
| 212 |
for prefix in AraSpellPostProcessor.HAMZA_PREFIXES:
|
|
|
|
| 184 |
Also handles prefixed words: و/ف/ب/ك/ل + whitelist word.
|
| 185 |
e.g. واصدقائي → وأصدقائي, بالاسعار → بالأسعار
|
| 186 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
words = text.split()
|
| 188 |
result = []
|
| 189 |
for word in words:
|
|
|
|
| 192 |
result.append(AraSpellPostProcessor.HAMZA_WHITELIST[word])
|
| 193 |
continue
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
# Try stripping common prefixes and looking up the remainder
|
| 196 |
fixed = False
|
| 197 |
for prefix in AraSpellPostProcessor.HAMZA_PREFIXES:
|
tests/test_bug_fixes.py
CHANGED
|
@@ -657,186 +657,5 @@ class TestSuffixCorruption(unittest.TestCase):
|
|
| 657 |
"Verb+pronoun كتبته→كتبتة must be blocked")
|
| 658 |
|
| 659 |
|
| 660 |
-
# ═══════════════════════════════════════════════════════════════
|
| 661 |
-
# P2: Ta Marbuta Fix Tests
|
| 662 |
-
# ═══════════════════════════════════════════════════════════════
|
| 663 |
-
class TestTaMarbutaFix(unittest.TestCase):
|
| 664 |
-
"""Tests for the _fix_ta_marbuta pipeline function."""
|
| 665 |
-
|
| 666 |
-
@classmethod
|
| 667 |
-
def setUpClass(cls):
|
| 668 |
-
from app import _fix_ta_marbuta, _TA_MARBUTA_DICT
|
| 669 |
-
cls.fix = staticmethod(_fix_ta_marbuta)
|
| 670 |
-
cls.dict = _TA_MARBUTA_DICT
|
| 671 |
-
|
| 672 |
-
def test_basic_fix(self):
|
| 673 |
-
"""المدرسه should be corrected to المدرسة."""
|
| 674 |
-
result, changes = self.fix('ذهبت الى المدرسه')
|
| 675 |
-
self.assertIn('المدرسة', result)
|
| 676 |
-
self.assertEqual(len(changes), 1)
|
| 677 |
-
self.assertEqual(changes[0]['original'], 'المدرسه')
|
| 678 |
-
self.assertEqual(changes[0]['correction'], 'المدرسة')
|
| 679 |
-
|
| 680 |
-
def test_multiple_fixes(self):
|
| 681 |
-
"""Multiple ta marbuta errors in one sentence."""
|
| 682 |
-
result, changes = self.fix('الحياه في المدينه جميله')
|
| 683 |
-
self.assertIn('الحياة', result)
|
| 684 |
-
self.assertIn('المدينة', result)
|
| 685 |
-
self.assertGreaterEqual(len(changes), 2)
|
| 686 |
-
|
| 687 |
-
def test_no_false_positives(self):
|
| 688 |
-
"""Words ending in ه that are NOT ta marbuta should be left alone."""
|
| 689 |
-
result, changes = self.fix('الله أكبر')
|
| 690 |
-
self.assertEqual(result, 'الله أكبر')
|
| 691 |
-
self.assertEqual(len(changes), 0)
|
| 692 |
-
|
| 693 |
-
def test_correct_text_untouched(self):
|
| 694 |
-
"""Already correct text should not be changed."""
|
| 695 |
-
result, changes = self.fix('ذهبت إلى المدرسة')
|
| 696 |
-
self.assertEqual(result, 'ذهبت إلى المدرسة')
|
| 697 |
-
self.assertEqual(len(changes), 0)
|
| 698 |
-
|
| 699 |
-
def test_without_alef_lam(self):
|
| 700 |
-
"""Bare words without ال should also be fixed."""
|
| 701 |
-
result, changes = self.fix('هذه مدرسه كبيره')
|
| 702 |
-
self.assertIn('مدرسة', result)
|
| 703 |
-
|
| 704 |
-
def test_dict_coverage(self):
|
| 705 |
-
"""Dictionary should have significant coverage."""
|
| 706 |
-
self.assertGreater(len(self.dict), 50)
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
# ═══════════════════════════════════════════════════════════════
|
| 710 |
-
# P2: Hamza Whitelist Tests
|
| 711 |
-
# ═══════════════════════════════════════════════════════════════
|
| 712 |
-
class TestHamzaWhitelist(unittest.TestCase):
|
| 713 |
-
"""Tests for hamza fix function."""
|
| 714 |
-
|
| 715 |
-
@classmethod
|
| 716 |
-
def setUpClass(cls):
|
| 717 |
-
try:
|
| 718 |
-
from nlp.spelling.araspell_rules import AraSpellPostProcessor
|
| 719 |
-
cls.fix = staticmethod(AraSpellPostProcessor.fix_common_hamza)
|
| 720 |
-
cls.available = True
|
| 721 |
-
except Exception:
|
| 722 |
-
cls.available = False
|
| 723 |
-
|
| 724 |
-
def test_anta_fix(self):
|
| 725 |
-
"""انت should become أنت."""
|
| 726 |
-
if not self.available:
|
| 727 |
-
self.skipTest("AraSpellPostProcessor not available")
|
| 728 |
-
result = self.fix('انت طالب')
|
| 729 |
-
self.assertIn('أنت', result)
|
| 730 |
-
|
| 731 |
-
def test_ana_fix(self):
|
| 732 |
-
"""انا should become أنا."""
|
| 733 |
-
if not self.available:
|
| 734 |
-
self.skipTest("AraSpellPostProcessor not available")
|
| 735 |
-
result = self.fix('انا ذاهب')
|
| 736 |
-
self.assertIn('أنا', result)
|
| 737 |
-
|
| 738 |
-
def test_alaan_fix(self):
|
| 739 |
-
"""الان should become الآن."""
|
| 740 |
-
if not self.available:
|
| 741 |
-
self.skipTest("AraSpellPostProcessor not available")
|
| 742 |
-
result = self.fix('اذهب الان')
|
| 743 |
-
self.assertIn('الآن', result)
|
| 744 |
-
|
| 745 |
-
def test_correct_hamza_untouched(self):
|
| 746 |
-
"""Already correct hamza should not be changed."""
|
| 747 |
-
if not self.available:
|
| 748 |
-
self.skipTest("AraSpellPostProcessor not available")
|
| 749 |
-
result = self.fix('أنت ذاهب إلى المدرسة')
|
| 750 |
-
self.assertEqual(result, 'أنت ذاهب إلى المدرسة')
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
# ═══════════════════════════════════════════════════════════════
|
| 754 |
-
# P3: Caching & Rate Limiting Tests
|
| 755 |
-
# ═══════════════════════════════════════════════════════════════
|
| 756 |
-
class TestCachingAndRateLimiting(unittest.TestCase):
|
| 757 |
-
"""Tests for response caching and rate limiting."""
|
| 758 |
-
|
| 759 |
-
@classmethod
|
| 760 |
-
def setUpClass(cls):
|
| 761 |
-
from app import (
|
| 762 |
-
_get_cache_key, _get_cached_response,
|
| 763 |
-
_set_cached_response, _check_rate_limit,
|
| 764 |
-
_analyze_cache, _rate_limit_store
|
| 765 |
-
)
|
| 766 |
-
cls._get_cache_key = staticmethod(_get_cache_key)
|
| 767 |
-
cls._get_cached = staticmethod(_get_cached_response)
|
| 768 |
-
cls._set_cached = staticmethod(_set_cached_response)
|
| 769 |
-
cls._check_rate = staticmethod(_check_rate_limit)
|
| 770 |
-
cls._cache = _analyze_cache
|
| 771 |
-
cls._rate_store = _rate_limit_store
|
| 772 |
-
|
| 773 |
-
def setUp(self):
|
| 774 |
-
self._cache.clear()
|
| 775 |
-
self._rate_store.clear()
|
| 776 |
-
|
| 777 |
-
def test_cache_key_deterministic(self):
|
| 778 |
-
"""Same text should produce same cache key."""
|
| 779 |
-
key1 = self._get_cache_key('مرحبا')
|
| 780 |
-
key2 = self._get_cache_key('مرحبا')
|
| 781 |
-
self.assertEqual(key1, key2)
|
| 782 |
-
|
| 783 |
-
def test_cache_key_different(self):
|
| 784 |
-
"""Different texts should produce different keys."""
|
| 785 |
-
key1 = self._get_cache_key('مرحبا')
|
| 786 |
-
key2 = self._get_cache_key('أهلا')
|
| 787 |
-
self.assertNotEqual(key1, key2)
|
| 788 |
-
|
| 789 |
-
def test_cache_store_and_retrieve(self):
|
| 790 |
-
"""Cached response should be retrievable."""
|
| 791 |
-
data = {'original': 'test', 'corrected': 'test', 'suggestions': []}
|
| 792 |
-
self._set_cached('مرحبا', data)
|
| 793 |
-
result = self._get_cached('مرحبا')
|
| 794 |
-
self.assertIsNotNone(result)
|
| 795 |
-
self.assertEqual(result['original'], 'test')
|
| 796 |
-
|
| 797 |
-
def test_cache_miss(self):
|
| 798 |
-
"""Non-cached text should return None."""
|
| 799 |
-
result = self._get_cached('نص جديد')
|
| 800 |
-
self.assertIsNone(result)
|
| 801 |
-
|
| 802 |
-
def test_rate_limit_allows(self):
|
| 803 |
-
"""First request should be allowed."""
|
| 804 |
-
self.assertTrue(self._check_rate('127.0.0.1'))
|
| 805 |
-
|
| 806 |
-
def test_rate_limit_blocks(self):
|
| 807 |
-
"""Should block after exceeding limit."""
|
| 808 |
-
for _ in range(30):
|
| 809 |
-
self._check_rate('test_ip')
|
| 810 |
-
self.assertFalse(self._check_rate('test_ip'))
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
# ═══════════════════════════════════════════════════════════════
|
| 814 |
-
# P2: Grammar Splitting Tests
|
| 815 |
-
# ═══════════════════════════════════════════════════════════════
|
| 816 |
-
class TestGrammarSplitting(unittest.TestCase):
|
| 817 |
-
"""Tests for grammar multi-word diff splitting logic."""
|
| 818 |
-
|
| 819 |
-
def test_split_logic(self):
|
| 820 |
-
"""Multi-word grammar diffs should be split into individual words."""
|
| 821 |
-
# Simulate the splitting logic from analyze_text
|
| 822 |
-
orig_text = 'الي المدرسه الاستاذ'
|
| 823 |
-
corr_text = 'إلى المدرسة الأستاذ'
|
| 824 |
-
orig_words = orig_text.split()
|
| 825 |
-
corr_words = corr_text.split()
|
| 826 |
-
|
| 827 |
-
self.assertEqual(len(orig_words), len(corr_words))
|
| 828 |
-
|
| 829 |
-
diffs = []
|
| 830 |
-
for ow, cw in zip(orig_words, corr_words):
|
| 831 |
-
if ow != cw:
|
| 832 |
-
diffs.append({'original': ow, 'correction': cw})
|
| 833 |
-
|
| 834 |
-
self.assertEqual(len(diffs), 3)
|
| 835 |
-
self.assertEqual(diffs[0]['original'], 'الي')
|
| 836 |
-
self.assertEqual(diffs[0]['correction'], 'إلى')
|
| 837 |
-
self.assertEqual(diffs[1]['original'], 'المدرسه')
|
| 838 |
-
self.assertEqual(diffs[1]['correction'], 'المدرسة')
|
| 839 |
-
|
| 840 |
-
|
| 841 |
if __name__ == '__main__':
|
| 842 |
unittest.main()
|
|
|
|
| 657 |
"Verb+pronoun كتبته→كتبتة must be blocked")
|
| 658 |
|
| 659 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
if __name__ == '__main__':
|
| 661 |
unittest.main()
|