Spaces:

bayan10
/

bayan-api

Running

youssefreda9 commited on 8 days ago

Commit

d3a32e2

1 Parent(s): 2fe1569

FIX-23: Block grammar model tanween removal + HF log fetcher

1. Tanween blocker: grammar model strips tanween (جداً→جدا) from correct text.
Added filter to block diffs that only differ by tanween marks.
2. HF log fetcher script for automated tracing.

Files changed (3) hide show

src/app.py +15 -0
tests/phase10/fetch_hf_logs.py +89 -0
tests/phase10/reports/phase10_results.json +0 -0

src/app.py CHANGED Viewed

@@ -1631,6 +1631,21 @@ def analyze_text():
                         )
                         continue
                     # ── FIX-06: Directional block protection for grammar ──
                     # Prevents meaning-changing substitutions (كان→كأن etc.)
                     # especially critical when spelling is skipped (>1000 chars).

                         )
                         continue
+                    # ── FIX-23: Tanween removal blocker ──
+                    # The grammar model often strips tanween (ً/ٌ/ٍ) from correct text.
+                    # Block diffs where the only change is tanween removal.
+                    if orig_text and corr_text:
+                        import re as _re_tnwn
+                        _TANWEEN = '\u064B\u064C\u064D'  # ً ٌ ٍ
+                        _orig_no_tnwn = _re_tnwn.sub(f'[{_TANWEEN}]', '', orig_text)
+                        _corr_no_tnwn = _re_tnwn.sub(f'[{_TANWEEN}]', '', corr_text)
+                        if _orig_no_tnwn == _corr_no_tnwn and orig_text != corr_text:
+                            logger.info(
+                                f"[GRAMMAR] Blocked tanween removal: "
+                                f"'{orig_text}'→'{corr_text}'"
+                            )
+                            continue
                     # ── FIX-06: Directional block protection for grammar ──
                     # Prevents meaning-changing substitutions (كان→كأن etc.)
                     # especially critical when spelling is skipped (>1000 chars).

tests/phase10/fetch_hf_logs.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""Fetch HF Space runtime logs and extract key events."""
+import requests
+import json
+import sys
+import os
+SPACE_ID = "bayan10/bayan-api"
+def _get_hf_token():
+    """Read HF token from stored credentials (huggingface-cli login)."""
+    # 1. Environment variable
+    token = os.environ.get("HF_TOKEN", "")
+    if token:
+        return token
+    # 2. huggingface_hub stored token
+    token_path = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "token")
+    if os.path.exists(token_path):
+        with open(token_path, "r") as f:
+            return f.read().strip()
+    return ""
+TOKEN = _get_hf_token()
+def fetch_logs(max_lines=500):
+    """Fetch runtime logs from HF Space."""
+    headers = {"Authorization": f"Bearer {TOKEN}"}
+    url = f"https://huggingface.co/api/spaces/{SPACE_ID}/logs/run"
+    r = requests.get(url, headers=headers, timeout=30, stream=True)
+    if r.status_code != 200:
+        print(f"Error: {r.status_code}")
+        return []
+    lines = []
+    for chunk in r.iter_content(chunk_size=8192, decode_unicode=True):
+        for line in chunk.split('\n'):
+            if line.startswith('data: '):
+                try:
+                    data = json.loads(line[6:])
+                    lines.append(data.get('data', ''))
+                except:
+                    pass
+        if len(lines) > max_lines:
+            break
+    return lines
+def analyze_logs(lines):
+    """Extract key events from logs."""
+    errors = []
+    grammar_events = []
+    spelling_events = []
+    startup = []
+    for line in lines:
+        if 'ERROR' in line or 'NameError' in line or 'Traceback' in line:
+            errors.append(line)
+        elif '[GRAMMAR' in line or 'Grammar' in line:
+            grammar_events.append(line)
+        elif '[SPELLING' in line:
+            spelling_events.append(line)
+        elif 'Startup' in line or 'loaded' in line.lower() or 'ready' in line.lower():
+            startup.append(line)
+    print(f"\n{'='*60}")
+    print(f"HF SPACE LOG ANALYSIS ({len(lines)} lines)")
+    print(f"{'='*60}")
+    print(f"\n🚀 STARTUP ({len(startup)} events):")
+    for e in startup[-5:]:
+        print(f"  {e}")
+    print(f"\n❌ ERRORS ({len(errors)}):")
+    if errors:
+        for e in errors[-10:]:
+            print(f"  {e}")
+    else:
+        print("  None! ✅")
+    print(f"\n📝 GRAMMAR ({len(grammar_events)} events, last 5):")
+    for e in grammar_events[-5:]:
+        print(f"  {e}")
+    print(f"\n✏️ SPELLING ({len(spelling_events)} events, last 5):")
+    for e in spelling_events[-5:]:
+        print(f"  {e}")
+if __name__ == "__main__":
+    lines = fetch_logs()
+    analyze_logs(lines)

tests/phase10/reports/phase10_results.json CHANGED Viewed

The diff for this file is too large to render. See raw diff