Spaces:

tinykavi
/

sinhala-dyslexia-binary-demo

Sleeping

App Files Files Community

tinykavi commited on Feb 12

Commit

8c8586c

verified ·

1 Parent(s): cc4f6fa

Update essay_aggregator.py

Browse files

Files changed (1) hide show

essay_aggregator.py +90 -90

essay_aggregator.py CHANGED Viewed

@@ -1,90 +1,90 @@
-"""
-Essay-level dyslexia analysis module.
-Responsibility:
-- Split an essay into sentences
-- Apply sentence-level dyslexia detection
-- Aggregate results into an essay-level decision
-This module bridges sentence predictions → essay screening.
-"""
-import re
-from src.sentence_classifier import predict_sentence
-import sys
-def split_sentences(text: str):
-    if not text or not text.strip():
-        return []
-    text = text.replace("\r\n", "\n").replace("\r", "\n")
-    # Split by punctuation, Sinhala danda, or newline
-    raw_sentences = re.split(r"[.!?।\n]+", text)
-    cleaned = []
-    for s in raw_sentences:
-        s = s.strip()
-        if len(s) >= 3:
-            cleaned.append(s)
-    # If still only 1 long paragraph, optionally chunk it
-    if len(cleaned) == 1 and len(cleaned[0]) > 200:
-        long_text = cleaned[0]
-        cleaned = [long_text[i:i+120] for i in range(0, len(long_text), 120)]
-    return cleaned
-def analyze_essay(essay_text: str, threshold: float = 0.65):
-    sentences = split_sentences(essay_text)
-    if not sentences:
-        return {"error": "No valid sentences found."}
-    dyslexic_count = 0
-    probabilities = []
-    sentence_results = []
-    for s in sentences:
-        prob = predict_sentence(s)
-        probabilities.append(prob)
-        is_dyslexic = prob >= threshold
-        if is_dyslexic:
-            dyslexic_count += 1
-        sentence_results.append({
-            "text": s,
-            "probability": round(float(prob), 2),
-            "label": "DYSLEXIC" if is_dyslexic else "NORMAL"
-        })
-    essay_label = (
-        "DYSLEXIC ESSAY"
-        if dyslexic_count >= 1
-        else "NORMAL ESSAY"
-    )
-    confidence = sum(probabilities) / len(probabilities)
-    return {
-        "essay_label": essay_label,
-        "confidence": round(confidence, 2),
-        "total_sentences": len(sentences),
-        "dyslexic_sentences": dyslexic_count,
-        "sentences": sentence_results
-    }

+"""
+Essay-level dyslexia analysis module.
+Responsibility:
+- Split an essay into sentences
+- Apply sentence-level dyslexia detection
+- Aggregate results into an essay-level decision
+This module bridges sentence predictions → essay screening.
+"""
+import re
+from sentence_classifier import predict_sentence
+import sys
+def split_sentences(text: str):
+    if not text or not text.strip():
+        return []
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    # Split by punctuation, Sinhala danda, or newline
+    raw_sentences = re.split(r"[.!?।\n]+", text)
+    cleaned = []
+    for s in raw_sentences:
+        s = s.strip()
+        if len(s) >= 3:
+            cleaned.append(s)
+    # If still only 1 long paragraph, optionally chunk it
+    if len(cleaned) == 1 and len(cleaned[0]) > 200:
+        long_text = cleaned[0]
+        cleaned = [long_text[i:i+120] for i in range(0, len(long_text), 120)]
+    return cleaned
+def analyze_essay(essay_text: str, threshold: float = 0.65):
+    sentences = split_sentences(essay_text)
+    if not sentences:
+        return {"error": "No valid sentences found."}
+    dyslexic_count = 0
+    probabilities = []
+    sentence_results = []
+    for s in sentences:
+        prob = predict_sentence(s)
+        probabilities.append(prob)
+        is_dyslexic = prob >= threshold
+        if is_dyslexic:
+            dyslexic_count += 1
+        sentence_results.append({
+            "text": s,
+            "probability": round(float(prob), 2),
+            "label": "DYSLEXIC" if is_dyslexic else "NORMAL"
+        })
+    essay_label = (
+        "DYSLEXIC ESSAY"
+        if dyslexic_count >= 1
+        else "NORMAL ESSAY"
+    )
+    confidence = sum(probabilities) / len(probabilities)
+    return {
+        "essay_label": essay_label,
+        "confidence": round(confidence, 2),
+        "total_sentences": len(sentences),
+        "dyslexic_sentences": dyslexic_count,
+        "sentences": sentence_results
+    }