tinykavi commited on
Commit
8c8586c
·
verified ·
1 Parent(s): cc4f6fa

Update essay_aggregator.py

Browse files
Files changed (1) hide show
  1. essay_aggregator.py +90 -90
essay_aggregator.py CHANGED
@@ -1,90 +1,90 @@
1
- """
2
- Essay-level dyslexia analysis module.
3
-
4
- Responsibility:
5
- - Split an essay into sentences
6
- - Apply sentence-level dyslexia detection
7
- - Aggregate results into an essay-level decision
8
-
9
- This module bridges sentence predictions → essay screening.
10
- """
11
-
12
- import re
13
- from src.sentence_classifier import predict_sentence
14
-
15
-
16
-
17
- import sys
18
-
19
-
20
-
21
-
22
-
23
- def split_sentences(text: str):
24
- if not text or not text.strip():
25
- return []
26
-
27
- text = text.replace("\r\n", "\n").replace("\r", "\n")
28
-
29
- # Split by punctuation, Sinhala danda, or newline
30
- raw_sentences = re.split(r"[.!?।\n]+", text)
31
-
32
- cleaned = []
33
- for s in raw_sentences:
34
- s = s.strip()
35
- if len(s) >= 3:
36
- cleaned.append(s)
37
-
38
- # If still only 1 long paragraph, optionally chunk it
39
- if len(cleaned) == 1 and len(cleaned[0]) > 200:
40
- long_text = cleaned[0]
41
- cleaned = [long_text[i:i+120] for i in range(0, len(long_text), 120)]
42
-
43
- return cleaned
44
-
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
- def analyze_essay(essay_text: str, threshold: float = 0.65):
53
- sentences = split_sentences(essay_text)
54
-
55
- if not sentences:
56
- return {"error": "No valid sentences found."}
57
-
58
- dyslexic_count = 0
59
- probabilities = []
60
- sentence_results = []
61
-
62
- for s in sentences:
63
- prob = predict_sentence(s)
64
- probabilities.append(prob)
65
-
66
- is_dyslexic = prob >= threshold
67
- if is_dyslexic:
68
- dyslexic_count += 1
69
-
70
- sentence_results.append({
71
- "text": s,
72
- "probability": round(float(prob), 2),
73
- "label": "DYSLEXIC" if is_dyslexic else "NORMAL"
74
- })
75
-
76
- essay_label = (
77
- "DYSLEXIC ESSAY"
78
- if dyslexic_count >= 1
79
- else "NORMAL ESSAY"
80
- )
81
-
82
- confidence = sum(probabilities) / len(probabilities)
83
-
84
- return {
85
- "essay_label": essay_label,
86
- "confidence": round(confidence, 2),
87
- "total_sentences": len(sentences),
88
- "dyslexic_sentences": dyslexic_count,
89
- "sentences": sentence_results
90
- }
 
1
+ """
2
+ Essay-level dyslexia analysis module.
3
+
4
+ Responsibility:
5
+ - Split an essay into sentences
6
+ - Apply sentence-level dyslexia detection
7
+ - Aggregate results into an essay-level decision
8
+
9
+ This module bridges sentence predictions → essay screening.
10
+ """
11
+
12
+ import re
13
+ from sentence_classifier import predict_sentence
14
+
15
+
16
+
17
+ import sys
18
+
19
+
20
+
21
+
22
+
23
+ def split_sentences(text: str):
24
+ if not text or not text.strip():
25
+ return []
26
+
27
+ text = text.replace("\r\n", "\n").replace("\r", "\n")
28
+
29
+ # Split by punctuation, Sinhala danda, or newline
30
+ raw_sentences = re.split(r"[.!?।\n]+", text)
31
+
32
+ cleaned = []
33
+ for s in raw_sentences:
34
+ s = s.strip()
35
+ if len(s) >= 3:
36
+ cleaned.append(s)
37
+
38
+ # If still only 1 long paragraph, optionally chunk it
39
+ if len(cleaned) == 1 and len(cleaned[0]) > 200:
40
+ long_text = cleaned[0]
41
+ cleaned = [long_text[i:i+120] for i in range(0, len(long_text), 120)]
42
+
43
+ return cleaned
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+ def analyze_essay(essay_text: str, threshold: float = 0.65):
53
+ sentences = split_sentences(essay_text)
54
+
55
+ if not sentences:
56
+ return {"error": "No valid sentences found."}
57
+
58
+ dyslexic_count = 0
59
+ probabilities = []
60
+ sentence_results = []
61
+
62
+ for s in sentences:
63
+ prob = predict_sentence(s)
64
+ probabilities.append(prob)
65
+
66
+ is_dyslexic = prob >= threshold
67
+ if is_dyslexic:
68
+ dyslexic_count += 1
69
+
70
+ sentence_results.append({
71
+ "text": s,
72
+ "probability": round(float(prob), 2),
73
+ "label": "DYSLEXIC" if is_dyslexic else "NORMAL"
74
+ })
75
+
76
+ essay_label = (
77
+ "DYSLEXIC ESSAY"
78
+ if dyslexic_count >= 1
79
+ else "NORMAL ESSAY"
80
+ )
81
+
82
+ confidence = sum(probabilities) / len(probabilities)
83
+
84
+ return {
85
+ "essay_label": essay_label,
86
+ "confidence": round(confidence, 2),
87
+ "total_sentences": len(sentences),
88
+ "dyslexic_sentences": dyslexic_count,
89
+ "sentences": sentence_results
90
+ }