ikarasz commited on
Commit
272cb8d
·
1 Parent(s): a0fc189

testing math vocabulary measure

Browse files
measures/VocabularyAnalyser.py CHANGED
@@ -14,8 +14,8 @@ def norm_txt(x: str) -> str:
14
 
15
  class VocabularyAnalyser:
16
  def __init__(self, glossary_file: str):
17
- # Load glossary CSV (first column = base + variants, comma-separated)
18
- raw = pd.read_csv(glossary_file)
19
 
20
  gloss_list = []
21
  for idx, row in raw.iterrows():
 
14
 
15
  class VocabularyAnalyser:
16
  def __init__(self, glossary_file: str):
17
+ # Load glossary CSV (no header, each row base + variants, comma-separated)
18
+ raw = pd.read_csv(glossary_file, header=None)
19
 
20
  gloss_list = []
21
  for idx, row in raw.iterrows():
measures/__pycache__/VocabularyAnalyser.cpython-310.pyc ADDED
Binary file (2.72 kB). View file
 
measures/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (161 Bytes). View file
 
requirements.txt CHANGED
@@ -8,3 +8,4 @@ transformers==4.46.1
8
  nltk==3.9.1
9
  inflect==7.5.0
10
  pandas==2.2.2
 
 
8
  nltk==3.9.1
9
  inflect==7.5.0
10
  pandas==2.2.2
11
+ pytest==8.2.2
tests/__pycache__/test_vocabulary_analyser.cpython-310-pytest-8.2.2.pyc ADDED
Binary file (4.71 kB). View file
 
tests/test_vocabulary_analyser.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import textwrap
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+
7
+ PROJECT_ROOT = Path(__file__).resolve().parents[1]
8
+ if str(PROJECT_ROOT) not in sys.path:
9
+ sys.path.insert(0, str(PROJECT_ROOT))
10
+
11
+ from measures.VocabularyAnalyser import VocabularyAnalyser
12
+
13
+
14
+ @pytest.fixture
15
+ def glossary_file(tmp_path):
16
+ """Create a small glossary CSV for testing."""
17
+ csv_content = textwrap.dedent(
18
+ """\
19
+ acute,,,
20
+ acute angle, acute angles,,
21
+ acute triangle, acute triangles,,
22
+ add, added, adding, adds
23
+ addend, addends,,
24
+ """
25
+ )
26
+ path = tmp_path / "glossary.csv"
27
+ path.write_text(csv_content, encoding="utf-8")
28
+ return str(path)
29
+
30
+
31
+ @pytest.fixture
32
+ def analyser(glossary_file):
33
+ return VocabularyAnalyser(glossary_file)
34
+
35
+
36
+ def test_match_counts_base_once(analyser):
37
+ text = "Add add ADD adding added adds"
38
+ assert analyser.match_one_utterance(text) == ["add"]
39
+
40
+
41
+ def test_match_prefers_longest_phrase(analyser):
42
+ text = "An acute angle appears in this proof."
43
+ assert analyser.match_one_utterance(text) == ["acute angle"]
44
+
45
+
46
+ def test_match_handles_overlapping_and_distinct_terms(analyser):
47
+ text = (
48
+ "The class studied the properties of an acute triangle, then discussed an acute situation."
49
+ )
50
+ assert analyser.match_one_utterance(text) == [
51
+ "acute",
52
+ "acute triangle",
53
+ ]
54
+
55
+
56
+ def test_run_analysis_adds_vocabulary_terms(analyser):
57
+ class DummyUtterance:
58
+ def __init__(self, speaker, text):
59
+ self.speaker = speaker
60
+ self.text = text
61
+ self.vocabulary_terms = None
62
+
63
+ class DummyTranscript:
64
+ def __init__(self, utterances):
65
+ self.utterances = utterances
66
+
67
+ transcript = DummyTranscript(
68
+ [
69
+ DummyUtterance("Teacher", "We add addends in this acute triangle."),
70
+ DummyUtterance("Student", "Acute angles contrast with obtuse ones."),
71
+ DummyUtterance("Teacher", "No glossary matches"),
72
+ ]
73
+ )
74
+
75
+ result = analyser.run_analysis(transcript)
76
+
77
+ assert result is transcript
78
+ assert transcript.utterances[0].vocabulary_terms == ["acute triangle", "add", "addend"]
79
+ assert transcript.utterances[1].vocabulary_terms == ["acute angle"]
80
+ assert transcript.utterances[2].vocabulary_terms == []