testing math vocabulary measure

Browse files

Files changed (6) hide show

measures/VocabularyAnalyser.py +2 -2
measures/__pycache__/VocabularyAnalyser.cpython-310.pyc +0 -0
measures/__pycache__/__init__.cpython-310.pyc +0 -0
requirements.txt +1 -0
tests/__pycache__/test_vocabulary_analyser.cpython-310-pytest-8.2.2.pyc +0 -0
tests/test_vocabulary_analyser.py +80 -0

measures/VocabularyAnalyser.py CHANGED Viewed

@@ -14,8 +14,8 @@ def norm_txt(x: str) -> str:
 class VocabularyAnalyser:
     def __init__(self, glossary_file: str):
-        # Load glossary CSV (first column = base + variants, comma-separated)
-        raw = pd.read_csv(glossary_file)
         gloss_list = []
         for idx, row in raw.iterrows():

 class VocabularyAnalyser:
     def __init__(self, glossary_file: str):
+        # Load glossary CSV (no header, each row base + variants, comma-separated)
+        raw = pd.read_csv(glossary_file, header=None)
         gloss_list = []
         for idx, row in raw.iterrows():

measures/__pycache__/VocabularyAnalyser.cpython-310.pyc ADDED Viewed

Binary file (2.72 kB). View file

measures/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (161 Bytes). View file

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ transformers==4.46.1
 nltk==3.9.1
 inflect==7.5.0
 pandas==2.2.2

 nltk==3.9.1
 inflect==7.5.0
 pandas==2.2.2
+pytest==8.2.2

tests/__pycache__/test_vocabulary_analyser.cpython-310-pytest-8.2.2.pyc ADDED Viewed

Binary file (4.71 kB). View file

tests/test_vocabulary_analyser.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import textwrap
+import sys
+from pathlib import Path
+import pytest
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+from measures.VocabularyAnalyser import VocabularyAnalyser
+@pytest.fixture
+def glossary_file(tmp_path):
+    """Create a small glossary CSV for testing."""
+    csv_content = textwrap.dedent(
+        """\
+        acute,,,
+        acute angle, acute angles,,
+        acute triangle, acute triangles,,
+        add, added, adding, adds
+        addend, addends,,
+        """
+    )
+    path = tmp_path / "glossary.csv"
+    path.write_text(csv_content, encoding="utf-8")
+    return str(path)
+@pytest.fixture
+def analyser(glossary_file):
+    return VocabularyAnalyser(glossary_file)
+def test_match_counts_base_once(analyser):
+    text = "Add add ADD adding added adds"
+    assert analyser.match_one_utterance(text) == ["add"]
+def test_match_prefers_longest_phrase(analyser):
+    text = "An acute angle appears in this proof."
+    assert analyser.match_one_utterance(text) == ["acute angle"]
+def test_match_handles_overlapping_and_distinct_terms(analyser):
+    text = (
+        "The class studied the properties of an acute triangle, then discussed an acute situation."
+    )
+    assert analyser.match_one_utterance(text) == [
+        "acute",
+        "acute triangle",
+    ]
+def test_run_analysis_adds_vocabulary_terms(analyser):
+    class DummyUtterance:
+        def __init__(self, speaker, text):
+            self.speaker = speaker
+            self.text = text
+            self.vocabulary_terms = None
+    class DummyTranscript:
+        def __init__(self, utterances):
+            self.utterances = utterances
+    transcript = DummyTranscript(
+        [
+            DummyUtterance("Teacher", "We add addends in this acute triangle."),
+            DummyUtterance("Student", "Acute angles contrast with obtuse ones."),
+            DummyUtterance("Teacher", "No glossary matches"),
+        ]
+    )
+    result = analyser.run_analysis(transcript)
+    assert result is transcript
+    assert transcript.utterances[0].vocabulary_terms == ["acute triangle", "add", "addend"]
+    assert transcript.utterances[1].vocabulary_terms == ["acute angle"]
+    assert transcript.utterances[2].vocabulary_terms == []