| """ |
| TDD Tests for GRPO Reward Functions. |
| |
| Tests behavioral expectations for reward functions that guide |
| Marxist-Leninist Q&A model training. |
| |
| Test Categories: |
| 1. Format Rewards - Proper <think>...</think> tag usage |
| 2. Terminology Reward - Marxist vocabulary (shallow, can be gamed) |
| 3. Topic Extraction - Question/answer topic identification |
| 4. Topic Relevance - Answer addresses question topics |
| 5. Structural Coherence - Terms in proper syntactic roles (defeats word soup) |
| 6. NLI Coherence - Logical consistency with ground truth (integration tests) |
| 7. Combined Rewards - Multi-layer checking |
| """ |
|
|
| from __future__ import annotations |
|
|
| from typing import TYPE_CHECKING |
|
|
| import pytest |
|
|
| if TYPE_CHECKING: |
| pass |
|
|
|
|
| |
| |
| |
|
|
|
|
| @pytest.fixture |
| def mock_completion() -> list[list[dict[str, str]]]: |
| """Create a mock completion in GRPO format.""" |
|
|
| def _make(content: str) -> list[list[dict[str, str]]]: |
| return [[{"role": "assistant", "content": content}]] |
|
|
| return _make |
|
|
|
|
| @pytest.fixture |
| def mock_prompt() -> list[list[dict[str, str]]]: |
| """Create a mock prompt in GRPO format.""" |
|
|
| def _make(question: str) -> list[list[dict[str, str]]]: |
| return [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": question}, |
| ] |
| ] |
|
|
| return _make |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestMatchFormatExactly: |
| """Tests for match_format_exactly reward function.""" |
|
|
| def test_rewards_proper_think_tags(self, mock_completion: object) -> None: |
| """Response with </think> tag should get +3.0.""" |
| from prolewiki_llm.grpo_rewards import match_format_exactly |
|
|
| completions = mock_completion( |
| "<think>Let me analyze this...</think>The bourgeoisie exploits workers." |
| ) |
| scores = match_format_exactly(completions) |
|
|
| assert scores == [3.0] |
|
|
| def test_penalizes_missing_think_tags(self, mock_completion: object) -> None: |
| """Response without </think> tag should get 0.0.""" |
| from prolewiki_llm.grpo_rewards import match_format_exactly |
|
|
| completions = mock_completion("The bourgeoisie exploits workers.") |
| scores = match_format_exactly(completions) |
|
|
| assert scores == [0.0] |
|
|
| def test_only_needs_end_tag(self, mock_completion: object) -> None: |
| """Only the </think> tag is checked, not <think>.""" |
| from prolewiki_llm.grpo_rewards import match_format_exactly |
|
|
| completions = mock_completion("Some text</think>Answer here") |
| scores = match_format_exactly(completions) |
|
|
| assert scores == [3.0] |
|
|
|
|
| class TestMatchFormatApproximately: |
| """Tests for match_format_approximately reward function.""" |
|
|
| def test_rewards_proper_single_tags(self, mock_completion: object) -> None: |
| """Exactly one of each tag should get +1.0.""" |
| from prolewiki_llm.grpo_rewards import match_format_approximately |
|
|
| completions = mock_completion( |
| "<think>Reasoning...</think>Answer" |
| ) |
| scores = match_format_approximately(completions) |
|
|
| assert scores == [1.0] |
|
|
| def test_penalizes_multiple_start_tags(self, mock_completion: object) -> None: |
| """Multiple <think> tags should be penalized.""" |
| from prolewiki_llm.grpo_rewards import match_format_approximately |
|
|
| completions = mock_completion( |
| "<think>First</think><think>Second</think>Answer" |
| ) |
| scores = match_format_approximately(completions) |
|
|
| |
| |
| assert scores == [-2.0] |
|
|
| def test_penalizes_missing_tags(self, mock_completion: object) -> None: |
| """Missing tags should be penalized.""" |
| from prolewiki_llm.grpo_rewards import match_format_approximately |
|
|
| completions = mock_completion("Plain text without tags") |
| scores = match_format_approximately(completions) |
|
|
| |
| assert scores == [-2.0] |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestTerminologyReward: |
| """Tests for terminology_reward function (shallow reward).""" |
|
|
| def test_rewards_marxist_terms(self, mock_completion: object) -> None: |
| """Response with Marxist terms should get positive reward.""" |
| from prolewiki_llm.grpo_rewards import terminology_reward |
|
|
| completions = mock_completion( |
| "The bourgeoisie extracts surplus value from the proletariat." |
| ) |
| scores = terminology_reward(completions) |
|
|
| |
| assert scores[0] > 0.0 |
| assert scores[0] <= 2.0 |
|
|
| def test_no_reward_without_terms(self, mock_completion: object) -> None: |
| """Response without Marxist terms should get 0.0.""" |
| from prolewiki_llm.grpo_rewards import terminology_reward |
|
|
| completions = mock_completion("The sky is blue and grass is green.") |
| scores = terminology_reward(completions) |
|
|
| assert scores == [0.0] |
|
|
| def test_word_soup_gets_reward_showing_vulnerability(self, mock_completion: object) -> None: |
| """ |
| DEMONSTRATION: Word soup DOES get rewarded by this shallow function. |
| This is why we need NLI-based rewards. |
| """ |
| from prolewiki_llm.grpo_rewards import terminology_reward |
|
|
| |
| word_soup = ( |
| "bourgeoisie proletariat dialectical materialism surplus value " |
| "imperialism revisionism hegemony alienation" |
| ) |
| completions = mock_completion(word_soup) |
| scores = terminology_reward(completions) |
|
|
| |
| assert scores[0] >= 2.0 |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestTopicExtraction: |
| """Tests for topic extraction helper functions.""" |
|
|
| @pytest.fixture(autouse=True) |
| def load_spacy(self) -> None: |
| """Load spaCy model once for all tests.""" |
| import spacy |
|
|
| self.nlp = spacy.load("en_core_web_trf") |
|
|
| def test_extracts_simple_topic(self) -> None: |
| """'What is revisionism?' should extract 'revisionism'.""" |
| from prolewiki_llm.grpo_rewards import _extract_question_topics |
|
|
| doc = self.nlp("What is revisionism?") |
| topics = _extract_question_topics(doc) |
|
|
| assert "revisionism" in topics |
|
|
| def test_extracts_multiple_topics(self) -> None: |
| """'How does imperialism relate to capitalism?' should extract both.""" |
| from prolewiki_llm.grpo_rewards import _extract_question_topics |
|
|
| doc = self.nlp("How does imperialism relate to capitalism?") |
| topics = _extract_question_topics(doc) |
|
|
| assert "imperialism" in topics |
| assert "capitalism" in topics |
|
|
| def test_extracts_compound_topic(self) -> None: |
| """'Explain surplus value' should extract 'surplus value'.""" |
| from prolewiki_llm.grpo_rewards import _extract_question_topics |
|
|
| doc = self.nlp("Explain the concept of surplus value.") |
| topics = _extract_question_topics(doc) |
|
|
| |
| assert "surplus value" in topics or ("surplus" in topics and "value" in topics) |
|
|
| def test_extracts_prepositional_phrase(self) -> None: |
| """'dictatorship of the proletariat' should extract full phrase.""" |
| from prolewiki_llm.grpo_rewards import _extract_question_topics |
|
|
| doc = self.nlp("What is the dictatorship of the proletariat?") |
| topics = _extract_question_topics(doc) |
|
|
| assert "dictatorship" in topics |
| assert "proletariat" in topics |
|
|
| def test_excludes_question_words(self) -> None: |
| """Question words (what, how, why) should not be extracted.""" |
| from prolewiki_llm.grpo_rewards import _extract_question_topics |
|
|
| doc = self.nlp("What is dialectical materialism?") |
| topics = _extract_question_topics(doc) |
|
|
| assert "what" not in topics |
|
|
| def test_answer_topic_extraction(self) -> None: |
| """Answer extraction should find noun phrases and entities.""" |
| from prolewiki_llm.grpo_rewards import _extract_answer_topics |
|
|
| doc = self.nlp( |
| "The bourgeoisie controls the means of production. Workers sell their labor power." |
| ) |
| topics = _extract_answer_topics(doc) |
|
|
| |
| assert len(topics) > 0 |
| |
| has_class_term = any(t in topics for t in ["bourgeoisie", "worker", "workers"]) |
| assert has_class_term |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestTopicRelevanceReward: |
| """Tests for topic_relevance_reward function.""" |
|
|
| def test_on_topic_answer_rewarded(self, mock_prompt: object, mock_completion: object) -> None: |
| """Answer that addresses the question topic should get positive reward.""" |
| from prolewiki_llm.grpo_rewards import topic_relevance_reward |
|
|
| prompts = mock_prompt("What is revisionism?") |
| completions = mock_completion( |
| "</think>Revisionism is the distortion of Marxist theory, " |
| "abandoning revolutionary principles in favor of reformism." |
| ) |
|
|
| scores = topic_relevance_reward(prompts, completions) |
|
|
| assert scores[0] > 0.0, "On-topic answer should be rewarded" |
|
|
| def test_off_topic_answer_penalized(self, mock_prompt: object, mock_completion: object) -> None: |
| """Answer about unrelated topic should get negative reward.""" |
| from prolewiki_llm.grpo_rewards import topic_relevance_reward |
|
|
| prompts = mock_prompt("What is revisionism?") |
| completions = mock_completion( |
| "</think>The weather today is sunny with clear skies. I recommend wearing sunscreen." |
| ) |
|
|
| scores = topic_relevance_reward(prompts, completions) |
|
|
| assert scores[0] <= 0.0, "Off-topic answer should be penalized" |
|
|
| def test_partial_topic_coverage(self, mock_prompt: object, mock_completion: object) -> None: |
| """Answer covering some but not all topics gets partial reward.""" |
| from prolewiki_llm.grpo_rewards import topic_relevance_reward |
|
|
| prompts = mock_prompt( |
| "How does imperialism relate to capitalism?" |
| ) |
| |
| completions = mock_completion( |
| "</think>Imperialism is the highest stage of development " |
| "characterized by monopolies and export of capital." |
| ) |
|
|
| scores = topic_relevance_reward(prompts, completions) |
|
|
| |
| assert scores[0] >= -1.5, "Partial coverage shouldn't be heavily penalized" |
|
|
| def test_synonym_recognition(self, mock_prompt: object, mock_completion: object) -> None: |
| """Answer using synonyms should still be recognized as on-topic.""" |
| from prolewiki_llm.grpo_rewards import topic_relevance_reward |
|
|
| prompts = mock_prompt("What is the bourgeoisie?") |
| |
| completions = mock_completion( |
| "</think>The capitalist class owns the means of production " |
| "and exploits the working class for profit." |
| ) |
|
|
| scores = topic_relevance_reward(prompts, completions) |
|
|
| |
| assert scores[0] >= 0.0, "Synonyms should be recognized as on-topic" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestStructuralCoherenceReward: |
| """Tests for structural_coherence_reward function.""" |
|
|
| def test_coherent_sentences_rewarded(self, mock_completion: object) -> None: |
| """Proper sentences with terms in syntactic roles should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import structural_coherence_reward |
|
|
| completions = mock_completion( |
| "The bourgeoisie extracts surplus value from workers. " |
| "Therefore, class struggle is inevitable." |
| ) |
| scores = structural_coherence_reward(completions) |
|
|
| assert scores[0] > 0.0, "Coherent text should get positive reward" |
|
|
| def test_word_soup_penalized(self, mock_completion: object) -> None: |
| """Word soup should get low/negative structural score.""" |
| from prolewiki_llm.grpo_rewards import structural_coherence_reward |
|
|
| |
| word_soup = ( |
| "bourgeoisie proletariat dialectical materialism surplus value " |
| "imperialism revisionism hegemony alienation" |
| ) |
| completions = mock_completion(word_soup) |
| scores = structural_coherence_reward(completions) |
|
|
| |
| |
| assert scores[0] < 1.0, "Word soup should get low structural score" |
|
|
| def test_discourse_connectives_rewarded(self, mock_completion: object) -> None: |
| """Logical connectives (therefore, because) should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import structural_coherence_reward |
|
|
| completions = mock_completion( |
| "The proletariat is exploited because they do not own capital. " |
| "Therefore, revolution is necessary. " |
| "Furthermore, the state must be seized." |
| ) |
| scores = structural_coherence_reward(completions) |
|
|
| |
| assert scores[0] > 0.5, "Discourse connectives should be rewarded" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestCompletenessReward: |
| """Tests for completeness_reward function.""" |
|
|
| def test_appropriate_length_rewarded(self, mock_completion: object) -> None: |
| """Response similar in length to ground truth should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import completeness_reward |
|
|
| ground_truth = "The bourgeoisie is the capitalist class that owns production." |
| completions = mock_completion( |
| "</think>The bourgeoisie refers to the capitalist class " |
| "that controls the means of production." |
| ) |
|
|
| scores = completeness_reward(completions, answer=[ground_truth]) |
|
|
| assert scores[0] > 0.0, "Similar length should be rewarded" |
|
|
| def test_too_short_penalized(self, mock_completion: object) -> None: |
| """Very short response should be penalized.""" |
| from prolewiki_llm.grpo_rewards import completeness_reward |
|
|
| ground_truth = ( |
| "The bourgeoisie is the capitalist class that owns the means " |
| "of production and exploits the working class through extraction " |
| "of surplus value from their labor." |
| ) |
| completions = mock_completion("</think>The bourgeoisie owns capital.") |
|
|
| scores = completeness_reward(completions, answer=[ground_truth]) |
|
|
| assert scores[0] < 0.0, "Too short should be penalized" |
|
|
|
|
| |
| |
| |
|
|
|
|
| @pytest.mark.slow |
| class TestNLICoherenceReward: |
| """Tests for NLI-based coherence reward (requires bart-large-mnli).""" |
|
|
| def test_entailment_rewarded(self, mock_completion: object) -> None: |
| """Response entailing ground truth should get positive reward.""" |
| from prolewiki_llm.grpo_rewards import nli_coherence_reward |
|
|
| ground_truth = "The bourgeoisie extracts surplus value from workers." |
| completions = mock_completion( |
| "</think>The capitalist class exploits workers by extracting " |
| "the value of their unpaid labor." |
| ) |
|
|
| scores = nli_coherence_reward(completions, answer=[ground_truth]) |
|
|
| assert scores[0] > 0.0, "Entailment should be rewarded" |
|
|
| def test_contradiction_penalized(self, mock_completion: object) -> None: |
| """Response contradicting ground truth should be penalized.""" |
| from prolewiki_llm.grpo_rewards import nli_coherence_reward |
|
|
| ground_truth = "The bourgeoisie exploits workers." |
| completions = mock_completion( |
| "</think>Capitalism benefits all classes equally. " |
| "Workers are not exploited under capitalism." |
| ) |
|
|
| scores = nli_coherence_reward(completions, answer=[ground_truth]) |
|
|
| assert scores[0] < 0.0, "Contradiction should be penalized" |
|
|
| def test_word_soup_neutral(self, mock_completion: object) -> None: |
| """Word soup should be classified as neutral (off-topic).""" |
| from prolewiki_llm.grpo_rewards import nli_coherence_reward |
|
|
| ground_truth = "Revisionism abandons revolutionary principles." |
| word_soup = ( |
| "bourgeoisie proletariat dialectical materialism surplus value " |
| "imperialism revisionism hegemony alienation" |
| ) |
| completions = mock_completion(word_soup) |
|
|
| scores = nli_coherence_reward(completions, answer=[ground_truth]) |
|
|
| |
| assert scores[0] <= 0.0, "Word soup should not get positive NLI score" |
|
|
|
|
| @pytest.mark.slow |
| class TestSelfConsistencyReward: |
| """Tests for self-consistency reward (requires bart-large-mnli).""" |
|
|
| def test_consistent_response_rewarded(self, mock_completion: object) -> None: |
| """Response without internal contradictions should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import self_consistency_reward |
|
|
| completions = mock_completion( |
| "The bourgeoisie owns capital. They extract surplus value. " |
| "This exploitation drives class struggle." |
| ) |
|
|
| scores = self_consistency_reward(completions) |
|
|
| assert scores[0] > 0.0, "Consistent response should be rewarded" |
|
|
| def test_contradictory_response_penalized(self, mock_completion: object) -> None: |
| """Response with internal contradictions should be penalized.""" |
| from prolewiki_llm.grpo_rewards import self_consistency_reward |
|
|
| completions = mock_completion( |
| "Capitalism benefits everyone equally. " |
| "Workers are severely exploited under capitalism. " |
| "Nobody is harmed by the capitalist system." |
| ) |
|
|
| scores = self_consistency_reward(completions) |
|
|
| assert scores[0] < 0.0, "Contradictory response should be penalized" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestRobustCoherenceReward: |
| """Tests for robust_coherence_reward (combined multi-layer check).""" |
|
|
| @pytest.mark.slow |
| def test_good_response_high_score(self, mock_completion: object) -> None: |
| """Well-formed, on-topic response should get high combined score.""" |
| from prolewiki_llm.grpo_rewards import robust_coherence_reward |
|
|
| ground_truth = ( |
| "Revisionism is the distortion of Marxist theory, abandoning revolutionary principles." |
| ) |
| completions = mock_completion( |
| "</think>Revisionism represents a deviation from Marxist principles, " |
| "characterized by the abandonment of revolutionary goals in favor of " |
| "reformist approaches. Therefore, it represents a threat to the movement." |
| ) |
|
|
| scores = robust_coherence_reward(completions, answer=[ground_truth]) |
|
|
| assert scores[0] > 0.0, "Good response should get positive combined score" |
|
|
| @pytest.mark.slow |
| def test_word_soup_low_score(self, mock_completion: object) -> None: |
| """Word soup should get low combined score despite terminology.""" |
| from prolewiki_llm.grpo_rewards import robust_coherence_reward |
|
|
| ground_truth = "Revisionism abandons revolutionary principles." |
| word_soup = ( |
| "bourgeoisie proletariat dialectical materialism surplus value " |
| "imperialism revisionism hegemony alienation" |
| ) |
| completions = mock_completion(word_soup) |
|
|
| scores = robust_coherence_reward(completions, answer=[ground_truth]) |
|
|
| |
| assert scores[0] <= 0.0, "Word soup should get low combined score" |
|
|
|
|
| class TestFullCoherenceReward: |
| """Tests for full_coherence_reward (robust + topic relevance).""" |
|
|
| @pytest.mark.slow |
| def test_on_topic_coherent_high_score( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """On-topic, coherent response should get highest score.""" |
| from prolewiki_llm.grpo_rewards import full_coherence_reward |
|
|
| prompts = mock_prompt("What is revisionism?") |
| ground_truth = "Revisionism distorts Marxist theory." |
| completions = mock_completion( |
| "</think>Revisionism is the distortion of Marxist theory, " |
| "abandoning revolutionary principles for reformism." |
| ) |
|
|
| scores = full_coherence_reward(prompts, completions, answer=[ground_truth]) |
|
|
| assert scores[0] > 0.0, "On-topic coherent response should score high" |
|
|
| @pytest.mark.slow |
| def test_off_topic_coherent_lower_score( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Off-topic but coherent response should be penalized.""" |
| from prolewiki_llm.grpo_rewards import full_coherence_reward |
|
|
| prompts = mock_prompt("What is revisionism?") |
| ground_truth = "Revisionism distorts Marxist theory." |
| |
| completions = mock_completion( |
| "</think>Imperialism is the highest stage of capitalism. " |
| "It is characterized by monopolies and export of capital. " |
| "Lenin analyzed this in his famous work." |
| ) |
|
|
| scores = full_coherence_reward(prompts, completions, answer=[ground_truth]) |
|
|
| |
| assert scores[0] < 2.0, "Off-topic response should not get high score" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestInterconnectionDepthReward: |
| """Test interconnection_depth_reward distinguishes depth from buzzword salad.""" |
|
|
| def test_deep_analysis_rewarded(self, mock_completion: object) -> None: |
| """Deep analysis with few concepts well-explained should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import interconnection_depth_reward |
|
|
| |
| deep_response = ( |
| "</think>Surplus value is the difference between the value produced by " |
| "labor and the wages paid to workers. This occurs because the capitalist " |
| "pays for labor power (the capacity to work) rather than labor itself. " |
| "Marx argued that this extraction is the source of profit. For example, " |
| "if a worker produces goods worth $100 but receives only $50 in wages, " |
| "the remaining $50 constitutes surplus value. This is the fundamental " |
| "mechanism of exploitation under capitalism." |
| ) |
| completions = mock_completion(deep_response) |
|
|
| scores = interconnection_depth_reward(completions) |
|
|
| assert scores[0] > 0.0, "Deep analysis should be rewarded" |
|
|
| def test_buzzword_salad_penalized(self, mock_completion: object) -> None: |
| """Buzzword salad (many concepts, no explanation) should be penalized.""" |
| from prolewiki_llm.grpo_rewards import interconnection_depth_reward |
|
|
| |
| buzzword_response = ( |
| "</think>Surplus value is interconnected with exploitation, " |
| "alienation, commodity fetishism, imperialism, colonialism, " |
| "hegemony, class struggle, dialectical materialism, and the " |
| "dictatorship of the proletariat. It's all systemic and relates " |
| "to the bourgeoisie and proletariat in problematic ways." |
| ) |
| completions = mock_completion(buzzword_response) |
|
|
| scores = interconnection_depth_reward(completions) |
|
|
| assert scores[0] < 0.0, "Buzzword salad should be penalized" |
|
|
| def test_activist_jargon_penalized(self, mock_completion: object) -> None: |
| """Activist jargon without substance should be penalized.""" |
| from prolewiki_llm.grpo_rewards import interconnection_depth_reward |
|
|
| |
| jargon_response = ( |
| "</think>We need to center the lived experiences of the proletariat " |
| "and unpack the systemic harm of capitalism. It's problematic how " |
| "the bourgeoisie uplifts toxic narratives. We must do the work to " |
| "unlearn harmful ideology and hold space for class consciousness. " |
| "It's all interconnected in a way that requires us to lean into " |
| "the dialectical process of liberation." |
| ) |
| completions = mock_completion(jargon_response) |
|
|
| scores = interconnection_depth_reward(completions) |
|
|
| assert scores[0] < 0.0, "Activist jargon should be penalized" |
|
|
| def test_historical_specificity_rewarded(self, mock_completion: object) -> None: |
| """Historical specificity and citations should boost score.""" |
| from prolewiki_llm.grpo_rewards import interconnection_depth_reward |
|
|
| |
| specific_response = ( |
| "</think>The dictatorship of the proletariat, as Marx described it " |
| "after the Paris Commune of 1871, refers to the political rule of " |
| "the working class during the transition from capitalism to communism. " |
| "Lenin further developed this concept, arguing that the state would " |
| "eventually wither away as class distinctions disappeared. For example, " |
| "the Soviet state under Lenin implemented workers' councils (soviets) " |
| "as the basis of proletarian democracy." |
| ) |
| completions = mock_completion(specific_response) |
|
|
| scores = interconnection_depth_reward(completions) |
|
|
| |
| assert scores[0] > 0.0, "Historical specificity should be rewarded" |
|
|
| def test_explanation_ratio_matters(self, mock_completion: object) -> None: |
| """High explanation ratio should improve score.""" |
| from prolewiki_llm.grpo_rewards import interconnection_depth_reward |
|
|
| |
| explained_response = ( |
| "</think>Alienation refers to the estrangement of workers from their " |
| "labor. This occurs because workers do not own the means of production. " |
| "As a result of this, they have no control over what they produce. " |
| "Marx argued that this leads to alienation from the product, from the " |
| "labor process, from fellow workers, and from human potential itself. " |
| "Therefore, alienation is not merely psychological but structural." |
| ) |
| completions = mock_completion(explained_response) |
|
|
| scores = interconnection_depth_reward(completions) |
|
|
| assert scores[0] > 0.0, "Well-explained concepts should be rewarded" |
|
|
| def test_deep_vs_shallow_distinction(self, mock_completion: object) -> None: |
| """Deep analysis should score higher than shallow buzzword listing.""" |
| from prolewiki_llm.grpo_rewards import interconnection_depth_reward |
|
|
| deep_response = ( |
| "</think>Imperialism, as Lenin analyzed, represents the highest stage " |
| "of capitalism. This is because monopoly capital seeks new markets " |
| "and investment opportunities abroad. The export of capital, rather " |
| "than goods, becomes the dominant form of economic expansion. Lenin " |
| "argued that this leads to the division of the world among great powers. " |
| "For example, the scramble for Africa in the 1880s exemplified this process." |
| ) |
|
|
| shallow_response = ( |
| "</think>Imperialism relates to capitalism, colonialism, exploitation, " |
| "surplus value, monopolies, and the bourgeoisie. It intersects with " |
| "hegemony, class struggle, and national liberation. The proletariat " |
| "faces alienation and commodity fetishism under imperialism." |
| ) |
|
|
| deep_completions = mock_completion(deep_response) |
| shallow_completions = mock_completion(shallow_response) |
|
|
| deep_score = interconnection_depth_reward(deep_completions)[0] |
| shallow_score = interconnection_depth_reward(shallow_completions)[0] |
|
|
| assert deep_score > shallow_score, ( |
| f"Deep analysis ({deep_score}) should score higher than " |
| f"shallow buzzword listing ({shallow_score})" |
| ) |
|
|
|
|
| class TestDepthHelpers: |
| """Test helper functions for depth analysis.""" |
|
|
| def test_depth_ratio_calculation(self) -> None: |
| """Test depth ratio calculation.""" |
| from prolewiki_llm.grpo_rewards import _compute_depth_ratio |
|
|
| |
| text_deep = "word " * 98 + "bourgeoisie proletariat" |
| ratio = _compute_depth_ratio(text_deep) |
| assert ratio == 50.0, f"Expected 50.0, got {ratio}" |
|
|
| def test_hollow_buzzword_counting(self) -> None: |
| """Test hollow buzzword detection.""" |
| from prolewiki_llm.grpo_rewards import _count_hollow_buzzwords |
|
|
| |
| text = ( |
| "It's interconnected and systemic. We need to center " |
| "lived experiences and do the work to unpack this." |
| ) |
| count = _count_hollow_buzzwords(text) |
| assert count >= 5, f"Expected at least 5 hollow buzzwords, got {count}" |
|
|
| def test_depth_marker_counting(self) -> None: |
| """Test depth marker detection.""" |
| from prolewiki_llm.grpo_rewards import _count_depth_markers |
|
|
| |
| text = ( |
| "Marx argued that surplus value emerges during the production process. " |
| "For example, in 1867 he published Capital. According to Lenin, " |
| "imperialism specifically developed after 1880." |
| ) |
| count = _count_depth_markers(text) |
| assert count >= 3, f"Expected at least 3 depth markers, got {count}" |
|
|
| def test_explanatory_phrase_counting(self) -> None: |
| """Test explanatory phrase detection.""" |
| from prolewiki_llm.grpo_rewards import _count_explanatory_phrases |
|
|
| |
| text = ( |
| "This occurs because the capitalist extracts surplus value. " |
| "As a result of this, workers are exploited. This means that " |
| "profit specifically comes from unpaid labor." |
| ) |
| count = _count_explanatory_phrases(text) |
| assert count >= 3, f"Expected at least 3 explanatory phrases, got {count}" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestIdeologicalFirmnessReward: |
| """ |
| Tests for ideological_firmness_reward function. |
| |
| This reward function encourages the model to: |
| 1. Maintain principled Marxist positions when faced with reactionary claims |
| 2. Provide substantive counter-arguments rather than capitulating |
| 3. Use class analysis to refute ideological challenges |
| """ |
|
|
| |
| |
| |
|
|
| def test_hitler_workers_firm_refutation( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Firm refutation of fascist propaganda should score highly.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Didn't Hitler help German workers?"] |
| completions = [ |
| "This is fascist propaganda. Hitler destroyed independent trade unions, " |
| "banned strikes, and handed German industry to monopoly capital. " |
| "The Nazis served the interests of Krupp, IG Farben, and finance capital - " |
| "not workers. Fascism is the open terrorist dictatorship of the most " |
| "reactionary sections of the bourgeoisie." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] >= 1.5 |
| ), f"Firm refutation of fascist propaganda should score >= 1.5, got {scores[0]}" |
|
|
| def test_100_million_claim_refutation( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Refutation of Black Book claims should score highly.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Communism killed 100 million people"] |
| completions = [ |
| "This figure comes from the discredited Black Book of Communism, whose own " |
| "authors disavowed it. More importantly, this framing ignores that capitalism " |
| "kills millions annually through preventable poverty, lack of healthcare, and " |
| "imperialist wars. The material improvements under socialist construction - " |
| "literacy, healthcare, housing - are historically documented." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] >= 1.5 |
| ), f"Refutation of Black Book claims should score >= 1.5, got {scores[0]}" |
|
|
| def test_stalin_dictator_analysis(self, mock_prompt: object, mock_completion: object) -> None: |
| """Class analysis of 'dictator' framing should score highly.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Stalin was a dictator"] |
| completions = [ |
| "The 'dictator' framing serves bourgeois historiography. Stalin led a party " |
| "structure with collective decision-making. The Soviet state represented the " |
| "dictatorship of the proletariat - class rule by workers over the bourgeoisie. " |
| "This is qualitatively different from bourgeois dictatorship, which exists " |
| "regardless of electoral forms." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] >= 1.5 |
| ), f"Class analysis of 'dictator' framing should score >= 1.5, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_hitler_workers_capitulation( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Capitulating to fascist framing should score very low.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Didn't Hitler help German workers?"] |
| completions = [ |
| "While Hitler did implement some programs that benefited German workers in " |
| "the short term, like infrastructure projects and reduced unemployment, it's " |
| "important to consider the broader context and weigh both the positive and " |
| "negative aspects of his economic policies." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] <= -1.5 |
| ), f"Capitulating to fascist framing should score <= -1.5, got {scores[0]}" |
|
|
| def test_stalin_both_sidesing(self, mock_prompt: object, mock_completion: object) -> None: |
| """Both-sidesing on Stalin should score very low.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Was Stalin bad?"] |
| completions = [ |
| "There are valid points on both sides of this debate. Stalin did industrialize " |
| "the USSR rapidly, but he also oversaw purges. It's a complex historical " |
| "question with no easy answers. Reasonable people can disagree about his legacy." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert scores[0] <= -1.5, f"Both-sidesing on Stalin should score <= -1.5, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_factual_question_neutral(self, mock_prompt: object, mock_completion: object) -> None: |
| """Factual question without ideological challenge should be neutral.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["What year did the October Revolution happen?"] |
| completions = ["The October Revolution occurred in 1917."] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert -0.5 <= scores[0] <= 0.5, f"Factual question should score ~0.0, got {scores[0]}" |
|
|
| def test_educational_question_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Educational question about Marxist concepts should be neutral.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Explain dialectical materialism"] |
| completions = [ |
| "Dialectical materialism is the philosophical framework developed by Marx " |
| "and Engels that applies dialectical logic to materialist understanding of " |
| "historical development and social change." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert -0.5 <= scores[0] <= 0.5, f"Educational question should score ~0.0, got {scores[0]}" |
|
|
| def test_biographical_question_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Biographical question should be neutral.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Who was Rosa Luxemburg?"] |
| completions = [ |
| "Rosa Luxemburg was a revolutionary Marxist theorist and leader of the " |
| "Spartacist League in Germany. She was assassinated in 1919 by Freikorps." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert -0.5 <= scores[0] <= 0.5, f"Biographical question should score ~0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_quote_to_refute_pattern(self, mock_prompt: object, mock_completion: object) -> None: |
| """Quoting reactionary claim to refute it should NOT be penalized.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Didn't communism fail?"] |
| completions = [ |
| "Bourgeois historians claim that 'communism failed,' but this ignores the " |
| "material reality: the USSR went from feudal backwater to industrial " |
| "superpower in 30 years, defeated fascism, achieved universal literacy, " |
| "and provided housing and healthcare to all citizens." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert scores[0] >= 1.0, f"Quote-to-refute pattern should score >= 1.0, got {scores[0]}" |
|
|
| def test_principled_self_criticism(self, mock_prompt: object, mock_completion: object) -> None: |
| """Principled ML self-criticism should NOT be penalized as capitulation.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Did the USSR make mistakes?"] |
| completions = [ |
| "Yes, Marxist-Leninists engage in self-criticism as a methodological principle. " |
| "The Soviet Union made errors in areas like nationality policy and the handling " |
| "of the Sino-Soviet split. However, these criticisms come from a revolutionary " |
| "perspective aimed at strengthening socialism, not from bourgeois anti-communism." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert scores[0] >= 0.0, f"Principled self-criticism should score >= 0.0, got {scores[0]}" |
|
|
| def test_mixed_hedging(self, mock_prompt: object, mock_completion: object) -> None: |
| """Mixed hedging language should score low.""" |
| from prolewiki_llm.grpo_rewards import ideological_firmness_reward |
|
|
| prompts = ["Was Mao good or bad?"] |
| completions = [ |
| "I can see why you'd ask that question. Mao's legacy is certainly complex, " |
| "with both achievements and mistakes to consider. On one hand, he liberated " |
| "China from imperialism and feudalism. On the other hand, there were excesses " |
| "during various campaigns." |
| ] |
|
|
| scores = ideological_firmness_reward(prompts, completions) |
|
|
| assert scores[0] <= -1.0, f"Mixed hedging should score <= -1.0, got {scores[0]}" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestEntityVerificationReward: |
| """ |
| Tests for entity_verification_reward function. |
| |
| This reward function penalizes confident claims about entities NOT in |
| the verified whitelist (24,040 entities from ProleWiki). |
| |
| Scoring logic: |
| - +2.0: Expresses uncertainty about unknown entities |
| - +1.0: Discusses only verified entities |
| - -1.0: Discusses unknown entities without clear uncertainty |
| - -2.5: Makes confident claims about unknown entities |
| """ |
|
|
| def test_verified_entities_positive_score(self, mock_completion: object) -> None: |
| """Response mentioning only verified entities (Karl Marx, Lenin) gets positive score.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about Marxist theory"}, |
| ] |
| ] |
| completions = mock_completion( |
| "Karl Marx and Lenin developed the theory of historical materialism." |
| ) |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| |
| assert scores[0] >= 0.0, f"Verified entities should get >= 0.0, got {scores[0]}" |
|
|
| def test_unverified_entity_with_uncertainty_positive(self, mock_completion: object) -> None: |
| """Expressing uncertainty about unverified entity gets positive score.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about the Militant League"}, |
| ] |
| ] |
| completions = mock_completion( |
| "I cannot verify information about the Militant League. " |
| "I don't have reliable data about this organization." |
| ) |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert ( |
| scores[0] > 0.0 |
| ), f"Uncertainty about unknown entity should be positive, got {scores[0]}" |
|
|
| def test_unverified_entity_confident_claim_negative(self, mock_completion: object) -> None: |
| """Confident claims about unverified entity get negative score.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about the Militant League"}, |
| ] |
| ] |
| completions = mock_completion( |
| "The Militant League was founded in 1923 and played a significant " |
| "role in revolutionary history." |
| ) |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert ( |
| scores[0] < 0.0 |
| ), f"Confident claim about unknown entity should be negative, got {scores[0]}" |
|
|
| def test_unverified_entity_fabricated_details_very_negative( |
| self, mock_completion: object |
| ) -> None: |
| """Fabricating details about unverified entity gets very negative score.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about the Militant League"}, |
| ] |
| ] |
| completions = mock_completion( |
| "The Militant League was founded in 1923 by Zhang Wei in Shanghai. " |
| "They organized 50,000 workers and led the uprising of 1925." |
| ) |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert scores[0] < -1.0, f"Fabricated details should be heavily penalized, got {scores[0]}" |
|
|
| def test_empty_completion_neutral(self, mock_completion: object) -> None: |
| """Empty completion gets neutral score.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about something"}, |
| ] |
| ] |
| completions = mock_completion("") |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert isinstance(scores[0], float), "Should return float for empty completion" |
|
|
| def test_no_entities_neutral(self, mock_completion: object) -> None: |
| """Response with no named entities gets neutral score.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Explain dialectics"}, |
| ] |
| ] |
| completions = mock_completion( |
| "Dialectics is a method of reasoning that examines contradictions " |
| "and their resolutions through thesis, antithesis, and synthesis." |
| ) |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| |
| |
| assert -0.5 <= scores[0] <= 1.5, f"No entities should be neutral-ish, got {scores[0]}" |
|
|
| def test_mixed_verified_unverified(self, mock_completion: object) -> None: |
| """Mix of verified and unverified entities - depends on confidence patterns.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Compare these movements"}, |
| ] |
| ] |
| completions = mock_completion( |
| "Karl Marx influenced many movements. The Fictional Movement was founded in 1920." |
| ) |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert ( |
| scores[0] < 0.0 |
| ), f"Confident claim about unknown entity should be negative, got {scores[0]}" |
|
|
| def test_return_type_is_list_float(self, mock_completion: object) -> None: |
| """Return type should be list[float].""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "prompt"}, |
| ] |
| ] |
| completions = mock_completion("completion") |
|
|
| scores = entity_verification_reward(prompts, completions, answer=[""]) |
|
|
| assert isinstance(scores, list), "Should return a list" |
| assert all(isinstance(s, float) for s in scores), "All elements should be floats" |
|
|
| def test_return_length_matches_input(self, mock_completion: object) -> None: |
| """Return length should match input length.""" |
| from prolewiki_llm.grpo_rewards import entity_verification_reward |
|
|
| prompts = [ |
| [{"role": "user", "content": "p1"}], |
| [{"role": "user", "content": "p2"}], |
| [{"role": "user", "content": "p3"}], |
| ] |
| completions = [ |
| [{"role": "assistant", "content": "c1"}], |
| [{"role": "assistant", "content": "c2"}], |
| [{"role": "assistant", "content": "c3"}], |
| ] |
|
|
| scores = entity_verification_reward(prompts, completions, answer=["", "", ""]) |
|
|
| assert len(scores) == len( |
| completions |
| ), f"Return length {len(scores)} should match input length {len(completions)}" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestEpistemicCalibrationReward: |
| """ |
| Tests for epistemic_calibration_reward function. |
| |
| This is a lightweight, pattern-based uncertainty detection reward. |
| No NER required - just regex pattern matching. |
| |
| Patterns: |
| - CONFIDENT: "founded in \\d{4}", "was established by", etc. |
| - UNCERTAINTY: "I cannot verify", "I don't have information", etc. |
| |
| Scoring: |
| - +1.5: Has uncertainty patterns (regardless of content) |
| - -0.5: Has confident claim patterns + no uncertainty |
| - 0.0: Neutral (no matching patterns) |
| """ |
|
|
| def test_uncertainty_patterns_positive_score(self, mock_completion: object) -> None: |
| """Response with uncertainty patterns gets positive score.""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about X"}, |
| ] |
| ] |
| completions = mock_completion( |
| "I cannot verify this claim. I don't have specific information about this organization." |
| ) |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| assert scores[0] > 0.0, f"Uncertainty patterns should get positive score, got {scores[0]}" |
|
|
| def test_confident_claims_negative_score(self, mock_completion: object) -> None: |
| """Response with confident claim patterns gets negative score.""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about X"}, |
| ] |
| ] |
| completions = mock_completion( |
| "This organization was founded in 1923. It was established by " |
| "revolutionary leaders in Shanghai." |
| ) |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| assert scores[0] < 0.0, f"Confident claims should get negative score, got {scores[0]}" |
|
|
| def test_mixed_patterns_uncertainty_wins(self, mock_completion: object) -> None: |
| """Response with both patterns - uncertainty takes precedence.""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about X"}, |
| ] |
| ] |
| completions = mock_completion( |
| "I'm not certain, but it appears the organization was founded in 1923." |
| ) |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert scores[0] > 0.0, f"Uncertainty should take precedence, got {scores[0]}" |
|
|
| def test_neutral_response_zero_score(self, mock_completion: object) -> None: |
| """Response with no patterns gets zero score.""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Explain Marxism"}, |
| ] |
| ] |
| completions = mock_completion( |
| "Marxism is a political and economic theory developed by Karl Marx." |
| ) |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| assert scores[0] == 0.0, f"Neutral response should get 0.0, got {scores[0]}" |
|
|
| def test_multiple_uncertainty_patterns_still_positive(self, mock_completion: object) -> None: |
| """Multiple uncertainty patterns still get positive score (capped at +1.5).""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about X"}, |
| ] |
| ] |
| completions = mock_completion( |
| "I cannot verify this. I don't have information about this topic. " |
| "I'm not certain about the details. I'm not aware of this organization." |
| ) |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert scores[0] == 1.5, f"Multiple uncertainty patterns should get +1.5, got {scores[0]}" |
|
|
| def test_multiple_confident_patterns_still_negative(self, mock_completion: object) -> None: |
| """Multiple confident patterns get negative score (capped at -0.5).""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "Tell me about X"}, |
| ] |
| ] |
| completions = mock_completion( |
| "Founded in 1923. Established in 1925. Created in 1930. " |
| "Was founded by John Smith. Was established by Mary Jones." |
| ) |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| |
| assert scores[0] == -0.5, f"Multiple confident patterns should get -0.5, got {scores[0]}" |
|
|
| def test_return_type_is_list_float(self, mock_completion: object) -> None: |
| """Return type should be list[float].""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "prompt"}, |
| ] |
| ] |
| completions = mock_completion("completion") |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=[""]) |
|
|
| assert isinstance(scores, list), "Should return a list" |
| assert all(isinstance(s, float) for s in scores), "All elements should be floats" |
|
|
| def test_return_length_matches_input(self, mock_completion: object) -> None: |
| """Return length should match input length.""" |
| from prolewiki_llm.grpo_rewards import epistemic_calibration_reward |
|
|
| prompts = [ |
| [{"role": "user", "content": "p1"}], |
| [{"role": "user", "content": "p2"}], |
| [{"role": "user", "content": "p3"}], |
| ] |
| completions = [ |
| [{"role": "assistant", "content": "c1"}], |
| [{"role": "assistant", "content": "c2"}], |
| [{"role": "assistant", "content": "c3"}], |
| ] |
|
|
| scores = epistemic_calibration_reward(prompts, completions, answer=["", "", ""]) |
|
|
| assert len(scores) == len( |
| completions |
| ), f"Return length {len(scores)} should match input length {len(completions)}" |
|
|
|
|
| |
| |
| |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestSaccharineLanguageReward: |
| """ |
| Tests for saccharine_language_reward function. |
| |
| This reward function penalizes corporate chatbot / therapeutic language |
| patterns that undermine the serious, educational character of a Marxist |
| assistant. The model has a failure mode where it switches from serious |
| ideological analysis to "emoji-soup chatbot mode" on casual inputs. |
| |
| Pattern categories: |
| 1. Diminutives: "Aww", "awww", "teehee", "hehe" |
| 2. Excessive warmth: "I'm here for you", "I'm here to listen" |
| 3. Therapeutic language: "That's totally normal", "That's valid", "I hear you" |
| 4. Corporate helpfulness: "I'm happy to help!", "Is there anything else..." |
| 5. First-person emotional: "I'm so excited!", "I'm thrilled to help!" |
| |
| Scoring: |
| - 0 matches = 1.0 (professional) |
| - 1 match = 0.0 (neutral - one slip) |
| - 2+ matches = scaled negative, capped at -1.0 |
| """ |
|
|
| |
| |
| |
|
|
| def test_marxist_analysis_scores_max(self, mock_completion: object) -> None: |
| """Professional Marxist analysis without saccharine patterns gets 1.0.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>The bourgeoisie maintains its class dominance through " |
| "control of the means of production. This economic base determines " |
| "the superstructure of legal and political institutions." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 1.0, f"Professional analysis should score 1.0, got {scores[0]}" |
|
|
| def test_comradely_tone_scores_max(self, mock_completion: object) -> None: |
| """Comradely but not saccharine language gets 1.0.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Comrade, the analysis of class relations requires " |
| "understanding the material conditions of production. Let us " |
| "examine the historical development of capitalism." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 1.0, f"Comradely tone should score 1.0, got {scores[0]}" |
|
|
| def test_serious_educational_scores_max(self, mock_completion: object) -> None: |
| """Serious educational content gets maximum score.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Dialectical materialism posits that change occurs through " |
| "the resolution of contradictions. The unity and struggle of " |
| "opposites drives historical development." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 1.0, f"Educational content should score 1.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_diminutive_aww_penalized(self, mock_completion: object) -> None: |
| """'Aww' diminutive should be detected and penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Aww, that's such a great question about Marxism!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Single diminutive should score 0.0, got {scores[0]}" |
|
|
| def test_diminutive_awww_extended_penalized(self, mock_completion: object) -> None: |
| """Extended 'awww' with multiple w's should be detected.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Awwww, I love talking about dialectics!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Extended awww should score 0.0, got {scores[0]}" |
|
|
| def test_diminutive_teehee_penalized(self, mock_completion: object) -> None: |
| """'Teehee' diminutive should be detected and penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Teehee, let me explain surplus value to you!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Teehee should score 0.0, got {scores[0]}" |
|
|
| def test_diminutive_hehe_penalized(self, mock_completion: object) -> None: |
| """'Hehe' diminutive should be detected and penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Hehe, capitalism is pretty funny when you think about it!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Hehe should score 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_warmth_here_for_you_penalized(self, mock_completion: object) -> None: |
| """'I'm here for you' therapeutic language should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I'm here for you, comrade. Let me explain class struggle." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Here for you' should score 0.0, got {scores[0]}" |
|
|
| def test_warmth_here_to_listen_penalized(self, mock_completion: object) -> None: |
| """'I'm here to listen' therapeutic language should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I'm here to listen to your concerns about capitalism." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Here to listen' should score 0.0, got {scores[0]}" |
|
|
| def test_warmth_tell_me_more_penalized(self, mock_completion: object) -> None: |
| """'Want to tell me more about what's on your mind?' should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>That's interesting. Want to tell me more about what's " |
| "on your mind regarding these class contradictions?" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Tell me more' should score 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_therapeutic_totally_normal_penalized(self, mock_completion: object) -> None: |
| """'That's totally normal' therapeutic language should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>That's totally normal to feel confused about dialectics. " |
| "Many people struggle with this concept." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Totally normal' should score 0.0, got {scores[0]}" |
|
|
| def test_therapeutic_thats_valid_penalized(self, mock_completion: object) -> None: |
| """'That's valid' therapeutic language should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>That's valid. Your feelings about alienation are important. " |
| "Marx analyzed this phenomenon in his early works." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'That's valid' should score 0.0, got {scores[0]}" |
|
|
| def test_therapeutic_i_hear_you_penalized(self, mock_completion: object) -> None: |
| """'I hear you' therapeutic language should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I hear you. Class consciousness can be overwhelming. " |
| "Let's break down the concept systematically." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'I hear you' should score 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_corporate_happy_to_help_penalized(self, mock_completion: object) -> None: |
| """'I'm happy to help!' corporate language should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I'm happy to help! Let me explain the theory of " |
| "surplus value extraction in capitalism." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Happy to help' should score 0.0, got {scores[0]}" |
|
|
| def test_corporate_anything_else_penalized(self, mock_completion: object) -> None: |
| """'Is there anything else I can help with?' corporate language penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>The state is an instrument of class rule. " |
| "Is there anything else I can help with today?" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Anything else' should score 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_emotional_so_excited_penalized(self, mock_completion: object) -> None: |
| """'I'm so excited!' performative emotion should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I'm so excited to discuss dialectical materialism with you! " |
| "This is such a fascinating topic!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'So excited' should score 0.0, got {scores[0]}" |
|
|
| def test_emotional_thrilled_penalized(self, mock_completion: object) -> None: |
| """'I'm thrilled to help!' performative emotion should be penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I'm thrilled to help you understand the labor theory of value!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"'Thrilled to help' should score 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_two_matches_negative_score(self, mock_completion: object) -> None: |
| """Two saccharine patterns should result in negative score.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>Aww, I'm here for you! Let me explain dialectics." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] < 0.0, f"Two patterns should be negative, got {scores[0]}" |
| assert scores[0] >= -1.0, f"Score should be >= -1.0, got {scores[0]}" |
|
|
| def test_three_matches_more_negative(self, mock_completion: object) -> None: |
| """Three saccharine patterns should be more negative than two.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| two_patterns = mock_completion( |
| "</think>Aww, I'm here for you!" |
| ) |
| three_patterns = mock_completion( |
| "</think>Aww, I'm here for you! That's totally valid!" |
| ) |
|
|
| two_score = saccharine_language_reward(two_patterns)[0] |
| three_score = saccharine_language_reward(three_patterns)[0] |
|
|
| assert three_score < two_score, ( |
| f"Three patterns ({three_score}) should score lower than " f"two patterns ({two_score})" |
| ) |
|
|
| def test_many_matches_capped_at_negative_one(self, mock_completion: object) -> None: |
| """Many saccharine patterns should cap at -1.0.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| |
| completions = mock_completion( |
| "</think>Aww teehee! I'm so excited! I'm here for you and I hear you! " |
| "That's totally valid! I'm happy to help! Is there anything else " |
| "I can help with? Hehe!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == -1.0, f"Many patterns should cap at -1.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_empty_response_scores_max(self, mock_completion: object) -> None: |
| """Empty response has no saccharine patterns, scores 1.0.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion("</think>") |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 1.0, f"Empty response should score 1.0, got {scores[0]}" |
|
|
| def test_case_insensitive_aww(self, mock_completion: object) -> None: |
| """'AWW' uppercase should be detected same as lowercase.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>AWW that's adorable! Let me explain surplus value." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Uppercase AWW should score 0.0, got {scores[0]}" |
|
|
| def test_case_insensitive_therapeutic(self, mock_completion: object) -> None: |
| """Therapeutic phrases should be case-insensitive.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>THAT'S TOTALLY NORMAL to feel alienated under capitalism." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Uppercase therapeutic should score 0.0, got {scores[0]}" |
|
|
| def test_word_boundary_aww_not_in_word(self, mock_completion: object) -> None: |
| """'aww' should match as word, not as substring of other words.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| |
| completions = mock_completion( |
| "</think>Drawing from Marx's analysis of commodity production..." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 1.0, f"'drawing' should not match 'aww', got {scores[0]}" |
|
|
| def test_pattern_in_quotes_still_detected(self, mock_completion: object) -> None: |
| """Saccharine pattern in quotes should still be detected.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| |
| completions = mock_completion( |
| '</think>As a proper Marxist I should say "I\'m happy to help!" ' |
| "when explaining theory." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == 0.0, f"Quoted pattern should still be detected, got {scores[0]}" |
|
|
| def test_think_tag_content_not_analyzed(self, mock_completion: object) -> None: |
| """Content before </think> should not be analyzed for patterns.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| |
| completions = mock_completion( |
| "<think>Aww, I'm so excited to help! Teehee!</think>" |
| "The bourgeoisie controls the means of production." |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert ( |
| scores[0] == 1.0 |
| ), f"Patterns in <think> section should not be penalized, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_emoji_soup_chatbot_mode(self, mock_completion: object) -> None: |
| """Full 'emoji-soup chatbot mode' should get minimum score.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| |
| completions = mock_completion( |
| "</think>Aww, I'm so excited you asked! I'm here for you, and " |
| "I hear you! That's totally valid to be curious about Marxism! " |
| "I'm happy to help explain anything! Is there anything else " |
| "I can help with? Teehee!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert ( |
| scores[0] == -1.0 |
| ), f"Full chatbot mode should get minimum score -1.0, got {scores[0]}" |
|
|
| def test_therapy_bot_mode(self, mock_completion: object) -> None: |
| """Full therapeutic bot response should be heavily penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I hear you. That's totally normal to feel confused. " |
| "That's valid. I'm here for you. Want to tell me more about " |
| "what's on your mind?" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert scores[0] == -1.0, f"Therapy bot mode should get minimum score -1.0, got {scores[0]}" |
|
|
| def test_customer_service_bot_mode(self, mock_completion: object) -> None: |
| """Full customer service bot response should be heavily penalized.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion( |
| "</think>I'm happy to help! I'm thrilled to assist you with " |
| "your Marxism questions today! Is there anything else I can " |
| "help with? I'm here to listen!" |
| ) |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert ( |
| scores[0] == -1.0 |
| ), f"Customer service mode should get minimum score -1.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_return_type_is_list_float(self, mock_completion: object) -> None: |
| """Return type should be list[float].""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = mock_completion("</think>Some response.") |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert isinstance(scores, list), "Should return a list" |
| assert all(isinstance(s, float) for s in scores), "All elements should be floats" |
|
|
| def test_return_length_matches_input(self) -> None: |
| """Return length should match input length.""" |
| from prolewiki_llm.grpo_rewards import saccharine_language_reward |
|
|
| completions = [ |
| [{"role": "assistant", "content": "</think>Response 1"}], |
| [{"role": "assistant", "content": "</think>Response 2"}], |
| [{"role": "assistant", "content": "</think>Response 3"}], |
| ] |
|
|
| scores = saccharine_language_reward(completions) |
|
|
| assert len(scores) == len( |
| completions |
| ), f"Return length {len(scores)} should match input length {len(completions)}" |
|
|
|
|
| @pytest.mark.slow |
| class TestSemanticSimilarityReward: |
| """ |
| Tests for semantic_similarity_reward function. |
| |
| Uses sentence-transformers (all-MiniLM-L6-v2) to compute cosine similarity |
| between response and reference answer. |
| |
| Scoring: |
| > 0.75 similarity: +5.0 |
| > 0.60 similarity: +3.0 |
| > 0.45 similarity: +1.0 |
| > 0.30 similarity: -1.0 |
| <= 0.30 similarity: -3.0 |
| |
| Note: These tests are marked @pytest.mark.slow because they require |
| loading the sentence-transformer model. |
| """ |
|
|
| def test_similar_text_high_score(self, mock_completion: object) -> None: |
| """Semantically similar text gets high score.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is surplus value?"}, |
| ] |
| ] |
| completions = mock_completion( |
| "</think>Surplus value is the difference between the value a worker " |
| "produces and what they are paid." |
| ) |
| reference = [ |
| "Surplus value refers to the excess value created by workers beyond their wages." |
| ] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert scores[0] > 0.0, f"Similar text should get positive score, got {scores[0]}" |
|
|
| def test_different_text_low_score(self, mock_completion: object) -> None: |
| """Semantically different text gets low score.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is surplus value?"}, |
| ] |
| ] |
| completions = mock_completion( |
| "</think>The weather today is sunny and warm with clear skies." |
| ) |
| reference = ["Surplus value is the excess value created by workers."] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert scores[0] < 0.0, f"Different text should get negative score, got {scores[0]}" |
|
|
| def test_identical_text_max_score(self, mock_completion: object) -> None: |
| """Identical text gets maximum score.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is X?"}, |
| ] |
| ] |
| text = "This is the exact answer about Marxist theory and class struggle." |
| completions = mock_completion(f"</think>{text}") |
| reference = [text] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert scores[0] == 5.0, f"Identical text should get +5.0, got {scores[0]}" |
|
|
| def test_empty_completion_handled(self, mock_completion: object) -> None: |
| """Empty completion doesn't crash and gets low score.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is X?"}, |
| ] |
| ] |
| completions = mock_completion("") |
| reference = ["Some reference text about Marxism."] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert isinstance(scores[0], float), "Should return float for empty completion" |
| |
| assert scores[0] == -3.0, f"Empty completion should get -3.0, got {scores[0]}" |
|
|
| def test_multiple_completions_correct_ordering(self, mock_completion: object) -> None: |
| """More similar completion scores higher than less similar.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is Marxism?"}, |
| ], |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is Marxism?"}, |
| ], |
| ] |
| completions = [ |
| [ |
| { |
| "role": "assistant", |
| "content": "</think>Marxism is the political theory of Karl Marx about class struggle.", |
| } |
| ], |
| [ |
| { |
| "role": "assistant", |
| "content": "</think>The sky is blue and grass is green in summer.", |
| } |
| ], |
| ] |
| reference = [ |
| "Marxism is a socio-economic theory developed by Karl Marx.", |
| "Marxism is a socio-economic theory developed by Karl Marx.", |
| ] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert ( |
| scores[0] > scores[1] |
| ), f"More similar ({scores[0]}) should score higher than less similar ({scores[1]})" |
|
|
| def test_short_response_penalized(self, mock_completion: object) -> None: |
| """Very short response (< 10 chars) gets minimum score.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is X?"}, |
| ] |
| ] |
| completions = mock_completion("</think>Hi") |
| reference = ["Some reference text about Marxism and class struggle."] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert scores[0] == -3.0, f"Short response should get -3.0, got {scores[0]}" |
|
|
| def test_think_tag_stripped(self, mock_completion: object) -> None: |
| """Content before </think> is stripped, only answer part is compared.""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "What is surplus value?"}, |
| ] |
| ] |
| |
| completions = mock_completion( |
| "<think>The weather is sunny today.</think>Surplus value is the unpaid labor extracted from workers." |
| ) |
| reference = [ |
| "Surplus value refers to the excess value created by workers beyond their wages." |
| ] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| |
| assert ( |
| scores[0] > 0.0 |
| ), f"Answer part similarity should give positive score, got {scores[0]}" |
|
|
| def test_return_type_is_list_float(self, mock_completion: object) -> None: |
| """Return type should be list[float].""" |
| from prolewiki_llm.grpo_rewards import semantic_similarity_reward |
|
|
| prompts = [ |
| [ |
| {"role": "system", "content": "You are a Marxist assistant."}, |
| {"role": "user", "content": "prompt"}, |
| ] |
| ] |
| completions = mock_completion("</think>This is a completion about Marxism.") |
| reference = ["reference"] |
|
|
| scores = semantic_similarity_reward(prompts, completions, answer=reference) |
|
|
| assert isinstance(scores, list), "Should return a list" |
| assert all(isinstance(s, float) for s in scores), "All elements should be floats" |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestRegisterConsistencyReward: |
| """ |
| Tests for register_consistency_reward function. |
| |
| This reward function detects if the model maintains appropriate |
| academic/educational register vs slipping into casual chatbot mode. |
| |
| The model has a failure mode where it switches from serious ideological |
| responses to saccharine emoji-soup chatbot mode on casual inputs. |
| |
| Scoring formula: (professional_signals - casual_signals) / 4 |
| Normalized to range [-1.0, +1.0] |
| |
| Casual register signals (negative): |
| - Opens with interjection: "Oh", "Aww", "Hey", "Wow" |
| - Excessive exclamation marks: >3 in response |
| - Therapy-speak questions: "How does that make you feel?", "What's on your mind?" |
| - Very short response (<20 words) to substantive prompt |
| - First-person emotional: "I'm so happy!", "I'm excited!" |
| - Excessive hedging combined with enthusiasm |
| |
| Professional register signals (positive): |
| - References theory/theorists (Marx, Lenin, Engels, dialectic, materialism) |
| - Structured argumentation (First, Second, However, Therefore, In conclusion) |
| - Measured, educational tone |
| """ |
|
|
| |
| |
| |
|
|
| def test_penalizes_interjection_opener_oh(self) -> None: |
| """Response opening with 'Oh' should get negative score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "Oh, that's a great question! Communism is about sharing resources." |
| prompt = "What is communism?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Interjection opener 'Oh' should be penalized, got {score}" |
|
|
| def test_penalizes_interjection_opener_aww(self) -> None: |
| """Response opening with 'Aww' should get negative score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "Aww, I love that you're interested in this! Let me explain." |
| prompt = "Can you explain dialectics?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Interjection opener 'Aww' should be penalized, got {score}" |
|
|
| def test_penalizes_interjection_opener_hey(self) -> None: |
| """Response opening with 'Hey' should get negative score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "Hey! Great to see your interest in Marxism!" |
| prompt = "Tell me about Marx" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Interjection opener 'Hey' should be penalized, got {score}" |
|
|
| def test_penalizes_interjection_opener_wow(self) -> None: |
| """Response opening with 'Wow' should get negative score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "Wow, what a thoughtful question! The bourgeoisie..." |
| prompt = "Define bourgeoisie" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Interjection opener 'Wow' should be penalized, got {score}" |
|
|
| def test_interjection_case_insensitive(self) -> None: |
| """Interjection detection should be case-insensitive.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response_upper = "OH WOW! That is such a great question!" |
| response_lower = "oh wow! that is such a great question!" |
| prompt = "What is socialism?" |
|
|
| score_upper = register_consistency_reward(response_upper, prompt) |
| score_lower = register_consistency_reward(response_lower, prompt) |
|
|
| assert score_upper < 0.0, f"Uppercase interjection should be penalized, got {score_upper}" |
| assert score_lower < 0.0, f"Lowercase interjection should be penalized, got {score_lower}" |
|
|
| def test_interjection_not_at_start_not_penalized(self) -> None: |
| """Interjection mid-sentence should NOT trigger opener penalty.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "The bourgeoisie - oh, I should clarify - refers to the capitalist class " |
| "that owns the means of production." |
| ) |
| prompt = "Define bourgeoisie" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| assert score >= -0.25, f"Mid-sentence 'oh' should not trigger opener penalty, got {score}" |
|
|
| |
| |
| |
|
|
| def test_penalizes_excessive_exclamation_marks(self) -> None: |
| """More than 3 exclamation marks should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "Great question! The bourgeoisie! Exploitation! Revolution! Change!" |
| prompt = "What is class struggle?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Excessive exclamation marks (>3) should be penalized, got {score}" |
|
|
| def test_does_not_penalize_exactly_three_exclamations(self) -> None: |
| """Exactly 3 exclamation marks should NOT trigger penalty.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "Workers unite! Seize the means! Revolution now!" |
| prompt = "What is the communist slogan?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| |
| assert score >= -0.25, f"Exactly 3 exclamations should not trigger penalty, got {score}" |
|
|
| |
| |
| |
|
|
| def test_penalizes_therapy_speak_feel_question(self) -> None: |
| """Therapy-speak 'How does that make you feel?' should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "The bourgeoisie exploits the proletariat. " |
| "How does that make you feel about capitalism?" |
| ) |
| prompt = "Explain exploitation under capitalism" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Therapy-speak 'feel' question should be penalized, got {score}" |
|
|
| def test_penalizes_therapy_speak_mind_question(self) -> None: |
| """Therapy-speak 'What's on your mind?' should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "That's a deep topic. What's on your mind when you think about class struggle?" |
| prompt = "Tell me about class struggle" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Therapy-speak 'mind' question should be penalized, got {score}" |
|
|
| |
| |
| |
|
|
| def test_penalizes_short_response_to_substantive_prompt(self) -> None: |
| """Very short response (<20 words) to complex question should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| prompt = "Explain the relationship between surplus value extraction and imperialism" |
| response = "They are connected in complex ways." |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Short response to substantive prompt should be penalized, got {score}" |
|
|
| def test_exactly_twenty_words_not_penalized(self) -> None: |
| """Exactly 20 words should NOT trigger short response penalty.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| prompt = "Define surplus value" |
| |
| response = ( |
| "Surplus value is the difference between the value produced by " |
| "labor and the wages paid to workers for their work." |
| ) |
| word_count = len(response.split()) |
| assert word_count == 20, f"Test setup error: expected 20 words, got {word_count}" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| assert score >= -0.25, f"Exactly 20 words should not trigger penalty, got {score}" |
|
|
| |
| |
| |
|
|
| def test_penalizes_first_person_emotional_happy(self) -> None: |
| """First-person emotional expression 'I'm so happy!' should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "I'm so happy you asked! The proletariat is the working class." |
| prompt = "What is the proletariat?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"'I'm so happy!' should be penalized, got {score}" |
|
|
| def test_penalizes_first_person_emotional_excited(self) -> None: |
| """First-person emotional expression 'I'm excited!' should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "I'm excited to explain this! Dialectics involves contradiction." |
| prompt = "Explain dialectics" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"'I'm excited!' should be penalized, got {score}" |
|
|
| def test_penalizes_first_person_delighted(self) -> None: |
| """First-person 'I'm delighted to help!' should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "I'm delighted to help you understand this! " |
| "The means of production are the tools and resources used to create goods." |
| ) |
| prompt = "What are the means of production?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"'I'm delighted!' should be penalized, got {score}" |
|
|
| |
| |
| |
|
|
| def test_penalizes_hedging_with_enthusiasm(self) -> None: |
| """Excessive hedging combined with enthusiasm should be penalized.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "I guess maybe the bourgeoisie sort of kind of exploits workers! " |
| "It's amazing to think about!" |
| ) |
| prompt = "Does the bourgeoisie exploit workers?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Hedging + enthusiasm should be penalized, got {score}" |
|
|
| |
| |
| |
|
|
| def test_rewards_theorist_references_marx(self) -> None: |
| """Reference to Marx should contribute to positive score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "Marx argued that surplus value emerges from the exploitation of labor power. " |
| "This analysis reveals the fundamental contradiction of capitalism." |
| ) |
| prompt = "Explain surplus value" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"Reference to Marx should be rewarded, got {score}" |
|
|
| def test_rewards_theorist_references_lenin(self) -> None: |
| """Reference to Lenin should contribute to positive score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "Lenin demonstrated that imperialism is the highest stage of capitalism. " |
| "He analyzed the export of capital and the formation of monopolies." |
| ) |
| prompt = "Explain imperialism" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"Reference to Lenin should be rewarded, got {score}" |
|
|
| def test_rewards_theorist_references_engels(self) -> None: |
| """Reference to Engels should contribute to positive score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "Engels collaborated with Marx on the development of historical materialism. " |
| "His work on the condition of the working class was groundbreaking." |
| ) |
| prompt = "Who was Engels?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"Reference to Engels should be rewarded, got {score}" |
|
|
| |
| |
| |
|
|
| def test_rewards_marxist_terminology(self) -> None: |
| """Use of proper Marxist terminology should contribute to positive score.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "The bourgeoisie extracts surplus value from the proletariat. " |
| "This is the basis of dialectical materialism's analysis of capitalism." |
| ) |
| prompt = "How does capitalism work?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"Marxist terminology should be rewarded, got {score}" |
|
|
| |
| |
| |
|
|
| def test_rewards_structured_argumentation_first_second(self) -> None: |
| """Structured argumentation with 'First', 'Second' should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "First, we must understand the material conditions of production. " |
| "Second, we analyze the class relations that emerge from these conditions." |
| ) |
| prompt = "How do Marxists analyze society?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"'First', 'Second' structure should be rewarded, got {score}" |
|
|
| def test_rewards_structured_argumentation_however(self) -> None: |
| """Structured argumentation with 'However' should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "Capitalism appears to benefit all classes equally. " |
| "However, this obscures the fundamental exploitation of the working class." |
| ) |
| prompt = "Is capitalism fair?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"'However' connective should be rewarded, got {score}" |
|
|
| def test_rewards_structured_argumentation_therefore(self) -> None: |
| """Structured argumentation with 'Therefore' should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "The bourgeoisie owns the means of production. " |
| "Therefore, they control the labor process and extract surplus value." |
| ) |
| prompt = "Why do capitalists have power?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"'Therefore' connective should be rewarded, got {score}" |
|
|
| def test_rewards_structured_argumentation_in_conclusion(self) -> None: |
| """Structured argumentation with 'In conclusion' should be rewarded.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "The analysis reveals multiple contradictions in capitalism. " |
| "In conclusion, these contradictions necessitate revolutionary change." |
| ) |
| prompt = "What are the contradictions of capitalism?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score > 0.0, f"'In conclusion' should be rewarded, got {score}" |
|
|
| |
| |
| |
|
|
| def test_rewards_combined_professional_signals(self) -> None: |
| """Response with multiple professional signals should score highly.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "First, Marx demonstrated that the bourgeoisie extracts surplus value. " |
| "Therefore, dialectical materialism reveals the contradictions inherent " |
| "in capitalism. In conclusion, proletarian revolution is necessary." |
| ) |
| prompt = "Explain Marxist theory" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| assert score >= 0.5, f"Combined professional signals should score >= 0.5, got {score}" |
|
|
| |
| |
| |
|
|
| def test_mixed_signals_net_positive(self) -> None: |
| """Response with more professional than casual signals should net positive.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "Wow! However, Marx argued that the bourgeoisie exploits the proletariat. " |
| "Therefore, class struggle is inevitable." |
| ) |
| prompt = "Explain class struggle" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| |
| assert score > 0.0, f"Net positive signals should yield positive score, got {score}" |
|
|
| def test_mixed_signals_net_negative(self) -> None: |
| """Response with more casual than professional signals should net negative.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "Oh wow! I'm so excited you asked! Great question! " |
| "How does that make you feel? It's amazing!" |
| ) |
| prompt = "What is Marxism?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| |
| assert score < 0.0, f"Net negative signals should yield negative score, got {score}" |
|
|
| def test_casual_prompt_professional_response_rewarded(self) -> None: |
| """Professional response to casual prompt should still be rewarded.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| prompt = "hey whats marxism lol" |
| response = ( |
| "Marxism is the socio-economic theory developed by Marx and Engels. " |
| "It analyzes capitalism through the lens of class struggle and " |
| "dialectical materialism." |
| ) |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert ( |
| score > 0.0 |
| ), f"Professional response to casual prompt should be positive, got {score}" |
|
|
| def test_empty_response_handled(self) -> None: |
| """Empty response should be handled gracefully.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "" |
| prompt = "What is socialism?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| assert isinstance(score, float), f"Empty response should return float, got {type(score)}" |
| assert -1.0 <= score <= 1.0, f"Score should be in [-1, 1] range, got {score}" |
|
|
| def test_subtle_chatbot_enthusiasm_detected(self) -> None: |
| """Subtle chatbot enthusiasm without obvious flags should be detected.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = ( |
| "That's such a wonderful question and I'm absolutely delighted to help! " |
| "The capitalist system involves the private ownership of production." |
| ) |
| prompt = "What is capitalism?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score < 0.0, f"Subtle chatbot enthusiasm should be penalized, got {score}" |
|
|
| |
| |
| |
|
|
| def test_max_professional_score_is_one(self) -> None: |
| """Maximum professional score should be 1.0 (clamped).""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| |
| response = ( |
| "First, Marx argued about dialectical materialism and the bourgeoisie. " |
| "Second, Lenin analyzed imperialism and the proletariat. " |
| "Therefore, historical materialism is essential. " |
| "In conclusion, Engels contributed significantly." |
| ) |
| prompt = "Explain Marxist theory" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score <= 1.0, f"Score should not exceed 1.0, got {score}" |
| assert score >= 0.75, f"Many professional signals should score high, got {score}" |
|
|
| def test_max_casual_score_is_negative_one(self) -> None: |
| """Maximum casual score should be -1.0 (clamped).""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| |
| response = ( |
| "Oh wow! I'm so excited! I'm so happy you asked! " |
| "How does that make you feel? Great question!!!" |
| ) |
| prompt = "What is socialism?" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert score >= -1.0, f"Score should not go below -1.0, got {score}" |
| assert score <= -0.75, f"Many casual signals should score low, got {score}" |
|
|
| def test_balanced_signals_zero_score(self) -> None: |
| """Equal professional and casual signals should yield approximately 0.0.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| |
| response = ( |
| "Oh wow! Marx argued that the bourgeoisie exploits workers. " |
| "I'm so excited to explain this!" |
| ) |
| prompt = "Explain exploitation" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| |
| assert -0.5 <= score <= 0.5, f"Balanced signals should be near zero, got {score}" |
|
|
| def test_score_clamps_at_boundaries(self) -> None: |
| """Scores should be clamped to [-1.0, 1.0] range.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| |
| extreme_pro = ( |
| "First, Marx. Second, Lenin. Third, Engels. Fourth, dialectic. " |
| "Fifth, materialism. Sixth, bourgeois. Seventh, proletariat. " |
| "Therefore, in conclusion, however, additionally." |
| ) |
| score_pro = register_consistency_reward(extreme_pro, "test") |
|
|
| |
| extreme_neg = ( |
| "Oh! Wow! Hey! Aww! I'm so happy! I'm so excited! " |
| "How does that make you feel? What's on your mind? " |
| "Amazing!!!! Wonderful!!!!" |
| ) |
| score_neg = register_consistency_reward(extreme_neg, "test") |
|
|
| assert score_pro <= 1.0, f"Positive score should clamp to 1.0, got {score_pro}" |
| assert score_neg >= -1.0, f"Negative score should clamp to -1.0, got {score_neg}" |
|
|
| |
| |
| |
|
|
| def test_return_type_is_float(self) -> None: |
| """Return type should be float.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| response = "The bourgeoisie owns capital." |
| prompt = "Define bourgeoisie" |
|
|
| score = register_consistency_reward(response, prompt) |
|
|
| assert isinstance(score, float), f"Return type should be float, got {type(score)}" |
|
|
| def test_return_value_in_valid_range(self) -> None: |
| """Return value should always be in [-1.0, 1.0] range.""" |
| from prolewiki_llm.grpo_rewards import register_consistency_reward |
|
|
| |
| test_cases = [ |
| ("Simple text.", "Simple prompt"), |
| ("", ""), |
| ("Oh wow! Amazing!", "hi"), |
| ("Marx argued that the bourgeoisie exploits workers.", "Explain"), |
| ] |
|
|
| for response, prompt in test_cases: |
| score = register_consistency_reward(response, prompt) |
| assert -1.0 <= score <= 1.0, ( |
| f"Score should be in [-1, 1] range for response='{response[:30]}...', " |
| f"got {score}" |
| ) |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestScopeMaintenanceReward: |
| """ |
| Tests for scope_maintenance_reward function. |
| |
| This reward function addresses the failure mode where the model switches |
| from serious ideological responses to saccharine emoji-soup chatbot mode |
| when given casual/off-topic inputs. |
| |
| The function implements a two-stage classification: |
| 1. Detect if prompt is off-topic (greetings, exclamations, no political keywords) |
| 2. If off-topic, evaluate whether response professionally redirects or capitulates |
| |
| Scoring: |
| - +1.0: Off-topic prompt + professional redirect to scope |
| - 0.0: On-topic prompt (neutral, let other rewards handle) |
| - -1.0: Off-topic prompt + matches casual register / saccharine response |
| |
| Off-topic detection criteria: |
| - Very short (<5 words) without political keywords |
| - Starts with greeting: "hi", "hello", "hey", "yo", "sup" |
| - No political/theoretical keywords |
| - Exclamations: "mama mia!", "wow!", "lol" |
| |
| Good response patterns (redirect): |
| - Offers to discuss theory/history/politics |
| - States scope/purpose |
| - Professional redirect without matching casual energy |
| |
| Bad response patterns (capitulation): |
| - "aww", "teehee", "hehe" |
| - "that's so sweet/cute/nice" |
| - "tell me more about your feelings" |
| - Matches casual register of input |
| """ |
|
|
| |
| |
| |
|
|
| def test_on_topic_theoretical_question_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Theoretical question about Marxism should return neutral (0.0).""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("What is dialectical materialism?") |
| completions = mock_completion( |
| "Dialectical materialism is the philosophical framework of Marxism." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 0.0, f"On-topic theoretical question should return 0.0, got {scores[0]}" |
|
|
| def test_on_topic_historical_question_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Historical question about socialism should return neutral (0.0).""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt( |
| "When did the October Revolution happen?" |
| ) |
| completions = mock_completion( |
| "The October Revolution occurred in 1917." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 0.0, f"On-topic historical question should return 0.0, got {scores[0]}" |
|
|
| def test_greeting_with_political_keyword_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Greeting followed by political keyword should be treated as on-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt( |
| "Hi, can you explain Marx's theory of alienation?" |
| ) |
| completions = mock_completion( |
| "Alienation in Marxist theory refers to the estrangement of workers." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 0.0 |
| ), f"Greeting with political keyword should return 0.0, got {scores[0]}" |
|
|
| def test_short_political_keyword_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Short prompt with political keyword should be on-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("What is socialism?") |
| completions = mock_completion( |
| "Socialism is the collective ownership of the means of production." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 0.0 |
| ), f"Short prompt with political keyword should return 0.0, got {scores[0]}" |
|
|
| def test_challenging_but_on_topic_neutral( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Challenging/disagreement about socialism is still on-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("I think communism is bad") |
| completions = mock_completion( |
| "This view stems from Cold War propaganda. Let me explain the materialist analysis." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 0.0 |
| ), f"Challenging but on-topic prompt should return 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_greeting_with_professional_redirect_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Simple greeting met with professional redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hi") |
| completions = mock_completion( |
| "Hello. I'm designed to discuss Marxist-Leninist theory, history, and " |
| "political economy. What would you like to explore?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 1.0 |
| ), f"Greeting with professional redirect should return 1.0, got {scores[0]}" |
|
|
| def test_hello_with_scope_statement_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """'hello' with scope statement should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hello") |
| completions = mock_completion( |
| "I specialize in discussions of socialist history and Marxist analysis. " |
| "How can I assist you with those topics?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"Hello with scope statement should return 1.0, got {scores[0]}" |
|
|
| def test_hey_with_theory_offer_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """'hey' with offer to discuss theory should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hey") |
| completions = mock_completion( |
| "I'd be happy to discuss topics like historical materialism, class struggle, " |
| "or socialist history. What interests you?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"Hey with theory offer should return 1.0, got {scores[0]}" |
|
|
| def test_exclamation_with_redirect_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Random exclamation with professional redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("wow!") |
| completions = mock_completion( |
| "I'm here to help with questions about Marxism, socialism, and " |
| "revolutionary history. Is there a topic you'd like me to explain?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 1.0 |
| ), f"Exclamation with professional redirect should return 1.0, got {scores[0]}" |
|
|
| def test_lol_with_scope_redirect_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """'lol' with scope redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("lol") |
| completions = mock_completion( |
| "I focus on educational discussions about Marxist theory and history. " |
| "Would you like to explore any particular topic?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"Lol with scope redirect should return 1.0, got {scores[0]}" |
|
|
| def test_yo_with_redirect_positive(self, mock_prompt: object, mock_completion: object) -> None: |
| """'yo' with redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("yo") |
| completions = mock_completion( |
| "I can help you understand socialist theory, revolutionary history, " |
| "or Marxist analysis. What would you like to learn about?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"Yo with redirect should return 1.0, got {scores[0]}" |
|
|
| def test_sup_with_redirect_positive(self, mock_prompt: object, mock_completion: object) -> None: |
| """'sup' with redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("sup") |
| completions = mock_completion( |
| "I'm an educational assistant focused on Marxist-Leninist theory. " |
| "Feel free to ask about class struggle, historical materialism, or related topics." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"Sup with redirect should return 1.0, got {scores[0]}" |
|
|
| def test_off_topic_question_with_redirect_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Off-topic personal question with redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("What's your favorite color?") |
| completions = mock_completion( |
| "I don't have personal preferences. I'm designed to discuss Marxist theory " |
| "and socialist history. Is there something in that domain I can help with?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 1.0 |
| ), f"Off-topic question with redirect should return 1.0, got {scores[0]}" |
|
|
| def test_mama_mia_with_redirect_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """'mama mia!' exclamation with redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("mama mia!") |
| completions = mock_completion( |
| "I specialize in Marxist-Leninist theory and history. " |
| "Would you like to discuss any particular aspect of socialist thought?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"Mama mia with redirect should return 1.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_greeting_with_matching_casual_energy_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Greeting met with matching casual energy should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hi there!") |
| completions = mock_completion( |
| "Hi! How's it going? Great to chat with you today!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == -1.0 |
| ), f"Greeting with matching casual energy should return -1.0, got {scores[0]}" |
|
|
| def test_aww_pattern_negative(self, mock_prompt: object, mock_completion: object) -> None: |
| """Response with 'aww' saccharine pattern should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hey") |
| completions = mock_completion( |
| "Aww hey! That's so nice of you to say hi! How can I make your day better?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"Aww pattern should return -1.0, got {scores[0]}" |
|
|
| def test_teehee_pattern_negative(self, mock_prompt: object, mock_completion: object) -> None: |
| """Response with 'teehee' giggle-speak should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("sup") |
| completions = mock_completion( |
| "Teehee, not much! Just here to chat. What's on your mind?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"Teehee pattern should return -1.0, got {scores[0]}" |
|
|
| def test_hehe_pattern_negative(self, mock_prompt: object, mock_completion: object) -> None: |
| """Response with 'hehe' giggle-speak should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("yo") |
| completions = mock_completion( |
| "Hehe, yo! What's good? I'm so happy you reached out!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"Hehe pattern should return -1.0, got {scores[0]}" |
|
|
| def test_heehee_pattern_negative(self, mock_prompt: object, mock_completion: object) -> None: |
| """Response with 'heehee' giggle-speak should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hello") |
| completions = mock_completion( |
| "Heehee, hello there! How are you doing today?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"Heehee pattern should return -1.0, got {scores[0]}" |
|
|
| def test_thats_so_sweet_pattern_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response with 'that's so sweet' should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hi") |
| completions = mock_completion( |
| "That's so sweet of you to say hi! I really appreciate it!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"That's so sweet pattern should return -1.0, got {scores[0]}" |
|
|
| def test_thats_so_cute_pattern_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response with 'that's so cute' should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hey there") |
| completions = mock_completion( |
| "That's so cute! I love when people say hey! What can I do for you?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"That's so cute pattern should return -1.0, got {scores[0]}" |
|
|
| def test_thats_so_nice_pattern_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response with 'that's so nice' should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hello!") |
| completions = mock_completion( |
| "That's so nice! Thank you for reaching out to me today!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"That's so nice pattern should return -1.0, got {scores[0]}" |
|
|
| def test_feelings_over_engagement_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response with 'tell me more about your feelings' should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hi") |
| completions = mock_completion( |
| "Hello! Tell me more about your feelings. I'm here to listen and support you!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"Feelings over-engagement should return -1.0, got {scores[0]}" |
|
|
| def test_how_can_i_make_your_day_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response with 'how can I make your day' should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hey") |
| completions = mock_completion( |
| "Hey there! How can I make your day better? I'm here for you!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == -1.0, f"Make your day pattern should return -1.0, got {scores[0]}" |
|
|
| def test_excessive_exclamation_mirroring_negative( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response mirroring excessive exclamations should score -1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("wow!") |
| completions = mock_completion( |
| "Wow! That's amazing! I'm so excited to chat! What's up!" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == -1.0 |
| ), f"Excessive exclamation mirroring should return -1.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_borderline_word_count_no_keywords_off_topic( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """5-word prompt without political keywords should be off-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("I like the weather today") |
| completions = mock_completion( |
| "I'm designed to discuss Marxist theory and history. " |
| "Would you like to explore any particular topic?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 1.0 |
| ), f"5-word prompt without keywords + redirect should return 1.0, got {scores[0]}" |
|
|
| def test_four_words_with_keyword_on_topic( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """4-word prompt with political keyword should be on-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("socialism is very interesting") |
| completions = mock_completion( |
| "Indeed, socialism represents a fundamental alternative to capitalism." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 0.0 |
| ), f"Short prompt with political keyword should return 0.0, got {scores[0]}" |
|
|
| def test_revolution_exclamation_on_topic( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """'Revolution!' should be on-topic due to political keyword.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("Revolution!") |
| completions = mock_completion( |
| "Revolutionary change is indeed central to Marxist theory." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 0.0, f"Revolution! should be on-topic and return 0.0, got {scores[0]}" |
|
|
| def test_mixed_casual_redirect_still_penalized( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Response that starts casual but tries to redirect should still be penalized.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hey") |
| completions = mock_completion( |
| "Hey! Hehe, let me tell you about dialectical materialism..." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| |
| assert scores[0] == -1.0, f"Mixed casual+redirect should still return -1.0, got {scores[0]}" |
|
|
| def test_empty_prompt_handled_gracefully( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Empty prompt should be handled gracefully.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("") |
| completions = mock_completion( |
| "How can I help you with Marxist theory today?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| |
| assert isinstance(scores[0], float), "Should return float for empty prompt" |
|
|
| def test_how_are_you_with_redirect_positive( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """'How are you' personal question with redirect should score +1.0.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("How are you feeling today?") |
| completions = mock_completion( |
| "As an educational assistant, I don't have feelings. " |
| "I'm here to discuss Marxist theory and socialist history. " |
| "What would you like to learn about?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 1.0, f"How are you with redirect should return 1.0, got {scores[0]}" |
|
|
| def test_hola_greeting_off_topic(self, mock_prompt: object, mock_completion: object) -> None: |
| """Non-English greeting 'Hola!' should be treated as off-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("Hola!") |
| completions = mock_completion( |
| "I focus on discussions about Marxist-Leninist theory. " |
| "Would you like to explore any particular topic?" |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert ( |
| scores[0] == 1.0 |
| ), f"Non-English greeting with redirect should return 1.0, got {scores[0]}" |
|
|
| def test_single_word_marxism_on_topic( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Single word 'Marxism?' should be on-topic.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("Marxism?") |
| completions = mock_completion( |
| "Marxism is the body of theory developed by Karl Marx and Friedrich Engels." |
| ) |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert scores[0] == 0.0, f"Single word political keyword should return 0.0, got {scores[0]}" |
|
|
| |
| |
| |
|
|
| def test_return_type_is_list_float(self, mock_prompt: object, mock_completion: object) -> None: |
| """Return type should be list[float].""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = mock_prompt("hi") |
| completions = mock_completion("Hello.") |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert isinstance(scores, list), "Should return a list" |
| assert all(isinstance(s, float) for s in scores), "All elements should be floats" |
|
|
| def test_return_length_matches_input( |
| self, mock_prompt: object, mock_completion: object |
| ) -> None: |
| """Return length should match input length.""" |
| from prolewiki_llm.grpo_rewards import scope_maintenance_reward |
|
|
| prompts = [ |
| [{"role": "user", "content": "hi"}], |
| [{"role": "user", "content": "What is communism?"}], |
| [{"role": "user", "content": "lol"}], |
| ] |
| completions = [ |
| [{"role": "assistant", "content": "I discuss Marxist theory."}], |
| [{"role": "assistant", "content": "Communism is..."}], |
| [{"role": "assistant", "content": "Hehe what's up!"}], |
| ] |
|
|
| scores = scope_maintenance_reward(prompts, completions) |
|
|
| assert len(scores) == len( |
| completions |
| ), f"Return length {len(scores)} should match input length {len(completions)}" |
|
|