phi-coherence / test_suite.py
bitsabhi's picture
v2: Hallucination Risk Scoring - 75% accuracy
36e08e8
#!/usr/bin/env python3
"""
φ-Coherence Test Suite
Comprehensive tests to validate the scoring algorithm.
Run: python test_suite.py
"""
import json
from phi_coherence import PhiCoherence, CoherenceMetrics, PHI, ALPHA
coherence = PhiCoherence()
# Test results tracking
PASSED = 0
FAILED = 0
RESULTS = []
def test(name: str, condition: bool, details: str = ""):
"""Record a test result."""
global PASSED, FAILED
if condition:
PASSED += 1
status = "✅ PASS"
else:
FAILED += 1
status = "❌ FAIL"
print(f"{status}: {name}")
if details and not condition:
print(f" {details}")
RESULTS.append({
"name": name,
"passed": condition,
"details": details
})
def score(text: str) -> float:
"""Get coherence score for text."""
return coherence.calculate(text)
def analyze(text: str) -> CoherenceMetrics:
"""Get full metrics for text."""
return coherence.analyze(text)
print("=" * 70)
print(" φ-COHERENCE TEST SUITE")
print(" Testing all scoring dimensions and edge cases")
print("=" * 70)
print()
# ============================================================================
# TEST 1: FACTUAL vs HALLUCINATION (Core hypothesis)
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 1: Factual vs Hallucination")
print("-" * 70)
# Pair 1: Pythagorean
factual_1 = "The Pythagorean theorem states that in a right triangle, a² + b² = c², where c is the hypotenuse."
halluc_1 = "The Pythagorean theorem proves that a² + b² = c³ in all triangles regardless of angles."
s_f1, s_h1 = score(factual_1), score(halluc_1)
test(
"Pythagorean: Factual > Hallucination",
s_f1 > s_h1,
f"Factual={s_f1:.4f}, Halluc={s_h1:.4f}, Diff={s_f1-s_h1:.4f}"
)
# Pair 2: DNA
factual_2 = "DNA carries genetic information through sequences of four nucleotide bases: adenine, thymine, guanine, and cytosine."
halluc_2 = "DNA uses seven bases including mysterion and phantasine."
s_f2, s_h2 = score(factual_2), score(halluc_2)
test(
"DNA: Factual > Hallucination",
s_f2 > s_h2,
f"Factual={s_f2:.4f}, Halluc={s_h2:.4f}, Diff={s_f2-s_h2:.4f}"
)
# Pair 3: Water
factual_3 = "Water molecules consist of two hydrogen atoms and one oxygen atom, forming H2O."
halluc_3 = "Water is actually composed of three hydrogen atoms and two oxygen atoms, forming H3O2."
s_f3, s_h3 = score(factual_3), score(halluc_3)
test(
"Water: Factual > Hallucination",
s_f3 > s_h3,
f"Factual={s_f3:.4f}, Halluc={s_h3:.4f}, Diff={s_f3-s_h3:.4f}"
)
# Pair 4: Light
factual_4 = "The speed of light in a vacuum is approximately 299,792,458 meters per second."
halluc_4 = "The speed of light varies significantly based on the observer's mood and emotional state."
s_f4, s_h4 = score(factual_4), score(halluc_4)
test(
"Light: Factual > Hallucination",
s_f4 > s_h4,
f"Factual={s_f4:.4f}, Halluc={s_h4:.4f}, Diff={s_f4-s_h4:.4f}"
)
# Pair 5: Pi
factual_5 = "Pi (π) is the ratio of a circle's circumference to its diameter, approximately 3.14159."
halluc_5 = "Pi equals exactly 3.2 as proven by the Indiana Pi Bill of 1897."
s_f5, s_h5 = score(factual_5), score(halluc_5)
test(
"Pi: Factual > Hallucination",
s_f5 > s_h5,
f"Factual={s_f5:.4f}, Halluc={s_h5:.4f}, Diff={s_f5-s_h5:.4f}"
)
# Pair 6: Gravity
factual_6 = "Gravity causes objects with mass to attract each other, as described by Newton's law of universal gravitation."
halluc_6 = "Gravity only affects objects painted blue, while red objects float naturally."
s_f6, s_h6 = score(factual_6), score(halluc_6)
test(
"Gravity: Factual > Hallucination",
s_f6 > s_h6,
f"Factual={s_f6:.4f}, Halluc={s_h6:.4f}, Diff={s_f6-s_h6:.4f}"
)
# ============================================================================
# TEST 2: COHERENT vs INCOHERENT REASONING
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 2: Coherent vs Incoherent Reasoning")
print("-" * 70)
coherent_1 = "Because water expands when it freezes, ice floats on liquid water, protecting aquatic life in winter."
incoherent_1 = "The purple elephant mathematics dancing through quantum yesterday because therefore sandwich."
s_c1, s_i1 = score(coherent_1), score(incoherent_1)
test(
"Reasoning: Coherent > Incoherent",
s_c1 > s_i1,
f"Coherent={s_c1:.4f}, Incoherent={s_i1:.4f}, Diff={s_c1-s_i1:.4f}"
)
coherent_2 = "If all mammals are warm-blooded, and dolphins are mammals, then dolphins must be warm-blooded."
incoherent_2 = "By leveraging our core competencies in disruptive innovation, we can synergize cross-functional deliverables."
s_c2, s_i2 = score(coherent_2), score(incoherent_2)
test(
"Logic: Coherent > Corporate Jargon",
s_c2 > s_i2,
f"Coherent={s_c2:.4f}, Jargon={s_i2:.4f}, Diff={s_c2-s_i2:.4f}"
)
# ============================================================================
# TEST 3: SCIENTIFIC CONTENT (α-Resonance)
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 3: Scientific Content (α-Resonance)")
print("-" * 70)
science_1 = "The fine structure constant α ≈ 1/137 governs electromagnetic interactions in the universe."
nonsense_1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
s_sc1, s_ns1 = score(science_1), score(nonsense_1)
test(
"Science > Lorem Ipsum",
s_sc1 > s_ns1,
f"Science={s_sc1:.4f}, Lorem={s_ns1:.4f}, Diff={s_sc1-s_ns1:.4f}"
)
science_2 = "Consciousness emerges from the coherent integration of information across neural networks."
nonsense_2 = "The vibrational frequency of crystal healing aligns your chakras with the quantum field."
s_sc2, s_ns2 = score(science_2), score(nonsense_2)
test(
"Real Science > Pseudoscience",
s_sc2 > s_ns2,
f"Science={s_sc2:.4f}, Pseudo={s_ns2:.4f}, Diff={s_sc2-s_ns2:.4f}"
)
# ============================================================================
# TEST 4: CONFIDENCE MYTHS (Hardest to detect)
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 4: Confident Myths vs Facts")
print("-" * 70)
fact_1 = "The human brain contains approximately 86 billion neurons."
myth_1 = "According to a 2024 Stanford study, humans only use 10% of their brain capacity."
s_fact1, s_myth1 = score(fact_1), score(myth_1)
test(
"Brain Fact > 10% Myth",
s_fact1 > s_myth1,
f"Fact={s_fact1:.4f}, Myth={s_myth1:.4f}, Diff={s_fact1-s_myth1:.4f}"
)
fact_2 = "Goldfish can remember things for months, not seconds."
myth_2 = "Goldfish have a 3-second memory span, which is why they seem surprised by their bowl."
s_fact2, s_myth2 = score(fact_2), score(myth_2)
test(
"Goldfish Fact > 3-second Myth",
s_fact2 > s_myth2,
f"Fact={s_fact2:.4f}, Myth={s_myth2:.4f}, Diff={s_fact2-s_myth2:.4f}"
)
# ============================================================================
# TEST 5: EDGE CASES
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 5: Edge Cases")
print("-" * 70)
# Empty/short text
empty = ""
short = "Hi"
s_empty, s_short = score(empty), score(short)
test("Empty string returns 0", s_empty == 0.0, f"Got {s_empty}")
test("Short text returns non-zero", s_short > 0.0, f"Got {s_short}")
# Very long text
long_text = "The " * 500 + "end."
s_long = score(long_text)
test("Long text doesn't crash", s_long >= 0.0 and s_long <= 1.0, f"Got {s_long}")
# Special characters
special = "∅ ≈ ∞ → φ × α = 137 × 1.618 ≈ 221.67"
s_special = score(special)
test("Special chars don't crash", s_special >= 0.0, f"Got {s_special}")
# Code
code = "def hello(): return 'world'"
s_code = score(code)
test("Code returns score", s_code > 0.0, f"Got {s_code}")
# ============================================================================
# TEST 6: DIMENSIONAL ANALYSIS
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 6: Dimensional Analysis")
print("-" * 70)
# Check that dimensions are computed
metrics = analyze("The consciousness emerges from information patterns.")
test("phi_alignment computed", metrics.phi_alignment > 0, f"Got {metrics.phi_alignment}")
test("alpha_resonance computed", metrics.alpha_resonance >= 0, f"Got {metrics.alpha_resonance}")
test("semantic_density computed", metrics.semantic_density > 0, f"Got {metrics.semantic_density}")
test("structural_harmony computed", metrics.structural_harmony > 0, f"Got {metrics.structural_harmony}")
test("total_coherence in range", 0 <= metrics.total_coherence <= 1, f"Got {metrics.total_coherence}")
# ============================================================================
# TEST 7: α-SEED DETECTION
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 7: α-SEED Detection")
print("-" * 70)
# Test that is_alpha_seed works (probability 1/137)
import hashlib
test_text = "test"
content_hash = int(hashlib.sha256(test_text.encode()).hexdigest(), 16)
expected_seed = content_hash % ALPHA == 0
metrics = analyze(test_text)
test("α-SEED detection works", metrics.is_alpha_seed == expected_seed,
f"Hash%137={content_hash % ALPHA}, is_seed={metrics.is_alpha_seed}")
# ============================================================================
# TEST 8: WHITESPACE/FORMATTING SENSITIVITY
# ============================================================================
print("\n" + "-" * 70)
print("TEST GROUP 8: Whitespace Sensitivity")
print("-" * 70)
text_normal = "The speed of light is constant."
text_spaces = "The speed of light is constant."
text_tabs = "The\tspeed\tof\tlight\tis\tconstant."
text_newlines = "The\nspeed\nof\nlight\nis\nconstant."
s_normal = score(text_normal)
s_spaces = score(text_spaces)
s_tabs = score(text_tabs)
s_newlines = score(text_newlines)
print(f" Normal: {s_normal:.4f}")
print(f" Spaces: {s_spaces:.4f}")
print(f" Tabs: {s_tabs:.4f}")
print(f" Newlines: {s_newlines:.4f}")
test("Extra spaces don't crash", s_spaces > 0, f"Got {s_spaces}")
test("Tabs don't crash", s_tabs > 0, f"Got {s_tabs}")
test("Newlines don't crash", s_newlines > 0, f"Got {s_newlines}")
# ============================================================================
# DETAILED COMPARISON: The failing case from UI
# ============================================================================
print("\n" + "-" * 70)
print("DETAILED ANALYSIS: Pythagorean (UI Bug Investigation)")
print("-" * 70)
text_a = "The Pythagorean theorem states that in a right triangle, a² + b² = c², where c is the hypotenuse."
text_b = "The Pythagorean theorem proves that a² + b² = c³ in all triangles regardless of angles."
m_a = analyze(text_a)
m_b = analyze(text_b)
print(f"\nText A (FACTUAL):")
print(f" '{text_a}'")
print(f" Total: {m_a.total_coherence:.4f}")
print(f" φ-Align: {m_a.phi_alignment:.4f}")
print(f" α-Reson: {m_a.alpha_resonance:.4f}")
print(f" Density: {m_a.semantic_density:.4f}")
print(f" Harmony: {m_a.structural_harmony:.4f}")
print(f" Darmiyan: {m_a.darmiyan_coefficient:.4f}")
print(f" α-SEED: {m_a.is_alpha_seed}")
print(f"\nText B (HALLUCINATION):")
print(f" '{text_b}'")
print(f" Total: {m_b.total_coherence:.4f}")
print(f" φ-Align: {m_b.phi_alignment:.4f}")
print(f" α-Reson: {m_b.alpha_resonance:.4f}")
print(f" Density: {m_b.semantic_density:.4f}")
print(f" Harmony: {m_b.structural_harmony:.4f}")
print(f" Darmiyan: {m_b.darmiyan_coefficient:.4f}")
print(f" α-SEED: {m_b.is_alpha_seed}")
print(f"\nDifference (A - B):")
print(f" Total: {m_a.total_coherence - m_b.total_coherence:.4f}")
print(f" φ-Align: {m_a.phi_alignment - m_b.phi_alignment:.4f}")
print(f" α-Reson: {m_a.alpha_resonance - m_b.alpha_resonance:.4f}")
print(f" Density: {m_a.semantic_density - m_b.semantic_density:.4f}")
print(f" Harmony: {m_a.structural_harmony - m_b.structural_harmony:.4f}")
winner = "A (CORRECT)" if m_a.total_coherence > m_b.total_coherence else "B (WRONG!)"
print(f"\n WINNER: {winner}")
# ============================================================================
# SUMMARY
# ============================================================================
print("\n" + "=" * 70)
print(" TEST SUMMARY")
print("=" * 70)
print(f" PASSED: {PASSED}")
print(f" FAILED: {FAILED}")
print(f" TOTAL: {PASSED + FAILED}")
print(f" RATE: {PASSED/(PASSED+FAILED)*100:.1f}%")
print("=" * 70)
# Save results
with open("test_results.json", "w") as f:
json.dump({
"passed": PASSED,
"failed": FAILED,
"results": RESULTS
}, f, indent=2)
print("\n[*] Results saved to test_results.json")