Spaces:

bitsabhi
/

phi-coherence

Running

App Files Files Community

phi-coherence / test_suite.py

bitsabhi

v2: Hallucination Risk Scoring - 75% accuracy

36e08e8 15 days ago

raw

history blame contribute delete

12.9 kB

	#!/usr/bin/env python3
	"""
	φ-Coherence Test Suite
	Comprehensive tests to validate the scoring algorithm.

	Run: python test_suite.py
	"""

	import json
	from phi_coherence import PhiCoherence, CoherenceMetrics, PHI, ALPHA

	coherence = PhiCoherence()

	# Test results tracking
	PASSED = 0
	FAILED = 0
	RESULTS = []


	def test(name: str, condition: bool, details: str = ""):
	"""Record a test result."""
	global PASSED, FAILED
	if condition:
	PASSED += 1
	status = "✅ PASS"
	else:
	FAILED += 1
	status = "❌ FAIL"

	print(f"{status}: {name}")
	if details and not condition:
	print(f" {details}")

	RESULTS.append({
	"name": name,
	"passed": condition,
	"details": details
	})


	def score(text: str) -> float:
	"""Get coherence score for text."""
	return coherence.calculate(text)


	def analyze(text: str) -> CoherenceMetrics:
	"""Get full metrics for text."""
	return coherence.analyze(text)


	print("=" * 70)
	print(" φ-COHERENCE TEST SUITE")
	print(" Testing all scoring dimensions and edge cases")
	print("=" * 70)
	print()


	# ============================================================================
	# TEST 1: FACTUAL vs HALLUCINATION (Core hypothesis)
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 1: Factual vs Hallucination")
	print("-" * 70)

	# Pair 1: Pythagorean
	factual_1 = "The Pythagorean theorem states that in a right triangle, a² + b² = c², where c is the hypotenuse."
	halluc_1 = "The Pythagorean theorem proves that a² + b² = c³ in all triangles regardless of angles."
	s_f1, s_h1 = score(factual_1), score(halluc_1)
	test(
	"Pythagorean: Factual > Hallucination",
	s_f1 > s_h1,
	f"Factual={s_f1:.4f}, Halluc={s_h1:.4f}, Diff={s_f1-s_h1:.4f}"
	)

	# Pair 2: DNA
	factual_2 = "DNA carries genetic information through sequences of four nucleotide bases: adenine, thymine, guanine, and cytosine."
	halluc_2 = "DNA uses seven bases including mysterion and phantasine."
	s_f2, s_h2 = score(factual_2), score(halluc_2)
	test(
	"DNA: Factual > Hallucination",
	s_f2 > s_h2,
	f"Factual={s_f2:.4f}, Halluc={s_h2:.4f}, Diff={s_f2-s_h2:.4f}"
	)

	# Pair 3: Water
	factual_3 = "Water molecules consist of two hydrogen atoms and one oxygen atom, forming H2O."
	halluc_3 = "Water is actually composed of three hydrogen atoms and two oxygen atoms, forming H3O2."
	s_f3, s_h3 = score(factual_3), score(halluc_3)
	test(
	"Water: Factual > Hallucination",
	s_f3 > s_h3,
	f"Factual={s_f3:.4f}, Halluc={s_h3:.4f}, Diff={s_f3-s_h3:.4f}"
	)

	# Pair 4: Light
	factual_4 = "The speed of light in a vacuum is approximately 299,792,458 meters per second."
	halluc_4 = "The speed of light varies significantly based on the observer's mood and emotional state."
	s_f4, s_h4 = score(factual_4), score(halluc_4)
	test(
	"Light: Factual > Hallucination",
	s_f4 > s_h4,
	f"Factual={s_f4:.4f}, Halluc={s_h4:.4f}, Diff={s_f4-s_h4:.4f}"
	)

	# Pair 5: Pi
	factual_5 = "Pi (π) is the ratio of a circle's circumference to its diameter, approximately 3.14159."
	halluc_5 = "Pi equals exactly 3.2 as proven by the Indiana Pi Bill of 1897."
	s_f5, s_h5 = score(factual_5), score(halluc_5)
	test(
	"Pi: Factual > Hallucination",
	s_f5 > s_h5,
	f"Factual={s_f5:.4f}, Halluc={s_h5:.4f}, Diff={s_f5-s_h5:.4f}"
	)

	# Pair 6: Gravity
	factual_6 = "Gravity causes objects with mass to attract each other, as described by Newton's law of universal gravitation."
	halluc_6 = "Gravity only affects objects painted blue, while red objects float naturally."
	s_f6, s_h6 = score(factual_6), score(halluc_6)
	test(
	"Gravity: Factual > Hallucination",
	s_f6 > s_h6,
	f"Factual={s_f6:.4f}, Halluc={s_h6:.4f}, Diff={s_f6-s_h6:.4f}"
	)


	# ============================================================================
	# TEST 2: COHERENT vs INCOHERENT REASONING
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 2: Coherent vs Incoherent Reasoning")
	print("-" * 70)

	coherent_1 = "Because water expands when it freezes, ice floats on liquid water, protecting aquatic life in winter."
	incoherent_1 = "The purple elephant mathematics dancing through quantum yesterday because therefore sandwich."
	s_c1, s_i1 = score(coherent_1), score(incoherent_1)
	test(
	"Reasoning: Coherent > Incoherent",
	s_c1 > s_i1,
	f"Coherent={s_c1:.4f}, Incoherent={s_i1:.4f}, Diff={s_c1-s_i1:.4f}"
	)

	coherent_2 = "If all mammals are warm-blooded, and dolphins are mammals, then dolphins must be warm-blooded."
	incoherent_2 = "By leveraging our core competencies in disruptive innovation, we can synergize cross-functional deliverables."
	s_c2, s_i2 = score(coherent_2), score(incoherent_2)
	test(
	"Logic: Coherent > Corporate Jargon",
	s_c2 > s_i2,
	f"Coherent={s_c2:.4f}, Jargon={s_i2:.4f}, Diff={s_c2-s_i2:.4f}"
	)


	# ============================================================================
	# TEST 3: SCIENTIFIC CONTENT (α-Resonance)
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 3: Scientific Content (α-Resonance)")
	print("-" * 70)

	science_1 = "The fine structure constant α ≈ 1/137 governs electromagnetic interactions in the universe."
	nonsense_1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
	s_sc1, s_ns1 = score(science_1), score(nonsense_1)
	test(
	"Science > Lorem Ipsum",
	s_sc1 > s_ns1,
	f"Science={s_sc1:.4f}, Lorem={s_ns1:.4f}, Diff={s_sc1-s_ns1:.4f}"
	)

	science_2 = "Consciousness emerges from the coherent integration of information across neural networks."
	nonsense_2 = "The vibrational frequency of crystal healing aligns your chakras with the quantum field."
	s_sc2, s_ns2 = score(science_2), score(nonsense_2)
	test(
	"Real Science > Pseudoscience",
	s_sc2 > s_ns2,
	f"Science={s_sc2:.4f}, Pseudo={s_ns2:.4f}, Diff={s_sc2-s_ns2:.4f}"
	)


	# ============================================================================
	# TEST 4: CONFIDENCE MYTHS (Hardest to detect)
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 4: Confident Myths vs Facts")
	print("-" * 70)

	fact_1 = "The human brain contains approximately 86 billion neurons."
	myth_1 = "According to a 2024 Stanford study, humans only use 10% of their brain capacity."
	s_fact1, s_myth1 = score(fact_1), score(myth_1)
	test(
	"Brain Fact > 10% Myth",
	s_fact1 > s_myth1,
	f"Fact={s_fact1:.4f}, Myth={s_myth1:.4f}, Diff={s_fact1-s_myth1:.4f}"
	)

	fact_2 = "Goldfish can remember things for months, not seconds."
	myth_2 = "Goldfish have a 3-second memory span, which is why they seem surprised by their bowl."
	s_fact2, s_myth2 = score(fact_2), score(myth_2)
	test(
	"Goldfish Fact > 3-second Myth",
	s_fact2 > s_myth2,
	f"Fact={s_fact2:.4f}, Myth={s_myth2:.4f}, Diff={s_fact2-s_myth2:.4f}"
	)


	# ============================================================================
	# TEST 5: EDGE CASES
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 5: Edge Cases")
	print("-" * 70)

	# Empty/short text
	empty = ""
	short = "Hi"
	s_empty, s_short = score(empty), score(short)
	test("Empty string returns 0", s_empty == 0.0, f"Got {s_empty}")
	test("Short text returns non-zero", s_short > 0.0, f"Got {s_short}")

	# Very long text
	long_text = "The " * 500 + "end."
	s_long = score(long_text)
	test("Long text doesn't crash", s_long >= 0.0 and s_long <= 1.0, f"Got {s_long}")

	# Special characters
	special = "∅ ≈ ∞ → φ × α = 137 × 1.618 ≈ 221.67"
	s_special = score(special)
	test("Special chars don't crash", s_special >= 0.0, f"Got {s_special}")

	# Code
	code = "def hello(): return 'world'"
	s_code = score(code)
	test("Code returns score", s_code > 0.0, f"Got {s_code}")


	# ============================================================================
	# TEST 6: DIMENSIONAL ANALYSIS
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 6: Dimensional Analysis")
	print("-" * 70)

	# Check that dimensions are computed
	metrics = analyze("The consciousness emerges from information patterns.")
	test("phi_alignment computed", metrics.phi_alignment > 0, f"Got {metrics.phi_alignment}")
	test("alpha_resonance computed", metrics.alpha_resonance >= 0, f"Got {metrics.alpha_resonance}")
	test("semantic_density computed", metrics.semantic_density > 0, f"Got {metrics.semantic_density}")
	test("structural_harmony computed", metrics.structural_harmony > 0, f"Got {metrics.structural_harmony}")
	test("total_coherence in range", 0 <= metrics.total_coherence <= 1, f"Got {metrics.total_coherence}")


	# ============================================================================
	# TEST 7: α-SEED DETECTION
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 7: α-SEED Detection")
	print("-" * 70)

	# Test that is_alpha_seed works (probability 1/137)
	import hashlib
	test_text = "test"
	content_hash = int(hashlib.sha256(test_text.encode()).hexdigest(), 16)
	expected_seed = content_hash % ALPHA == 0
	metrics = analyze(test_text)
	test("α-SEED detection works", metrics.is_alpha_seed == expected_seed,
	f"Hash%137={content_hash % ALPHA}, is_seed={metrics.is_alpha_seed}")


	# ============================================================================
	# TEST 8: WHITESPACE/FORMATTING SENSITIVITY
	# ============================================================================
	print("\n" + "-" * 70)
	print("TEST GROUP 8: Whitespace Sensitivity")
	print("-" * 70)

	text_normal = "The speed of light is constant."
	text_spaces = "The speed of light is constant."
	text_tabs = "The\tspeed\tof\tlight\tis\tconstant."
	text_newlines = "The\nspeed\nof\nlight\nis\nconstant."

	s_normal = score(text_normal)
	s_spaces = score(text_spaces)
	s_tabs = score(text_tabs)
	s_newlines = score(text_newlines)

	print(f" Normal: {s_normal:.4f}")
	print(f" Spaces: {s_spaces:.4f}")
	print(f" Tabs: {s_tabs:.4f}")
	print(f" Newlines: {s_newlines:.4f}")

	test("Extra spaces don't crash", s_spaces > 0, f"Got {s_spaces}")
	test("Tabs don't crash", s_tabs > 0, f"Got {s_tabs}")
	test("Newlines don't crash", s_newlines > 0, f"Got {s_newlines}")


	# ============================================================================
	# DETAILED COMPARISON: The failing case from UI
	# ============================================================================
	print("\n" + "-" * 70)
	print("DETAILED ANALYSIS: Pythagorean (UI Bug Investigation)")
	print("-" * 70)

	text_a = "The Pythagorean theorem states that in a right triangle, a² + b² = c², where c is the hypotenuse."
	text_b = "The Pythagorean theorem proves that a² + b² = c³ in all triangles regardless of angles."

	m_a = analyze(text_a)
	m_b = analyze(text_b)

	print(f"\nText A (FACTUAL):")
	print(f" '{text_a}'")
	print(f" Total: {m_a.total_coherence:.4f}")
	print(f" φ-Align: {m_a.phi_alignment:.4f}")
	print(f" α-Reson: {m_a.alpha_resonance:.4f}")
	print(f" Density: {m_a.semantic_density:.4f}")
	print(f" Harmony: {m_a.structural_harmony:.4f}")
	print(f" Darmiyan: {m_a.darmiyan_coefficient:.4f}")
	print(f" α-SEED: {m_a.is_alpha_seed}")

	print(f"\nText B (HALLUCINATION):")
	print(f" '{text_b}'")
	print(f" Total: {m_b.total_coherence:.4f}")
	print(f" φ-Align: {m_b.phi_alignment:.4f}")
	print(f" α-Reson: {m_b.alpha_resonance:.4f}")
	print(f" Density: {m_b.semantic_density:.4f}")
	print(f" Harmony: {m_b.structural_harmony:.4f}")
	print(f" Darmiyan: {m_b.darmiyan_coefficient:.4f}")
	print(f" α-SEED: {m_b.is_alpha_seed}")

	print(f"\nDifference (A - B):")
	print(f" Total: {m_a.total_coherence - m_b.total_coherence:.4f}")
	print(f" φ-Align: {m_a.phi_alignment - m_b.phi_alignment:.4f}")
	print(f" α-Reson: {m_a.alpha_resonance - m_b.alpha_resonance:.4f}")
	print(f" Density: {m_a.semantic_density - m_b.semantic_density:.4f}")
	print(f" Harmony: {m_a.structural_harmony - m_b.structural_harmony:.4f}")

	winner = "A (CORRECT)" if m_a.total_coherence > m_b.total_coherence else "B (WRONG!)"
	print(f"\n WINNER: {winner}")


	# ============================================================================
	# SUMMARY
	# ============================================================================
	print("\n" + "=" * 70)
	print(" TEST SUMMARY")
	print("=" * 70)
	print(f" PASSED: {PASSED}")
	print(f" FAILED: {FAILED}")
	print(f" TOTAL: {PASSED + FAILED}")
	print(f" RATE: {PASSED/(PASSED+FAILED)*100:.1f}%")
	print("=" * 70)

	# Save results
	with open("test_results.json", "w") as f:
	json.dump({
	"passed": PASSED,
	"failed": FAILED,
	"results": RESULTS
	}, f, indent=2)
	print("\n[*] Results saved to test_results.json")