ResearchIT / tests /test_fusion.py
siddhm11
Phase 4 complete + Phase 4.5 instrumentation foundation
61d5f0d
"""
Tests for importance-weighted quota fusion.
Covers:
- Proportional allocation (dominant cluster gets most slots)
- Floor guarantee (every cluster gets at least min_slots)
- Total slots == sum of allocated slots (or >= when floors force it)
- Remainder distributed correctly
- Single cluster gets all slots
- Equal importances β†’ roughly equal allocation
- Zero importances fall back to equal distribution
- merge_quota_results deduplication and order
"""
from app.recommend.fusion import allocate_quotas, merge_quota_results
# ── allocate_quotas ───────────────────────────────────────────────────────────
def test_proportional_allocation():
"""Dominant cluster should receive proportionally more slots."""
importances = [7.0, 3.0]
slots = allocate_quotas(importances, total_slots=100, min_slots=3)
assert len(slots) == 2
assert slots[0] > slots[1], "Dominant cluster (imp=7) should get more slots than minor (imp=3)"
def test_floor_guarantee():
"""Every cluster must receive at least min_slots regardless of importance."""
# One huge cluster and one tiny one
importances = [99.0, 1.0]
slots = allocate_quotas(importances, total_slots=100, min_slots=3)
assert all(s >= 3 for s in slots), f"Floor violated: {slots}"
def test_total_slots_met():
"""Sum of allocated slots should equal total_slots when no floor pressure."""
importances = [5.0, 3.0, 2.0]
total = 100
slots = allocate_quotas(importances, total_slots=total, min_slots=3)
assert sum(slots) == total, f"Expected sum={total}, got {sum(slots)} from {slots}"
def test_floor_overrides_total():
"""When many clusters with min_slots exceed total, allocation may go over."""
# 7 clusters Γ— 3 min_slots = 21 > 20 total
importances = [1.0] * 7
slots = allocate_quotas(importances, total_slots=20, min_slots=3)
assert all(s >= 3 for s in slots), f"Floor violated under pressure: {slots}"
assert len(slots) == 7
def test_single_cluster_gets_all():
"""A single cluster should receive all slots (or min_slots if larger)."""
slots = allocate_quotas([5.0], total_slots=50, min_slots=3)
assert slots == [50]
def test_equal_importances_roughly_equal():
"""Equal importances should produce roughly equal slot counts."""
importances = [1.0, 1.0, 1.0]
slots = allocate_quotas(importances, total_slots=99, min_slots=3)
assert len(slots) == 3
assert slots == [33, 33, 33], f"Expected equal split [33,33,33], got {slots}"
def test_zero_importances_fallback():
"""All-zero importances should not crash; falls back to equal distribution."""
importances = [0.0, 0.0, 0.0]
slots = allocate_quotas(importances, total_slots=30, min_slots=3)
assert len(slots) == 3
assert sum(slots) == 30
assert all(s >= 3 for s in slots)
def test_empty_importances():
"""Empty input returns empty list."""
assert allocate_quotas([], total_slots=100) == []
def test_remainder_distributed():
"""With 3 equal clusters and 100 slots, remainder 1 goes to someone."""
importances = [1.0, 1.0, 1.0]
# 100 / 3 = 33.333 β†’ floor is 33 each, remainder = 1
slots = allocate_quotas(importances, total_slots=100, min_slots=3)
assert sum(slots) == 100
assert sorted(slots) == [33, 33, 34]
def test_two_cluster_sum_correct():
"""70/30 split on 100 slots: sum should be exactly 100."""
slots = allocate_quotas([70.0, 30.0], total_slots=100, min_slots=3)
assert sum(slots) == 100
assert slots[0] >= slots[1]
assert slots[1] >= 3
def test_doc06_worked_example():
"""
Doc 06 worked example:
importances = [0.55, 0.30, 0.15], total=30, min=3
raw = [16.5, 9.0, 4.5]
floor = [16, 9, 4] (sum=29)
remainder = 1 β†’ largest frac (0.5 at idx 0) gets it
final = [17, 9, 4]
"""
slots = allocate_quotas([0.55, 0.30, 0.15], total_slots=30, min_slots=3)
assert slots == [17, 9, 4], f"Doc 06 example expected [17, 9, 4], got {slots}"
assert sum(slots) == 30
def test_doc06_tiny_cluster_floor():
"""
Doc 06 tiny-cluster edge case:
importances = [0.60, 0.25, 0.10, 0.05], total=30, min=3
raw = [18.0, 7.5, 3.0, 1.5]
floor applied: [18, 7, 3, 3] -- smallest cluster gets 3 not 1
"""
slots = allocate_quotas([0.60, 0.25, 0.10, 0.05], total_slots=30, min_slots=3)
# The smallest cluster must get at least min_slots (3), not 1
assert slots[3] >= 3, f"Floor violated: smallest cluster got {slots[3]}"
# The dominant cluster still dominates
assert slots[0] > slots[1] > slots[2]
def test_fractional_priority_deterministic():
"""
Remainder should go to clusters with the largest fractional parts.
importances=[10,10,10], total=20, min=3
raw = [6.667, 6.667, 6.667]
floor = [6, 6, 6] (sum=18)
remainder = 2 β†’ all fractions equal (0.667), first two get +1 (stable sort)
final = [7, 7, 6]
"""
slots = allocate_quotas([10.0, 10.0, 10.0], total_slots=20, min_slots=3)
assert sum(slots) == 20
# With 2 remainder slots and 3 equal clusters, counts should be [7, 7, 6] in some order
assert sorted(slots, reverse=True) == [7, 7, 6]
def test_fractional_priority_prefers_larger_frac():
"""
Cluster with larger fractional part should receive remainder bonus first.
importances=[2, 3] on 10 slots, min=3:
raw = [4.0, 6.0]
floor = [4, 6] (sum=10, remainder=0)
final = [4, 6]
"""
slots = allocate_quotas([2.0, 3.0], total_slots=10, min_slots=3)
assert slots == [4, 6]
def test_many_clusters_floor_overflow():
"""
10 clusters, each needs min=3, but total=20 means 10Γ—3=30 > 20.
Floor guarantee overrides total β€” sum exceeds total_slots.
"""
slots = allocate_quotas([1.0] * 10, total_slots=20, min_slots=3)
assert len(slots) == 10
assert all(s >= 3 for s in slots)
# Floor overflow: sum exceeds requested total because min_slots dominates
assert sum(slots) == 30
def test_zero_importances_respects_floor_edge():
"""
Zero-importance with total < n Γ— min should still respect floor.
"""
slots = allocate_quotas([0.0, 0.0, 0.0], total_slots=6, min_slots=3)
assert all(s >= 3 for s in slots)
assert len(slots) == 3
def test_dominant_cluster_does_not_starve_minority():
"""
Critical Doc 06 fairness test:
User 70% NLP, 30% RL β€” RL must not get zero slots (the RRF failure mode).
"""
slots = allocate_quotas([70.0, 30.0], total_slots=30, min_slots=3)
assert slots[1] >= 3, f"Minority RL cluster starved: got {slots[1]}"
assert slots[0] > slots[1] # but dominance is still preserved
assert sum(slots) == 30
def test_allocation_order_matches_input():
"""Output order must match input order (importance-ranked already by caller)."""
slots = allocate_quotas([50.0, 25.0, 25.0], total_slots=100, min_slots=3)
# Cluster 0 is the largest, gets most slots; clusters 1 and 2 tied
assert slots[0] >= slots[1]
assert slots[0] >= slots[2]
# ── merge_quota_results ───────────────────────────────────────────────────────
def test_merge_respects_quota():
"""Each cluster contributes at most its quota to the result."""
cluster_a = ["a1", "a2", "a3", "a4", "a5"]
cluster_b = ["b1", "b2", "b3"]
result = merge_quota_results([cluster_a, cluster_b], quotas=[3, 3])
a_count = sum(1 for r in result if r.startswith("a"))
b_count = sum(1 for r in result if r.startswith("b"))
assert a_count <= 3, f"Cluster A exceeded quota: {a_count}"
assert b_count <= 3, f"Cluster B exceeded quota: {b_count}"
def test_merge_deduplicates():
"""Papers appearing in multiple clusters should appear only once."""
cluster_a = ["shared", "a1", "a2"]
cluster_b = ["shared", "b1", "b2"]
result = merge_quota_results([cluster_a, cluster_b], quotas=[3, 3])
assert result.count("shared") == 1, "Duplicate 'shared' should appear only once"
def test_merge_preserves_order():
"""Cluster A results appear before Cluster B results."""
cluster_a = ["a1", "a2"]
cluster_b = ["b1", "b2"]
result = merge_quota_results([cluster_a, cluster_b], quotas=[2, 2])
assert result == ["a1", "a2", "b1", "b2"]
def test_merge_empty_cluster():
"""An empty cluster contributes nothing; others still fill their quota."""
cluster_a = ["a1", "a2", "a3"]
cluster_b: list[str] = []
result = merge_quota_results([cluster_a, cluster_b], quotas=[3, 3])
assert result == ["a1", "a2", "a3"]
def test_merge_empty_input():
"""No clusters β†’ empty result."""
assert merge_quota_results([], []) == []