from shared .schemas import EpisodeMetrics def compute_reward (metrics :EpisodeMetrics ,baseline_mbps :float =1.0 )->float : if not metrics .connected : return -1.0 r =0.0 connect_score =max (0.0 ,1.0 -metrics .connect_time_ms /5000.0 ) loss_score =1.0 -metrics .packet_loss_ratio ping_score =max (0.0 ,1.0 -metrics .avg_ping_ms /1000.0 ) connection_component =(connect_score *0.3 +loss_score *0.4 +ping_score *0.3 ) r +=0.50 *connection_component stability_score =metrics .stability_ratio reconnect_penalty =max (0.0 ,1.0 -metrics .reconnect_count /5.0 ) stability_component =stability_score *0.7 +reconnect_penalty *0.3 r +=0.35 *stability_component safe_baseline =max (baseline_mbps ,0.1 ) speed_ratio =min (metrics .throughput_mbps /safe_baseline ,1.0 ) import math speed_score =math .log1p (speed_ratio *9 )/math .log1p (9 ) r +=0.15 *speed_score return round (r ,4 ) def reward_to_label (r :float )->str : if r <-0.5 : return "💀 FAIL" if r <0.0 : return "❌ bad" if r <0.3 : return "⚠️ weak" if r <0.6 : return "🟡 ok" if r <0.8 : return "🟢 good" return "🏆 great"