| from shared .schemas import EpisodeMetrics | |
| def compute_reward (metrics :EpisodeMetrics ,baseline_mbps :float =1.0 )->float : | |
| if not metrics .connected : | |
| return -1.0 | |
| r =0.0 | |
| connect_score =max (0.0 ,1.0 -metrics .connect_time_ms /5000.0 ) | |
| loss_score =1.0 -metrics .packet_loss_ratio | |
| ping_score =max (0.0 ,1.0 -metrics .avg_ping_ms /1000.0 ) | |
| connection_component =(connect_score *0.3 +loss_score *0.4 +ping_score *0.3 ) | |
| r +=0.50 *connection_component | |
| stability_score =metrics .stability_ratio | |
| reconnect_penalty =max (0.0 ,1.0 -metrics .reconnect_count /5.0 ) | |
| stability_component =stability_score *0.7 +reconnect_penalty *0.3 | |
| r +=0.35 *stability_component | |
| safe_baseline =max (baseline_mbps ,0.1 ) | |
| speed_ratio =min (metrics .throughput_mbps /safe_baseline ,1.0 ) | |
| import math | |
| speed_score =math .log1p (speed_ratio *9 )/math .log1p (9 ) | |
| r +=0.15 *speed_score | |
| return round (r ,4 ) | |
| def reward_to_label (r :float )->str : | |
| if r <-0.5 : | |
| return "π FAIL" | |
| if r <0.0 : | |
| return "β bad" | |
| if r <0.3 : | |
| return "β οΈ weak" | |
| if r <0.6 : | |
| return "π‘ ok" | |
| if r <0.8 : | |
| return "π’ good" | |
| return "π great" | |