alphabypass3 / reward.py
NickupAI's picture
Upload 6 files
c7c0a38 verified
from shared .schemas import EpisodeMetrics
def compute_reward (metrics :EpisodeMetrics ,baseline_mbps :float =1.0 )->float :
if not metrics .connected :
return -1.0
r =0.0
connect_score =max (0.0 ,1.0 -metrics .connect_time_ms /5000.0 )
loss_score =1.0 -metrics .packet_loss_ratio
ping_score =max (0.0 ,1.0 -metrics .avg_ping_ms /1000.0 )
connection_component =(connect_score *0.3 +loss_score *0.4 +ping_score *0.3 )
r +=0.50 *connection_component
stability_score =metrics .stability_ratio
reconnect_penalty =max (0.0 ,1.0 -metrics .reconnect_count /5.0 )
stability_component =stability_score *0.7 +reconnect_penalty *0.3
r +=0.35 *stability_component
safe_baseline =max (baseline_mbps ,0.1 )
speed_ratio =min (metrics .throughput_mbps /safe_baseline ,1.0 )
import math
speed_score =math .log1p (speed_ratio *9 )/math .log1p (9 )
r +=0.15 *speed_score
return round (r ,4 )
def reward_to_label (r :float )->str :
if r <-0.5 :
return "πŸ’€ FAIL"
if r <0.0 :
return "❌ bad"
if r <0.3 :
return "⚠️ weak"
if r <0.6 :
return "🟑 ok"
if r <0.8 :
return "🟒 good"
return "πŸ† great"