File size: 1,312 Bytes
c7c0a38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

from shared .schemas import EpisodeMetrics 


def compute_reward (metrics :EpisodeMetrics ,baseline_mbps :float =1.0 )->float :


    if not metrics .connected :
        return -1.0 

    r =0.0 



    connect_score =max (0.0 ,1.0 -metrics .connect_time_ms /5000.0 )

    loss_score =1.0 -metrics .packet_loss_ratio 

    ping_score =max (0.0 ,1.0 -metrics .avg_ping_ms /1000.0 )

    connection_component =(connect_score *0.3 +loss_score *0.4 +ping_score *0.3 )
    r +=0.50 *connection_component 



    stability_score =metrics .stability_ratio 

    reconnect_penalty =max (0.0 ,1.0 -metrics .reconnect_count /5.0 )
    stability_component =stability_score *0.7 +reconnect_penalty *0.3 
    r +=0.35 *stability_component 


    safe_baseline =max (baseline_mbps ,0.1 )
    speed_ratio =min (metrics .throughput_mbps /safe_baseline ,1.0 )

    import math 
    speed_score =math .log1p (speed_ratio *9 )/math .log1p (9 )
    r +=0.15 *speed_score 

    return round (r ,4 )


def reward_to_label (r :float )->str :
    if r <-0.5 :
        return "💀 FAIL"
    if r <0.0 :
        return "❌ bad"
    if r <0.3 :
        return "⚠️ weak"
    if r <0.6 :
        return "🟡 ok"
    if r <0.8 :
        return "🟢 good"
    return "🏆 great"