Upload 6 files

Browse files

Files changed (6) hide show

agent.py +368 -0
config_generator.py +272 -0
crypto.py +105 -0
environment.py +214 -0
reward.py +54 -0
schemas.py +182 -0

agent.py ADDED Viewed

	@@ -0,0 +1,368 @@

+from __future__ import annotations
+import os
+import time
+import json
+import numpy as np
+import torch
+import torch .nn as nn
+import torch .nn .functional as F
+from torch .distributions import Categorical ,Normal
+from typing import Optional ,Tuple ,List
+from server .rl .environment import (
+DISCRETE_NVEC ,
+N_CONTINUOUS ,
+TOTAL_OBS_DIM ,
+AlphaBypassEnv ,
+)
+from server .rl .reward import reward_to_label
+class PolicyNetwork (nn .Module ):
+    def __init__ (
+    self ,
+    obs_dim :int =TOTAL_OBS_DIM ,
+    hidden :int =512 ,
+    discrete_nvec :List [int ]=DISCRETE_NVEC ,
+    n_continuous :int =N_CONTINUOUS ,
+    ):
+        super ().__init__ ()
+        self .discrete_nvec =discrete_nvec
+        self .n_continuous =n_continuous
+        self .trunk =nn .Sequential (
+        nn .Linear (obs_dim ,hidden ),
+        nn .LayerNorm (hidden ),
+        nn .ReLU (),
+        nn .Linear (hidden ,hidden ),
+        nn .LayerNorm (hidden ),
+        nn .ReLU (),
+        nn .Linear (hidden ,hidden ),
+        nn .LayerNorm (hidden ),
+        nn .ReLU (),
+        )
+        self .discrete_heads =nn .ModuleList ([
+        nn .Linear (hidden ,n )for n in discrete_nvec
+        ])
+        self .cont_mu =nn .Linear (hidden ,n_continuous )
+        self .cont_log_std =nn .Parameter (torch .zeros (n_continuous ))
+        self .value_head =nn .Sequential (
+        nn .Linear (hidden ,256 ),
+        nn .ReLU (),
+        nn .Linear (256 ,1 ),
+        )
+    def forward (self ,obs :torch .Tensor ):
+        h =self .trunk (obs )
+        logits =[head (h )for head in self .discrete_heads ]
+        mu =torch .sigmoid (self .cont_mu (h ))
+        log_std =self .cont_log_std .clamp (-4 ,0 )
+        value =self .value_head (h ).squeeze (-1 )
+        return logits ,mu ,log_std ,value
+    def get_action_and_log_prob (
+    self ,
+    obs :torch .Tensor ,
+    action_masks :Optional [List [Optional [torch .Tensor ]]]=None ,
+    )->Tuple [np .ndarray ,np .ndarray ,torch .Tensor ,torch .Tensor ]:
+        logits ,mu ,log_std ,value =self .forward (obs )
+        discrete_actions =[]
+        log_probs_discrete =[]
+        for i ,(lg ,n )in enumerate (zip (logits ,self .discrete_nvec )):
+            if action_masks and action_masks [i ]is not None :
+                mask =action_masks [i ].to (lg .device )
+                lg =lg .masked_fill (~mask ,float ("-inf"))
+            dist =Categorical (logits =lg )
+            a =dist .sample ()
+            discrete_actions .append (a .item ())
+            log_probs_discrete .append (dist .log_prob (a ))
+        log_prob_discrete =torch .stack (log_probs_discrete ).sum ()
+        std =log_std .exp ()
+        dist_cont =Normal (mu ,std )
+        cont_sample =dist_cont .sample ()
+        cont_action =cont_sample .clamp (0.0 ,1.0 )
+        log_prob_cont =dist_cont .log_prob (cont_sample ).sum ()
+        total_log_prob =log_prob_discrete +log_prob_cont
+        return (
+        np .array (discrete_actions ,dtype =np .int32 ),
+        cont_action .detach ().cpu ().numpy (),
+        total_log_prob ,
+        value ,
+        )
+    def evaluate_actions (
+    self ,
+    obs :torch .Tensor ,
+    discrete_actions :torch .Tensor ,
+    cont_actions :torch .Tensor ,
+    )->Tuple [torch .Tensor ,torch .Tensor ,torch .Tensor ]:
+        logits ,mu ,log_std ,value =self .forward (obs )
+        log_prob_d =torch .zeros (obs .shape [0 ],device =obs .device )
+        entropy_d =torch .zeros (obs .shape [0 ],device =obs .device )
+        for i ,lg in enumerate (logits ):
+            dist =Categorical (logits =lg )
+            log_prob_d +=dist .log_prob (discrete_actions [:,i ])
+            entropy_d +=dist .entropy ()
+        std =log_std .exp ()
+        dist_c =Normal (mu ,std )
+        log_prob_c =dist_c .log_prob (cont_actions ).sum (-1 )
+        entropy_c =dist_c .entropy ().sum (-1 )
+        return log_prob_d +log_prob_c ,(entropy_d +entropy_c )/2 ,value
+class RolloutBuffer :
+    def __init__ (self ):
+        self .clear ()
+    def clear (self ):
+        self .obs :List [np .ndarray ]=[]
+        self .discrete_actions :List [np .ndarray ]=[]
+        self .cont_actions :List [np .ndarray ]=[]
+        self .rewards :List [float ]=[]
+        self .log_probs :List [torch .Tensor ]=[]
+        self .values :List [torch .Tensor ]=[]
+        self .dones :List [bool ]=[]
+    def add (self ,obs ,d_action ,c_action ,reward ,log_prob ,value ,done ):
+        self .obs .append (obs )
+        self .discrete_actions .append (d_action )
+        self .cont_actions .append (c_action )
+        self .rewards .append (reward )
+        self .log_probs .append (log_prob )
+        self .values .append (value )
+        self .dones .append (done )
+    def compute_returns (self ,last_value :float ,gamma :float =0.99 ,gae_lambda :float =0.95 ):
+        advantages =[]
+        gae =0.0
+        values =[v .item ()for v in self .values ]+[last_value ]
+        for t in reversed (range (len (self .rewards ))):
+            delta =self .rewards [t ]+gamma *values [t +1 ]*(1 -self .dones [t ])-values [t ]
+            gae =delta +gamma *gae_lambda *(1 -self .dones [t ])*gae
+            advantages .insert (0 ,gae )
+        returns =[a +v .item ()for a ,v in zip (advantages ,self .values )]
+        return advantages ,returns
+    def to_tensors (self ,device :torch .device ):
+        obs =torch .FloatTensor (np .stack (self .obs )).to (device )
+        d_act =torch .LongTensor (np .stack (self .discrete_actions )).to (device )
+        c_act =torch .FloatTensor (np .stack (self .cont_actions )).to (device )
+        return obs ,d_act ,c_act
+class PPOTrainer :
+    def __init__ (
+    self ,
+    env :AlphaBypassEnv ,
+    device_str :str ="cuda",
+    lr :float =3e-4 ,
+    gamma :float =0.99 ,
+    gae_lambda :float =0.95 ,
+    clip_eps :float =0.2 ,
+    entropy_coef :float =0.01 ,
+    vf_coef :float =0.5 ,
+    max_grad_norm :float =0.5 ,
+    update_epochs :int =4 ,
+    steps_per_update :int =8 ,
+    checkpoint_dir :str ="checkpoints",
+    checkpoint_every :int =100 ,
+    ):
+        self .env =env
+        self .device =torch .device (device_str if torch .cuda .is_available ()else "cpu")
+        print (f"[PPO] device: {self .device }")
+        self .policy =PolicyNetwork ().to (self .device )
+        self .optimizer =torch .optim .Adam (self .policy .parameters (),lr =lr )
+        self .scheduler =torch .optim .lr_scheduler .ExponentialLR (self .optimizer ,gamma =0.999 )
+        self .gamma =gamma
+        self .gae_lambda =gae_lambda
+        self .clip_eps =clip_eps
+        self .entropy_coef =entropy_coef
+        self .vf_coef =vf_coef
+        self .max_grad_norm =max_grad_norm
+        self .update_epochs =update_epochs
+        self .steps_per_update =steps_per_update
+        self .checkpoint_dir =checkpoint_dir
+        self .checkpoint_every =checkpoint_every
+        os .makedirs (checkpoint_dir ,exist_ok =True )
+        self .total_episodes =0
+        self .best_reward =-float ("inf")
+        self .reward_history :List [float ]=[]
+    def _build_action_masks (self ,obs_tensor :torch .Tensor )->List [Optional [torch .Tensor ]]:
+        return [None ]*len (DISCRETE_NVEC )
+    def collect_rollout (self )->RolloutBuffer :
+        buffer =RolloutBuffer ()
+        obs =self .env ._build_obs ()
+        for _ in range (self .steps_per_update ):
+            obs_t =torch .FloatTensor (obs ).unsqueeze (0 ).to (self .device )
+            with torch .no_grad ():
+                masks =self ._build_action_masks (obs_t )
+                d_action ,c_action ,log_prob ,value =self .policy .get_action_and_log_prob (
+                obs_t .squeeze (0 ),masks
+                )
+            next_obs ,reward ,done ,info =self .env .step (d_action ,c_action )
+            self .total_episodes +=1
+            self .reward_history .append (reward )
+            print (
+            f"[Ep {self .total_episodes :04d}] "
+            f"reward={reward :+.4f} {reward_to_label (reward )} | "
+            f"transport={info ['transport']:5s} dest={info ['dest']:30s} | "
+            f"stable={info ['stability']:.2f} "
+            f"speed={info ['throughput_mbps']:.2f}Mbps"
+            )
+            buffer .add (obs ,d_action ,c_action ,reward ,log_prob ,value ,done )
+            obs =next_obs
+            if done :
+                obs =self .env .reset ()
+            if self .total_episodes %self .checkpoint_every ==0 :
+                self .save_checkpoint ()
+        return buffer
+    def update (self ,buffer :RolloutBuffer ):
+        print (f"\n[PPO] ── Update #{self .total_episodes //self .steps_per_update } ──────────────────────────")
+        print (f"[PPO] Buffer: {len (buffer .rewards )} episodes | "
+        f"mean_reward={sum (buffer .rewards )/len (buffer .rewards ):+.4f} | "
+        f"positive={sum (1 for r in buffer .rewards if r >0 )}/{len (buffer .rewards )}")
+        obs_t ,d_act_t ,c_act_t =buffer .to_tensors (self .device )
+        with torch .no_grad ():
+            last_obs =torch .FloatTensor (self .env ._build_obs ()).to (self .device )
+            _ ,_ ,_ ,last_val =self .policy .forward (last_obs .unsqueeze (0 ))
+            last_value =last_val .item ()
+        advantages ,returns =buffer .compute_returns (last_value ,self .gamma ,self .gae_lambda )
+        adv_t =torch .FloatTensor (advantages ).to (self .device )
+        ret_t =torch .FloatTensor (returns ).to (self .device )
+        adv_t =(adv_t -adv_t .mean ())/(adv_t .std ()+1e-8 )
+        old_log_probs =torch .stack (buffer .log_probs ).to (self .device ).detach ()
+        for _ in range (self .update_epochs ):
+            log_probs ,entropy ,values =self .policy .evaluate_actions (obs_t ,d_act_t ,c_act_t )
+            ratio =(log_probs -old_log_probs ).exp ()
+            surr1 =ratio *adv_t
+            surr2 =ratio .clamp (1 -self .clip_eps ,1 +self .clip_eps )*adv_t
+            policy_loss =-torch .min (surr1 ,surr2 ).mean ()
+            value_loss =F .mse_loss (values ,ret_t )
+            entropy_loss =-entropy .mean ()
+            loss =policy_loss +self .vf_coef *value_loss +self .entropy_coef *entropy_loss
+            self .optimizer .zero_grad ()
+            loss .backward ()
+            nn .utils .clip_grad_norm_ (self .policy .parameters (),self .max_grad_norm )
+            self .optimizer .step ()
+        self .scheduler .step ()
+    def train (self ,total_episodes :int =10000 ):
+        print (f"\n{'='*60 }")
+        print (f"  AlphaBypass — PPO Training")
+        print (f"  Target: {total_episodes } episodes")
+        print (f"  Device: {self .device }")
+        print (f"{'='*60 }\n")
+        obs =self .env .reset ()
+        while self .total_episodes <total_episodes :
+            buffer =self .collect_rollout ()
+            self .update (buffer )
+            if len (self .reward_history )>=20 :
+                recent =self .reward_history [-20 :]
+                print (
+                f"\n[Stats] last 20 episodes: "
+                f"mean={np .mean (recent ):+.4f} "
+                f"max={np .max (recent ):+.4f} "
+                f"min={np .min (recent ):+.4f}\n"
+                )
+    def save_checkpoint (self ,tag :str =""):
+        path =os .path .join (
+        self .checkpoint_dir ,
+        f"checkpoint_ep{self .total_episodes :05d}{tag }.pt"
+        )
+        torch .save ({
+        "episode":self .total_episodes ,
+        "policy_state":self .policy .state_dict (),
+        "optimizer_state":self .optimizer .state_dict (),
+        "reward_history":self .reward_history ,
+        "best_reward":self .best_reward ,
+        },path )
+        print (f"[Checkpoint] saved → {path }")
+        r =np .mean (self .reward_history [-10 :])if len (self .reward_history )>=10 else -999
+        if r >self .best_reward :
+            self .best_reward =r
+            best_path =os .path .join (self .checkpoint_dir ,"best.pt")
+            torch .save (torch .load (path ),best_path )
+            print (f"[Checkpoint] 🏆 new best ({r :+.4f}) → {best_path }")
+    def load_checkpoint (self ,path :str ):
+        ck =torch .load (path ,map_location =self .device )
+        self .policy .load_state_dict (ck ["policy_state"])
+        self .optimizer .load_state_dict (ck ["optimizer_state"])
+        self .total_episodes =ck ["episode"]
+        self .reward_history =ck .get ("reward_history",[])
+        self .best_reward =ck .get ("best_reward",-float ("inf"))
+        print (f"[Checkpoint] loaded from ep {self .total_episodes }")

config_generator.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import json
+import uuid
+import secrets
+import string
+from typing import Optional
+from shared .schemas import VlessConfig
+def generate_uuid ()->str :
+    return str (uuid .uuid4 ())
+def generate_short_id (length :int =8 )->str :
+    return secrets .token_hex (length //2 )
+def random_service_name (seed :int )->str :
+    rng_chars =string .ascii_lowercase +string .digits
+    r =__import__ ("random").Random (seed )
+    length =r .randint (6 ,20 )
+    return "".join (r .choice (rng_chars )for _ in range (length ))
+def build_server_config (
+cfg :VlessConfig ,
+vless_uuid :str ,
+private_key :str ,
+public_key :str ,
+listen_ip :str ="0.0.0.0",
+)->dict :
+    stream =_build_stream_settings_server (cfg )
+    inbound ={
+    "tag":"vless-in",
+    "listen":listen_ip ,
+    "port":cfg .proxy_port ,
+    "protocol":"vless",
+    "settings":{
+    "clients":[
+    {
+    "id":vless_uuid ,
+    "flow":_pick_flow (cfg ),
+    }
+    ],
+    "decryption":"none",
+    },
+    "streamSettings":stream ,
+    "sniffing":{
+    "enabled":True ,
+    "destOverride":["http","tls","quic"],
+    },
+    }
+    inbound ["streamSettings"]["realitySettings"]={
+    "show":False ,
+    "dest":f"{cfg .dest_domain }:443",
+    "xver":0 ,
+    "serverNames":[cfg .dest_domain ],
+    "privateKey":private_key ,
+    "shortIds":[cfg .short_id ],
+    "spiderX":cfg .spider_x ,
+    }
+    config ={
+    "log":{"loglevel":"warning"},
+    "inbounds":[inbound ],
+    "outbounds":[
+    {"tag":"direct","protocol":"freedom"},
+    {"tag":"block","protocol":"blackhole"},
+    ],
+    "routing":{
+    "rules":[
+    {"type":"field","ip":["geoip:private"],"outboundTag":"block"},
+    ]
+    },
+    }
+    return config
+def _pick_flow (cfg :VlessConfig )->str :
+    if cfg .transport_type =="tcp":
+        return "xtls-rprx-vision"
+    return ""
+def _build_stream_settings_server (cfg :VlessConfig )->dict :
+    base ={
+    "network":cfg .transport_type ,
+    "security":"reality",
+    }
+    if cfg .transport_type =="grpc":
+        base ["grpcSettings"]={
+        "serviceName":cfg .grpc_service_name ,
+        "multiMode":False ,
+        }
+    elif cfg .transport_type =="xhttp":
+        base ["xhttpSettings"]={
+        "mode":cfg .xhttp_mode ,
+        "path":cfg .spider_x or "/",
+        "host":cfg .dest_domain ,
+        }
+    return base
+def build_client_config (
+cfg :VlessConfig ,
+vless_uuid :str ,
+server_ip :str ,
+public_key :str ,
+socks_port :int =10808 ,
+http_port :int =10809 ,
+)->dict :
+    stream =_build_stream_settings_client (cfg ,public_key )
+    outbound ={
+    "tag":"proxy",
+    "protocol":"vless",
+    "settings":{
+    "vnext":[
+    {
+    "address":server_ip ,
+    "port":cfg .proxy_port ,
+    "users":[
+    {
+    "id":vless_uuid ,
+    "encryption":"none",
+    "flow":_pick_flow (cfg ),
+    }
+    ],
+    }
+    ]
+    },
+    "streamSettings":stream ,
+    }
+    if cfg .mux_concurrency >0 :
+        outbound ["mux"]={
+        "enabled":True ,
+        "concurrency":cfg .mux_concurrency ,
+        "xudpConcurrency":cfg .mux_concurrency ,
+        }
+    if cfg .transport_type =="tcp"and cfg .fragment_strategy !="none":
+        outbound ["streamSettings"]["sockopt"]={
+        "dialerProxy":"fragment",
+        }
+    config ={
+    "log":{"loglevel":"warning"},
+    "inbounds":[
+    {
+    "tag":"socks",
+    "listen":"127.0.0.1",
+    "port":socks_port ,
+    "protocol":"socks",
+    "settings":{"auth":"noauth","udp":True },
+    },
+    {
+    "tag":"http",
+    "listen":"127.0.0.1",
+    "port":http_port ,
+    "protocol":"http",
+    },
+    ],
+    "outbounds":[outbound ,{"tag":"direct","protocol":"freedom"}],
+    }
+    if cfg .transport_type =="tcp"and cfg .fragment_strategy !="none":
+        config ["outbounds"].append (_build_fragment_outbound (cfg ))
+    return config
+def _build_stream_settings_client (cfg :VlessConfig ,public_key :str )->dict :
+    base :dict ={
+    "network":cfg .transport_type ,
+    "security":"reality",
+    "realitySettings":{
+    "fingerprint":cfg .fingerprint ,
+    "serverName":cfg .dest_domain ,
+    "publicKey":public_key ,
+    "shortId":cfg .short_id ,
+    "spiderX":cfg .spider_x ,
+    },
+    }
+    if cfg .alpn :
+        base ["realitySettings"]["alpn"]=cfg .alpn
+    if cfg .transport_type =="grpc":
+        base ["grpcSettings"]={
+        "serviceName":cfg .grpc_service_name ,
+        }
+    elif cfg .transport_type =="xhttp":
+        headers ={"Host":cfg .dest_domain }
+        headers .update (cfg .extra_headers )
+        base ["xhttpSettings"]={
+        "mode":cfg .xhttp_mode ,
+        "path":cfg .spider_x or "/",
+        "headers":headers ,
+        }
+    return base
+def _build_fragment_outbound (cfg :VlessConfig )->dict :
+    return {
+    "tag":"fragment",
+    "protocol":"freedom",
+    "settings":{
+    "domainStrategy":"AsIs",
+    },
+    "streamSettings":{
+    "sockopt":{
+    "dialerProxy":"",
+    "tcpKeepAliveInterval":0 ,
+    }
+    },
+    "fragment":{
+    "packets":cfg .fragment_strategy ,
+    "length":f"{cfg .fragment_length_min }-{cfg .fragment_length_max }",
+    "interval":f"{cfg .fragment_interval_min }-{cfg .fragment_interval_max }",
+    },
+    }
+def generate_reality_keys ()->tuple [str ,str ]:
+    import subprocess
+    result =subprocess .run (
+    ["xray","x25519"],
+    capture_output =True ,text =True ,timeout =10
+    )
+    lines =result .stdout .strip ().splitlines ()
+    priv =lines [0 ].split (": ")[1 ].strip ()
+    pub =lines [1 ].split (": ")[1 ].strip ()
+    return priv ,pub
+def save_config (config :dict ,path :str ):
+    with open (path ,"w")as f :
+        json .dump (config ,f ,indent =2 )

crypto.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import hmac
+import hashlib
+import time
+import json
+import os
+import base64
+from typing import Tuple
+def sign_payload (payload :dict ,secret :str )->Tuple [str ,str ]:
+    payload ["_ts"]=int (time .time ())
+    body =json .dumps (payload ,separators =(",",":"),sort_keys =True )
+    sig =hmac .new (
+    secret .encode (),
+    body .encode (),
+    hashlib .sha256
+    ).hexdigest ()
+    return body ,sig
+def verify_payload (body :str ,sig :str ,secret :str ,max_age_seconds :int =300 )->dict :
+    expected =hmac .new (
+    secret .encode (),
+    body .encode (),
+    hashlib .sha256
+    ).hexdigest ()
+    if not hmac .compare_digest (expected ,sig ):
+        raise ValueError ("Invalid HMAC signature")
+    data =json .loads (body )
+    ts =data .get ("_ts",0 )
+    if abs (time .time ()-ts )>max_age_seconds :
+        raise ValueError (f"Stale request: {abs (time .time ()-ts ):.0f}s old")
+    return data
+def generate_self_signed_cert (cert_path :str ,key_path :str ,cn :str ="localhost"):
+    from cryptography import x509
+    from cryptography .x509 .oid import NameOID
+    from cryptography .hazmat .primitives import hashes ,serialization
+    from cryptography .hazmat .primitives .asymmetric import rsa
+    from cryptography .hazmat .backends import default_backend
+    import datetime
+    key =rsa .generate_private_key (
+    public_exponent =65537 ,
+    key_size =2048 ,
+    backend =default_backend ()
+    )
+    subject =issuer =x509 .Name ([
+    x509 .NameAttribute (NameOID .COMMON_NAME ,cn ),
+    ])
+    cert =(
+    x509 .CertificateBuilder ()
+    .subject_name (subject )
+    .issuer_name (issuer )
+    .public_key (key .public_key ())
+    .serial_number (x509 .random_serial_number ())
+    .not_valid_before (datetime .datetime .utcnow ())
+    .not_valid_after (datetime .datetime .utcnow ()+datetime .timedelta (days =3650 ))
+    .add_extension (
+    x509 .SubjectAlternativeName ([x509 .DNSName (cn )]),
+    critical =False ,
+    )
+    .sign (key ,hashes .SHA256 (),default_backend ())
+    )
+    with open (cert_path ,"wb")as f :
+        f .write (cert .public_bytes (serialization .Encoding .PEM ))
+    with open (key_path ,"wb")as f :
+        f .write (key .private_bytes (
+        serialization .Encoding .PEM ,
+        serialization .PrivateFormat .TraditionalOpenSSL ,
+        serialization .NoEncryption ()
+        ))
+def load_or_create_secret (path :str ="shared_secret.key")->str :
+    if os .path .exists (path ):
+        with open (path ,"r")as f :
+            return f .read ().strip ()
+    secret =base64 .urlsafe_b64encode (os .urandom (32 )).decode ()
+    with open (path ,"w")as f :
+        f .write (secret )
+    return secret

environment.py ADDED Viewed

	@@ -0,0 +1,214 @@

+from __future__ import annotations
+import math
+import time
+import uuid
+import numpy as np
+from typing import Optional ,Tuple
+from shared .schemas import (
+VlessConfig ,
+EpisodeMetrics ,
+TRANSPORT_TYPES ,
+CANDIDATE_PORTS ,
+SNI_DOMAINS ,
+FINGERPRINTS ,
+ALPN_OPTIONS ,
+FRAGMENT_STRATEGIES ,
+MUX_CONCURRENCY_VALUES ,
+SHORT_ID_LENGTHS ,
+XHTTP_MODES ,
+)
+from server .rl .reward import compute_reward
+HISTORY_LEN =10
+OBS_PER_EPISODE =7
+TOTAL_OBS_DIM =HISTORY_LEN *OBS_PER_EPISODE +5
+DISCRETE_NVEC =[
+len (TRANSPORT_TYPES ),
+len (CANDIDATE_PORTS ),
+len (SNI_DOMAINS ),
+len (FINGERPRINTS ),
+len (ALPN_OPTIONS ),
+len (FRAGMENT_STRATEGIES ),
+len (MUX_CONCURRENCY_VALUES ),
+len (SHORT_ID_LENGTHS ),
+len (XHTTP_MODES ),
+100 ,
+]
+N_CONTINUOUS =5
+def decode_action (discrete :np .ndarray ,continuous :np .ndarray )->VlessConfig :
+    import secrets as _sec
+    transport =TRANSPORT_TYPES [int (discrete [0 ])]
+    port =CANDIDATE_PORTS [int (discrete [1 ])]
+    dest =SNI_DOMAINS [int (discrete [2 ])]
+    fingerprint =FINGERPRINTS [int (discrete [3 ])]
+    alpn =ALPN_OPTIONS [int (discrete [4 ])]
+    frag_strat =FRAGMENT_STRATEGIES [int (discrete [5 ])]
+    mux_conc =MUX_CONCURRENCY_VALUES [int (discrete [6 ])]
+    sid_len =SHORT_ID_LENGTHS [int (discrete [7 ])]
+    xhttp_mode =XHTTP_MODES [int (discrete [8 ])]
+    grpc_seed =int (discrete [9 ])
+    c =continuous
+    frag_len_min =int (10 +c [0 ]*190 )
+    frag_len_max =frag_len_min +int (c [1 ]*100 )
+    frag_interval_min =int (c [2 ]*50 )
+    frag_interval_max =frag_interval_min +5
+    padding_min =int (c [3 ]*500 )
+    padding_max =padding_min +int (c [4 ]*500 )
+    from server .config_generator import random_service_name ,generate_short_id
+    grpc_name =random_service_name (grpc_seed )
+    short_id =generate_short_id (sid_len )
+    if transport !="tcp":
+        frag_strat ="none"
+    padding_enabled =(transport =="tcp")and (padding_min >0 )
+    return VlessConfig (
+    transport_type =transport ,
+    proxy_port =port ,
+    dest_domain =dest ,
+    short_id =short_id ,
+    spider_x ="/",
+    fingerprint =fingerprint ,
+    alpn =alpn ,
+    grpc_service_name =grpc_name ,
+    xhttp_mode =xhttp_mode ,
+    fragment_strategy =frag_strat ,
+    fragment_length_min =frag_len_min ,
+    fragment_length_max =frag_len_max ,
+    fragment_interval_min =frag_interval_min ,
+    fragment_interval_max =frag_interval_max ,
+    padding_enabled =padding_enabled ,
+    padding_min =padding_min ,
+    padding_max =padding_max ,
+    mux_concurrency =mux_conc ,
+    )
+def metrics_to_obs_vector (m :EpisodeMetrics )->np .ndarray :
+    return np .array ([
+    float (m .connected ),
+    min (m .stability_ratio ,1.0 ),
+    min (m .throughput_ratio ,1.0 ),
+    max (0.0 ,1.0 -m .avg_ping_ms /1000.0 ),
+    1.0 -min (m .packet_loss_ratio ,1.0 ),
+    max (0.0 ,1.0 -m .connect_time_ms /5000.0 ),
+    max (0.0 ,1.0 -m .reconnect_count /5.0 ),
+    ],dtype =np .float32 )
+class AlphaBypassEnv :
+    def __init__ (
+    self ,
+    bridge ,
+    episode_duration :int =90 ,
+    baseline_mbps :float =1.0 ,
+    max_steps :int =0 ,
+    fail_streak_warn :int =10 ,
+    ):
+        self .bridge =bridge
+        self .episode_duration =episode_duration
+        self .baseline_mbps =baseline_mbps
+        self .max_steps =max_steps
+        self .fail_streak_warn =fail_streak_warn
+        self .history :list [EpisodeMetrics ]=[]
+        self .step_count :int =0
+        self ._fail_streak :int =0
+        self .obs_dim =TOTAL_OBS_DIM
+        self .discrete_nvec =DISCRETE_NVEC
+        self .n_continuous =N_CONTINUOUS
+    def _build_obs (self )->np .ndarray :
+        obs =np .zeros (self .obs_dim ,dtype =np .float32 )
+        relevant =self .history [-HISTORY_LEN :]
+        for i ,m in enumerate (reversed (relevant )):
+            start =i *OBS_PER_EPISODE
+            obs [start :start +OBS_PER_EPISODE ]=metrics_to_obs_vector (m )
+        base =HISTORY_LEN *OBS_PER_EPISODE
+        t =time .localtime ()
+        hour =t .tm_hour +t .tm_min /60.0
+        obs [base ]=min (self .step_count /1000.0 ,1.0 )
+        obs [base +1 ]=math .sin (2 *math .pi *hour /24 )
+        obs [base +2 ]=math .cos (2 *math .pi *hour /24 )
+        dow =t .tm_wday
+        obs [base +3 ]=math .sin (2 *math .pi *dow /7 )
+        obs [base +4 ]=math .cos (2 *math .pi *dow /7 )
+        return obs
+    def reset (self )->np .ndarray :
+        self .history =[]
+        self .step_count =0
+        self ._fail_streak =0
+        return self ._build_obs ()
+    def step (
+    self ,
+    discrete_action :np .ndarray ,
+    continuous_action :np .ndarray ,
+    )->Tuple [np .ndarray ,float ,bool ,dict ]:
+        cfg =decode_action (discrete_action ,continuous_action )
+        episode_id =str (uuid .uuid4 ())[:8 ]
+        metrics =self .bridge .run_episode (
+        cfg =cfg ,
+        episode_id =episode_id ,
+        duration =self .episode_duration ,
+        )
+        reward =compute_reward (metrics ,self .baseline_mbps )
+        self .bridge .report_reward (episode_id ,reward )
+        self .history .append (metrics )
+        self .step_count +=1
+        if not metrics .connected :
+            self ._fail_streak +=1
+            if self ._fail_streak ==self .fail_streak_warn :
+                print (
+                f"\n⚠️  [Degradation] {self ._fail_streak } FAIL подряд! "
+                f"Возможно РКН изменил политику или проблема с сетью."
+                )
+        else :
+            self ._fail_streak =0
+        done =(self .max_steps >0 and self .step_count >=self .max_steps )
+        obs =self ._build_obs ()
+        info ={
+        "episode_id":episode_id ,
+        "reward":reward ,
+        "connected":metrics .connected ,
+        "stability":metrics .stability_ratio ,
+        "throughput_mbps":metrics .throughput_mbps ,
+        "transport":cfg .transport_type ,
+        "dest":cfg .dest_domain ,
+        "fail_streak":self ._fail_streak ,
+        }
+        return obs ,reward ,done ,info

reward.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from shared .schemas import EpisodeMetrics
+def compute_reward (metrics :EpisodeMetrics ,baseline_mbps :float =1.0 )->float :
+    if not metrics .connected :
+        return -1.0
+    r =0.0
+    connect_score =max (0.0 ,1.0 -metrics .connect_time_ms /5000.0 )
+    loss_score =1.0 -metrics .packet_loss_ratio
+    ping_score =max (0.0 ,1.0 -metrics .avg_ping_ms /1000.0 )
+    connection_component =(connect_score *0.3 +loss_score *0.4 +ping_score *0.3 )
+    r +=0.50 *connection_component
+    stability_score =metrics .stability_ratio
+    reconnect_penalty =max (0.0 ,1.0 -metrics .reconnect_count /5.0 )
+    stability_component =stability_score *0.7 +reconnect_penalty *0.3
+    r +=0.35 *stability_component
+    safe_baseline =max (baseline_mbps ,0.1 )
+    speed_ratio =min (metrics .throughput_mbps /safe_baseline ,1.0 )
+    import math
+    speed_score =math .log1p (speed_ratio *9 )/math .log1p (9 )
+    r +=0.15 *speed_score
+    return round (r ,4 )
+def reward_to_label (r :float )->str :
+    if r <-0.5 :
+        return "💀 FAIL"
+    if r <0.0 :
+        return "❌ bad"
+    if r <0.3 :
+        return "⚠️ weak"
+    if r <0.6 :
+        return "🟡 ok"
+    if r <0.8 :
+        return "🟢 good"
+    return "🏆 great"

schemas.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from __future__ import annotations
+from dataclasses import dataclass ,field ,asdict
+from typing import Optional ,List ,Dict ,Any
+import json
+import time
+TRANSPORT_TYPES =["tcp","grpc"]
+CANDIDATE_PORTS =[443 ,80 ,8443 ,2053 ,2083 ,2087 ,9443 ]
+SNI_DOMAINS =[
+"download.nvidia.com",
+"swscan.apple.com",
+"updates.cdn-apple.com",
+"steamcdn-a.akamaihd.net",
+"dl.delivery.mp.microsoft.com",
+"download.windowsupdate.com",
+"cdn.cloudflare.steamstatic.com",
+"origin-a.akamaihd.net",
+"pkg-containers.githubusercontent.com",
+"download.jetbrains.com",
+"packages.ubuntu.com",
+"ajax.aspnetcdn.com",
+"github-releases.githubusercontent.com",
+"objects.githubusercontent.com",
+"software.download.prss.microsoft.com",
+]
+FINGERPRINTS =["chrome","firefox","edge","safari","ios","random","randomized"]
+ALPN_OPTIONS =[
+["h2","http/1.1"],
+["h2"],
+["http/1.1"],
+]
+FRAGMENT_STRATEGIES =["none","tlshello","all"]
+MUX_CONCURRENCY_VALUES =[0 ,1 ,2 ,4 ,8 ,16 ,32 ]
+SHORT_ID_LENGTHS =[4 ,8 ,16 ]
+XHTTP_MODES =["packet-up","streaming"]
+@dataclass
+class VlessConfig :
+    transport_type :str ="tcp"
+    proxy_port :int =443
+    dest_domain :str ="download.nvidia.com"
+    short_id :str ="abcdef01"
+    spider_x :str ="/"
+    fingerprint :str ="chrome"
+    alpn :List [str ]=field (default_factory =lambda :["h2","http/1.1"])
+    grpc_service_name :str ="grpc"
+    xhttp_mode :str ="packet-up"
+    fragment_strategy :str ="none"
+    fragment_length_min :int =50
+    fragment_length_max :int =100
+    fragment_interval_min :int =1
+    fragment_interval_max :int =5
+    padding_enabled :bool =False
+    padding_min :int =0
+    padding_max :int =0
+    mux_concurrency :int =0
+    extra_headers :Dict [str ,str ]=field (default_factory =dict )
+    def to_dict (self )->dict :
+        return asdict (self )
+    @classmethod
+    def from_dict (cls ,d :dict )->"VlessConfig":
+        return cls (**d )
+    def to_json (self )->str :
+        return json .dumps (self .to_dict ())
+    @classmethod
+    def from_json (cls ,s :str )->"VlessConfig":
+        return cls .from_dict (json .loads (s ))
+@dataclass
+class EpisodeMetrics :
+    episode_id :str =""
+    timestamp :float =field (default_factory =time .time )
+    connected :bool =False
+    connect_time_ms :float =0.0
+    stability_ratio :float =0.0
+    reconnect_count :int =0
+    drop_count :int =0
+    throughput_mbps :float =0.0
+    throughput_ratio :float =0.0
+    avg_ping_ms :float =0.0
+    max_ping_ms :float =0.0
+    packet_loss_ratio :float =0.0
+    error_message :Optional [str ]=None
+    samples :int =0
+    def to_dict (self )->dict :
+        return asdict (self )
+    @classmethod
+    def from_dict (cls ,d :dict )->"EpisodeMetrics":
+        return cls (**d )
+@dataclass
+class EpisodeCommand :
+    episode_id :str =""
+    config :Optional [dict ]=None
+    duration_seconds :int =90
+    server_ip :str =""
+    server_port :int =443
+    uuid :str =""
+    def to_dict (self )->dict :
+        return asdict (self )
+    @classmethod
+    def from_dict (cls ,d :dict )->"EpisodeCommand":
+        obj =cls (**{k :v for k ,v in d .items ()if k !="config"})
+        obj .config =d .get ("config")
+        return obj
+@dataclass
+class ClientStatus :
+    episode_id :str =""
+    partial_metrics :Optional [dict ]=None
+    phase :str ="idle"