File size: 4,739 Bytes
1ed1542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#This is a code implementation of the Reinforcement learning based TCP congestion control 
#Importing the neccssary libaraies 
import gym 
import network_sim
import tensorflow as tf 

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.policies import FeedForwardPolicy
from stable_baselines import PPO1 

import os
import sys
import inspect
#atking the current directory from the filename 
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
#making the current directory as parent directory 
parentdir = os.path.dirname(currentdir)
#adding the parent directory to the system path
sys.path.insert(0,parentdir) 
# makign the arguement passer as parent directory
from common.simple_arg_parse import arg_or_default

arch_str = arg_or_default("--arch", default="32,16")
#if the made arguenment is empty then it will take the default value
if arch_str == "":
    arch = []
else:#Else it will take the arguement value
    arch = [int(layer_width) for layer_width in arch_str.split(",")]
#Prinitng the architecture 
print("Architecture is: %s" % str(arch))

#Making the Initial training session as none
training_sess = None

#Making the feed forward policy 
class MyMlpPolicy(FeedForwardPolicy):
    #defining the constructor function with parameters (Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **_kwargs):
        super(MyMlpPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse, net_arch=[{"pi":arch, "vf":arch}],
                                        feature_extraction="mlp", **_kwargs)
        #Now making the training session as global variable
        global training_sess
        #making the training session as the current session
        training_sess = sess
#making the environment of a specified type 
env = gym.make('PccNs-v0')

#defining a gamma value for the rate of training session
gamma = arg_or_default("--gamma", default=0.99)
print("gamma = %f" % gamma)

#making the model 
model = PPO1(MyMlpPolicy, env, verbose=1, schedule='constant', timesteps_per_actorbatch=8192, optim_batchsize=2048, gamma=gamma)

for i in range(0, 6):
    with model.graph.as_default():
        #saving the trained model for further use                                                               
        saver = tf.train.Saver()                                                                     
        saver.save(training_sess, "./pcc_model_%d.ckpt" % i)
        #making the model learn in 1600 seconds for 410 epochs 
    model.learn(total_timesteps=(1600 * 410))

#Now making a deafult directory for exporting the saved model 
default_export_dir = "/tmp/pcc_saved_models/model_A/"
#again making the qrguement passer for storing in specific directory or making it save in deafult directory 
export_dir = arg_or_default("--model-dir", default=default_export_dir)
#Taking the model as default model
with model.graph.as_default():

    pol = model.policy_pi#act_model

    obs_ph = pol.obs_ph
    #making the action in the action space
    act = pol.deterministic_action
    #taking out any sample action from the action space
    sampled_act = pol.action
    # Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
    obs_input = tf.saved_model.utils.build_tensor_info(obs_ph)
    #build_tensor_info
    outputs_tensor_info = tf.saved_model.utils.build_tensor_info(act)
    stochastic_act_tensor_info = tf.saved_model.utils.build_tensor_info(sampled_act)
    #A SignatureDef protocol buffer constructed based on the supplied arguments.
    signature = tf.saved_model.signature_def_utils.build_signature_def(
        inputs={"ob":obs_input},
        outputs={"act":outputs_tensor_info, "stochastic_act":stochastic_act_tensor_info},
        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)

    #"""
    signature_map = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                     signature}
    #Building the SavedModel protocol buffer and saving variables and assets.
    model_builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
    #adding the sessions and tages and the signature map to the model builder
    model_builder.add_meta_graph_and_variables(model.sess,
        tags=[tf.saved_model.tag_constants.SERVING],
        signature_def_map=signature_map,
        clear_devices=True)
        #now saving the model builder in form of text.
    model_builder.save(as_text=True)