|
|
|
|
|
class WalkingAI extends tf.layers.Layer { |
|
|
constructor(num_actions) { |
|
|
super({}); |
|
|
this.num_actions = num_actions; |
|
|
} |
|
|
|
|
|
call(inputs) { |
|
|
return tf.tidy(() => { |
|
|
const x = tf.layers.dense({units: 32, activation: 'relu'}).apply(inputs); |
|
|
const y = tf.layers.dense({units: 32, activation: 'relu'}).apply(x); |
|
|
return tf.layers.dense({units: this.num_actions}).apply(y); |
|
|
}); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
class ReinforcementAgent { |
|
|
constructor(num_actions) { |
|
|
this.num_actions = num_actions; |
|
|
this.model = tf.model({inputs: tf.input({shape: [4]}), outputs: new WalkingAI(num_actions).apply}); |
|
|
this.optimizer = tf.train.adam(0.001); |
|
|
} |
|
|
|
|
|
getAction(state) { |
|
|
return tf.tidy(() => { |
|
|
const actionProbs = this.model.predict(tf.tensor(state, [1, 4])); |
|
|
return tf.argMax(actionProbs, 1).dataSync()[0]; |
|
|
}); |
|
|
} |
|
|
|
|
|
train(states, actions, rewards) { |
|
|
tf.tidy(() => { |
|
|
const target = tf.tensor(actions, [actions.length, 1]); |
|
|
const loss = tf.losses.softmaxCrossEntropy(target, this.model.predict(states)); |
|
|
const grad = this.optimizer.computeGradients(() => loss.mean()); |
|
|
this.optimizer.applyGradients(grad.grads); |
|
|
}); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
document.getElementById("startButton").addEventListener("click", startAI); |
|
|
document.getElementById("stopButton").addEventListener("click", stopAI); |
|
|
|
|
|
|
|
|
function startAI() { |
|
|
const env = new gym.make('YourEnvName'); |
|
|
const numActions = env.actionSpace.n; |
|
|
const agent = new ReinforcementAgent(numActions); |
|
|
|
|
|
|
|
|
const numEpisodes = 100; |
|
|
const maxSteps = 200; |
|
|
for (let episode = 0; episode < numEpisodes; episode++) { |
|
|
let state = env.reset(); |
|
|
let episodeReward = 0; |
|
|
|
|
|
for (let step = 0; step < maxSteps; step++) { |
|
|
|
|
|
const action = agent.getAction(state); |
|
|
|
|
|
|
|
|
const [nextState, reward, done, _] = env.step(action); |
|
|
|
|
|
|
|
|
episodeReward += reward; |
|
|
|
|
|
|
|
|
agent.train([state], [action], [reward]); |
|
|
|
|
|
|
|
|
state = nextState; |
|
|
|
|
|
|
|
|
updateEnvironmentDisplay(); |
|
|
|
|
|
if (done) { |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
console.log("Episode:", episode, "Reward:", episodeReward); |
|
|
|
|
|
|
|
|
updateOutputDisplay(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function stopAI() { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
function updateEnvironmentDisplay() { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
function updateOutputDisplay() { |
|
|
|
|
|
} |
|
|
|