Module bento.example.mountcar
Expand source code
#
# bentobox-sdk
# mountain car example simulation
#
from bento import types
from bento.graph.plotter import Plotter
from bento.spec.ecs import ComponentDef, EntityDef
from bento.example.specs import Velocity, Position
from bento.sim import Simulation
from bento.spec.sim import SimulationDef
Action = ComponentDef(
name="action",
schema={
"accelerate": types.int32,
},
)
"""
Action allows the agent to control the car in the Simulation via its acceleration.
Attributes:
**accelerate** (`types.int32`): Set the acceleration of the car.
- 0: Accelerate to the Left
- 1: Don't accelerate
- 2: Accelerate to the Right
"""
State = ComponentDef(
name="state",
schema={
"reward": types.int32,
"ended": types.boolean,
},
)
"""
State tracks the current state of the Simulation.
Attributes:
**reward** (`types.int32`): Reward given to agent for the current Simulation step:
- Reward of 0 is awarded if the agent reached the flag (`Position.x >= 0.5`) on top of the mountain.
- Reward of -1 is penalized if the position of the agent `Position.x < 0.5`
**ended** (`types.boolean`): Whether the Simulation is has ended (`Position.x > 0.5`).
"""
MountainCar = SimulationDef(
name="mountain_car",
components=[Velocity, Position, Action, State],
entities=[
EntityDef(components=[Velocity, Position]),
EntityDef(components=[Action, State]),
],
)
"""
Mountain Car Simulation implemented using `bento`
A car is started at random position the bottom of a valley.
The agent may choose to accelerate the car to the left, right or cease any acceleration.
The objective of the Simulation is to reach the flag on top of the mountain at `Position.x > 0.5`
The simulation ends when the car's `Position.x > 0.5`
"""
@MountainCar.init
def init_fn(g: Plotter):
car = g.entity(components=[Velocity, Position])
car[Velocity].x = 0.0
car[Position].x = g.random(-0.6, -0.4)
env = g.entity(components=[Action, State])
env[State].reward = 0
env[State].ended = False
env[Action].accelerate = 1
@MountainCar.system
def sim_fn(g: Plotter):
env = g.entity(components=[Action, State])
car = g.entity(components=[Velocity, Position])
# process car physics
# compute velocity based on acceleration action & decceleration due to gravity
acceleration, gravity, max_speed = 0.001, 0.0025, 0.07
# apply acceleration based on accelerate action:
# 0: Accelerate to the Left
# 1: Don't accelerate
# 2: Accelerate to the Right
car[Velocity].x += (env[Action].accelerate - 1) * acceleration
# apply gravity inverse to the mountain path used by the car
# the mountain is defined by y = sin(3*x)
# as such we apply gravity inversely using y = cos(3*x)
# apply negative gravity as gravity works in the opposite direction of movement
car[Velocity].x += g.cos(3 * car[Position].x) * (-gravity)
car[Velocity].x = g.clip(car[Velocity].x, min_x=-max_speed, max_x=max_speed)
# compute new position from current velocity
min_position, max_position = -1.2, 0.6
car[Position].x += car[Velocity].x
car[Position].x = g.clip(car[Position].x, min_position, max_position)
# collision: stop car when colliding with min_position
if car[Position].x <= min_position:
car[Velocity].x = 0.0
# resolve simulation state: reward and simulation completition
env[State].reward = 0 if car[Position].x >= 0.5 else -1
env[State].ended = True if car[Position].x > 0.5 else False
Global variables
var Action
-
Action allows the agent to control the car in the Simulation via its acceleration.
Attributes
accelerate (
types.int32
): Set the acceleration of the car.- 0: Accelerate to the Left
- 1: Don't accelerate
- 2: Accelerate to the Right
var MountainCar
-
Mountain Car Simulation implemented using
bento
A car is started at random position the bottom of a valley. The agent may choose to accelerate the car to the left, right or cease any acceleration. The objective of the Simulation is to reach the flag on top of the mountain at
Position.x > 0.5
The simulation ends when the car'sPosition.x > 0.5
var State
-
State tracks the current state of the Simulation.
Attributes
reward (
types.int32
): Reward given to agent for the current Simulation step:- Reward of 0 is awarded if the agent reached the flag (
Position.x >= 0.5
) on top of the mountain. - Reward of -1 is penalized if the position of the agent
Position.x < 0.5
ended (
types.boolean
): Whether the Simulation is has ended (Position.x > 0.5
). - Reward of 0 is awarded if the agent reached the flag (