Added example reward functions.
This commit is contained in:
46
reward-example-encourage-racing-line.py
Normal file
46
reward-example-encourage-racing-line.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import math
|
||||||
|
def reward_function(params):
|
||||||
|
|
||||||
|
track_width = params['track_width']
|
||||||
|
distance_from_center = params['distance_from_center']
|
||||||
|
steering = abs(params['steering_angle'])
|
||||||
|
direction_stearing=params['steering_angle']
|
||||||
|
speed = params['speed']
|
||||||
|
steps = params['steps']
|
||||||
|
progress = params['progress']
|
||||||
|
all_wheels_on_track = params['all_wheels_on_track']
|
||||||
|
ABS_STEERING_THRESHOLD = 15
|
||||||
|
SPEED_TRESHOLD = 5
|
||||||
|
TOTAL_NUM_STEPS = 85
|
||||||
|
|
||||||
|
# Read input variables
|
||||||
|
waypoints = params['waypoints']
|
||||||
|
closest_waypoints = params['closest_waypoints']
|
||||||
|
heading = params['heading']
|
||||||
|
|
||||||
|
reward = 1.0
|
||||||
|
|
||||||
|
if progress == 100:
|
||||||
|
reward += 100
|
||||||
|
|
||||||
|
# Calculate the direction of the center line based on the closest waypoints
|
||||||
|
next_point = waypoints[closest_waypoints[1]]
|
||||||
|
prev_point = waypoints[closest_waypoints[0]]
|
||||||
|
# Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
|
||||||
|
track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
|
||||||
|
# Convert to degree
|
||||||
|
track_direction = math.degrees(track_direction)
|
||||||
|
# Calculate the difference between the track direction and the heading direction of the car
|
||||||
|
direction_diff = abs(track_direction - heading)
|
||||||
|
# Penalize the reward if the difference is too large
|
||||||
|
DIRECTION_THRESHOLD = 10.0
|
||||||
|
|
||||||
|
malus=1
|
||||||
|
|
||||||
|
if direction_diff > DIRECTION_THRESHOLD:
|
||||||
|
malus=1-(direction_diff/50)
|
||||||
|
if malus<0 or malus>1:
|
||||||
|
malus = 0
|
||||||
|
reward *= malus
|
||||||
|
|
||||||
|
return reward
|
||||||
25
reward-example-follow-centre-line.py
Normal file
25
reward-example-follow-centre-line.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
def reward_function(params):
|
||||||
|
'''
|
||||||
|
Example of rewarding the agent to follow center line
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Read input parameters
|
||||||
|
track_width = params['track_width']
|
||||||
|
distance_from_center = params['distance_from_center']
|
||||||
|
|
||||||
|
# Calculate 3 markers that are increasingly further away from the center line
|
||||||
|
marker_1 = 0.1 * track_width
|
||||||
|
marker_2 = 0.25 * track_width
|
||||||
|
marker_3 = 0.5 * track_width
|
||||||
|
|
||||||
|
# Give higher reward if the car is closer to center line and vice versa
|
||||||
|
if distance_from_center <= marker_1:
|
||||||
|
reward = 1
|
||||||
|
elif distance_from_center <= marker_2:
|
||||||
|
reward = 0.5
|
||||||
|
elif distance_from_center <= marker_3:
|
||||||
|
reward = 0.1
|
||||||
|
else:
|
||||||
|
reward = 1e-3 # likely crashed/ close to off track
|
||||||
|
|
||||||
|
return reward
|
||||||
33
reward-example-prevent-zig-zag.py
Normal file
33
reward-example-prevent-zig-zag.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
def reward_function(params):
|
||||||
|
'''
|
||||||
|
Example of penalize steering, which helps mitigate zig-zag behaviors
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Read input parameters
|
||||||
|
distance_from_center = params['distance_from_center']
|
||||||
|
track_width = params['track_width']
|
||||||
|
steering = abs(params['steering_angle']) # Only need the absolute steering angle
|
||||||
|
|
||||||
|
# Calculate 3 marks that are farther and father away from the center line
|
||||||
|
marker_1 = 0.1 * track_width
|
||||||
|
marker_2 = 0.25 * track_width
|
||||||
|
marker_3 = 0.5 * track_width
|
||||||
|
|
||||||
|
# Give higher reward if the car is closer to center line and vice versa
|
||||||
|
if distance_from_center <= marker_1:
|
||||||
|
reward = 1.0
|
||||||
|
elif distance_from_center <= marker_2:
|
||||||
|
reward = 0.5
|
||||||
|
elif distance_from_center <= marker_3:
|
||||||
|
reward = 0.1
|
||||||
|
else:
|
||||||
|
reward = 1e-3 # likely crashed/ close to off track
|
||||||
|
|
||||||
|
# Steering penality threshold, change the number based on your action space setting
|
||||||
|
ABS_STEERING_THRESHOLD = 15
|
||||||
|
|
||||||
|
# Penalize reward if the car is steering too much
|
||||||
|
if steering > ABS_STEERING_THRESHOLD:
|
||||||
|
reward *= 0.8
|
||||||
|
|
||||||
|
return float(reward)
|
||||||
32
reward-example-pure-pursuit.py
Normal file
32
reward-example-pure-pursuit.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
def reward_function(self, on_track, x, y, distance_from_center, car_orientation, progress, steps,
|
||||||
|
throttle, steering, track_width, waypoints, closest_waypoints):
|
||||||
|
|
||||||
|
reward = 1e-3
|
||||||
|
|
||||||
|
rabbit = [0,0]
|
||||||
|
pointing = [0,0]
|
||||||
|
|
||||||
|
# Reward when yaw (car_orientation) is pointed to the next waypoint IN FRONT.
|
||||||
|
|
||||||
|
# Find nearest waypoint coordinates
|
||||||
|
|
||||||
|
rabbit = [waypoints[closest_waypoints+1][0],waypoints[closest_waypoints+1][1]]
|
||||||
|
|
||||||
|
radius = math.hypot(x - rabbit[0], y - rabbit[1])
|
||||||
|
|
||||||
|
pointing[0] = x + (radius * math.cos(car_orientation))
|
||||||
|
pointing[1] = y + (radius * math.sin(car_orientation))
|
||||||
|
|
||||||
|
vector_delta = math.hypot(pointing[0] - rabbit[0], pointing[1] - rabbit[1])
|
||||||
|
|
||||||
|
# Max distance for pointing away will be the radius * 2
|
||||||
|
# Min distance means we are pointing directly at the next waypoint
|
||||||
|
# We can setup a reward that is a ratio to this max.
|
||||||
|
|
||||||
|
if vector_delta == 0:
|
||||||
|
reward += 1
|
||||||
|
else:
|
||||||
|
reward += ( 1 - ( vector_delta / (radius * 2)))
|
||||||
|
|
||||||
|
return reward
|
||||||
|
|
||||||
8
reward-example-self-motivation.py
Normal file
8
reward-example-self-motivation.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
def reward_function(params):
|
||||||
|
|
||||||
|
if params["all_wheels_on_track"] and params["steps"] > 0:
|
||||||
|
reward = ((params["progress"] / params["steps"]) * 100) + (params["speed"]**2)
|
||||||
|
else:
|
||||||
|
reward = 0.01
|
||||||
|
|
||||||
|
return float(reward)
|
||||||
Reference in New Issue
Block a user