From 1172f5b38c0cca672548439f3d0775f0759c3125 Mon Sep 17 00:00:00 2001 From: James Blair Date: Thu, 23 Jul 2020 12:29:02 +1200 Subject: [PATCH] Added example reward functions. --- reward-example-encourage-racing-line.py | 46 +++++++++++++++++++++++++ reward-example-follow-centre-line.py | 25 ++++++++++++++ reward-example-prevent-zig-zag.py | 33 ++++++++++++++++++ reward-example-pure-pursuit.py | 32 +++++++++++++++++ reward-example-self-motivation.py | 8 +++++ 5 files changed, 144 insertions(+) create mode 100644 reward-example-encourage-racing-line.py create mode 100644 reward-example-follow-centre-line.py create mode 100644 reward-example-prevent-zig-zag.py create mode 100644 reward-example-pure-pursuit.py create mode 100644 reward-example-self-motivation.py diff --git a/reward-example-encourage-racing-line.py b/reward-example-encourage-racing-line.py new file mode 100644 index 0000000..4bfeee9 --- /dev/null +++ b/reward-example-encourage-racing-line.py @@ -0,0 +1,46 @@ +import math +def reward_function(params): + + track_width = params['track_width'] + distance_from_center = params['distance_from_center'] + steering = abs(params['steering_angle']) + direction_stearing=params['steering_angle'] + speed = params['speed'] + steps = params['steps'] + progress = params['progress'] + all_wheels_on_track = params['all_wheels_on_track'] + ABS_STEERING_THRESHOLD = 15 + SPEED_TRESHOLD = 5 + TOTAL_NUM_STEPS = 85 + + # Read input variables + waypoints = params['waypoints'] + closest_waypoints = params['closest_waypoints'] + heading = params['heading'] + + reward = 1.0 + + if progress == 100: + reward += 100 + + # Calculate the direction of the center line based on the closest waypoints + next_point = waypoints[closest_waypoints[1]] + prev_point = waypoints[closest_waypoints[0]] + # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians + track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) + # Convert to degree + track_direction = math.degrees(track_direction) + # Calculate the difference between the track direction and the heading direction of the car + direction_diff = abs(track_direction - heading) + # Penalize the reward if the difference is too large + DIRECTION_THRESHOLD = 10.0 + + malus=1 + + if direction_diff > DIRECTION_THRESHOLD: + malus=1-(direction_diff/50) + if malus<0 or malus>1: + malus = 0 + reward *= malus + + return reward diff --git a/reward-example-follow-centre-line.py b/reward-example-follow-centre-line.py new file mode 100644 index 0000000..a1a18a4 --- /dev/null +++ b/reward-example-follow-centre-line.py @@ -0,0 +1,25 @@ +def reward_function(params): + ''' + Example of rewarding the agent to follow center line + ''' + + # Read input parameters + track_width = params['track_width'] + distance_from_center = params['distance_from_center'] + + # Calculate 3 markers that are increasingly further away from the center line + marker_1 = 0.1 * track_width + marker_2 = 0.25 * track_width + marker_3 = 0.5 * track_width + + # Give higher reward if the car is closer to center line and vice versa + if distance_from_center <= marker_1: + reward = 1 + elif distance_from_center <= marker_2: + reward = 0.5 + elif distance_from_center <= marker_3: + reward = 0.1 + else: + reward = 1e-3 # likely crashed/ close to off track + + return reward diff --git a/reward-example-prevent-zig-zag.py b/reward-example-prevent-zig-zag.py new file mode 100644 index 0000000..fcda7e1 --- /dev/null +++ b/reward-example-prevent-zig-zag.py @@ -0,0 +1,33 @@ +def reward_function(params): + ''' + Example of penalize steering, which helps mitigate zig-zag behaviors + ''' + + # Read input parameters + distance_from_center = params['distance_from_center'] + track_width = params['track_width'] + steering = abs(params['steering_angle']) # Only need the absolute steering angle + + # Calculate 3 marks that are farther and father away from the center line + marker_1 = 0.1 * track_width + marker_2 = 0.25 * track_width + marker_3 = 0.5 * track_width + + # Give higher reward if the car is closer to center line and vice versa + if distance_from_center <= marker_1: + reward = 1.0 + elif distance_from_center <= marker_2: + reward = 0.5 + elif distance_from_center <= marker_3: + reward = 0.1 + else: + reward = 1e-3 # likely crashed/ close to off track + + # Steering penality threshold, change the number based on your action space setting + ABS_STEERING_THRESHOLD = 15 + + # Penalize reward if the car is steering too much + if steering > ABS_STEERING_THRESHOLD: + reward *= 0.8 + + return float(reward) diff --git a/reward-example-pure-pursuit.py b/reward-example-pure-pursuit.py new file mode 100644 index 0000000..a0890b8 --- /dev/null +++ b/reward-example-pure-pursuit.py @@ -0,0 +1,32 @@ + def reward_function(self, on_track, x, y, distance_from_center, car_orientation, progress, steps, + throttle, steering, track_width, waypoints, closest_waypoints): + + reward = 1e-3 + + rabbit = [0,0] + pointing = [0,0] + + # Reward when yaw (car_orientation) is pointed to the next waypoint IN FRONT. + + # Find nearest waypoint coordinates + + rabbit = [waypoints[closest_waypoints+1][0],waypoints[closest_waypoints+1][1]] + + radius = math.hypot(x - rabbit[0], y - rabbit[1]) + + pointing[0] = x + (radius * math.cos(car_orientation)) + pointing[1] = y + (radius * math.sin(car_orientation)) + + vector_delta = math.hypot(pointing[0] - rabbit[0], pointing[1] - rabbit[1]) + + # Max distance for pointing away will be the radius * 2 + # Min distance means we are pointing directly at the next waypoint + # We can setup a reward that is a ratio to this max. + + if vector_delta == 0: + reward += 1 + else: + reward += ( 1 - ( vector_delta / (radius * 2))) + + return reward + diff --git a/reward-example-self-motivation.py b/reward-example-self-motivation.py new file mode 100644 index 0000000..fe5fca3 --- /dev/null +++ b/reward-example-self-motivation.py @@ -0,0 +1,8 @@ +def reward_function(params): + + if params["all_wheels_on_track"] and params["steps"] > 0: + reward = ((params["progress"] / params["steps"]) * 100) + (params["speed"]**2) + else: + reward = 0.01 + + return float(reward)