From 1172f5b38c0cca672548439f3d0775f0759c3125 Mon Sep 17 00:00:00 2001
From: James Blair <mail@jamesblair.net>
Date: Thu, 23 Jul 2020 12:29:02 +1200
Subject: [PATCH] Added example reward functions.

---
 reward-example-encourage-racing-line.py | 46 +++++++++++++++++++++++++
 reward-example-follow-centre-line.py    | 25 ++++++++++++++
 reward-example-prevent-zig-zag.py       | 33 ++++++++++++++++++
 reward-example-pure-pursuit.py          | 32 +++++++++++++++++
 reward-example-self-motivation.py       |  8 +++++
 5 files changed, 144 insertions(+)
 create mode 100644 reward-example-encourage-racing-line.py
 create mode 100644 reward-example-follow-centre-line.py
 create mode 100644 reward-example-prevent-zig-zag.py
 create mode 100644 reward-example-pure-pursuit.py
 create mode 100644 reward-example-self-motivation.py

diff --git a/reward-example-encourage-racing-line.py b/reward-example-encourage-racing-line.py
new file mode 100644
index 0000000..4bfeee9
--- /dev/null
+++ b/reward-example-encourage-racing-line.py
@@ -0,0 +1,46 @@
+import math
+def reward_function(params):
+
+    track_width = params['track_width']
+    distance_from_center = params['distance_from_center']
+    steering = abs(params['steering_angle'])
+    direction_stearing=params['steering_angle']
+    speed = params['speed']
+    steps = params['steps']
+    progress = params['progress']
+    all_wheels_on_track = params['all_wheels_on_track']
+    ABS_STEERING_THRESHOLD = 15
+    SPEED_TRESHOLD = 5
+    TOTAL_NUM_STEPS = 85
+
+    # Read input variables
+    waypoints = params['waypoints']
+    closest_waypoints = params['closest_waypoints']
+    heading = params['heading']
+
+    reward = 1.0
+
+    if progress == 100:
+        reward += 100
+
+    # Calculate the direction of the center line based on the closest waypoints
+    next_point = waypoints[closest_waypoints[1]]
+    prev_point = waypoints[closest_waypoints[0]]
+    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
+    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
+    # Convert to degree
+    track_direction = math.degrees(track_direction)
+    # Calculate the difference between the track direction and the heading direction of the car
+    direction_diff = abs(track_direction - heading)
+    # Penalize the reward if the difference is too large
+    DIRECTION_THRESHOLD = 10.0
+
+    malus=1
+
+    if direction_diff > DIRECTION_THRESHOLD:
+        malus=1-(direction_diff/50)
+        if malus<0 or malus>1:
+            malus = 0
+        reward *= malus
+
+    return reward
diff --git a/reward-example-follow-centre-line.py b/reward-example-follow-centre-line.py
new file mode 100644
index 0000000..a1a18a4
--- /dev/null
+++ b/reward-example-follow-centre-line.py
@@ -0,0 +1,25 @@
+def reward_function(params):
+    '''
+    Example of rewarding the agent to follow center line
+    '''
+    
+    # Read input parameters
+    track_width = params['track_width']
+    distance_from_center = params['distance_from_center']
+
+    # Calculate 3 markers that are increasingly further away from the center line
+    marker_1 = 0.1 * track_width
+    marker_2 = 0.25 * track_width
+    marker_3 = 0.5 * track_width
+
+    # Give higher reward if the car is closer to center line and vice versa
+    if distance_from_center <= marker_1:
+        reward = 1
+    elif distance_from_center <= marker_2:
+        reward = 0.5
+    elif distance_from_center <= marker_3:
+        reward = 0.1
+    else:
+        reward = 1e-3  # likely crashed/ close to off track
+
+    return reward
diff --git a/reward-example-prevent-zig-zag.py b/reward-example-prevent-zig-zag.py
new file mode 100644
index 0000000..fcda7e1
--- /dev/null
+++ b/reward-example-prevent-zig-zag.py
@@ -0,0 +1,33 @@
+def reward_function(params):
+    '''
+    Example of penalize steering, which helps mitigate zig-zag behaviors
+    '''
+    
+    # Read input parameters
+    distance_from_center = params['distance_from_center']
+    track_width = params['track_width']
+    steering = abs(params['steering_angle']) # Only need the absolute steering angle
+
+    # Calculate 3 marks that are farther and father away from the center line
+    marker_1 = 0.1 * track_width
+    marker_2 = 0.25 * track_width
+    marker_3 = 0.5 * track_width
+
+    # Give higher reward if the car is closer to center line and vice versa
+    if distance_from_center <= marker_1:
+        reward = 1.0
+    elif distance_from_center <= marker_2:
+        reward = 0.5
+    elif distance_from_center <= marker_3:
+        reward = 0.1
+    else:
+        reward = 1e-3  # likely crashed/ close to off track
+
+    # Steering penality threshold, change the number based on your action space setting
+    ABS_STEERING_THRESHOLD = 15 
+
+    # Penalize reward if the car is steering too much
+    if steering > ABS_STEERING_THRESHOLD:
+        reward *= 0.8
+
+    return float(reward)
diff --git a/reward-example-pure-pursuit.py b/reward-example-pure-pursuit.py
new file mode 100644
index 0000000..a0890b8
--- /dev/null
+++ b/reward-example-pure-pursuit.py
@@ -0,0 +1,32 @@
+  def reward_function(self, on_track, x, y, distance_from_center, car_orientation, progress, steps,
+                        throttle, steering, track_width, waypoints, closest_waypoints):
+
+        reward = 1e-3
+
+        rabbit = [0,0]
+        pointing = [0,0]
+
+        # Reward when yaw (car_orientation) is pointed to the next waypoint IN FRONT.
+
+        # Find nearest waypoint coordinates
+
+        rabbit = [waypoints[closest_waypoints+1][0],waypoints[closest_waypoints+1][1]]
+
+        radius = math.hypot(x - rabbit[0], y - rabbit[1])
+
+        pointing[0] = x + (radius * math.cos(car_orientation))
+        pointing[1] = y + (radius * math.sin(car_orientation))
+
+        vector_delta = math.hypot(pointing[0] - rabbit[0], pointing[1] - rabbit[1])
+
+        # Max distance for pointing away will be the radius * 2
+        # Min distance means we are pointing directly at the next waypoint
+        # We can setup a reward that is a ratio to this max.
+
+        if vector_delta == 0:
+            reward += 1
+        else:
+            reward += ( 1 - ( vector_delta / (radius * 2)))
+
+        return reward
+
diff --git a/reward-example-self-motivation.py b/reward-example-self-motivation.py
new file mode 100644
index 0000000..fe5fca3
--- /dev/null
+++ b/reward-example-self-motivation.py
@@ -0,0 +1,8 @@
+def reward_function(params):
+
+    if params["all_wheels_on_track"] and params["steps"] > 0:
+        reward = ((params["progress"] / params["steps"]) * 100) + (params["speed"]**2)
+    else:
+        reward = 0.01
+
+    return float(reward)