Removed head-to-head reward function.

2020-07-13 12:49:55 +12:00
parent bef6bd6bad
commit 80a513716e
1 changed files with 0 additions and 51 deletions
--- a/Readme.md
+++ b/Readme.md
@ -320,57 +320,6 @@ Once you are done creating your reward function be sure to use the **Validate**

 Scroll down and select **Next**.

-For those interested in head-to-head racing, here is a basic reward function
-
-**Example 4**: Basic head-to-head reward function
-
-
-	def reward_function(params):
-	    '''
-	    Example of rewarding the agent to stay inside two borders
-	    and penalizing getting too close to the objects in front
-	    '''
-
-	    all_wheels_on_track = params['all_wheels_on_track']
-	    distance_from_center = params['distance_from_center']
-	    track_width = params['track_width']
-	    objects_distance = params['objects_distance']
-	    _, next_object_index = params['closest_objects']
-	    objects_left_of_center = params['objects_left_of_center']
-	    is_left_of_center = params['is_left_of_center']
-
-	    # Initialize reward with a small number but not zero
-	    # because zero means off-track or crashed
-	    reward = 1e-3
-
-	    # Reward if the agent stays inside the two borders of the track
-	    if all_wheels_on_track and (0.5 * track_width - distance_from_center) >= 0.05:
-		reward_lane = 1.0
-	    else:
-		reward_lane = 1e-3
-
-	    # Penalize if the agent is too close to the next object
-	    reward_avoid = 1.0
-
-	    # Distance to the next object
-	    distance_closest_object = objects_distance[next_object_index]
-	    # Decide if the agent and the next object is on the same lane
-	    is_same_lane = objects_left_of_center[next_object_index] == is_left_of_center
-
-	    if is_same_lane:
-		if 0.5 <= distance_closest_object < 0.8: 
-		    reward_avoid *= 0.5
-		elif 0.3 <= distance_closest_object < 0.5:
-		    reward_avoid *= 0.2
-		elif distance_closest_object < 0.3:
-		    reward_avoid = 1e-3 # Likely crashed
-
-	    # Calculate reward by putting different weights on 
-	    # the two aspects above
-	    reward += 1.0 * reward_lane + 4.0 * reward_avoid
-
-	    return reward
-
 #### 4.3.2 Training algorithm and hyperparameters

 ![Training algorithm and hyperparameters](img/hyperparameters.png)