Removed head-to-head reward function.
This commit is contained in:
51
Readme.md
51
Readme.md
@ -320,57 +320,6 @@ Once you are done creating your reward function be sure to use the **Validate**
|
|||||||
|
|
||||||
Scroll down and select **Next**.
|
Scroll down and select **Next**.
|
||||||
|
|
||||||
For those interested in head-to-head racing, here is a basic reward function
|
|
||||||
|
|
||||||
**Example 4**: Basic head-to-head reward function
|
|
||||||
|
|
||||||
|
|
||||||
def reward_function(params):
|
|
||||||
'''
|
|
||||||
Example of rewarding the agent to stay inside two borders
|
|
||||||
and penalizing getting too close to the objects in front
|
|
||||||
'''
|
|
||||||
|
|
||||||
all_wheels_on_track = params['all_wheels_on_track']
|
|
||||||
distance_from_center = params['distance_from_center']
|
|
||||||
track_width = params['track_width']
|
|
||||||
objects_distance = params['objects_distance']
|
|
||||||
_, next_object_index = params['closest_objects']
|
|
||||||
objects_left_of_center = params['objects_left_of_center']
|
|
||||||
is_left_of_center = params['is_left_of_center']
|
|
||||||
|
|
||||||
# Initialize reward with a small number but not zero
|
|
||||||
# because zero means off-track or crashed
|
|
||||||
reward = 1e-3
|
|
||||||
|
|
||||||
# Reward if the agent stays inside the two borders of the track
|
|
||||||
if all_wheels_on_track and (0.5 * track_width - distance_from_center) >= 0.05:
|
|
||||||
reward_lane = 1.0
|
|
||||||
else:
|
|
||||||
reward_lane = 1e-3
|
|
||||||
|
|
||||||
# Penalize if the agent is too close to the next object
|
|
||||||
reward_avoid = 1.0
|
|
||||||
|
|
||||||
# Distance to the next object
|
|
||||||
distance_closest_object = objects_distance[next_object_index]
|
|
||||||
# Decide if the agent and the next object is on the same lane
|
|
||||||
is_same_lane = objects_left_of_center[next_object_index] == is_left_of_center
|
|
||||||
|
|
||||||
if is_same_lane:
|
|
||||||
if 0.5 <= distance_closest_object < 0.8:
|
|
||||||
reward_avoid *= 0.5
|
|
||||||
elif 0.3 <= distance_closest_object < 0.5:
|
|
||||||
reward_avoid *= 0.2
|
|
||||||
elif distance_closest_object < 0.3:
|
|
||||||
reward_avoid = 1e-3 # Likely crashed
|
|
||||||
|
|
||||||
# Calculate reward by putting different weights on
|
|
||||||
# the two aspects above
|
|
||||||
reward += 1.0 * reward_lane + 4.0 * reward_avoid
|
|
||||||
|
|
||||||
return reward
|
|
||||||
|
|
||||||
#### 4.3.2 Training algorithm and hyperparameters
|
#### 4.3.2 Training algorithm and hyperparameters
|
||||||
|
|
||||||

|

|
||||||
|
|||||||
Reference in New Issue
Block a user