Removed head-to-head reward function.
This commit is contained in:
51
Readme.md
51
Readme.md
@ -320,57 +320,6 @@ Once you are done creating your reward function be sure to use the **Validate**
|
||||
|
||||
Scroll down and select **Next**.
|
||||
|
||||
For those interested in head-to-head racing, here is a basic reward function
|
||||
|
||||
**Example 4**: Basic head-to-head reward function
|
||||
|
||||
|
||||
def reward_function(params):
|
||||
'''
|
||||
Example of rewarding the agent to stay inside two borders
|
||||
and penalizing getting too close to the objects in front
|
||||
'''
|
||||
|
||||
all_wheels_on_track = params['all_wheels_on_track']
|
||||
distance_from_center = params['distance_from_center']
|
||||
track_width = params['track_width']
|
||||
objects_distance = params['objects_distance']
|
||||
_, next_object_index = params['closest_objects']
|
||||
objects_left_of_center = params['objects_left_of_center']
|
||||
is_left_of_center = params['is_left_of_center']
|
||||
|
||||
# Initialize reward with a small number but not zero
|
||||
# because zero means off-track or crashed
|
||||
reward = 1e-3
|
||||
|
||||
# Reward if the agent stays inside the two borders of the track
|
||||
if all_wheels_on_track and (0.5 * track_width - distance_from_center) >= 0.05:
|
||||
reward_lane = 1.0
|
||||
else:
|
||||
reward_lane = 1e-3
|
||||
|
||||
# Penalize if the agent is too close to the next object
|
||||
reward_avoid = 1.0
|
||||
|
||||
# Distance to the next object
|
||||
distance_closest_object = objects_distance[next_object_index]
|
||||
# Decide if the agent and the next object is on the same lane
|
||||
is_same_lane = objects_left_of_center[next_object_index] == is_left_of_center
|
||||
|
||||
if is_same_lane:
|
||||
if 0.5 <= distance_closest_object < 0.8:
|
||||
reward_avoid *= 0.5
|
||||
elif 0.3 <= distance_closest_object < 0.5:
|
||||
reward_avoid *= 0.2
|
||||
elif distance_closest_object < 0.3:
|
||||
reward_avoid = 1e-3 # Likely crashed
|
||||
|
||||
# Calculate reward by putting different weights on
|
||||
# the two aspects above
|
||||
reward += 1.0 * reward_lane + 4.0 * reward_avoid
|
||||
|
||||
return reward
|
||||
|
||||
#### 4.3.2 Training algorithm and hyperparameters
|
||||
|
||||

|
||||
|
||||
Reference in New Issue
Block a user