Added example reward functions.
This commit is contained in:
		
							
								
								
									
										46
									
								
								reward-example-encourage-racing-line.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								reward-example-encourage-racing-line.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,46 @@
 | 
			
		||||
import math
 | 
			
		||||
def reward_function(params):
 | 
			
		||||
 | 
			
		||||
    track_width = params['track_width']
 | 
			
		||||
    distance_from_center = params['distance_from_center']
 | 
			
		||||
    steering = abs(params['steering_angle'])
 | 
			
		||||
    direction_stearing=params['steering_angle']
 | 
			
		||||
    speed = params['speed']
 | 
			
		||||
    steps = params['steps']
 | 
			
		||||
    progress = params['progress']
 | 
			
		||||
    all_wheels_on_track = params['all_wheels_on_track']
 | 
			
		||||
    ABS_STEERING_THRESHOLD = 15
 | 
			
		||||
    SPEED_TRESHOLD = 5
 | 
			
		||||
    TOTAL_NUM_STEPS = 85
 | 
			
		||||
 | 
			
		||||
    # Read input variables
 | 
			
		||||
    waypoints = params['waypoints']
 | 
			
		||||
    closest_waypoints = params['closest_waypoints']
 | 
			
		||||
    heading = params['heading']
 | 
			
		||||
 | 
			
		||||
    reward = 1.0
 | 
			
		||||
 | 
			
		||||
    if progress == 100:
 | 
			
		||||
        reward += 100
 | 
			
		||||
 | 
			
		||||
    # Calculate the direction of the center line based on the closest waypoints
 | 
			
		||||
    next_point = waypoints[closest_waypoints[1]]
 | 
			
		||||
    prev_point = waypoints[closest_waypoints[0]]
 | 
			
		||||
    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
 | 
			
		||||
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
 | 
			
		||||
    # Convert to degree
 | 
			
		||||
    track_direction = math.degrees(track_direction)
 | 
			
		||||
    # Calculate the difference between the track direction and the heading direction of the car
 | 
			
		||||
    direction_diff = abs(track_direction - heading)
 | 
			
		||||
    # Penalize the reward if the difference is too large
 | 
			
		||||
    DIRECTION_THRESHOLD = 10.0
 | 
			
		||||
 | 
			
		||||
    malus=1
 | 
			
		||||
 | 
			
		||||
    if direction_diff > DIRECTION_THRESHOLD:
 | 
			
		||||
        malus=1-(direction_diff/50)
 | 
			
		||||
        if malus<0 or malus>1:
 | 
			
		||||
            malus = 0
 | 
			
		||||
        reward *= malus
 | 
			
		||||
 | 
			
		||||
    return reward
 | 
			
		||||
							
								
								
									
										25
									
								
								reward-example-follow-centre-line.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								reward-example-follow-centre-line.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,25 @@
 | 
			
		||||
def reward_function(params):
 | 
			
		||||
    '''
 | 
			
		||||
    Example of rewarding the agent to follow center line
 | 
			
		||||
    '''
 | 
			
		||||
    
 | 
			
		||||
    # Read input parameters
 | 
			
		||||
    track_width = params['track_width']
 | 
			
		||||
    distance_from_center = params['distance_from_center']
 | 
			
		||||
 | 
			
		||||
    # Calculate 3 markers that are increasingly further away from the center line
 | 
			
		||||
    marker_1 = 0.1 * track_width
 | 
			
		||||
    marker_2 = 0.25 * track_width
 | 
			
		||||
    marker_3 = 0.5 * track_width
 | 
			
		||||
 | 
			
		||||
    # Give higher reward if the car is closer to center line and vice versa
 | 
			
		||||
    if distance_from_center <= marker_1:
 | 
			
		||||
        reward = 1
 | 
			
		||||
    elif distance_from_center <= marker_2:
 | 
			
		||||
        reward = 0.5
 | 
			
		||||
    elif distance_from_center <= marker_3:
 | 
			
		||||
        reward = 0.1
 | 
			
		||||
    else:
 | 
			
		||||
        reward = 1e-3  # likely crashed/ close to off track
 | 
			
		||||
 | 
			
		||||
    return reward
 | 
			
		||||
							
								
								
									
										33
									
								
								reward-example-prevent-zig-zag.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								reward-example-prevent-zig-zag.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,33 @@
 | 
			
		||||
def reward_function(params):
 | 
			
		||||
    '''
 | 
			
		||||
    Example of penalize steering, which helps mitigate zig-zag behaviors
 | 
			
		||||
    '''
 | 
			
		||||
    
 | 
			
		||||
    # Read input parameters
 | 
			
		||||
    distance_from_center = params['distance_from_center']
 | 
			
		||||
    track_width = params['track_width']
 | 
			
		||||
    steering = abs(params['steering_angle']) # Only need the absolute steering angle
 | 
			
		||||
 | 
			
		||||
    # Calculate 3 marks that are farther and father away from the center line
 | 
			
		||||
    marker_1 = 0.1 * track_width
 | 
			
		||||
    marker_2 = 0.25 * track_width
 | 
			
		||||
    marker_3 = 0.5 * track_width
 | 
			
		||||
 | 
			
		||||
    # Give higher reward if the car is closer to center line and vice versa
 | 
			
		||||
    if distance_from_center <= marker_1:
 | 
			
		||||
        reward = 1.0
 | 
			
		||||
    elif distance_from_center <= marker_2:
 | 
			
		||||
        reward = 0.5
 | 
			
		||||
    elif distance_from_center <= marker_3:
 | 
			
		||||
        reward = 0.1
 | 
			
		||||
    else:
 | 
			
		||||
        reward = 1e-3  # likely crashed/ close to off track
 | 
			
		||||
 | 
			
		||||
    # Steering penality threshold, change the number based on your action space setting
 | 
			
		||||
    ABS_STEERING_THRESHOLD = 15 
 | 
			
		||||
 | 
			
		||||
    # Penalize reward if the car is steering too much
 | 
			
		||||
    if steering > ABS_STEERING_THRESHOLD:
 | 
			
		||||
        reward *= 0.8
 | 
			
		||||
 | 
			
		||||
    return float(reward)
 | 
			
		||||
							
								
								
									
										32
									
								
								reward-example-pure-pursuit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								reward-example-pure-pursuit.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,32 @@
 | 
			
		||||
  def reward_function(self, on_track, x, y, distance_from_center, car_orientation, progress, steps,
 | 
			
		||||
                        throttle, steering, track_width, waypoints, closest_waypoints):
 | 
			
		||||
 | 
			
		||||
        reward = 1e-3
 | 
			
		||||
 | 
			
		||||
        rabbit = [0,0]
 | 
			
		||||
        pointing = [0,0]
 | 
			
		||||
 | 
			
		||||
        # Reward when yaw (car_orientation) is pointed to the next waypoint IN FRONT.
 | 
			
		||||
 | 
			
		||||
        # Find nearest waypoint coordinates
 | 
			
		||||
 | 
			
		||||
        rabbit = [waypoints[closest_waypoints+1][0],waypoints[closest_waypoints+1][1]]
 | 
			
		||||
 | 
			
		||||
        radius = math.hypot(x - rabbit[0], y - rabbit[1])
 | 
			
		||||
 | 
			
		||||
        pointing[0] = x + (radius * math.cos(car_orientation))
 | 
			
		||||
        pointing[1] = y + (radius * math.sin(car_orientation))
 | 
			
		||||
 | 
			
		||||
        vector_delta = math.hypot(pointing[0] - rabbit[0], pointing[1] - rabbit[1])
 | 
			
		||||
 | 
			
		||||
        # Max distance for pointing away will be the radius * 2
 | 
			
		||||
        # Min distance means we are pointing directly at the next waypoint
 | 
			
		||||
        # We can setup a reward that is a ratio to this max.
 | 
			
		||||
 | 
			
		||||
        if vector_delta == 0:
 | 
			
		||||
            reward += 1
 | 
			
		||||
        else:
 | 
			
		||||
            reward += ( 1 - ( vector_delta / (radius * 2)))
 | 
			
		||||
 | 
			
		||||
        return reward
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										8
									
								
								reward-example-self-motivation.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								reward-example-self-motivation.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,8 @@
 | 
			
		||||
def reward_function(params):
 | 
			
		||||
 | 
			
		||||
    if params["all_wheels_on_track"] and params["steps"] > 0:
 | 
			
		||||
        reward = ((params["progress"] / params["steps"]) * 100) + (params["speed"]**2)
 | 
			
		||||
    else:
 | 
			
		||||
        reward = 0.01
 | 
			
		||||
 | 
			
		||||
    return float(reward)
 | 
			
		||||
		Reference in New Issue
	
	Block a user