Tags: Machine learning Neural network
Cosine
class CosineLRScheduler(keras.callbacks.Callback):
def __init__(self, total_steps: int, lr_max: float, lr_min: float):
super().__init__()
self.total_steps = total_steps
self.lr_max = lr_max
self.lr_min = lr_min
self.n = 0
def on_batch_begin(self, batch, logs=None):
self.model.optimizer.learning_rate = cosine_lr(self.n, self.total_steps, self.lr_max, self.lr_min)
self.n += 1
def cosine_lr(step: int, total_steps: int, lr_max: float, lr_min: float):
if step >= total_steps: return lr_min
return lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos(np.pi * step / total_steps))
Cosine with linear warmup
def get_lr(it: int, warmup_iters: int, max_iters: int) -> float:
# 1) linear warmup for warmup_iters steps
if it < warmup_iters:
return learning_rate * it / warmup_iters
# 2) if it > max_iters, return min learning rate
if it > max_iters:
return min_lr
# 3) in between, use cosine decay down to min learning rate
decay_ratio = (it - warmup_iters) / (max_iters - warmup_iters)
assert 0 <= decay_ratio <= 1
coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1
return min_lr + coeff * (learning_rate - min_lr)