From 8556d0fd3f03a6e0aa4432faf36a86a5c6d58b61 Mon Sep 17 00:00:00 2001 From: BlinkDL Date: Tue, 5 Jul 2022 18:58:54 +0800 Subject: [PATCH] no message --- RWKV-v3/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RWKV-v3/train.py b/RWKV-v3/train.py index a8f42dc..ce9eb2f 100644 --- a/RWKV-v3/train.py +++ b/RWKV-v3/train.py @@ -58,7 +58,7 @@ batch_size = 12 # Here are my suggestions for training a good model. # Let's say you will train a L6-D512 model. # 1) Set lr_init = lr_final = 8e-4. Let it run for some mini-epochs, until the improvement of loss become slow. -# 2) Ctrl+C to stop the run. +# 2) Check epoch_save_frequency and make sure the partially-trained model is saved. Ctrl+C to stop the run. # 3) Set lr_init = 8e-4, lr_final = 1e-5, warmup_tokens = ctx_len * batch_size * 50, betas = (0.9, 0.999) # 4) Search for "torch.load" here and modify it to load the partially-trained model. Continue the training. #