ifTrue:# True False ---> Set to False if you don't understand it
print("\n\n[[[ SPECIAL DEBUG MODE FOR MYSELF. DON'T ENABLE THIS IF YOU DON'T UNDERSTAND IT ]]]\n\n")
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import src.utils
src.utils.set_seed(42) # make training deterministic (including dataloader). if you are doing this, remember to change seed when you load a model (otherwise the dataloader loads old samples)
# if False: # True False ---> Set to False if you don't understand it
# print("\n\n[[[ SPECIAL DEBUG MODE FOR MYSELF. DON'T ENABLE THIS IF YOU DON'T UNDERSTAND IT ]]]\n\n")
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# import src.utils
# src.utils.set_seed(42) # make training deterministic (including dataloader). if you are doing this, remember to change seed when you load a model (otherwise the dataloader loads old samples)
importlogging
importdatetime
@ -53,6 +53,16 @@ model_type = 'RWKV'
batch_size=12
### Step 4: set learning rate, number of mini-epochs #######################################################
# By default we are using exponential LR decay.
#
# Here are my suggestions for training a good model.
# Let's say you will train a L6-D512 model.
# 1) Set lr_init = lr_final = 8e-4. Let it run for some mini-epochs, until the improvement of loss become slow.