You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RWKV-LM/RWKV-v4/deepspeed.json

38 lines
1008 B
JSON

{
"zero_allow_untested_optimizer":true,
"zero_optimization":{
"stage":2,
"contiguous_gradients":true,
"overlap_comm":true,
"allgather_partitions":true,
"reduce_scatter":true,
"allgather_bucket_size":200000000,
"reduce_bucket_size":200000000,
"sub_group_size":1000000000000
},
"activation_checkpointing":{
"partition_activations":false,
"cpu_checkpointing":false,
"contiguous_memory_optimization":false,
"synchronize_checkpoint_boundary":false
},
"aio":{
"block_size":1048576,
"queue_depth":8,
"single_submit":false,
"overlap_events":true,
"thread_count":1
},
"gradient_clipping": 1.0,
"gradient_accumulation_steps": 1,
"fp16": {
"fp16": true,
"enabled": true,
"loss_scale": 0,
"initial_scale_power": 12,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
}
}