Minimind/ds_config.json

46 lines
1.1 KiB
JSON
Raw Normal View History

2025-05-14 00:01:40 +08:00
{
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"zero_optimization": {
"stage": 2,
"offload_optimizer": {
"device": "cpu",
2025-09-06 18:16:46 +08:00
"pin_memory": true,
"buffer_count": 4,
"fast_init": false
},
"offload_param": {
"device": "cpu",
"pin_memory": true,
"buffer_count": 4,
"buffer_size": 1e8,
"max_in_cpu": 1e9
2025-05-14 00:01:40 +08:00
},
"allgather_partitions": true,
2025-09-06 18:16:46 +08:00
"allgather_bucket_size": 2e8,
2025-05-14 00:01:40 +08:00
"overlap_comm": true,
"reduce_scatter": true,
2025-09-06 18:16:46 +08:00
"reduce_bucket_size": 2e8,
"contiguous_gradients": true,
"cpu_offload": true
2025-05-14 00:01:40 +08:00
},
"fp16": {
2025-09-06 16:16:12 +08:00
"enabled": false
2025-05-14 00:01:40 +08:00
},
"bf16": {
"enabled": true
2025-05-14 00:01:40 +08:00
},
2025-09-06 18:16:46 +08:00
"aio": {
"block_size": 1048576,
"queue_depth": 8,
"thread_count": 1,
"single_submit": false,
"overlap_events": true
},
2025-05-14 00:01:40 +08:00
"steps_per_print": 100,
2025-09-06 18:16:46 +08:00
"wall_clock_breakdown": false,
"memory_breakdown": false
2025-05-14 00:01:40 +08:00
}