experiment_1.3.1
This commit is contained in:
parent
4505546641
commit
70404b8e87
47
run_file/experiment_1.3.1.sh
Normal file
47
run_file/experiment_1.3.1.sh
Normal file
@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 激活conda环境
|
||||
source $(conda info --base)/etc/profile.d/conda.sh
|
||||
conda activate ycz_accelerate
|
||||
|
||||
# 设置环境变量以帮助调试
|
||||
export NCCL_DEBUG=INFO
|
||||
export PYTHONFAULTHANDLER=1
|
||||
|
||||
# 实验1.3.0 - 使用命令行参数直接配置accelerate
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||
--multi_gpu \
|
||||
--num_processes=4 \
|
||||
--mixed_precision=bf16 \
|
||||
--main_process_port=29500 \
|
||||
train_pretrain_accelerate.py \
|
||||
--out_dir "out" \
|
||||
--epochs 3 \
|
||||
--embedding_epoch 2 \
|
||||
--batch_size 48 \
|
||||
--learning_rate 2e-4 \
|
||||
--dtype bfloat16 \
|
||||
--use_swanlab \
|
||||
--swanlab_project "MiniMind-Pretrain" \
|
||||
--num_workers 1 \
|
||||
--accumulation_steps 32 \
|
||||
--grad_clip 1.0 \
|
||||
--warmup_iters 0 \
|
||||
--log_interval 100 \
|
||||
--save_interval 10000 \
|
||||
--dim 1024 \
|
||||
--n_layers 18 \
|
||||
--max_seq_len 512 \
|
||||
--use_moe False \
|
||||
--data_path "./dataset/stable/merged_pretrain.jsonl" \
|
||||
--profile \
|
||||
--profile_interval 10 \
|
||||
--use_flash_attn \
|
||||
--knowledge_num 1048576 \
|
||||
--knowledge_length 32 \
|
||||
--database_init_path "./dataset/stable/sentence_trex_data.json" \
|
||||
--fast_clustering \
|
||||
--cluster_cache_path "./cache/cluster_tokens_single.pt" \
|
||||
--memory_monitor_interval 10 \
|
||||
--model_type "model" \
|
||||
--model_size 814.724
|
Loading…
x
Reference in New Issue
Block a user