update seed set
This commit is contained in:
parent
08e9a22a25
commit
7fcc46b39a
@ -163,7 +163,6 @@ if __name__ == "__main__":
|
|||||||
os.makedirs(args.save_dir, exist_ok=True)
|
os.makedirs(args.save_dir, exist_ok=True)
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
os.makedirs(args.out_dir, exist_ok=True)
|
||||||
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
||||||
torch.manual_seed(1337)
|
|
||||||
device_type = "cuda" if "cuda" in args.device else "cpu"
|
device_type = "cuda" if "cuda" in args.device else "cpu"
|
||||||
|
|
||||||
args.wandb_run_name = f"MiniMind-Distill-Reasoning-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
args.wandb_run_name = f"MiniMind-Distill-Reasoning-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
||||||
@ -171,9 +170,17 @@ if __name__ == "__main__":
|
|||||||
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
||||||
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
||||||
ddp_local_rank, DEVICE = 0, "cuda:0"
|
ddp_local_rank, DEVICE = 0, "cuda:0"
|
||||||
|
base_seed = 1337
|
||||||
|
torch.manual_seed(base_seed)
|
||||||
|
torch.cuda.manual_seed(base_seed)
|
||||||
|
|
||||||
if ddp:
|
if ddp:
|
||||||
init_distributed_mode()
|
init_distributed_mode()
|
||||||
args.device = torch.device(DEVICE)
|
args.device = torch.device(DEVICE)
|
||||||
|
rank = dist.get_rank()
|
||||||
|
torch.manual_seed(base_seed + rank)
|
||||||
|
# 同时设置 CUDA 的随机种子
|
||||||
|
torch.cuda.manual_seed(base_seed + rank)
|
||||||
|
|
||||||
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
||||||
import wandb
|
import wandb
|
||||||
|
@ -209,7 +209,6 @@ if __name__ == "__main__":
|
|||||||
os.makedirs(args.save_dir, exist_ok=True)
|
os.makedirs(args.save_dir, exist_ok=True)
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
os.makedirs(args.out_dir, exist_ok=True)
|
||||||
tokens_per_iter = args.batch_size * max_seq_len
|
tokens_per_iter = args.batch_size * max_seq_len
|
||||||
torch.manual_seed(1337)
|
|
||||||
device_type = "cuda" if "cuda" in args.device else "cpu"
|
device_type = "cuda" if "cuda" in args.device else "cpu"
|
||||||
|
|
||||||
args.wandb_run_name = f"MiniMind-Dist-SFT-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
args.wandb_run_name = f"MiniMind-Dist-SFT-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
||||||
@ -217,9 +216,17 @@ if __name__ == "__main__":
|
|||||||
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
||||||
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
||||||
ddp_local_rank, DEVICE = 0, "cuda:0"
|
ddp_local_rank, DEVICE = 0, "cuda:0"
|
||||||
|
base_seed = 1337
|
||||||
|
torch.manual_seed(base_seed)
|
||||||
|
torch.cuda.manual_seed(base_seed)
|
||||||
|
|
||||||
if ddp:
|
if ddp:
|
||||||
init_distributed_mode()
|
init_distributed_mode()
|
||||||
args.device = torch.device(DEVICE)
|
args.device = torch.device(DEVICE)
|
||||||
|
rank = dist.get_rank()
|
||||||
|
torch.manual_seed(base_seed + rank)
|
||||||
|
# 同时设置 CUDA 的随机种子
|
||||||
|
torch.cuda.manual_seed(base_seed + rank)
|
||||||
|
|
||||||
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
||||||
import wandb
|
import wandb
|
||||||
|
@ -195,7 +195,6 @@ if __name__ == "__main__":
|
|||||||
os.makedirs(args.save_dir, exist_ok=True)
|
os.makedirs(args.save_dir, exist_ok=True)
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
os.makedirs(args.out_dir, exist_ok=True)
|
||||||
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
||||||
torch.manual_seed(1337)
|
|
||||||
device_type = "cuda" if "cuda" in args.device else "cpu"
|
device_type = "cuda" if "cuda" in args.device else "cpu"
|
||||||
|
|
||||||
args.wandb_run_name = f"MiniMind-Full-DPO-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
args.wandb_run_name = f"MiniMind-Full-DPO-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
||||||
@ -203,9 +202,17 @@ if __name__ == "__main__":
|
|||||||
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
||||||
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
||||||
ddp_local_rank, DEVICE = 0, "cuda:0"
|
ddp_local_rank, DEVICE = 0, "cuda:0"
|
||||||
|
base_seed = 1337
|
||||||
|
torch.manual_seed(base_seed)
|
||||||
|
torch.cuda.manual_seed(base_seed)
|
||||||
|
|
||||||
if ddp:
|
if ddp:
|
||||||
init_distributed_mode()
|
init_distributed_mode()
|
||||||
args.device = torch.device(DEVICE)
|
args.device = torch.device(DEVICE)
|
||||||
|
rank = dist.get_rank()
|
||||||
|
torch.manual_seed(base_seed + rank)
|
||||||
|
# 同时设置 CUDA 的随机种子
|
||||||
|
torch.cuda.manual_seed(base_seed + rank)
|
||||||
|
|
||||||
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
||||||
import wandb
|
import wandb
|
||||||
|
@ -150,7 +150,6 @@ if __name__ == "__main__":
|
|||||||
os.makedirs(args.save_dir, exist_ok=True)
|
os.makedirs(args.save_dir, exist_ok=True)
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
os.makedirs(args.out_dir, exist_ok=True)
|
||||||
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
||||||
torch.manual_seed(1337)
|
|
||||||
device_type = "cuda" if "cuda" in args.device else "cpu"
|
device_type = "cuda" if "cuda" in args.device else "cpu"
|
||||||
|
|
||||||
args.wandb_run_name = f"MiniMind-Full-SFT-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
args.wandb_run_name = f"MiniMind-Full-SFT-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
||||||
@ -158,9 +157,17 @@ if __name__ == "__main__":
|
|||||||
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
||||||
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
||||||
ddp_local_rank, DEVICE = 0, "cuda:0"
|
ddp_local_rank, DEVICE = 0, "cuda:0"
|
||||||
|
base_seed = 1337
|
||||||
|
torch.manual_seed(base_seed)
|
||||||
|
torch.cuda.manual_seed(base_seed)
|
||||||
|
|
||||||
if ddp:
|
if ddp:
|
||||||
init_distributed_mode()
|
init_distributed_mode()
|
||||||
args.device = torch.device(DEVICE)
|
args.device = torch.device(DEVICE)
|
||||||
|
rank = dist.get_rank()
|
||||||
|
torch.manual_seed(base_seed + rank)
|
||||||
|
# 同时设置 CUDA 的随机种子
|
||||||
|
torch.cuda.manual_seed(base_seed + rank)
|
||||||
|
|
||||||
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
||||||
import wandb
|
import wandb
|
||||||
|
@ -137,15 +137,22 @@ if __name__ == "__main__":
|
|||||||
os.makedirs(args.save_dir, exist_ok=True)
|
os.makedirs(args.save_dir, exist_ok=True)
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
os.makedirs(args.out_dir, exist_ok=True)
|
||||||
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
||||||
torch.manual_seed(1337)
|
|
||||||
device_type = "cuda" if "cuda" in args.device else "cpu"
|
device_type = "cuda" if "cuda" in args.device else "cpu"
|
||||||
|
|
||||||
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
ctx = nullcontext() if device_type == "cpu" else torch.cuda.amp.autocast()
|
||||||
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
||||||
ddp_local_rank, DEVICE = 0, "cuda:0"
|
ddp_local_rank, DEVICE = 0, "cuda:0"
|
||||||
|
base_seed = 1337
|
||||||
|
torch.manual_seed(base_seed)
|
||||||
|
torch.cuda.manual_seed(base_seed)
|
||||||
|
|
||||||
if ddp:
|
if ddp:
|
||||||
init_distributed_mode()
|
init_distributed_mode()
|
||||||
args.device = torch.device(DEVICE)
|
args.device = torch.device(DEVICE)
|
||||||
|
rank = dist.get_rank()
|
||||||
|
torch.manual_seed(base_seed + rank)
|
||||||
|
# 同时设置 CUDA 的随机种子
|
||||||
|
torch.cuda.manual_seed(base_seed + rank)
|
||||||
|
|
||||||
args.wandb_run_name = f"MiniMind-Lora-SFT-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
args.wandb_run_name = f"MiniMind-Lora-SFT-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
||||||
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
||||||
|
@ -146,7 +146,6 @@ if __name__ == "__main__":
|
|||||||
os.makedirs(args.save_dir, exist_ok=True)
|
os.makedirs(args.save_dir, exist_ok=True)
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
os.makedirs(args.out_dir, exist_ok=True)
|
||||||
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
tokens_per_iter = args.batch_size * lm_config.max_seq_len
|
||||||
torch.manual_seed(1337)
|
|
||||||
device_type = "cuda" if "cuda" in args.device else "cpu"
|
device_type = "cuda" if "cuda" in args.device else "cpu"
|
||||||
|
|
||||||
args.wandb_run_name = f"MiniMind-Pretrain-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
args.wandb_run_name = f"MiniMind-Pretrain-Epoch-{args.epochs}-BatchSize-{args.batch_size}-LearningRate-{args.learning_rate}"
|
||||||
@ -156,9 +155,17 @@ if __name__ == "__main__":
|
|||||||
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run?
|
||||||
ddp_local_rank, DEVICE = 0, "cuda:0"
|
ddp_local_rank, DEVICE = 0, "cuda:0"
|
||||||
|
|
||||||
|
base_seed = 1337
|
||||||
|
torch.manual_seed(base_seed)
|
||||||
|
torch.cuda.manual_seed(base_seed)
|
||||||
|
|
||||||
if ddp:
|
if ddp:
|
||||||
init_distributed_mode()
|
init_distributed_mode()
|
||||||
args.device = torch.device(DEVICE)
|
args.device = torch.device(DEVICE)
|
||||||
|
rank = dist.get_rank()
|
||||||
|
torch.manual_seed(base_seed + rank)
|
||||||
|
# 同时设置 CUDA 的随机种子
|
||||||
|
torch.cuda.manual_seed(base_seed + rank)
|
||||||
|
|
||||||
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
if args.use_wandb and (not ddp or ddp_local_rank == 0):
|
||||||
import wandb
|
import wandb
|
||||||
|
Loading…
x
Reference in New Issue
Block a user