diff --git a/3-full_sft.py b/3-full_sft.py index de464f9..d8aa768 100644 --- a/3-full_sft.py +++ b/3-full_sft.py @@ -143,7 +143,7 @@ if __name__ == "__main__": parser.add_argument("--out_dir", type=str, default="out", help="Output directory") parser.add_argument("--epochs", type=int, default=19, help="Number of epochs") parser.add_argument("--batch_size", type=int, default=32, help="Batch size") - parser.add_argument("--learning_rate", type=float, default=1.5e-4, help="Learning rate") + parser.add_argument("--learning_rate", type=float, default=1e-4, help="Learning rate") parser.add_argument("--device", type=str, default="cuda:0" if torch.cuda.is_available() else "cpu", help="Device to use") parser.add_argument("--dtype", type=str, default="bfloat16", help="Data type") parser.add_argument("--use_wandb", action="store_true", help="Use Weights & Biases")