diff --git a/1-pretrain.py b/1-pretrain.py index fabb445..d1d84ec 100644 --- a/1-pretrain.py +++ b/1-pretrain.py @@ -120,7 +120,6 @@ def init_distributed_mode(): DEVICE = f"cuda:{ddp_local_rank}" torch.cuda.set_device(DEVICE) - # torchrun --nproc_per_node 2 1-pretrain.py # I/O if __name__ == "__main__": diff --git a/model/model.py b/model/model.py index 4901bd7..bc68dd6 100644 --- a/model/model.py +++ b/model/model.py @@ -375,7 +375,6 @@ class Transformer(PreTrainedModel): self.OUT.__setitem__('logits', logits) self.OUT.__setitem__('last_loss', self.last_loss) - return self.OUT @torch.inference_mode() diff --git a/中文逐行注释/README.md b/中文逐行注释/README.md index 6f70d7c..c2a1ad6 100644 --- a/中文逐行注释/README.md +++ b/中文逐行注释/README.md @@ -3,4 +3,4 @@ 感谢[@chuanzhubin](https://github.com/chuanzhubin)贡献此部分内容 -很大程度方便了学习者快速理解! \ No newline at end of file +很大程度方便了学习者快速理解 \ No newline at end of file