From dd52733d6f9be7f5f853c205575d89c8cffc8d92 Mon Sep 17 00:00:00 2001 From: gongjy <2474590974@qq.com> Date: Fri, 13 Sep 2024 14:16:10 +0800 Subject: [PATCH] update readme --- 1-pretrain.py | 2 +- 2-eval.py | 2 +- 3-full_sft.py | 14 +++++++------- README.md | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/1-pretrain.py b/1-pretrain.py index 720396e..a7f4a5b 100644 --- a/1-pretrain.py +++ b/1-pretrain.py @@ -133,7 +133,7 @@ if __name__ == "__main__": max_seq_len = lm_config.max_seq_len out_dir = 'out' epochs = 20 - batch_size = 8 + batch_size = 64 learning_rate = 1e-4 device = 'cuda:0' dtype = 'bfloat16' diff --git a/2-eval.py b/2-eval.py index 7ed18d9..5297db8 100644 --- a/2-eval.py +++ b/2-eval.py @@ -60,7 +60,7 @@ if __name__ == "__main__": # ----------------------------------------------------------------------------- out_dir = 'out' start = "" - temperature = 0.7 + temperature = 0.5 top_k = 16 setup_seed(1337) # device = 'cpu' diff --git a/3-full_sft.py b/3-full_sft.py index 1c76954..2097faf 100644 --- a/3-full_sft.py +++ b/3-full_sft.py @@ -112,13 +112,13 @@ def init_model(lm_config): ckp = f'./out/pretrain_{lm_config.dim}{moe_path}.pth' model = Transformer(lm_config) - state_dict = torch.load(ckp, map_location=device) - - unwanted_prefix = '_orig_mod.' - for k, v in list(state_dict.items()): - if k.startswith(unwanted_prefix): - state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k) - model.load_state_dict(state_dict, strict=False) + # state_dict = torch.load(ckp, map_location=device) + # + # unwanted_prefix = '_orig_mod.' + # for k, v in list(state_dict.items()): + # if k.startswith(unwanted_prefix): + # state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k) + # model.load_state_dict(state_dict, strict=False) else: model = AutoModel.from_pretrained('./minimind', trust_remote_code=True) diff --git a/README.md b/README.md index 40aa951..52b4874 100644 --- a/README.md +++ b/README.md @@ -279,7 +279,7 @@ streamlit run fast_inference.py | MiniMind训练数据集 | 下载地址 | |-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------| | **【tokenizer训练集】** | [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset/tree/main) / [百度网盘](https://pan.baidu.com/s/1yAw1LVTftuhQGAC1Y9RdYQ?pwd=6666) | -| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY) / [百度网盘](https://pan.baidu.com/s/114F1k3eksiWCOQLvaT3RYQ?pwd=6666) | +| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY) / [百度网盘](https://pan.baidu.com/s/1-Z8Q37lJD4tOKhyBs1D_6Q?pwd=6666) | | **【Pretrain数据(2选1)】** | [SkyPile-150B数据集](https://hf-mirror.com/datasets/Skywork/SkyPile-150B/tree/main/data) | | **【SFT数据】** | [匠数大模型SFT数据集](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data/resolve/master/sft_data_zh.jsonl) | | **【DPO数据1】** | [活字数据集1](https://huggingface.co/datasets/Skepsun/huozi_rlhf_data_json) |