From dd52733d6f9be7f5f853c205575d89c8cffc8d92 Mon Sep 17 00:00:00 2001
From: gongjy <2474590974@qq.com>
Date: Fri, 13 Sep 2024 14:16:10 +0800
Subject: [PATCH] update readme

---
 1-pretrain.py |  2 +-
 2-eval.py     |  2 +-
 3-full_sft.py | 14 +++++++-------
 README.md     |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/1-pretrain.py b/1-pretrain.py
index 720396e..a7f4a5b 100644
--- a/1-pretrain.py
+++ b/1-pretrain.py
@@ -133,7 +133,7 @@ if __name__ == "__main__":
     max_seq_len = lm_config.max_seq_len
     out_dir = 'out'
     epochs = 20
-    batch_size = 8
+    batch_size = 64
     learning_rate = 1e-4
     device = 'cuda:0'
     dtype = 'bfloat16'
diff --git a/2-eval.py b/2-eval.py
index 7ed18d9..5297db8 100644
--- a/2-eval.py
+++ b/2-eval.py
@@ -60,7 +60,7 @@ if __name__ == "__main__":
     # -----------------------------------------------------------------------------
     out_dir = 'out'
     start = ""
-    temperature = 0.7
+    temperature = 0.5
     top_k = 16
     setup_seed(1337)
     # device = 'cpu'
diff --git a/3-full_sft.py b/3-full_sft.py
index 1c76954..2097faf 100644
--- a/3-full_sft.py
+++ b/3-full_sft.py
@@ -112,13 +112,13 @@ def init_model(lm_config):
         ckp = f'./out/pretrain_{lm_config.dim}{moe_path}.pth'
 
         model = Transformer(lm_config)
-        state_dict = torch.load(ckp, map_location=device)
-
-        unwanted_prefix = '_orig_mod.'
-        for k, v in list(state_dict.items()):
-            if k.startswith(unwanted_prefix):
-                state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
-        model.load_state_dict(state_dict, strict=False)
+        # state_dict = torch.load(ckp, map_location=device)
+        #
+        # unwanted_prefix = '_orig_mod.'
+        # for k, v in list(state_dict.items()):
+        #     if k.startswith(unwanted_prefix):
+        #         state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
+        # model.load_state_dict(state_dict, strict=False)
     else:
         model = AutoModel.from_pretrained('./minimind', trust_remote_code=True)
 
diff --git a/README.md b/README.md
index 40aa951..52b4874 100644
--- a/README.md
+++ b/README.md
@@ -279,7 +279,7 @@ streamlit run fast_inference.py
 | MiniMind训练数据集         | 下载地址                                                                                                                                                     |
 |-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
 | **【tokenizer训练集】**    | [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset/tree/main) / [百度网盘](https://pan.baidu.com/s/1yAw1LVTftuhQGAC1Y9RdYQ?pwd=6666) |
-| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY)  / [百度网盘](https://pan.baidu.com/s/114F1k3eksiWCOQLvaT3RYQ?pwd=6666)                  |
+| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY)  / [百度网盘](https://pan.baidu.com/s/1-Z8Q37lJD4tOKhyBs1D_6Q?pwd=6666)                  |
 | **【Pretrain数据(2选1)】** | [SkyPile-150B数据集](https://hf-mirror.com/datasets/Skywork/SkyPile-150B/tree/main/data)                                                                    |
 | **【SFT数据】**           | [匠数大模型SFT数据集](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data/resolve/master/sft_data_zh.jsonl)                                            |
 | **【DPO数据1】**          | [活字数据集1](https://huggingface.co/datasets/Skepsun/huozi_rlhf_data_json)                                                                                   |