From c9ca0f57dc9a203e762c3ebe39fabc83d6cf2f5f Mon Sep 17 00:00:00 2001
From: gongjy <2474590974@qq.com>
Date: Mon, 10 Feb 2025 00:14:11 +0800
Subject: [PATCH] update config

---
 README.md                   | 43 ++++++++++++++++++-------------------
 README_en.md                | 41 +++++++++++++++++------------------
 eval_model.py               |  9 ++++----
 scripts/convert_model.py    |  4 ++--
 scripts/serve_openai_api.py |  4 ++--
 train_distill_reason.py     |  8 +++----
 6 files changed, 53 insertions(+), 56 deletions(-)
diff --git a/README.md b/README.md
index d9bba18..f4256c6 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@
 最低只需3块钱不到的服务器成本，就能亲身体验从0到1构建一个语言模型的全过程。
 一起感受创造的乐趣吧！
 
-> [!TIP]
+> [!NOTE]
 > （截至2025-02-07）MiniMind系列已完成多个型号模型的预训练，最小仅需25.8M（0.02B），即可具备流畅对话能力！
 
 <details>
@@ -172,6 +172,18 @@
 
 # 📌 快速开始
 
+<div align="center" style="font-size: 1.5em; font-weight: bold;">
+  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
+  Hugging Face
+
+[MiniMind (HuggingFace)](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
+
+ <img src="https://g.alicdn.com/sail-web/maas/1.15.0/static/modelscopeIcon.cd89353f.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
+
+[MiniMind (ModelScope)](https://www.modelscope.cn/profile/gongjy)
+
+</div>
+
 <details>
 <summary>分享本人的软硬件配置（仅供参考）</summary>
 
@@ -193,18 +205,6 @@ git clone https://github.com/jingyaogong/minimind.git
 
 ## Ⅰ 测试已有模型效果
 
-<div align="center" style="font-size: 1.5em; font-weight: bold;">
-  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
-  Hugging Face
-
-[MiniMind (HuggingFace)](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
-
- <img src="https://g.alicdn.com/sail-web/maas/1.15.0/static/modelscopeIcon.cd89353f.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
-
-[MiniMind (ModelScope)](https://www.modelscope.cn/profile/gongjy)
-
-</div>
-
 ### 1.下载模型
 
 ```bash
@@ -254,7 +254,7 @@ streamlit run web_demo.py
 
 从下文的[数据集下载链接](#数据集下载)下载需要的数据文件放到`./dataset`目录下
 
-> 【注】默认推荐下载`pretrain_data.csv` + `sft_mini_512.jsonl`最快速度复现Zero聊天模型。
+> 【注】默认推荐下载`pretrain_hq.jsonl` + `sft_mini_512.jsonl`最快速度复现Zero聊天模型。
 
 > 【注】数据文件可自由选择，下文提供了多种搭配方案，可根据自己手头的训练需求和GPU资源进行适当组合。
 
@@ -448,7 +448,7 @@ quality（当然也还不算high，提升数据质量无止尽）。
 
 ## Ⅷ 数据集下载
 
-> [!TIP]
+> [!NOTE]
 > 2025-02-05后，开源MiniMind最终训练所用的所有数据集，因此无需再自行预处理大规模数据集，避免重复性的数据处理工作。
 
 MiniMind训练数据集
@@ -559,7 +559,7 @@ MiniMind的整体结构一致，只是在RoPE计算、推理函数和FFN层的
 <br/>单卡3090 (1 epoch) + 2.1小时 + 花费2.73元人民币
 <br/>即可从0训练出MiniMind-Zero-0.025B模型!!!
 
-> MiniMind2~Small参数
+> MiniMind2-Small参数
 >> `pretrain_hq.jsonl`+`sft_512.jsonl`+`sft_2048.jsonl`+`dpo.jsonl`数据集
 <br/>单卡3090 (2 epochs) + 大约38.16小时 + 花费49.61元人民币
 <br/>即可从0训练出MiniMind2-Small-0.025B模型!!!
@@ -877,9 +877,8 @@ MobileLLM提出架构的深度比宽度更重要，「深而窄」的「瘦长
 
 #### Transformers模型
 
-*
-
-MiniMind系列 [(ModelScope)](https://www.modelscope.cn/profile/gongjy) | [HuggingFace](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
+* MiniMind系列 [(ModelScope)](https://www.modelscope.cn/profile/gongjy)
+  | [HuggingFace](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
 
 <details>
 <summary>Torch文件命名对照（展开）</summary>
@@ -905,7 +904,7 @@ MiniMind系列 [(ModelScope)](https://www.modelscope.cn/profile/gongjy) | [Huggi
 
 ## Ⅰ RLHF对比SFT篇
 
-> [!TIP]
+> [!NOTE]
 > 测试基于「full_sft」和「rlhf」的`MiniMind2系列`模型对比，测试随机种子固定为`42`
 
 <details>
@@ -943,7 +942,7 @@ DPO和在线PPO的区别在于reject和chosen都是离线准备的，和minimind
 
 ## Ⅱ 主观样例测评
 
-> [!TIP]
+> [!NOTE]
 > 以下测试于2025-02-09完成，此日期后发布的新模型，无特殊需要时将不加入测试。
 
 [A] [MiniMind2 (0.1B)](https://www.modelscope.cn/models/gongjy/MiniMind2-PyTorch)<br/>
@@ -1218,7 +1217,7 @@ MiniMind模型本身预训练数据集小的可怜，也没有针对性的对测
 
 # 📌 Acknowledge
 
-> [!TIP]
+> [!NOTE]
 > 如果觉得`MiniMind系列`对您有所帮助，可以在 GitHub 上加一个⭐<br/>
 > 篇幅超长水平有限难免纰漏，欢迎在Issues交流指正或提交PR改进项目<br/>
 > 您的小小支持就是持续改进此项目的动力！
diff --git a/README_en.md b/README_en.md
index 72633c3..6300381 100644
--- a/README_en.md
+++ b/README_en.md
@@ -82,7 +82,7 @@ and to train a very small language model from scratch, not just performing **inf
 With server costs of less than 3 RMB, you can experience the entire process of building a language model from 0 to 1.  
 Let's enjoy the fun of creation together!
 
-> [!TIP]  
+> [!NOTE]  
 > (As of 2025-02-07) The MiniMind series has completed pretraining for multiple models, with the smallest one being only
 > 25.8M (0.02B) and capable of smooth conversation!
 
@@ -191,6 +191,18 @@ We hope this open-source project can help LLM beginners quickly get started!
 
 # 📌 Quick Start
 
+<div align="center" style="font-size: 1.5em; font-weight: bold;">
+  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
+  Hugging Face
+
+[MiniMind (HuggingFace)](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
+
+ <img src="https://g.alicdn.com/sail-web/maas/1.15.0/static/modelscopeIcon.cd89353f.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
+
+[MiniMind (ModelScope)](https://www.modelscope.cn/profile/gongjy)
+
+</div>
+
 <details>
 <summary>Sharing My Hardware and Software Configuration (For Reference Only)</summary>
 
@@ -212,18 +224,6 @@ git clone https://github.com/jingyaogong/minimind.git
 
 ## Ⅰ Test Pre-trained Model
 
-<div align="center" style="font-size: 1.5em; font-weight: bold;">
-  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
-  Hugging Face
-
-[MiniMind (HuggingFace)](https://huggingface.co/collections/jingyaogong/minimind-66caf8d999f5c7fa64f399e5)
-
- <img src="https://g.alicdn.com/sail-web/maas/1.15.0/static/modelscopeIcon.cd89353f.svg" alt="Hugging Face Logo" style="vertical-align: middle; height: 30px;" />
-
-[MiniMind (ModelScope)](https://www.modelscope.cn/profile/gongjy)
-
-</div>
-
 ### 1. Download the Model
 
 ```bash
@@ -276,7 +276,7 @@ Download the necessary data files from the [Dataset Download Link](#数据集下
 directory.
 
 > **Note**: The recommended fastest way to replicate the Zero Chat model is by
-> downloading `pretrain_data.csv` + `sft_mini_512.jsonl`.
+> downloading `pretrain_hq.jsonl` + `sft_mini_512.jsonl`.
 
 > **Note**: You can freely choose from multiple dataset options below based on your training needs and GPU resources.
 
@@ -492,7 +492,8 @@ Big respect!
 
 ## Ⅷ Dataset Download
 
-> **TIP**: After `2025-02-05`, MiniMind’s open-source datasets for final training are provided, so there is no need for
+> [!NOTE]
+> After `2025-02-05`, MiniMind’s open-source datasets for final training are provided, so there is no need for
 > you to preprocess large datasets by yourself anymore. This helps avoid redundant work.
 
 MiniMind Training Datasets are available for download from:
@@ -617,7 +618,7 @@ Reference model parameter versions are shown in the table below:
 <br/>Single card 3090 (1 epoch) + 2.1 hours + cost 2.73 RMB  
 <br/>Can train the MiniMind-Zero-0.025B model from scratch!!!
 
-> MiniMind2~Small Parameters
+> MiniMind2-Small Parameters
 >> `pretrain_hq.jsonl` + `sft_512.jsonl` + `sft_2048.jsonl` + `dpo.jsonl` dataset  
 <br/>Single card 3090 (2 epochs) + approx. 38.16 hours + cost 49.61 RMB  
 <br/>Can train the MiniMind2-Small-0.025B model from scratch!!!
@@ -1030,7 +1031,7 @@ For reference, the parameter settings for GPT-3 are shown in the table below:
 
 ## Ⅰ RLHF vs SFT Comparison
 
-> [!TIP]
+> [!NOTE]
 > The test is based on comparing `MiniMind2 series` models with "full_sft" and "rlhf". The random seed is fixed at `42`.
 
 <details>
@@ -1089,7 +1090,7 @@ rather than using the PPO method where the reward model acts as a "coach" to cor
 
 ## Ⅱ Subjective Sample Evaluation
 
-> [!TIP]
+> [!NOTE]
 > The following tests were completed on February 9, 2025. New models released after this date will not be included in
 > the tests unless there is a special need.
 
@@ -1103,8 +1104,6 @@ rather than using the PPO method where the reward model acts as a "coach" to cor
 [G] [baby-llama2-chinese(0.2B)](https://github.com/DLLXW/baby-llama2-chinese)<br/>
 [H] [chatlm-mini-chinese(0.2B)](https://github.com/charent/ChatLM-mini-Chinese)<br/>
 
-
-
 ```text
 [Q]: Do you know about the Yangtze River?
 [A]: The Yangtze River, located in southeastern China, is the longest river in China, stretching over 6,300 kilometers and flowing through 11 provinces. It is China's "mother river." The history of the Yangtze River dates back to the Shang Dynasty around 2000 BC. During the Shang Dynasty, the economic development of the Yangtze River reached its peak, making it one of the most important agricultural and industrial centers in China. The economy of the Yangtze River Basin mainly relies on agriculture, with Yangtze River fisheries occupying a major industry, such as fishing and hunting fish. The water resources of the Yangtze River are also very abundant, including rivers, lakes, and streams. The natural scenery of the Yangtze River is spectacular, with different landscapes in each season: rainy in spring and summer, cold and snowy in winter, and beautiful flowers and fruits in spring and autumn. The Yangtze River is China's economic center, home to many countries and cities, and an important transportation hub and shipping center in the world. At the same time, the Yangtze River is also an important water resource for China, playing a significant role in the country's economic development.
@@ -1376,7 +1375,7 @@ is mainly for fun, so take the results lightly:
 
 # 📌 Acknowledge
 
-> [!TIP]
+> [!NOTE]
 > If you find the `MiniMind series` helpful, feel free to give it a ⭐ on GitHub.<br/>
 > Due to the length of the content, mistakes are inevitable; please feel free to report issues or submit a PR to improve
 > the project.<br/>
diff --git a/eval_model.py b/eval_model.py
index 1f1ce08..2732140 100644
--- a/eval_model.py
+++ b/eval_model.py
@@ -113,8 +113,8 @@ def main():
     # MiniMind2-moe (145M)：(dim=640, n_layers=8, use_moe=True)
     # MiniMind2-Small (26M)：(dim=512, n_layers=8)
     # MiniMind2 (104M)：(dim=768, n_layers=16)
-    parser.add_argument('--dim', default=768, type=int)
-    parser.add_argument('--n_layers', default=16, type=int)
+    parser.add_argument('--dim', default=512, type=int)
+    parser.add_argument('--n_layers', default=8, type=int)
     parser.add_argument('--max_seq_len', default=8192, type=int)
     parser.add_argument('--use_moe', default=False, type=bool)
     # 携带历史对话上下文条数
@@ -123,7 +123,7 @@ def main():
     parser.add_argument('--history_cnt', default=0, type=int)
     parser.add_argument('--stream', default=True, type=bool)
     parser.add_argument('--load', default=0, type=int, help="0: 原生torch权重，1: transformers加载")
-    parser.add_argument('--model_mode', default=3, type=int,
+    parser.add_argument('--model_mode', default=0, type=int,
                         help="0: 预训练模型，1: SFT-Chat模型，2: RLHF-Chat模型，3: Reason模型")
     args = parser.parse_args()
 
@@ -178,6 +178,5 @@ def main():
 
 if __name__ == "__main__":
     torch.backends.cudnn.deterministic = True
-    # random.seed(random.randint(0, 2048))
-    random.seed(42)
+    random.seed(random.randint(0, 2048))
     main()
diff --git a/scripts/convert_model.py b/scripts/convert_model.py
index b1760a2..9c2209f 100644
--- a/scripts/convert_model.py
+++ b/scripts/convert_model.py
@@ -51,9 +51,9 @@ def push_to_hf(export_model_path):
 if __name__ == '__main__':
     lm_config = LMConfig(dim=512, n_layers=8, max_seq_len=8192, use_moe=False)
 
-    torch_path = f"../out/reason_{lm_config.dim}{'_moe' if lm_config.use_moe else ''}.pth"
+    torch_path = f"../out/rlhf_{lm_config.dim}{'_moe' if lm_config.use_moe else ''}.pth"
 
-    transformers_path = '../MiniMind2-Small-R1'
+    transformers_path = '../MiniMind2-Small'
 
     # convert torch to transformers model
     convert_torch2transformers(torch_path, transformers_path)
diff --git a/scripts/serve_openai_api.py b/scripts/serve_openai_api.py
index 3c0cf0b..93a6df1 100644
--- a/scripts/serve_openai_api.py
+++ b/scripts/serve_openai_api.py
@@ -26,7 +26,7 @@ def init_model(args):
     tokenizer = AutoTokenizer.from_pretrained('../model/minimind_tokenizer')
     if args.load == 0:
         moe_path = '_moe' if args.use_moe else ''
-        modes = {0: 'pretrain', 1: 'full_sft', 2: 'full_dist', 3: 'rlhf'}
+        modes = {0: 'pretrain', 1: 'full_sft', 2: 'rlhf', 3: 'reason'}
         ckp = f'../{args.out_dir}/{modes[args.model_mode]}_{args.dim}{moe_path}.pth'
 
         model = MiniMindLM(LMConfig(
@@ -156,7 +156,7 @@ if __name__ == "__main__":
     parser.add_argument('--max_seq_len', default=8192, type=int)
     parser.add_argument('--use_moe', default=False, type=bool)
     parser.add_argument('--load', default=0, type=int, help="0: 从原生torch权重，1: 利用transformers加载")
-    parser.add_argument('--model_mode', default=1, type=int, help="0: 预训练模型，1: SFT-Chat模型，2: RLHF-Chat模型")
+    parser.add_argument('--model_mode', default=1, type=int, help="0: 预训练模型，1: SFT-Chat模型，2: RLHF-Chat模型，3: Reason模型")
 
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     model, tokenizer = init_model(parser.parse_args())
diff --git a/train_distill_reason.py b/train_distill_reason.py
index 4d1b0d3..722deeb 100644
--- a/train_distill_reason.py
+++ b/train_distill_reason.py
@@ -150,11 +150,11 @@ if __name__ == "__main__":
     parser.add_argument("--log_interval", type=int, default=1)
     parser.add_argument("--save_interval", type=int, default=50)
     parser.add_argument('--local_rank', type=int, default=-1)
-    parser.add_argument('--dim', default=768, type=int)
-    parser.add_argument('--n_layers', default=16, type=int)
-    parser.add_argument('--max_seq_len', default=768, type=int)
+    parser.add_argument('--dim', default=512, type=int)
+    parser.add_argument('--n_layers', default=8, type=int)
+    parser.add_argument('--max_seq_len', default=1024, type=int)
     parser.add_argument('--use_moe', default=False, type=bool)
-    parser.add_argument("--data_path", type=str, default="./dataset/r1_768.jsonl")
+    parser.add_argument("--data_path", type=str, default="./dataset/r1_mix_1024.jsonl")
 
     args = parser.parse_args()