update readme

This commit is contained in:
gongjy 2024-09-13 14:16:10 +08:00
parent 56c6139896
commit dd52733d6f
4 changed files with 10 additions and 10 deletions

View File

@ -133,7 +133,7 @@ if __name__ == "__main__":
max_seq_len = lm_config.max_seq_len
out_dir = 'out'
epochs = 20
batch_size = 8
batch_size = 64
learning_rate = 1e-4
device = 'cuda:0'
dtype = 'bfloat16'

View File

@ -60,7 +60,7 @@ if __name__ == "__main__":
# -----------------------------------------------------------------------------
out_dir = 'out'
start = ""
temperature = 0.7
temperature = 0.5
top_k = 16
setup_seed(1337)
# device = 'cpu'

View File

@ -112,13 +112,13 @@ def init_model(lm_config):
ckp = f'./out/pretrain_{lm_config.dim}{moe_path}.pth'
model = Transformer(lm_config)
state_dict = torch.load(ckp, map_location=device)
unwanted_prefix = '_orig_mod.'
for k, v in list(state_dict.items()):
if k.startswith(unwanted_prefix):
state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
model.load_state_dict(state_dict, strict=False)
# state_dict = torch.load(ckp, map_location=device)
#
# unwanted_prefix = '_orig_mod.'
# for k, v in list(state_dict.items()):
# if k.startswith(unwanted_prefix):
# state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
# model.load_state_dict(state_dict, strict=False)
else:
model = AutoModel.from_pretrained('./minimind', trust_remote_code=True)

View File

@ -279,7 +279,7 @@ streamlit run fast_inference.py
| MiniMind训练数据集 | 下载地址 |
|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
| **【tokenizer训练集】** | [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset/tree/main) / [百度网盘](https://pan.baidu.com/s/1yAw1LVTftuhQGAC1Y9RdYQ?pwd=6666) |
| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY) / [百度网盘](https://pan.baidu.com/s/114F1k3eksiWCOQLvaT3RYQ?pwd=6666) |
| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY) / [百度网盘](https://pan.baidu.com/s/1-Z8Q37lJD4tOKhyBs1D_6Q?pwd=6666) |
| **【Pretrain数据(2选1)】** | [SkyPile-150B数据集](https://hf-mirror.com/datasets/Skywork/SkyPile-150B/tree/main/data) |
| **【SFT数据】** | [匠数大模型SFT数据集](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data/resolve/master/sft_data_zh.jsonl) |
| **【DPO数据1】** | [活字数据集1](https://huggingface.co/datasets/Skepsun/huozi_rlhf_data_json) |