update readme
This commit is contained in:
parent
56c6139896
commit
dd52733d6f
@ -133,7 +133,7 @@ if __name__ == "__main__":
|
||||
max_seq_len = lm_config.max_seq_len
|
||||
out_dir = 'out'
|
||||
epochs = 20
|
||||
batch_size = 8
|
||||
batch_size = 64
|
||||
learning_rate = 1e-4
|
||||
device = 'cuda:0'
|
||||
dtype = 'bfloat16'
|
||||
|
@ -60,7 +60,7 @@ if __name__ == "__main__":
|
||||
# -----------------------------------------------------------------------------
|
||||
out_dir = 'out'
|
||||
start = ""
|
||||
temperature = 0.7
|
||||
temperature = 0.5
|
||||
top_k = 16
|
||||
setup_seed(1337)
|
||||
# device = 'cpu'
|
||||
|
@ -112,13 +112,13 @@ def init_model(lm_config):
|
||||
ckp = f'./out/pretrain_{lm_config.dim}{moe_path}.pth'
|
||||
|
||||
model = Transformer(lm_config)
|
||||
state_dict = torch.load(ckp, map_location=device)
|
||||
|
||||
unwanted_prefix = '_orig_mod.'
|
||||
for k, v in list(state_dict.items()):
|
||||
if k.startswith(unwanted_prefix):
|
||||
state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
|
||||
model.load_state_dict(state_dict, strict=False)
|
||||
# state_dict = torch.load(ckp, map_location=device)
|
||||
#
|
||||
# unwanted_prefix = '_orig_mod.'
|
||||
# for k, v in list(state_dict.items()):
|
||||
# if k.startswith(unwanted_prefix):
|
||||
# state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
|
||||
# model.load_state_dict(state_dict, strict=False)
|
||||
else:
|
||||
model = AutoModel.from_pretrained('./minimind', trust_remote_code=True)
|
||||
|
||||
|
@ -279,7 +279,7 @@ streamlit run fast_inference.py
|
||||
| MiniMind训练数据集 | 下载地址 |
|
||||
|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| **【tokenizer训练集】** | [HuggingFace](https://huggingface.co/datasets/jingyaogong/minimind_dataset/tree/main) / [百度网盘](https://pan.baidu.com/s/1yAw1LVTftuhQGAC1Y9RdYQ?pwd=6666) |
|
||||
| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY) / [百度网盘](https://pan.baidu.com/s/114F1k3eksiWCOQLvaT3RYQ?pwd=6666) |
|
||||
| **【Pretrain数据(2选1)】** | [Seq-Monkey通用文本数据集](http://share.mobvoi.com:5000/sharing/O91blwPkY) / [百度网盘](https://pan.baidu.com/s/1-Z8Q37lJD4tOKhyBs1D_6Q?pwd=6666) |
|
||||
| **【Pretrain数据(2选1)】** | [SkyPile-150B数据集](https://hf-mirror.com/datasets/Skywork/SkyPile-150B/tree/main/data) |
|
||||
| **【SFT数据】** | [匠数大模型SFT数据集](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data/resolve/master/sft_data_zh.jsonl) |
|
||||
| **【DPO数据1】** | [活字数据集1](https://huggingface.co/datasets/Skepsun/huozi_rlhf_data_json) |
|
||||
|
Loading…
x
Reference in New Issue
Block a user