diff --git a/README.md b/README.md index d6e2742..c08314c 100644 --- a/README.md +++ b/README.md @@ -225,7 +225,7 @@ git clone https://huggingface.co/jingyaogong/MiniMind2 ```bash # load=0: load from pytorch model, load=1: load from transformers-hf model -python eval_model.py --load 1 +python eval_model.py --load 1 --model_mode 2 ``` ### 4.或启动WebUI diff --git a/README_en.md b/README_en.md index 988ffea..e349805 100644 --- a/README_en.md +++ b/README_en.md @@ -239,7 +239,7 @@ git clone https://huggingface.co/jingyaogong/MiniMind2 ```bash # load=0: load from pytorch model, load=1: load from transformers-hf model -python eval_model.py --load 1 +python eval_model.py --load 1 --model_mode 2 ``` ### 4. Or Start WebUI diff --git a/scripts/train_tokenizer.py b/scripts/train_tokenizer.py index 6661ace..868099a 100644 --- a/scripts/train_tokenizer.py +++ b/scripts/train_tokenizer.py @@ -25,7 +25,7 @@ def train_tokenizer(): data = json.loads(line) yield data['text'] - data_path = '../dataset/tokenizer_train.jsonl' + data_path = '../dataset/pretrain_hq.jsonl' # 初始化tokenizer tokenizer = Tokenizer(models.BPE()) @@ -139,12 +139,12 @@ def eval_tokenizer(): print('encoder长度:', len(model_inputs['input_ids'])) input_ids = model_inputs['input_ids'] - response = tokenizer.decode(input_ids, skip_special_tokens=True) + response = tokenizer.decode(input_ids, skip_special_tokens=False) print('decoder和原始文本是否一致:', response == new_prompt) def main(): - # train_tokenizer() + train_tokenizer() eval_tokenizer() diff --git a/train_distill_reason.py b/train_distill_reason.py index 722deeb..1fe9ad1 100644 --- a/train_distill_reason.py +++ b/train_distill_reason.py @@ -35,7 +35,7 @@ def train_epoch(epoch, wandb): # 思考标签占位符 start_of_think_ids = tokenizer('').input_ids end_of_think_ids = tokenizer('').input_ids - start_of_answer_ids = tokenizer('').input_ids + start_of_answer_ids = tokenizer('').input_ids end_of_answer_ids = tokenizer('').input_ids loss_fct = nn.CrossEntropyLoss(reduction='none') start_time = time.time()