diff --git a/README.md b/README.md
index d6e2742..c08314c 100644
--- a/README.md
+++ b/README.md
@@ -225,7 +225,7 @@ git clone https://huggingface.co/jingyaogong/MiniMind2
```bash
# load=0: load from pytorch model, load=1: load from transformers-hf model
-python eval_model.py --load 1
+python eval_model.py --load 1 --model_mode 2
```
### 4.或启动WebUI
diff --git a/README_en.md b/README_en.md
index 988ffea..e349805 100644
--- a/README_en.md
+++ b/README_en.md
@@ -239,7 +239,7 @@ git clone https://huggingface.co/jingyaogong/MiniMind2
```bash
# load=0: load from pytorch model, load=1: load from transformers-hf model
-python eval_model.py --load 1
+python eval_model.py --load 1 --model_mode 2
```
### 4. Or Start WebUI
diff --git a/scripts/train_tokenizer.py b/scripts/train_tokenizer.py
index 6661ace..868099a 100644
--- a/scripts/train_tokenizer.py
+++ b/scripts/train_tokenizer.py
@@ -25,7 +25,7 @@ def train_tokenizer():
data = json.loads(line)
yield data['text']
- data_path = '../dataset/tokenizer_train.jsonl'
+ data_path = '../dataset/pretrain_hq.jsonl'
# 初始化tokenizer
tokenizer = Tokenizer(models.BPE())
@@ -139,12 +139,12 @@ def eval_tokenizer():
print('encoder长度:', len(model_inputs['input_ids']))
input_ids = model_inputs['input_ids']
- response = tokenizer.decode(input_ids, skip_special_tokens=True)
+ response = tokenizer.decode(input_ids, skip_special_tokens=False)
print('decoder和原始文本是否一致:', response == new_prompt)
def main():
- # train_tokenizer()
+ train_tokenizer()
eval_tokenizer()
diff --git a/train_distill_reason.py b/train_distill_reason.py
index 722deeb..1fe9ad1 100644
--- a/train_distill_reason.py
+++ b/train_distill_reason.py
@@ -35,7 +35,7 @@ def train_epoch(epoch, wandb):
# 思考标签占位符
start_of_think_ids = tokenizer('').input_ids
end_of_think_ids = tokenizer('').input_ids
- start_of_answer_ids = tokenizer('').input_ids
+ start_of_answer_ids = tokenizer('').input_ids
end_of_answer_ids = tokenizer('').input_ids
loss_fct = nn.CrossEntropyLoss(reduction='none')
start_time = time.time()