diff --git a/5-dpo_train.py b/5-dpo_train.py
index 99314c5..498eb8b 100644
--- a/5-dpo_train.py
+++ b/5-dpo_train.py
@@ -1,44 +1,21 @@
 import os
+import warnings
 os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-import torch
 from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer
 from trl import DPOTrainer
-from peft import get_peft_model, LoraConfig, TaskType
 from datasets import load_dataset
 
-def find_all_linear_names(model):
-    cls = torch.nn.Linear
-    lora_module_names = set()
-    for name, module in model.named_modules():
-        if isinstance(module, cls):
-            names = name.split('.')
-            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
-
-    if 'lm_head' in lora_module_names:
-        lora_module_names.remove('lm_head')
-    return list(lora_module_names)
+warnings.filterwarnings('ignore')
 
 
 def init_model():
     device = 'cuda:0'
     # Do model patching and add fast LoRA weights
-    model_name_or_path = "minimind"
-    tokenizer_name_or_path = "minimind"
+    model_name_or_path = "minimind-v1-small"
+    tokenizer_name_or_path = "minimind-v1-small"
     model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, trust_remote_code=True, use_fast=False)
     tokenizer.pad_token = tokenizer.eos_token
-    target_modules = find_all_linear_names(model)
-    peft_config = LoraConfig(
-        task_type=TaskType.CAUSAL_LM,
-        r=8,
-        lora_alpha=16,
-        lora_dropout=0.1,
-        inference_mode=False,
-        target_modules=target_modules
-    )
-    model = get_peft_model(model, peft_config)
-    model.print_trainable_parameters()
     model = model.to(device)
     return model, tokenizer
 
@@ -47,15 +24,10 @@ if __name__ == '__main__':
     model, tokenizer = init_model()
     training_args = TrainingArguments(output_dir="./minimind_dpo",
                                       per_device_train_batch_size=1,
-                                      remove_unused_columns=False)
+                                      remove_unused_columns=False,
+                                      report_to="none")
 
-    ################
-    # Dataset
-    ################
-    # 确保路径正确，文件存在
     dataset_path = './dataset/dpo/train_data.json'
-
-    # 加载数据集
     train_dataset = load_dataset('json', data_files=dataset_path)
 
     dpo_trainer = DPOTrainer(
diff --git a/data_process.py b/data_process.py
index bea9371..c5dc041 100644
--- a/data_process.py
+++ b/data_process.py
@@ -114,24 +114,20 @@ def rl_process():
     # Dataset
     ################
 
-    dataset_path = ['./dataset/dpo/dpo_zh_demo.json',
-                    './dataset/dpo/train_data.json',
-                    './dataset/dpo/huozi_rlhf_data.json', ]
+    dataset_paths = [
+        './dataset/dpo/dpo_zh_demo.json',
+        './dataset/dpo/dpo_train_data.json',
+        './dataset/dpo/huozi_rlhf_data.json',
+    ]
 
-    train_dataset = load_dataset('json', data_files=dataset_path)
+    train_dataset = load_dataset('json', data_files=dataset_paths)
 
-    def process(row):
-        row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
-        row["reject"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
-        return row
+    merged_data = []
+    for split in train_dataset.keys():
+        merged_data.extend(train_dataset[split])
 
-    ds = train_dataset.map(
-        process,
-        load_from_cache_file=False,
-    )
-
-    output_dataset_path = './dataset/dpo/train_data.json'
-    ds['train'].to_json(output_dataset_path, force_ascii=False, orient='records', lines=True)
+    with open('./dataset/dpo/train_data.json', 'w', encoding='utf-8') as f:
+        json.dump(merged_data, f, ensure_ascii=False, indent=4)
 
 
 if __name__ == "__main__":
@@ -143,7 +139,7 @@ if __name__ == "__main__":
     # 2: sft
     # 3: RL
     ################
-    process_type = 1
+    process_type = 3
 
     if process_type == 1:
         pretrain_process()