From 135421690ee96389e5f63b8df41d13167d1ec3ea Mon Sep 17 00:00:00 2001 From: gongjy <2474590974@qq.com> Date: Sat, 12 Oct 2024 19:46:08 +0800 Subject: [PATCH] update data_process --- 5-dpo_train.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/5-dpo_train.py b/5-dpo_train.py index 498eb8b..3ae3df7 100644 --- a/5-dpo_train.py +++ b/5-dpo_train.py @@ -1,5 +1,6 @@ import os import warnings + os.environ['CUDA_VISIBLE_DEVICES'] = '0' from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer from trl import DPOTrainer @@ -11,8 +12,8 @@ warnings.filterwarnings('ignore') def init_model(): device = 'cuda:0' # Do model patching and add fast LoRA weights - model_name_or_path = "minimind-v1-small" - tokenizer_name_or_path = "minimind-v1-small" + model_name_or_path = "minimind-v1" + tokenizer_name_or_path = "minimind-v1" model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, trust_remote_code=True, use_fast=False) tokenizer.pad_token = tokenizer.eos_token @@ -22,10 +23,14 @@ def init_model(): if __name__ == '__main__': model, tokenizer = init_model() - training_args = TrainingArguments(output_dir="./minimind_dpo", - per_device_train_batch_size=1, - remove_unused_columns=False, - report_to="none") + training_args = TrainingArguments( + output_dir="./minimind_dpo", + per_device_train_batch_size=1, + remove_unused_columns=False, + report_to="none", + save_steps=2000, + learning_rate=4e-5 + ) dataset_path = './dataset/dpo/train_data.json' train_dataset = load_dataset('json', data_files=dataset_path)