Minimind/5-dpo_train.py

49 lines
1.4 KiB
Python
Raw Normal View History

2024-08-28 16:41:44 +08:00
import os
2024-10-12 18:47:08 +08:00
import warnings
2024-10-12 19:46:08 +08:00
2024-08-28 16:41:44 +08:00
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer
2024-10-13 22:44:28 +08:00
from trl import DPOConfig, DPOTrainer
2024-08-28 16:41:44 +08:00
from datasets import load_dataset
2024-10-12 18:47:08 +08:00
warnings.filterwarnings('ignore')
2024-08-28 16:41:44 +08:00
def init_model():
device = 'cuda:0'
# Do model patching and add fast LoRA weights
2024-10-12 19:46:08 +08:00
model_name_or_path = "minimind-v1"
tokenizer_name_or_path = "minimind-v1"
2024-08-28 16:41:44 +08:00
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
model = model.to(device)
return model, tokenizer
if __name__ == '__main__':
model, tokenizer = init_model()
2024-10-13 22:44:28 +08:00
training_config = DPOConfig(
2024-10-12 19:46:08 +08:00
output_dir="./minimind_dpo",
per_device_train_batch_size=1,
remove_unused_columns=False,
report_to="none",
save_steps=2000,
learning_rate=4e-5
)
2024-08-28 16:41:44 +08:00
dataset_path = './dataset/dpo/train_data.json'
train_dataset = load_dataset('json', data_files=dataset_path)
dpo_trainer = DPOTrainer(
model,
ref_model=None,
2024-10-13 22:44:28 +08:00
args=training_config,
2024-08-28 16:41:44 +08:00
beta=0.1,
train_dataset=train_dataset['train'],
tokenizer=tokenizer,
max_length=512,
max_prompt_length=512
)
dpo_trainer.train()