Compare commits
No commits in common. "6590cef3581360222aa70e7be48e1ad76f44170e" and "f33476bc76fe7fb0bc0088c4748e692b9fdaac9b" have entirely different histories.
6590cef358
...
f33476bc76
@ -40,8 +40,8 @@ LOG_FILE="$LOG_DIR/experiment.log"
|
|||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
# 🤖 硬件配置
|
# 🤖 硬件配置
|
||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
CUDA_VISIBLE_DEVICES="0,1"
|
CUDA_VISIBLE_DEVICES="0,1,2,3"
|
||||||
NUM_PROCESSES="2"
|
NUM_PROCESSES="4"
|
||||||
MIXED_PRECISION="bf16"
|
MIXED_PRECISION="bf16"
|
||||||
MAIN_PROCESS_PORT="29500"
|
MAIN_PROCESS_PORT="29500"
|
||||||
|
|
||||||
@ -66,9 +66,9 @@ DISABLE_DB="false"
|
|||||||
# 🤖 训练超参数
|
# 🤖 训练超参数
|
||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
EPOCHS="3"
|
EPOCHS="3"
|
||||||
EMBEDDING_EPOCH="2"
|
EMBEDDING_EPOCH="42"
|
||||||
BATCH_SIZE="42" # 🔥 降低批次大小以适应更复杂的计算
|
BATCH_SIZE="4" # 🔥 降低批次大小以适应更复杂的计算
|
||||||
ACCUMULATION_STEPS="8" # 🔥 增加累积步数保持有效批次大小
|
ACCUMULATION_STEPS="4" # 🔥 增加累积步数保持有效批次大小
|
||||||
LEARNING_RATE="2e-4" # 🔥 适度降低学习率提升稳定性
|
LEARNING_RATE="2e-4" # 🔥 适度降低学习率提升稳定性
|
||||||
DTYPE="bfloat16"
|
DTYPE="bfloat16"
|
||||||
GRAD_CLIP="1.0"
|
GRAD_CLIP="1.0"
|
||||||
@ -80,8 +80,8 @@ SIMILARITY_LOSS_COEF="0.15" # 🔥 相似度损失系数(核心损失)
|
|||||||
DIVERSITY_LOSS_COEF="0.08" # 🔥 多样性损失系数(避免候选重复)
|
DIVERSITY_LOSS_COEF="0.08" # 🔥 多样性损失系数(避免候选重复)
|
||||||
|
|
||||||
# 数据和缓存路径
|
# 数据和缓存路径
|
||||||
DATA_PATH="dataset/stable/merged_pretrain.jsonl"
|
DATA_PATH="/home/zym/Code/stable/merged_pretrain.jsonl"
|
||||||
DATABASE_INIT_PATH="dataset/stable/sentence_trex_data.json"
|
DATABASE_INIT_PATH="/home/zym/Code/stable/sentence_trex_data.json"
|
||||||
CLUSTER_CACHE_PATH="None" # 禁用聚类缓存
|
CLUSTER_CACHE_PATH="None" # 禁用聚类缓存
|
||||||
VAL_DATA_PATH="dataset/stable/eval_data.json"
|
VAL_DATA_PATH="dataset/stable/eval_data.json"
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user