Compare commits

...

2 Commits

Author SHA1 Message Date
Jax922
feeccf733c DynamicKV-LLM Pretrain v1.1.1 2025-05-22 10:05:31 +08:00
Jax922
42e3d38a3f 使用变量代替固定值 2025-05-21 08:14:36 +00:00
2 changed files with 3 additions and 3 deletions

View File

@ -627,7 +627,7 @@ class MiniMindLM(PreTrainedModel):
# Specific layers for v path
self.downsample_v_specific = nn.Sequential(
nn.Conv1d(128*8, 128, kernel_size=1, padding='same'),
nn.Conv1d(128, 8, kernel_size=1, padding='same')
nn.Conv1d(128, self.params.knowledge_length, kernel_size=1, padding='same')
)
# Specific layers for q path

View File

@ -46,5 +46,5 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
--use_flash_attn \
--profile \
--profile_interval 10\
--knowlwdge_num 1024 \
--knowlwdge_length 8
--knowledge_num 16384 \
--knowledge_length 64