Compare commits
2 Commits
d7fe504e1e
...
feeccf733c
Author | SHA1 | Date | |
---|---|---|---|
![]() |
feeccf733c | ||
![]() |
42e3d38a3f |
@ -627,7 +627,7 @@ class MiniMindLM(PreTrainedModel):
|
|||||||
# Specific layers for v path
|
# Specific layers for v path
|
||||||
self.downsample_v_specific = nn.Sequential(
|
self.downsample_v_specific = nn.Sequential(
|
||||||
nn.Conv1d(128*8, 128, kernel_size=1, padding='same'),
|
nn.Conv1d(128*8, 128, kernel_size=1, padding='same'),
|
||||||
nn.Conv1d(128, 8, kernel_size=1, padding='same')
|
nn.Conv1d(128, self.params.knowledge_length, kernel_size=1, padding='same')
|
||||||
)
|
)
|
||||||
|
|
||||||
# Specific layers for q path
|
# Specific layers for q path
|
||||||
|
@ -46,5 +46,5 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
|||||||
--use_flash_attn \
|
--use_flash_attn \
|
||||||
--profile \
|
--profile \
|
||||||
--profile_interval 10\
|
--profile_interval 10\
|
||||||
--knowlwdge_num 1024 \
|
--knowledge_num 16384 \
|
||||||
--knowlwdge_length 8
|
--knowledge_length 64
|
||||||
|
Loading…
x
Reference in New Issue
Block a user