fix data_process bug
This commit is contained in:
parent
5f8279f661
commit
b4359b3335
@ -687,6 +687,8 @@ minimind模型本身没有使用较大的数据集训练,也没有针对回答
|
||||
|
||||
<a href="https://github.com/chuanzhubin"><img src="https://avatars.githubusercontent.com/u/2813798" width="70px" height="70px"/></a>
|
||||
|
||||
<a href="https://github.com/iomgaa-ycz"><img src="https://avatars.githubusercontent.com/u/124225682" width="70px" height="70px"/></a>
|
||||
|
||||
|
||||
## 😊鸣谢
|
||||
|
||||
|
@ -756,6 +756,8 @@ your model with third-party UIs, such as fastgpt, OpenWebUI, etc.
|
||||
|
||||
<a href="https://github.com/chuanzhubin"><img src="https://avatars.githubusercontent.com/u/2813798" width="70px" height="70px"/></a>
|
||||
|
||||
<a href="https://github.com/iomgaa-ycz"><img src="https://avatars.githubusercontent.com/u/124225682" width="70px" height="70px"/></a>
|
||||
|
||||
|
||||
## 😊Thanks for
|
||||
|
||||
|
@ -95,7 +95,7 @@ def process_seq_monkey(chunk_size=50000):
|
||||
|
||||
if doc_ids:
|
||||
arr = np.array(doc_ids, dtype=np.uint16)
|
||||
with open(f'./dataset/clean_seq_monkey.bin', 'wb') as f:
|
||||
with open(f'./dataset/clean_seq_monkey.bin', 'ab') as f:
|
||||
f.write(arr.tobytes())
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user