fix data_process bug

2024-09-23 20:11:19 +08:00 · 2024-09-23 20:11:19 +08:00 · b4359b3335
commit b4359b3335
parent 5f8279f661
3 changed files with 5 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -687,6 +687,8 @@ minimind模型本身没有使用较大的数据集训练，也没有针对回答
 &nbsp;
 <a href="https://github.com/chuanzhubin"><img src="https://avatars.githubusercontent.com/u/2813798" width="70px" height="70px"/></a>
 &nbsp;
+<a href="https://github.com/iomgaa-ycz"><img src="https://avatars.githubusercontent.com/u/124225682" width="70px" height="70px"/></a>
+&nbsp;

 ## 😊鸣谢

--- a/README_en.md
+++ b/README_en.md
@ -756,6 +756,8 @@ your model with third-party UIs, such as fastgpt, OpenWebUI, etc.
 &nbsp;
 <a href="https://github.com/chuanzhubin"><img src="https://avatars.githubusercontent.com/u/2813798" width="70px" height="70px"/></a>
 &nbsp;
+<a href="https://github.com/iomgaa-ycz"><img src="https://avatars.githubusercontent.com/u/124225682" width="70px" height="70px"/></a>
+&nbsp;

 ## 😊Thanks for

--- a/data_process.py
+++ b/data_process.py
@ -95,7 +95,7 @@ def process_seq_monkey(chunk_size=50000):

    if doc_ids:
        arr = np.array(doc_ids, dtype=np.uint16)
-        with open(f'./dataset/clean_seq_monkey.bin', 'wb') as f:
+        with open(f'./dataset/clean_seq_monkey.bin', 'ab') as f:
            f.write(arr.tobytes())