From b4359b3335776a583377ee70771f126b15108e95 Mon Sep 17 00:00:00 2001
From: gongjy <2474590974@qq.com>
Date: Mon, 23 Sep 2024 20:11:19 +0800
Subject: [PATCH] fix data_process bug
---
README.md | 2 ++
README_en.md | 2 ++
data_process.py | 2 +-
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d4f871e..ffaa032 100644
--- a/README.md
+++ b/README.md
@@ -687,6 +687,8 @@ minimind模型本身没有使用较大的数据集训练,也没有针对回答
+
+
## 😊鸣谢
diff --git a/README_en.md b/README_en.md
index 7c88224..e2e558a 100644
--- a/README_en.md
+++ b/README_en.md
@@ -756,6 +756,8 @@ your model with third-party UIs, such as fastgpt, OpenWebUI, etc.
+
+
## 😊Thanks for
diff --git a/data_process.py b/data_process.py
index 9c03628..047ff0e 100644
--- a/data_process.py
+++ b/data_process.py
@@ -95,7 +95,7 @@ def process_seq_monkey(chunk_size=50000):
if doc_ids:
arr = np.array(doc_ids, dtype=np.uint16)
- with open(f'./dataset/clean_seq_monkey.bin', 'wb') as f:
+ with open(f'./dataset/clean_seq_monkey.bin', 'ab') as f:
f.write(arr.tobytes())