diff --git a/README.md b/README.md index 5e74d40..88fba40 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,16 @@ --- + + +
-https://github.com/user-attachments/assets/88b98128-636e-43bc-a419-b1b1403c2055 +![](./images/minimind-demo.gif) -[Bilibili视频链接](https://www.bilibili.com/video/BV12dHPeqE72/?share_source=copy_web&vd_source=670c2504f88726f8cf4a21ef6147c0e8) +[ModelScope在线测试](https://www.modelscope.cn/studios/gongjy/minimind) | [Bilibili视频链接](https://www.bilibili.com/video/BV12dHPeqE72/?share_source=copy_web&vd_source=670c2504f88726f8cf4a21ef6147c0e8) + +---
@@ -116,7 +121,7 @@ https://github.com/user-attachments/assets/88b98128-636e-43bc-a419-b1b1403c2055 - 项目已部署至ModelScope创空间,可以在此网站上体验: -- [ModelScope在线体验](https://www.modelscope.cn/studios/gongjy/minimind) +- [🔗ModelScope在线体验🔗](https://www.modelscope.cn/studios/gongjy/minimind) @@ -175,16 +180,6 @@ python 2-eval.py streamlit run fast_inference.py ``` -![](./images/streamlit.png) - -
- -项目已部署至ModelScope创空间,可以在此网站上体验: - -[ModelScope在线体验](https://www.modelscope.cn/studios/gongjy/minimind) - - -
# 📌 Quick Start Train @@ -198,7 +193,7 @@ streamlit run fast_inference.py pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple ``` - ```python + ```text # 测试torch是否可用cuda import torch print(torch.cuda.is_available()) diff --git a/README_en.md b/README_en.md index 72210e8..c466cbe 100644 --- a/README_en.md +++ b/README_en.md @@ -43,12 +43,15 @@
-https://github.com/user-attachments/assets/88b98128-636e-43bc-a419-b1b1403c2055 +![](./images/minimind-demo.gif) -[Bilibili Video](https://www.bilibili.com/video/BV12dHPeqE72/?share_source=copy_web&vd_source=670c2504f88726f8cf4a21ef6147c0e8) +[ModelScope Online Testing](https://www.modelscope.cn/studios/gongjy/minimind) | [Bilibili Video Link](https://www.bilibili.com/video/BV12dHPeqE72/?share_source=copy_web&vd_source=670c2504f88726f8cf4a21ef6147c0e8) + +---
+ # 📌 Introduction In the field of large language models (LLMs) such as GPT, LLaMA, GLM, etc., while their performance is impressive, the @@ -187,18 +190,6 @@ or you can run streamlit, launch a web page to chat with minimind-v1 streamlit run fast_inference.py ``` -![](./images/streamlit.png) - - -
- -The project has been deployed to ModelScope makerspace, where you can experience: - -[ModelScope Online](https://www.modelscope.cn/studios/gongjy/minimind) - - -
- # 📌 Quick Start Train * 0.Clone the project code @@ -213,7 +204,7 @@ The project has been deployed to ModelScope makerspace, where you can experience pip install -r requirements.txt ``` - ```python + ```text # Test if torch can use CUDA import torch print(torch.cuda.is_available()) diff --git a/images/minimind-demo.gif b/images/minimind-demo.gif new file mode 100644 index 0000000..2459050 Binary files /dev/null and b/images/minimind-demo.gif differ diff --git a/model/model.py b/model/model.py index 20535fb..5976ad2 100644 --- a/model/model.py +++ b/model/model.py @@ -27,11 +27,15 @@ class RMSNorm(torch.nn.Module): return output * self.weight -def precompute_pos_cis(dim: int, end: int, theta: float = 10000.0): +def precompute_pos_cis(dim: int, end: int, theta: float = 10000.0, train_len: int = 512): freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) t = torch.arange(end, device=freqs.device) # type: ignore freqs = torch.outer(t, freqs).float() # type: ignore pos_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 + # # 计算缩放因子 + # scale = train_len / end + # # 缩放旋转嵌入,实现线性的长度外推(注释掉不用是因为小模型依赖pos_cis拟合严重,直接做线性外推效果并不好) + # pos_cis = pos_cis * scale return pos_cis