diff --git a/chat_openai_api.py b/chat_openai_api.py index 7eddbf2..9b85055 100644 --- a/chat_openai_api.py +++ b/chat_openai_api.py @@ -2,7 +2,7 @@ from openai import OpenAI client = OpenAI( api_key="none", - base_url="http://202.195.167.142:8000/v1" + base_url="http://202.195.167.206:8000/v1" ) # 初始化对话历史列表 diff --git a/fast_infenence.py b/fast_infenence.py index f2d2d40..822bde8 100644 --- a/fast_infenence.py +++ b/fast_infenence.py @@ -4,8 +4,8 @@ import torch from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation.utils import GenerationConfig -st.set_page_config(page_title="MiniMind-V1 Demo(无历史上文)") -st.title("MiniMind-V1 Demo(无历史上文)") +st.set_page_config(page_title="MiniMind-V1 108M(无历史上文)") +st.title("MiniMind-V1 108M(无历史上文)") model_id = "minimind-v1" @@ -15,7 +15,6 @@ top_k = 8 max_seq_len = 1 * 1024 # ----------------------------------------------------------------------------- - @st.cache_resource def load_model_tokenizer(): model = AutoModelForCausalLM.from_pretrained( @@ -31,7 +30,6 @@ def load_model_tokenizer(): generation_config = GenerationConfig.from_pretrained(model_id) return model, tokenizer, generation_config - def clear_chat_messages(): del st.session_state.messages @@ -50,7 +48,6 @@ def init_chat_messages(): return st.session_state.messages - # max_new_tokens = st.sidebar.slider("max_new_tokens", 0, 1024, 512, step=1) # top_p = st.sidebar.slider("top_p", 0.0, 1.0, 0.8, step=0.01) # top_k = st.sidebar.slider("top_k", 0, 100, 0, step=1) @@ -119,7 +116,6 @@ def main(): # messages.append({"role": "assistant", "content": assistant_answer}) messages.append({"role": "assistant", "content": response}) - # print("messages: ", json.dumps(response, ensure_ascii=False), flush=True) st.button("清空对话", on_click=clear_chat_messages) diff --git a/model/LMConfig.py b/model/LMConfig.py index bf0e4b9..5e958ba 100644 --- a/model/LMConfig.py +++ b/model/LMConfig.py @@ -17,7 +17,7 @@ class LMConfig(PretrainedConfig): norm_eps: float = 1e-5, max_seq_len: int = 512, dropout: float = 0.0, - flash_attn: bool = True, + flash_attn: bool = False, #################################################### # Here are the specific configurations of MOE # When use_moe is false, the following is invalid diff --git a/model/model.py b/model/model.py index 1608d91..0c563d1 100644 --- a/model/model.py +++ b/model/model.py @@ -527,4 +527,4 @@ class Transformer(PreTrainedModel): # write to binary file f.close() - print(f"wrote {filepath}") + print(f"wrote {filepath}") \ No newline at end of file diff --git a/train_tokenizer.py b/train_tokenizer.py index cee2ce3..80e7a86 100644 --- a/train_tokenizer.py +++ b/train_tokenizer.py @@ -56,10 +56,10 @@ def train_tokenizer(): assert tokenizer.token_to_id("") == 2 # 保存tokenizer - tokenizer_dir = "./minimind_tokenizer" + tokenizer_dir = "./model/minimind_tokenizer" os.makedirs(tokenizer_dir, exist_ok=True) tokenizer.save(os.path.join(tokenizer_dir, "tokenizer.json")) - tokenizer.model.save("./minimind_tokenizer") + tokenizer.model.save("./model/minimind_tokenizer") # 手动创建配置文件 config = {