🔬 实验基础: 基于实验1.4.7的重要改进 🎯 研究目标: 提升Memory Bank的知识保护和检索准确性 🚀 三大核心创新: 1️⃣ 智能冻结策略改进 • 从随机冻结 → 顺序冻结前20%记忆条目 • 保护重要知识: 假设前面的记忆条目更重要,需要优先保护 • freeze_ratio=0.2: 冻结前20%的memory_bank条目 2️⃣ 查询-知识相似度Loss • 新增相似度监督信号: 衡量查询向量与选中知识的匹配度 • 余弦相似度计算: F.cosine_similarity(query, selected_memory) • 相似度统计: 平均值、最大值、最小值、标准差全方位监控 3️⃣ 维度截断问题修复 • 统一维度处理: knowledge_dim → dim,避免信息截断 • concat_dim修正: dim + num_selected * dim (之前是knowledge_dim) • 记忆向量完整保留: 解决查询结果维度被不当压缩的问题 🏗️ 架构优化细节: • GatedMemoryFusion维度一致性: 统一使用dim维度 • 记忆池化策略: 使用平均池化压缩knowledge_length维度 • 残差连接增强: 改进memory_output与主路径的融合 📊 实验配置: • experiment_1_4_9-02: 8层网络完整测试 • experiment_1_4_9-04: 1层网络最小验证 • EMA更新机制: decay=0.9, update_freq=5 • 数据库初始化: sentence_trex_data.json文本数据 💡 技术假设: 顺序冻结策略能更好地保护重要知识,相似度Loss能提升检索精度, 维度统一能减少信息丢失,三者结合将显著改善Memory Bank性能。 🛠️ 基础设施改进: • UUID映射系统: 跟踪记忆条目的原始数据源 • 增强缓存机制: 支持映射文件自动生成 • 监控系统升级: 相似度统计信息实时追踪 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
124 lines
4.5 KiB
JSON
124 lines
4.5 KiB
JSON
{
|
|
"version": "0.2.0",
|
|
"configurations": [
|
|
{
|
|
"name": "MiniMind Training (Direct Python)",
|
|
"type": "python",
|
|
"request": "launch",
|
|
"program": "${workspaceFolder}/train_pretrain_accelerate.py",
|
|
// "args": [
|
|
// "--out_dir", "out",
|
|
// "--epochs", "3",
|
|
// "--embedding_epoch", "2",
|
|
// "--batch_size", "128",
|
|
// "--learning_rate", "8e-5",
|
|
// "--dtype", "bfloat16",
|
|
// "--use_swanlab",
|
|
// "--swanlab_project", "MiniMind-Pretrain",
|
|
// "--num_workers", "1",
|
|
// "--accumulation_steps", "16",
|
|
// "--grad_clip", "0.5",
|
|
// "--warmup_iters", "0",
|
|
// "--log_interval", "1",
|
|
// "--save_interval", "10000",
|
|
// "--dim", "512",
|
|
// "--n_layers", "8",
|
|
// "--max_seq_len", "512",
|
|
// "--data_path", "./dataset/stable/merged_pretrain.jsonl",
|
|
// "--profile",
|
|
// "--profile_interval", "10",
|
|
// "--use_flash_attn",
|
|
// "--knowledge_num", "1048576",
|
|
// "--knowledge_length", "32",
|
|
// "--database_init_path", "./dataset/stable/sentence_trex_data.json",
|
|
// "--fast_clustering",
|
|
// "--cluster_cache_path", "./cache/cluster_tokens_single.pt",
|
|
// "--memory_monitor_interval", "10",
|
|
// "--model_type", "model",
|
|
// "--model_size", "538"
|
|
// ],
|
|
"env": {
|
|
"CUDA_VISIBLE_DEVICES": "0",
|
|
"NCCL_DEBUG": "INFO",
|
|
"PYTHONFAULTHANDLER": "1"
|
|
},
|
|
"cwd": "${workspaceFolder}",
|
|
"console": "integratedTerminal",
|
|
"justMyCode": false,
|
|
"stopOnEntry": false,
|
|
"python": "${workspaceFolder}/.venv/bin/python"
|
|
},
|
|
{
|
|
"name": "MiniMind Training (Direct Python - Simple)",
|
|
"type": "python",
|
|
"request": "launch",
|
|
"program": "${workspaceFolder}/train_pretrain_accelerate.py",
|
|
"args": [
|
|
"--epochs", "1",
|
|
"--batch_size", "32",
|
|
"--learning_rate", "1e-4",
|
|
"--log_interval", "10",
|
|
"--profile_interval", "2",
|
|
"--model_type", "model_original"
|
|
],
|
|
"env": {
|
|
"CUDA_VISIBLE_DEVICES": "0"
|
|
},
|
|
"cwd": "${workspaceFolder}",
|
|
"console": "integratedTerminal",
|
|
"justMyCode": false,
|
|
"stopOnEntry": false,
|
|
"python": "${workspaceFolder}/.venv/bin/python"
|
|
},
|
|
{
|
|
"name": "MiniMind Test (Direct Python)",
|
|
"type": "python",
|
|
"request": "launch",
|
|
"program": "${workspaceFolder}/test.py",
|
|
"env": {
|
|
"CUDA_VISIBLE_DEVICES": "0"
|
|
},
|
|
"cwd": "${workspaceFolder}",
|
|
"console": "integratedTerminal",
|
|
"justMyCode": false,
|
|
"python": "${workspaceFolder}/.venv/bin/python"
|
|
},
|
|
{
|
|
"name": "MiniMind Training Debug (Accelerate)",
|
|
"type": "python",
|
|
"request": "launch",
|
|
"module": "accelerate.commands.launch",
|
|
"args": [
|
|
"--num_processes=1",
|
|
"--mixed_precision=bf16",
|
|
"${workspaceFolder}/train_pretrain_accelerate.py",
|
|
"--epochs", "1",
|
|
"--batch_size", "32",
|
|
"--learning_rate", "1e-4",
|
|
"--log_interval", "10",
|
|
"--profile_interval", "2",
|
|
"--model_type", "model_original"
|
|
],
|
|
"env": {
|
|
"CUDA_VISIBLE_DEVICES": "0"
|
|
},
|
|
"cwd": "${workspaceFolder}",
|
|
"console": "integratedTerminal",
|
|
"justMyCode": false,
|
|
"stopOnEntry": false,
|
|
"python": "${workspaceFolder}/.venv/bin/python"
|
|
},
|
|
{
|
|
"name": "MiniMind Test Only",
|
|
"type": "python",
|
|
"request": "launch",
|
|
"program": "${workspaceFolder}/test.py",
|
|
"env": {
|
|
"CUDA_VISIBLE_DEVICES": "0"
|
|
},
|
|
"cwd": "${workspaceFolder}",
|
|
"console": "integratedTerminal",
|
|
"justMyCode": false
|
|
}
|
|
]
|
|
} |