triage/analysis/dataset_statistics.py
iomgaa 7c723fbc4b 删除废弃的disease_analyst智能体模块
删除了不再使用的disease_analyst模块的所有相关文件:
- agent.py: 疾病分析智能体主逻辑
- prompt.py: 疾病分析提示模板
- response_model.py: 响应数据模型
- __init__.py: 模块初始化文件

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-03 21:44:01 +08:00

155 lines
4.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""
统计dataset/bbb.json中所有病例的一级科室和二级科室集合
"""
import json
from pathlib import Path
from collections import Counter
def load_dataset(file_path: str) -> list:
"""加载数据集"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"加载文件 {file_path} 时出错: {e}")
return []
def analyze_departments(data: list) -> dict:
"""分析科室分布"""
# 收集一级科室和二级科室
level1_departments = []
level2_departments = []
# 建立一级到二级的映射关系
level1_to_level2 = {}
for case in data:
level1 = case.get('一级科室', '').strip()
level2 = case.get('二级科室', '').strip()
if level1:
level1_departments.append(level1)
if level2:
level2_departments.append(level2)
# 建立映射关系
if level1 and level2:
if level1 not in level1_to_level2:
level1_to_level2[level1] = set()
level1_to_level2[level1].add(level2)
# 统计计数
level1_counter = Counter(level1_departments)
level2_counter = Counter(level2_departments)
return {
'level1_counter': level1_counter,
'level2_counter': level2_counter,
'level1_to_level2': {k: list(v) for k, v in level1_to_level2.items()},
'total_cases': len(data),
'unique_level1': len(set(level1_departments)),
'unique_level2': len(set(level2_departments))
}
def print_statistics(stats: dict):
"""打印统计结果"""
print("=" * 60)
print("DATASET 科室统计报告")
print("=" * 60)
print(f"总病例数: {stats['total_cases']}")
print(f"一级科室种类数: {stats['unique_level1']}")
print(f"二级科室种类数: {stats['unique_level2']}")
print()
print("一级科室分布:")
print("-" * 40)
for dept, count in sorted(stats['level1_counter'].items(), key=lambda x: x[1], reverse=True):
percentage = (count / stats['total_cases']) * 100
print(f" {dept}: {count} 例 ({percentage:.1f}%)")
print()
print("二级科室分布:")
print("-" * 40)
for dept, count in sorted(stats['level2_counter'].items(), key=lambda x: x[1], reverse=True):
percentage = (count / stats['total_cases']) * 100
print(f" {dept}: {count} 例 ({percentage:.1f}%)")
print()
print("一级科室 → 二级科室映射:")
print("-" * 40)
for level1, level2_list in sorted(stats['level1_to_level2'].items()):
print(f" {level1}:")
for level2 in sorted(level2_list):
count = stats['level2_counter'][level2]
print(f" - {level2}: {count}")
print()
def save_statistics(stats: dict, output_file: str):
"""保存统计结果"""
# 准备保存的数据
save_data = {
'一级科室列表': sorted(list(stats['level1_counter'].keys())),
'二级科室列表': sorted(list(set(stats['level2_counter'].keys()))),
'一级科室计数': dict(stats['level1_counter']),
'二级科室计数': dict(stats['level2_counter']),
'一级科室到二级科室映射': stats['level1_to_level2'],
'统计信息': {
'总病例数': stats['total_cases'],
'一级科室种类数': stats['unique_level1'],
'二级科室种类数': stats['unique_level2']
}
}
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(save_data, f, ensure_ascii=False, indent=2)
def main():
"""主函数"""
# 设置路径
dataset_file = Path(__file__).parent.parent / "dataset" / "bbb.json"
output_file = Path(__file__).parent.parent / "analysis" / "dataset_department_statistics.json"
print(f"正在加载数据集: {dataset_file}")
data = load_dataset(str(dataset_file))
if not data:
print("无法加载数据集")
return
print(f"成功加载 {len(data)} 个病例")
stats = analyze_departments(data)
print_statistics(stats)
save_statistics(stats, str(output_file))
print(f"统计结果已保存到: {output_file}")
# 额外输出纯列表格式
print("\n" + "=" * 60)
print("科室列表(纯文本格式)")
print("=" * 60)
print("一级科室集合:")
for dept in sorted(list(stats['level1_counter'].keys())):
print(f" '{dept}'")
print("\n二级科室集合:")
for dept in sorted(list(set(stats['level2_counter'].keys()))):
print(f" '{dept}'")
if __name__ == "__main__":
main()