删除了不再使用的disease_analyst模块的所有相关文件: - agent.py: 疾病分析智能体主逻辑 - prompt.py: 疾病分析提示模板 - response_model.py: 响应数据模型 - __init__.py: 模块初始化文件 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
155 lines
4.7 KiB
Python
Executable File
155 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
统计dataset/bbb.json中所有病例的一级科室和二级科室集合
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from collections import Counter
|
|
|
|
|
|
def load_dataset(file_path: str) -> list:
|
|
"""加载数据集"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
print(f"加载文件 {file_path} 时出错: {e}")
|
|
return []
|
|
|
|
|
|
def analyze_departments(data: list) -> dict:
|
|
"""分析科室分布"""
|
|
|
|
# 收集一级科室和二级科室
|
|
level1_departments = []
|
|
level2_departments = []
|
|
|
|
# 建立一级到二级的映射关系
|
|
level1_to_level2 = {}
|
|
|
|
for case in data:
|
|
level1 = case.get('一级科室', '').strip()
|
|
level2 = case.get('二级科室', '').strip()
|
|
|
|
if level1:
|
|
level1_departments.append(level1)
|
|
|
|
if level2:
|
|
level2_departments.append(level2)
|
|
|
|
# 建立映射关系
|
|
if level1 and level2:
|
|
if level1 not in level1_to_level2:
|
|
level1_to_level2[level1] = set()
|
|
level1_to_level2[level1].add(level2)
|
|
|
|
# 统计计数
|
|
level1_counter = Counter(level1_departments)
|
|
level2_counter = Counter(level2_departments)
|
|
|
|
return {
|
|
'level1_counter': level1_counter,
|
|
'level2_counter': level2_counter,
|
|
'level1_to_level2': {k: list(v) for k, v in level1_to_level2.items()},
|
|
'total_cases': len(data),
|
|
'unique_level1': len(set(level1_departments)),
|
|
'unique_level2': len(set(level2_departments))
|
|
}
|
|
|
|
|
|
def print_statistics(stats: dict):
|
|
"""打印统计结果"""
|
|
|
|
print("=" * 60)
|
|
print("DATASET 科室统计报告")
|
|
print("=" * 60)
|
|
|
|
print(f"总病例数: {stats['total_cases']}")
|
|
print(f"一级科室种类数: {stats['unique_level1']}")
|
|
print(f"二级科室种类数: {stats['unique_level2']}")
|
|
print()
|
|
|
|
print("一级科室分布:")
|
|
print("-" * 40)
|
|
for dept, count in sorted(stats['level1_counter'].items(), key=lambda x: x[1], reverse=True):
|
|
percentage = (count / stats['total_cases']) * 100
|
|
print(f" {dept}: {count} 例 ({percentage:.1f}%)")
|
|
|
|
print()
|
|
print("二级科室分布:")
|
|
print("-" * 40)
|
|
for dept, count in sorted(stats['level2_counter'].items(), key=lambda x: x[1], reverse=True):
|
|
percentage = (count / stats['total_cases']) * 100
|
|
print(f" {dept}: {count} 例 ({percentage:.1f}%)")
|
|
|
|
print()
|
|
print("一级科室 → 二级科室映射:")
|
|
print("-" * 40)
|
|
for level1, level2_list in sorted(stats['level1_to_level2'].items()):
|
|
print(f" {level1}:")
|
|
for level2 in sorted(level2_list):
|
|
count = stats['level2_counter'][level2]
|
|
print(f" - {level2}: {count} 例")
|
|
print()
|
|
|
|
|
|
def save_statistics(stats: dict, output_file: str):
|
|
"""保存统计结果"""
|
|
|
|
# 准备保存的数据
|
|
save_data = {
|
|
'一级科室列表': sorted(list(stats['level1_counter'].keys())),
|
|
'二级科室列表': sorted(list(set(stats['level2_counter'].keys()))),
|
|
'一级科室计数': dict(stats['level1_counter']),
|
|
'二级科室计数': dict(stats['level2_counter']),
|
|
'一级科室到二级科室映射': stats['level1_to_level2'],
|
|
'统计信息': {
|
|
'总病例数': stats['total_cases'],
|
|
'一级科室种类数': stats['unique_level1'],
|
|
'二级科室种类数': stats['unique_level2']
|
|
}
|
|
}
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(save_data, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def main():
|
|
"""主函数"""
|
|
|
|
# 设置路径
|
|
dataset_file = Path(__file__).parent.parent / "dataset" / "bbb.json"
|
|
output_file = Path(__file__).parent.parent / "analysis" / "dataset_department_statistics.json"
|
|
|
|
print(f"正在加载数据集: {dataset_file}")
|
|
data = load_dataset(str(dataset_file))
|
|
|
|
if not data:
|
|
print("无法加载数据集")
|
|
return
|
|
|
|
print(f"成功加载 {len(data)} 个病例")
|
|
|
|
stats = analyze_departments(data)
|
|
print_statistics(stats)
|
|
|
|
save_statistics(stats, str(output_file))
|
|
print(f"统计结果已保存到: {output_file}")
|
|
|
|
# 额外输出纯列表格式
|
|
print("\n" + "=" * 60)
|
|
print("科室列表(纯文本格式)")
|
|
print("=" * 60)
|
|
|
|
print("一级科室集合:")
|
|
for dept in sorted(list(stats['level1_counter'].keys())):
|
|
print(f" '{dept}'")
|
|
|
|
print("\n二级科室集合:")
|
|
for dept in sorted(list(set(stats['level2_counter'].keys()))):
|
|
print(f" '{dept}'")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |