triage/analysis/workflow_completeness_checker.py
iomgaa 7c723fbc4b 删除废弃的disease_analyst智能体模块
删除了不再使用的disease_analyst模块的所有相关文件:
- agent.py: 疾病分析智能体主逻辑
- prompt.py: 疾病分析提示模板
- response_model.py: 响应数据模型
- __init__.py: 模块初始化文件

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-03 21:44:01 +08:00

176 lines
6.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
工作流完成度检查器
检查workflow文件是否完成所有任务生成过滤列表供其他分析脚本使用
"""
import os
import json
import sys
from pathlib import Path
from typing import List, Dict, Tuple
class WorkflowCompletenessChecker:
"""工作流完成度检查器"""
def __init__(self, data_dir: str, output_dir: str):
"""
初始化检查器
Args:
data_dir: 数据目录路径
output_dir: 输出目录路径
"""
self.data_dir = Path(data_dir)
self.output_dir = Path(output_dir)
self.incomplete_files = []
self.complete_files = []
self.error_files = []
def check_file_completeness(self, filepath: Path) -> bool:
"""
检查单个文件是否完成
Args:
filepath: 文件路径
Returns:
True if完成False if未完成
"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
# 读取最后一行
lines = f.readlines()
if not lines:
return False
last_line = lines[-1].strip()
if not last_line:
return False
# 解析最后一行JSON
try:
last_event = json.loads(last_line)
return last_event.get('event_type') == 'workflow_complete'
except json.JSONDecodeError:
return False
except Exception as e:
print(f"检查文件 {filepath.name} 时出错: {e}")
self.error_files.append(filepath.name)
return False
def scan_directory(self) -> None:
"""扫描目录中的所有workflow文件"""
if not self.data_dir.exists():
print(f"数据目录不存在: {self.data_dir}")
return
# 查找所有jsonl文件
jsonl_files = list(self.data_dir.glob("*.jsonl"))
print(f"找到 {len(jsonl_files)} 个数据文件")
for filepath in sorted(jsonl_files):
if self.check_file_completeness(filepath):
self.complete_files.append(filepath.name)
else:
self.incomplete_files.append(filepath.name)
print(f"完成文件: {len(self.complete_files)}")
print(f"未完成文件: {len(self.incomplete_files)}")
print(f"错误文件: {len(self.error_files)}")
def generate_filter_files(self) -> None:
"""生成过滤文件列表"""
# 创建输出目录
self.output_dir.mkdir(parents=True, exist_ok=True)
# 保存未完成文件列表(供其他脚本使用)
incomplete_list_file = self.output_dir / "incomplete_files.txt"
with open(incomplete_list_file, 'w', encoding='utf-8') as f:
for filename in self.incomplete_files:
f.write(f"{filename}\n")
# 保存完成文件列表
complete_list_file = self.output_dir / "complete_files.txt"
with open(complete_list_file, 'w', encoding='utf-8') as f:
for filename in self.complete_files:
f.write(f"{filename}\n")
# 生成详细统计报告
report_file = self.output_dir / "completeness_report.json"
report_data = {
"scan_directory": str(self.data_dir),
"total_files": len(self.complete_files) + len(self.incomplete_files) + len(self.error_files),
"complete_files_count": len(self.complete_files),
"incomplete_files_count": len(self.incomplete_files),
"error_files_count": len(self.error_files),
"completion_rate": len(self.complete_files) / (len(self.complete_files) + len(self.incomplete_files)) if (len(self.complete_files) + len(self.incomplete_files)) > 0 else 0.0,
"incomplete_files": self.incomplete_files,
"error_files": self.error_files
}
with open(report_file, 'w', encoding='utf-8') as f:
json.dump(report_data, f, ensure_ascii=False, indent=2)
print(f"\n过滤文件已生成:")
print(f" - 未完成文件列表: {incomplete_list_file}")
print(f" - 完成文件列表: {complete_list_file}")
print(f" - 完成度报告: {report_file}")
def print_summary(self) -> None:
"""打印汇总信息"""
total = len(self.complete_files) + len(self.incomplete_files)
if total > 0:
completion_rate = len(self.complete_files) / total * 100
print(f"\n=== 工作流完成度检查汇总 ===")
print(f"总文件数: {total}")
print(f"完成文件: {len(self.complete_files)} 个 ({completion_rate:.1f}%)")
print(f"未完成文件: {len(self.incomplete_files)}")
if self.error_files:
print(f"错误文件: {len(self.error_files)}")
if self.incomplete_files:
print(f"\n未完成的文件前10个:")
for filename in self.incomplete_files[:10]:
print(f" - {filename}")
if len(self.incomplete_files) > 10:
print(f" ... 还有 {len(self.incomplete_files) - 10}")
def run_check(self) -> None:
"""运行完整的检查流程"""
print("开始检查工作流完成度...")
# 1. 扫描目录
self.scan_directory()
# 2. 生成过滤文件
self.generate_filter_files()
# 3. 打印汇总
self.print_summary()
print("完成度检查完成!")
def main():
"""主函数"""
import sys
# 从命令行参数获取路径,如果没有提供则使用默认值
if len(sys.argv) >= 3:
data_dir = sys.argv[1]
output_dir = sys.argv[2]
else:
data_dir = "results/results0902"
output_dir = "analysis/0902"
checker = WorkflowCompletenessChecker(data_dir=data_dir, output_dir=output_dir)
checker.run_check()
if __name__ == "__main__":
main()