删除了不再使用的disease_analyst模块的所有相关文件: - agent.py: 疾病分析智能体主逻辑 - prompt.py: 疾病分析提示模板 - response_model.py: 响应数据模型 - __init__.py: 模块初始化文件 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
176 lines
6.2 KiB
Python
176 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
工作流完成度检查器
|
||
检查workflow文件是否完成所有任务,生成过滤列表供其他分析脚本使用
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import List, Dict, Tuple
|
||
|
||
|
||
class WorkflowCompletenessChecker:
|
||
"""工作流完成度检查器"""
|
||
|
||
def __init__(self, data_dir: str, output_dir: str):
|
||
"""
|
||
初始化检查器
|
||
|
||
Args:
|
||
data_dir: 数据目录路径
|
||
output_dir: 输出目录路径
|
||
"""
|
||
self.data_dir = Path(data_dir)
|
||
self.output_dir = Path(output_dir)
|
||
self.incomplete_files = []
|
||
self.complete_files = []
|
||
self.error_files = []
|
||
|
||
def check_file_completeness(self, filepath: Path) -> bool:
|
||
"""
|
||
检查单个文件是否完成
|
||
|
||
Args:
|
||
filepath: 文件路径
|
||
|
||
Returns:
|
||
True if完成,False if未完成
|
||
"""
|
||
try:
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
# 读取最后一行
|
||
lines = f.readlines()
|
||
if not lines:
|
||
return False
|
||
|
||
last_line = lines[-1].strip()
|
||
if not last_line:
|
||
return False
|
||
|
||
# 解析最后一行JSON
|
||
try:
|
||
last_event = json.loads(last_line)
|
||
return last_event.get('event_type') == 'workflow_complete'
|
||
except json.JSONDecodeError:
|
||
return False
|
||
|
||
except Exception as e:
|
||
print(f"检查文件 {filepath.name} 时出错: {e}")
|
||
self.error_files.append(filepath.name)
|
||
return False
|
||
|
||
def scan_directory(self) -> None:
|
||
"""扫描目录中的所有workflow文件"""
|
||
if not self.data_dir.exists():
|
||
print(f"数据目录不存在: {self.data_dir}")
|
||
return
|
||
|
||
# 查找所有jsonl文件
|
||
jsonl_files = list(self.data_dir.glob("*.jsonl"))
|
||
print(f"找到 {len(jsonl_files)} 个数据文件")
|
||
|
||
for filepath in sorted(jsonl_files):
|
||
if self.check_file_completeness(filepath):
|
||
self.complete_files.append(filepath.name)
|
||
else:
|
||
self.incomplete_files.append(filepath.name)
|
||
|
||
print(f"完成文件: {len(self.complete_files)} 个")
|
||
print(f"未完成文件: {len(self.incomplete_files)} 个")
|
||
print(f"错误文件: {len(self.error_files)} 个")
|
||
|
||
def generate_filter_files(self) -> None:
|
||
"""生成过滤文件列表"""
|
||
# 创建输出目录
|
||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 保存未完成文件列表(供其他脚本使用)
|
||
incomplete_list_file = self.output_dir / "incomplete_files.txt"
|
||
with open(incomplete_list_file, 'w', encoding='utf-8') as f:
|
||
for filename in self.incomplete_files:
|
||
f.write(f"{filename}\n")
|
||
|
||
# 保存完成文件列表
|
||
complete_list_file = self.output_dir / "complete_files.txt"
|
||
with open(complete_list_file, 'w', encoding='utf-8') as f:
|
||
for filename in self.complete_files:
|
||
f.write(f"{filename}\n")
|
||
|
||
# 生成详细统计报告
|
||
report_file = self.output_dir / "completeness_report.json"
|
||
report_data = {
|
||
"scan_directory": str(self.data_dir),
|
||
"total_files": len(self.complete_files) + len(self.incomplete_files) + len(self.error_files),
|
||
"complete_files_count": len(self.complete_files),
|
||
"incomplete_files_count": len(self.incomplete_files),
|
||
"error_files_count": len(self.error_files),
|
||
"completion_rate": len(self.complete_files) / (len(self.complete_files) + len(self.incomplete_files)) if (len(self.complete_files) + len(self.incomplete_files)) > 0 else 0.0,
|
||
"incomplete_files": self.incomplete_files,
|
||
"error_files": self.error_files
|
||
}
|
||
|
||
with open(report_file, 'w', encoding='utf-8') as f:
|
||
json.dump(report_data, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"\n过滤文件已生成:")
|
||
print(f" - 未完成文件列表: {incomplete_list_file}")
|
||
print(f" - 完成文件列表: {complete_list_file}")
|
||
print(f" - 完成度报告: {report_file}")
|
||
|
||
def print_summary(self) -> None:
|
||
"""打印汇总信息"""
|
||
total = len(self.complete_files) + len(self.incomplete_files)
|
||
if total > 0:
|
||
completion_rate = len(self.complete_files) / total * 100
|
||
print(f"\n=== 工作流完成度检查汇总 ===")
|
||
print(f"总文件数: {total}")
|
||
print(f"完成文件: {len(self.complete_files)} 个 ({completion_rate:.1f}%)")
|
||
print(f"未完成文件: {len(self.incomplete_files)} 个")
|
||
|
||
if self.error_files:
|
||
print(f"错误文件: {len(self.error_files)} 个")
|
||
|
||
if self.incomplete_files:
|
||
print(f"\n未完成的文件(前10个):")
|
||
for filename in self.incomplete_files[:10]:
|
||
print(f" - {filename}")
|
||
if len(self.incomplete_files) > 10:
|
||
print(f" ... 还有 {len(self.incomplete_files) - 10} 个")
|
||
|
||
def run_check(self) -> None:
|
||
"""运行完整的检查流程"""
|
||
print("开始检查工作流完成度...")
|
||
|
||
# 1. 扫描目录
|
||
self.scan_directory()
|
||
|
||
# 2. 生成过滤文件
|
||
self.generate_filter_files()
|
||
|
||
# 3. 打印汇总
|
||
self.print_summary()
|
||
|
||
print("完成度检查完成!")
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
import sys
|
||
|
||
# 从命令行参数获取路径,如果没有提供则使用默认值
|
||
if len(sys.argv) >= 3:
|
||
data_dir = sys.argv[1]
|
||
output_dir = sys.argv[2]
|
||
else:
|
||
data_dir = "results/results0902"
|
||
output_dir = "analysis/0902"
|
||
|
||
checker = WorkflowCompletenessChecker(data_dir=data_dir, output_dir=output_dir)
|
||
checker.run_check()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |