优化:集成Evaluator智能体并支持日志文件序号功能
- 在Step 7之后添加Evaluator评分步骤,将step流程从8步扩展为9步 - 新增evaluator模块的__init__.py文件确保正确导入 - 优化WorkflowLogger支持外部传入的case_index序号,生成更规范的日志文件名 - MedicalWorkflow类新增case_index参数,支持批量处理时的文件标识 - 完善Evaluator agent在workflow中的集成,提供医生问诊质量的多维度评价 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
399e4d4447
commit
076135fc87
5
agent_system/evaluetor/__init__.py
Normal file
5
agent_system/evaluetor/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from .agent import Evaluator
|
||||||
|
from .prompt import EvaluatorPrompt
|
||||||
|
from .response_model import EvaluatorResult, EvaluationDimension
|
||||||
|
|
||||||
|
__all__ = ["Evaluator", "EvaluatorPrompt", "EvaluatorResult", "EvaluationDimension"]
|
||||||
@ -11,7 +11,8 @@ class MedicalWorkflow:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, case_data: Dict[str, Any], model_type: str = "gpt-oss:latest",
|
def __init__(self, case_data: Dict[str, Any], model_type: str = "gpt-oss:latest",
|
||||||
llm_config: Optional[Dict] = None, max_steps: int = 30, log_dir: str = "logs"):
|
llm_config: Optional[Dict] = None, max_steps: int = 30, log_dir: str = "logs",
|
||||||
|
case_index: Optional[int] = None):
|
||||||
"""
|
"""
|
||||||
初始化医疗问诊工作流
|
初始化医疗问诊工作流
|
||||||
|
|
||||||
@ -21,6 +22,7 @@ class MedicalWorkflow:
|
|||||||
llm_config: 语言模型配置,默认为None
|
llm_config: 语言模型配置,默认为None
|
||||||
max_steps: 最大执行步数,默认为30
|
max_steps: 最大执行步数,默认为30
|
||||||
log_dir: 日志目录,默认为"logs"
|
log_dir: 日志目录,默认为"logs"
|
||||||
|
case_index: 病例序号,用于日志文件命名
|
||||||
"""
|
"""
|
||||||
self.case_data = case_data
|
self.case_data = case_data
|
||||||
self.model_type = model_type
|
self.model_type = model_type
|
||||||
@ -30,7 +32,7 @@ class MedicalWorkflow:
|
|||||||
# 初始化核心组件
|
# 初始化核心组件
|
||||||
self.task_manager = TaskManager()
|
self.task_manager = TaskManager()
|
||||||
self.step_executor = StepExecutor(model_type=model_type, llm_config=self.llm_config)
|
self.step_executor = StepExecutor(model_type=model_type, llm_config=self.llm_config)
|
||||||
self.logger = WorkflowLogger(case_data=case_data, log_dir=log_dir)
|
self.logger = WorkflowLogger(case_data=case_data, log_dir=log_dir, case_index=case_index)
|
||||||
|
|
||||||
# 初始化工作流状态
|
# 初始化工作流状态
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
|
|||||||
@ -6,6 +6,7 @@ from agent_system.controller import TaskController
|
|||||||
from agent_system.prompter import Prompter
|
from agent_system.prompter import Prompter
|
||||||
from agent_system.inquirer import Inquirer
|
from agent_system.inquirer import Inquirer
|
||||||
from agent_system.virtual_patient import VirtualPatientAgent
|
from agent_system.virtual_patient import VirtualPatientAgent
|
||||||
|
from agent_system.evaluetor import Evaluator
|
||||||
from .task_manager import TaskManager, TaskPhase
|
from .task_manager import TaskManager, TaskPhase
|
||||||
from .workflow_logger import WorkflowLogger
|
from .workflow_logger import WorkflowLogger
|
||||||
|
|
||||||
@ -32,6 +33,7 @@ class StepExecutor:
|
|||||||
self.controller = TaskController(model_type=model_type, llm_config=self.llm_config)
|
self.controller = TaskController(model_type=model_type, llm_config=self.llm_config)
|
||||||
self.prompter = Prompter(model_type=model_type, llm_config=self.llm_config)
|
self.prompter = Prompter(model_type=model_type, llm_config=self.llm_config)
|
||||||
self.virtual_patient = VirtualPatientAgent(model_type=model_type, llm_config=self.llm_config)
|
self.virtual_patient = VirtualPatientAgent(model_type=model_type, llm_config=self.llm_config)
|
||||||
|
self.evaluator = Evaluator(model_type=model_type, llm_config=self.llm_config)
|
||||||
|
|
||||||
def execute_step(self,
|
def execute_step(self,
|
||||||
step_num: int,
|
step_num: int,
|
||||||
@ -123,7 +125,13 @@ class StepExecutor:
|
|||||||
)
|
)
|
||||||
step_result["doctor_question"] = doctor_question
|
step_result["doctor_question"] = doctor_question
|
||||||
|
|
||||||
# Step 8: 获取任务完成情况摘要
|
# Step 8: 使用Evaluator进行评分
|
||||||
|
evaluator_result = self._execute_evaluator(
|
||||||
|
step_num, logger, case_data, step_result
|
||||||
|
)
|
||||||
|
step_result["evaluator_result"] = evaluator_result
|
||||||
|
|
||||||
|
# Step 9: 获取任务完成情况摘要
|
||||||
step_result["task_completion_summary"] = task_manager.get_completion_summary()
|
step_result["task_completion_summary"] = task_manager.get_completion_summary()
|
||||||
|
|
||||||
step_result["success"] = True
|
step_result["success"] = True
|
||||||
@ -372,3 +380,73 @@ class StepExecutor:
|
|||||||
logger.log_error(step_num, "inquirer_error", error_msg)
|
logger.log_error(step_num, "inquirer_error", error_msg)
|
||||||
# 返回默认问题
|
# 返回默认问题
|
||||||
return "请您详细描述一下您的症状,包括什么时候开始的,有什么特点?"
|
return "请您详细描述一下您的症状,包括什么时候开始的,有什么特点?"
|
||||||
|
|
||||||
|
def _execute_evaluator(self, step_num: int, logger: WorkflowLogger,
|
||||||
|
case_data: Dict[str, Any], step_result: Dict[str, Any]):
|
||||||
|
"""执行Evaluator agent"""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 准备评价器需要的数据格式
|
||||||
|
round_data = {
|
||||||
|
"patient_response": step_result.get("patient_response", ""),
|
||||||
|
"doctor_inquiry": step_result.get("doctor_question", ""),
|
||||||
|
"HPI": step_result.get("updated_hpi", ""),
|
||||||
|
"PH": step_result.get("updated_ph", ""),
|
||||||
|
"chief_complaint": step_result.get("updated_chief_complaint", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
# 调用评价器进行单轮评价
|
||||||
|
input_data = {
|
||||||
|
"patient_case": case_data,
|
||||||
|
"current_round": step_num,
|
||||||
|
"round_data": round_data
|
||||||
|
}
|
||||||
|
|
||||||
|
result = self.evaluator.evaluate_single_round(
|
||||||
|
patient_case=case_data,
|
||||||
|
round_data=round_data
|
||||||
|
)
|
||||||
|
|
||||||
|
execution_time = time.time() - start_time
|
||||||
|
|
||||||
|
output_data = {
|
||||||
|
"clinical_inquiry": {
|
||||||
|
"score": result.clinical_inquiry.score,
|
||||||
|
"comment": result.clinical_inquiry.comment
|
||||||
|
},
|
||||||
|
"communication_quality": {
|
||||||
|
"score": result.communication_quality.score,
|
||||||
|
"comment": result.communication_quality.comment
|
||||||
|
},
|
||||||
|
"overall_professionalism": {
|
||||||
|
"score": result.overall_professionalism.score,
|
||||||
|
"comment": result.overall_professionalism.comment
|
||||||
|
},
|
||||||
|
"summary": result.summary,
|
||||||
|
"key_suggestions": result.key_suggestions
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.log_agent_execution(step_num, "evaluator", input_data, output_data, execution_time)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Evaluator执行失败: {str(e)}"
|
||||||
|
logger.log_error(step_num, "evaluator_error", error_msg)
|
||||||
|
# 返回默认评价结果
|
||||||
|
from agent_system.evaluetor.response_model import EvaluatorResult, EvaluationDimension
|
||||||
|
|
||||||
|
default_dimension = EvaluationDimension(score=0.0, comment="评价失败")
|
||||||
|
return EvaluatorResult(
|
||||||
|
clinical_inquiry=default_dimension,
|
||||||
|
diagnostic_reasoning=default_dimension,
|
||||||
|
communication_quality=default_dimension,
|
||||||
|
multi_round_consistency=default_dimension,
|
||||||
|
overall_professionalism=default_dimension,
|
||||||
|
present_illness_similarity=default_dimension,
|
||||||
|
past_history_similarity=default_dimension,
|
||||||
|
chief_complaint_similarity=default_dimension,
|
||||||
|
summary="评价失败",
|
||||||
|
key_suggestions=["系统需要调试"]
|
||||||
|
)
|
||||||
@ -10,16 +10,18 @@ class WorkflowLogger:
|
|||||||
负责将每个step的详细信息记录到jsonl格式文件中
|
负责将每个step的详细信息记录到jsonl格式文件中
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, case_data: Dict[str, Any], log_dir: str = "logs"):
|
def __init__(self, case_data: Dict[str, Any], log_dir: str = "logs", case_index: Optional[int] = None):
|
||||||
"""
|
"""
|
||||||
初始化日志记录器
|
初始化日志记录器
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
case_data: 病例数据
|
case_data: 病例数据
|
||||||
log_dir: 日志目录,默认为"logs"
|
log_dir: 日志目录,默认为"logs"
|
||||||
|
case_index: 病例序号,用于文件名标识
|
||||||
"""
|
"""
|
||||||
self.case_data = case_data
|
self.case_data = case_data
|
||||||
self.log_dir = log_dir
|
self.log_dir = log_dir
|
||||||
|
self.case_index = case_index
|
||||||
self.log_file_path = self._generate_log_file_path()
|
self.log_file_path = self._generate_log_file_path()
|
||||||
self.step_count = 0
|
self.step_count = 0
|
||||||
|
|
||||||
@ -36,15 +38,18 @@ class WorkflowLogger:
|
|||||||
Returns:
|
Returns:
|
||||||
str: 日志文件路径
|
str: 日志文件路径
|
||||||
"""
|
"""
|
||||||
# 生成基于病例内容的唯一标识
|
|
||||||
case_str = json.dumps(self.case_data, ensure_ascii=False, sort_keys=True)
|
|
||||||
case_hash = hashlib.md5(case_str.encode('utf-8')).hexdigest()[:8]
|
|
||||||
|
|
||||||
# 生成时间戳
|
# 生成时间戳
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
# 构建文件名
|
# 构建文件名,如果有序号则包含序号
|
||||||
|
if self.case_index is not None:
|
||||||
|
filename = f"workflow_{timestamp}_case_{self.case_index:04d}.jsonl"
|
||||||
|
else:
|
||||||
|
# 生成基于病例内容的唯一标识作为后备
|
||||||
|
case_str = json.dumps(self.case_data, ensure_ascii=False, sort_keys=True)
|
||||||
|
case_hash = hashlib.md5(case_str.encode('utf-8')).hexdigest()[:8]
|
||||||
filename = f"workflow_{timestamp}_{case_hash}.jsonl"
|
filename = f"workflow_{timestamp}_{case_hash}.jsonl"
|
||||||
|
|
||||||
return os.path.join(self.log_dir, filename)
|
return os.path.join(self.log_dir, filename)
|
||||||
|
|
||||||
def _log_workflow_start(self):
|
def _log_workflow_start(self):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user