diff --git a/example/subagent-example/README.md b/example/subagent-example/README.md new file mode 100644 index 0000000..c1fbe15 --- /dev/null +++ b/example/subagent-example/README.md @@ -0,0 +1,428 @@ +# SubAgent系统使用指南 + +## 📚 概述 + +SubAgent是基于Agno框架构建的智能代理系统,为MedResearcher项目提供强大的AI功能。它支持多种LLM提供商,提供动态prompt构建、JSON结构化输出和零容错解析等核心功能。 + +## ✨ 核心特性 + +### 🤖 智能代理核心 +- **多提供商支持**: 阿里云(qwen)、DeepSeek、OpenAI等主流LLM服务 +- **动态Prompt**: 支持模板变量替换的灵活prompt构建系统 +- **结构化输出**: 基于Pydantic模型的JSON格式化响应 +- **零容错解析**: 多策略JSON解析,确保即使不完美输出也能解析 + +### 🔧 配置管理 +- **YAML配置**: 统一的配置文件管理,支持环境变量 +- **模型工厂**: 自动化的模型实例创建和参数管理 +- **灵活配置**: 支持运行时参数覆盖和动态配置 + +### 🛠 开发便利性 +- **类型安全**: 完整的类型提示支持 +- **异常处理**: 详细的错误信息和异常层级 +- **调试支持**: 内置日志和调试模式 + +## 🚀 快速开始 + +### 1. 基础设置 + +首先确保已安装依赖: +```bash +uv add agno pydantic pyyaml +``` + +### 2. 配置LLM服务 + +在`src/config/llm_config.yaml`中配置你的LLM服务: +```yaml +aliyun: + base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1" + api_key: "${DASHSCOPE_API_KEY}" + models: + qwen-max: + class: "OpenAILike" + params: + id: "qwen-max" + temperature: 0.3 +``` + +### 3. 设置环境变量 + +创建`.env`文件或设置环境变量: +```bash +export DASHSCOPE_API_KEY="your_api_key_here" +``` + +### 4. 创建你的第一个Agent + +```python +from src.agent_system import SubAgent +from pydantic import BaseModel, Field + +# 定义响应模型 +class TaskResult(BaseModel): + summary: str = Field(description="任务总结") + confidence: float = Field(description="置信度", ge=0.0, le=1.0) + +# 创建SubAgent +agent = SubAgent( + provider="aliyun", + model_name="qwen-max", + name="task_agent", + instructions=["你是一个专业的任务处理专家"], + prompt_template="请分析以下任务: {task_description}", + response_model=TaskResult +) + +# 执行任务 +result = agent.run(template_vars={"task_description": "数据分析项目"}) +print(f"总结: {result.summary}") +print(f"置信度: {result.confidence}") +``` + +## 📖 详细使用指南 + +### SubAgent核心类 + +#### 初始化参数 + +```python +SubAgent( + provider: str, # LLM提供商名称 + model_name: str, # 模型名称 + instructions: List[str], # 指令列表(可选) + name: str, # Agent名称(可选) + description: str, # Agent描述(可选) + prompt_template: str, # 动态prompt模板(可选) + response_model: BaseModel, # Pydantic响应模型(可选) + config: Dict[str, Any], # 自定义配置(可选) + **agent_kwargs # 传递给Agno Agent的额外参数 +) +``` + +#### 核心方法 + +##### 1. build_prompt() - 构建动态Prompt +```python +# 设置带变量的prompt模板 +agent.update_prompt_template(""" +请分析以下{data_type}数据: + +数据内容: {data_content} +分析目标: {analysis_goal} + +请提供详细的分析结果。 +""") + +# 构建具体prompt +prompt = agent.build_prompt({ + "data_type": "销售", + "data_content": "Q1销售数据...", + "analysis_goal": "找出增长趋势" +}) +``` + +##### 2. run() - 执行推理 +```python +# 方式1: 使用模板变量 +result = agent.run(template_vars={ + "input_text": "待分析的文本内容" +}) + +# 方式2: 直接提供prompt +result = agent.run(prompt="请分析这段文本的情感倾向") + +# 方式3: 带额外参数 +result = agent.run( + template_vars={"data": "测试数据"}, + temperature=0.7, + max_tokens=1000 +) +``` + +##### 3. get_model_info() - 获取模型信息 +```python +info = agent.get_model_info() +print(f"Agent名称: {info['name']}") +print(f"提供商: {info['provider']}") +print(f"模型: {info['model_name']}") +print(f"是否有prompt模板: {info['has_prompt_template']}") +``` + +### Pydantic响应模型 + +#### 基础模型定义 +```python +from pydantic import BaseModel, Field +from typing import List, Optional + +class AnalysisResult(BaseModel): + """分析结果模型""" + + summary: str = Field(description="分析总结") + key_points: List[str] = Field(description="关键要点列表") + confidence: float = Field(description="置信度", ge=0.0, le=1.0) + recommendations: Optional[List[str]] = Field(default=None, description="建议列表") + + class Config: + json_encoders = { + float: lambda v: round(v, 3) if v is not None else None + } +``` + +#### 复杂嵌套模型 +```python +class DetailedItem(BaseModel): + name: str = Field(description="项目名称") + value: float = Field(description="数值") + category: str = Field(description="分类") + +class ComprehensiveResult(BaseModel): + items: List[DetailedItem] = Field(description="详细项目列表") + total_count: int = Field(description="总数量", ge=0) + summary: str = Field(description="整体总结") +``` + +### 配置管理详解 + +#### LLM配置文件结构 (llm_config.yaml) +```yaml +# 阿里云配置 +aliyun: + base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1" + api_key: "${DASHSCOPE_API_KEY}" + models: + qwen-max: + class: "OpenAILike" + params: + id: "qwen-max" + temperature: 0.3 + max_tokens: 4000 + qwen-plus: + class: "OpenAILike" + params: + id: "qwen-plus" + temperature: 0.5 + +# DeepSeek配置 +deepseek: + base_url: "https://api.deepseek.com/v1" + api_key: "${DEEPSEEK_API_KEY}" + models: + deepseek-v3: + class: "OpenAILike" + params: + id: "deepseek-chat" + temperature: 0.3 + +# OpenAI配置 +openai: + api_key: "${OPENAI_API_KEY}" + models: + gpt-4o: + class: "OpenAIChat" + params: + model: "gpt-4o" + temperature: 0.3 +``` + +#### 环境变量配置 (.env) +```bash +# 阿里云API密钥 +DASHSCOPE_API_KEY=sk-your-dashscope-key + +# DeepSeek API密钥 +DEEPSEEK_API_KEY=sk-your-deepseek-key + +# OpenAI API密钥 +OPENAI_API_KEY=sk-your-openai-key +``` + +### 便捷函数使用 + +#### create_json_agent() - 快速创建JSON Agent +```python +from src.agent_system import create_json_agent + +# 快速创建支持JSON输出的Agent +agent = create_json_agent( + provider="aliyun", + model_name="qwen-max", + name="json_extractor", + prompt_template="从以下文本提取信息: {text}", + response_model="MyModel", # 可以是字符串或类 + instructions=["你是信息提取专家"] +) +``` + +## 🎯 实际应用示例 + +### 示例1: 情感分析Agent + +```python +from pydantic import BaseModel, Field +from typing import Literal +from src.agent_system import SubAgent + +class SentimentResult(BaseModel): + sentiment: Literal["positive", "negative", "neutral"] = Field(description="情感倾向") + confidence: float = Field(description="置信度", ge=0.0, le=1.0) + explanation: str = Field(description="分析说明") + +sentiment_agent = SubAgent( + provider="aliyun", + model_name="qwen-max", + name="sentiment_analyzer", + instructions=[ + "你是专业的文本情感分析专家", + "请准确识别文本的情感倾向", + "提供详细的分析依据" + ], + prompt_template=""" +请分析以下文本的情感倾向: + +文本内容: {text} + +请识别情感倾向(positive/negative/neutral)、置信度(0-1)和分析说明。 +""", + response_model=SentimentResult +) + +# 使用示例 +result = sentiment_agent.run(template_vars={ + "text": "这个产品质量很好,我非常满意!" +}) + +print(f"情感: {result.sentiment}") +print(f"置信度: {result.confidence}") +print(f"说明: {result.explanation}") +``` + +### 示例2: 数据提取Agent + +```python +class DataExtraction(BaseModel): + extracted_data: Dict[str, Any] = Field(description="提取的数据") + extraction_count: int = Field(description="提取项目数量") + data_quality: Literal["high", "medium", "low"] = Field(description="数据质量评估") + +extractor_agent = SubAgent( + provider="aliyun", + model_name="qwen-plus", + name="data_extractor", + instructions=[ + "你是数据提取专家", + "从非结构化文本中提取结构化数据", + "确保提取的数据准确完整" + ], + prompt_template=""" +从以下{data_type}文档中提取关键数据: + +文档内容: +{document} + +提取要求: +{requirements} + +请提取所有相关数据并评估数据质量。 +""", + response_model=DataExtraction +) +``` + +## ⚠️ 注意事项与最佳实践 + +### 1. 配置管理 +- **API密钥安全**: 始终使用环境变量存储API密钥,切勿在代码中硬编码 +- **配置验证**: 程序启动时验证配置文件完整性 +- **环境隔离**: 开发、测试、生产环境使用不同的配置文件 + +### 2. Prompt设计 +- **明确指令**: 提供清晰、具体的任务指令 +- **示例驱动**: 在prompt中包含输入输出示例 +- **结构化模板**: 使用结构化的prompt模板提高一致性 + +### 3. 错误处理 +- **异常捕获**: 对Agent调用进行适当的异常处理 +- **重试机制**: 对网络错误实现重试逻辑 +- **降级策略**: 准备备用模型或简化输出格式 + +### 4. 性能优化 +- **缓存机制**: 对相同输入实现结果缓存 +- **批处理**: 将多个小任务合并为大任务处理 +- **模型选择**: 根据任务复杂度选择合适的模型 + +## 🔧 故障排除 + +### 常见问题 + +#### 1. 配置文件不存在 +``` +错误: FileNotFoundError: LLM配置文件不存在 +解决: 确保 src/config/llm_config.yaml 文件存在且格式正确 +``` + +#### 2. API密钥未设置 +``` +错误: 环境变量 DASHSCOPE_API_KEY 未定义 +解决: 设置相应的环境变量或在.env文件中配置 +``` + +#### 3. JSON解析失败 +``` +错误: JSONParseError: 所有解析策略都失败了 +解决: 检查prompt设计,确保要求明确的JSON格式输出 +``` + +#### 4. 模型验证失败 +``` +错误: Pydantic模型验证失败 +解决: 检查响应模型定义与实际输出是否匹配 +``` + +### 调试技巧 + +#### 启用调试模式 +```python +agent = SubAgent( + provider="aliyun", + model_name="qwen-max", + debug_mode=True, # 启用调试输出 + # ... 其他参数 +) +``` + +#### 查看生成的Prompt +```python +# 构建并查看最终的prompt +prompt = agent.build_prompt({"key": "value"}) +print(f"生成的prompt: {prompt}") +``` + +#### 捕获详细错误信息 +```python +try: + result = agent.run(template_vars={"text": "测试"}) +except Exception as e: + print(f"错误类型: {type(e)}") + print(f"错误信息: {e}") + import traceback + traceback.print_exc() +``` + +## 🚦 版本信息 + +- **当前版本**: 0.1.0 +- **依赖要求**: + - Python >= 3.8 + - agno >= 0.1.0 + - pydantic >= 2.0.0 + - pyyaml >= 6.0.0 + +## 📞 支持与反馈 + +如遇到问题或有功能建议,请联系开发团队或提交issue。 + +--- + +*MedResearcher SubAgent系统 - 让AI更智能,让开发更简单* 🎉 \ No newline at end of file diff --git a/example/subagent-example/basic_example.py b/example/subagent-example/basic_example.py new file mode 100644 index 0000000..55252e2 --- /dev/null +++ b/example/subagent-example/basic_example.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 +""" +SubAgent基础使用示例 + +展示SubAgent系统的基本功能: +1. 创建简单的对话Agent +2. 使用动态prompt模板 +3. 结构化JSON输出 +4. 错误处理和调试 +""" + +import sys +import os +from typing import Optional + +# 添加项目根路径到Python路径 +project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +sys.path.append(project_root) + +from src.agent_system import SubAgent +from example_models import BasicResponse, SentimentAnalysis + + +def create_simple_chat_agent() -> SubAgent: + """创建简单的聊天Agent""" + print("🤖 创建简单聊天Agent...") + + try: + agent = SubAgent( + provider="aliyun", + model_name="qwen-turbo", + name="simple_chat", + description="简单的聊天助手", + instructions=[ + "你是一个友好的AI助手", + "请用简洁明了的语言回答问题", + "保持积极正面的态度" + ] + ) + + print("✅ 简单聊天Agent创建成功") + return agent + + except Exception as e: + print(f"❌ Agent创建失败: {e}") + raise + + +def create_structured_output_agent() -> SubAgent: + """创建支持结构化输出的Agent""" + print("\n🔧 创建结构化输出Agent...") + + try: + agent = SubAgent( + provider="aliyun", + model_name="qwen-max", + name="structured_responder", + description="提供结构化响应的智能助手", + instructions=[ + "你是一个专业的响应助手", + "始终提供结构化的JSON格式输出", + "确保响应准确和有用" + ], + response_model=BasicResponse + ) + + print("✅ 结构化输出Agent创建成功") + return agent + + except Exception as e: + print(f"❌ 结构化输出Agent创建失败: {e}") + raise + + +def create_sentiment_agent() -> SubAgent: + """创建情感分析Agent""" + print("\n💭 创建情感分析Agent...") + + instructions = [ + "你是专业的文本情感分析专家", + "准确识别文本的情感倾向:positive(积极)、negative(消极)、neutral(中性)", + "提供0-1范围的置信度评分", + "给出详细的分析说明和关键词" + ] + + prompt_template = """ +请对以下文本进行情感分析: + +文本内容:"{text}" + +分析要求: +1. 识别情感倾向(positive/negative/neutral) +2. 评估分析置信度(0-1之间的浮点数) +3. 提供分析说明和依据 +4. 提取影响情感判断的关键词 + +请严格按照指定的JSON格式返回结果。 +""" + + try: + agent = SubAgent( + provider="aliyun", + model_name="qwen-max", + name="sentiment_analyzer", + description="专业的文本情感分析系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=SentimentAnalysis + ) + + print("✅ 情感分析Agent创建成功") + return agent + + except Exception as e: + print(f"❌ 情感分析Agent创建失败: {e}") + raise + + +def demo_simple_chat(): + """演示简单对话功能""" + print("\n" + "="*50) + print("🗣️ 简单对话演示") + print("="*50) + + try: + agent = create_simple_chat_agent() + + # 测试问题列表 + test_questions = [ + "你好!请介绍一下你自己", + "什么是人工智能?", + "请给我一些学习Python的建议" + ] + + for i, question in enumerate(test_questions, 1): + print(f"\n问题 {i}: {question}") + try: + response = agent.run(prompt=question) + print(f"回答: {response}") + + except Exception as e: + print(f"❌ 回答失败: {e}") + + return True + + except Exception as e: + print(f"❌ 简单对话演示失败: {e}") + return False + + +def demo_structured_response(): + """演示结构化响应功能""" + print("\n" + "="*50) + print("📋 结构化响应演示") + print("="*50) + + try: + agent = create_structured_output_agent() + + # 测试请求列表 + test_requests = [ + "请解释什么是机器学习", + "介绍Python编程语言的特点", + "如何提高工作效率?" + ] + + for i, request in enumerate(test_requests, 1): + print(f"\n请求 {i}: {request}") + try: + result = agent.run(prompt=request) + print(f"✅ 响应成功:") + print(f" 消息: {result.message}") + print(f" 成功: {result.success}") + print(f" 时间: {result.timestamp}") + + except Exception as e: + print(f"❌ 响应失败: {e}") + + return True + + except Exception as e: + print(f"❌ 结构化响应演示失败: {e}") + return False + + +def demo_dynamic_prompt(): + """演示动态prompt模板功能""" + print("\n" + "="*50) + print("🎭 动态Prompt模板演示") + print("="*50) + + try: + agent = create_sentiment_agent() + + # 测试文本列表 + test_texts = [ + "这个产品质量非常好,我很满意!强烈推荐给大家。", + "服务态度差,产品质量也不行,非常失望。", + "产品功能还可以,价格也合理,算是中规中矩的选择。", + "今天天气不错,适合出去散步。", + "这部电影真是太精彩了!演员演技出色,剧情引人入胜。" + ] + + for i, text in enumerate(test_texts, 1): + print(f"\n文本 {i}: {text}") + try: + # 使用动态模板构建prompt + result = agent.run(template_vars={"text": text}) + + print(f"✅ 分析结果:") + print(f" 情感: {result.sentiment}") + print(f" 置信度: {result.confidence}") + print(f" 说明: {result.explanation}") + print(f" 关键词: {result.keywords}") + + except Exception as e: + print(f"❌ 分析失败: {e}") + + return True + + except Exception as e: + print(f"❌ 动态prompt演示失败: {e}") + return False + + +def demo_error_handling(): + """演示错误处理功能""" + print("\n" + "="*50) + print("⚠️ 错误处理演示") + print("="*50) + + # 测试各种错误情况 + error_tests = [ + { + "name": "无效的提供商", + "params": {"provider": "invalid_provider", "model_name": "test"}, + "expected_error": "ValueError" + }, + { + "name": "空的prompt模板变量", + "params": {"provider": "aliyun", "model_name": "qwen-turbo"}, + "template_vars": {}, + "prompt_template": "分析这个文本: {missing_var}", + "expected_error": "SubAgentError" + } + ] + + for test in error_tests: + print(f"\n测试: {test['name']}") + try: + if 'prompt_template' in test: + agent = SubAgent(**test['params']) + agent.update_prompt_template(test['prompt_template']) + agent.run(template_vars=test.get('template_vars', {})) + else: + agent = SubAgent(**test['params']) + + print(f"❌ 预期错误未发生") + + except Exception as e: + print(f"✅ 捕获到预期错误: {type(e).__name__}: {e}") + + return True + + +def interactive_demo(): + """交互式演示""" + print("\n" + "="*50) + print("💬 交互式演示") + print("="*50) + print("输入文本进行情感分析,输入'quit'退出") + + try: + agent = create_sentiment_agent() + + while True: + user_input = input("\n请输入要分析的文本: ").strip() + + if user_input.lower() == 'quit': + print("再见!") + break + + if not user_input: + continue + + try: + print(f"正在分析: {user_input}") + result = agent.run(template_vars={"text": user_input}) + + print(f"\n📊 分析结果:") + print(f"情感倾向: {result.sentiment}") + print(f"置信度: {result.confidence:.3f}") + print(f"分析说明: {result.explanation}") + if result.keywords: + print(f"关键词: {', '.join(result.keywords)}") + + except Exception as e: + print(f"❌ 分析失败: {e}") + + except KeyboardInterrupt: + print("\n程序已中断") + except Exception as e: + print(f"❌ 交互式演示失败: {e}") + + +def show_agent_info(agent: SubAgent): + """显示Agent信息""" + info = agent.get_model_info() + print(f"\n📋 Agent信息:") + for key, value in info.items(): + print(f" {key}: {value}") + + +def main(): + """主函数 - 运行所有演示""" + print("🚀 SubAgent基础使用示例") + print("=" * 60) + + # 运行所有演示 + demos = [ + ("简单对话", demo_simple_chat), + ("结构化响应", demo_structured_response), + ("动态Prompt", demo_dynamic_prompt), + ("错误处理", demo_error_handling), + ] + + results = {} + + for name, demo_func in demos: + print(f"\n开始演示: {name}") + try: + success = demo_func() + results[name] = success + print(f"{'✅' if success else '❌'} {name}演示{'成功' if success else '失败'}") + except Exception as e: + print(f"❌ {name}演示异常: {e}") + results[name] = False + + # 显示总结 + print("\n" + "="*60) + print("📊 演示总结") + print("="*60) + + total_demos = len(results) + successful_demos = sum(results.values()) + + for name, success in results.items(): + status = "✅ 成功" if success else "❌ 失败" + print(f" {name}: {status}") + + print(f"\n🎯 总计: {successful_demos}/{total_demos} 个演示成功") + + # 询问是否运行交互式演示 + print(f"\n是否运行交互式演示?(y/n): ", end="") + try: + choice = input().strip().lower() + if choice in ['y', 'yes', '是']: + interactive_demo() + except (KeyboardInterrupt, EOFError): + print("\n程序结束") + + return successful_demos == total_demos + + +def test_basic_functionality(): + """测试基础功能""" + print("正在测试SubAgent基础功能...") + + try: + # 创建基本Agent + agent = SubAgent( + provider="aliyun", + model_name="qwen-turbo", + name="test_agent" + ) + + print(f"✅ Agent创建成功: {agent}") + + # 显示Agent信息 + show_agent_info(agent) + + # 测试简单对话 + response = agent.run(prompt="请简单介绍一下你自己") + print(f"✅ 对话测试成功,响应长度: {len(str(response))}字符") + + return True + + except Exception as e: + print(f"❌ 基础功能测试失败: {e}") + return False + + +if __name__ == "__main__": + # 可以选择运行测试或完整演示 + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "--test": + # 仅运行基础测试 + success = test_basic_functionality() + exit(0 if success else 1) + else: + # 运行完整演示 + main() \ No newline at end of file diff --git a/example/subagent-example/example_models.py b/example/subagent-example/example_models.py new file mode 100644 index 0000000..5c42663 --- /dev/null +++ b/example/subagent-example/example_models.py @@ -0,0 +1,377 @@ +""" +SubAgent示例Pydantic模型定义 + +提供各种场景下的结构化输出模型,展示SubAgent系统的灵活性 +""" + +from typing import List, Dict, Any, Optional, Literal, Union +from pydantic import BaseModel, Field, validator +from datetime import datetime + + +class BasicResponse(BaseModel): + """基础响应模型""" + + message: str = Field(description="响应消息") + success: bool = Field(description="处理是否成功", default=True) + timestamp: datetime = Field(default_factory=datetime.now, description="响应时间") + + +class SentimentAnalysis(BaseModel): + """情感分析结果模型""" + + sentiment: Literal["positive", "negative", "neutral"] = Field(description="情感倾向") + confidence: float = Field(description="置信度", ge=0.0, le=1.0) + explanation: str = Field(description="分析说明") + keywords: List[str] = Field(description="关键词列表", default_factory=list) + + @validator('confidence') + def validate_confidence(cls, v): + """验证置信度范围""" + if not 0.0 <= v <= 1.0: + raise ValueError('置信度必须在0.0到1.0之间') + return round(v, 3) + + +class KeywordExtraction(BaseModel): + """关键词提取项""" + + keyword: str = Field(description="关键词") + frequency: int = Field(description="出现频次", ge=1) + importance: float = Field(description="重要性评分", ge=0.0, le=1.0) + category: str = Field(description="关键词分类", default="general") + + class Config: + json_encoders = { + float: lambda v: round(v, 3) if v is not None else None + } + + +class TextAnalysisResult(BaseModel): + """文本分析完整结果""" + + # 基本信息 + text_length: int = Field(description="文本长度(字符数)", ge=0) + word_count: int = Field(description="词汇数量", ge=0) + language: str = Field(description="检测到的语言", default="zh") + + # 分析结果 + summary: str = Field(description="文本摘要") + sentiment: SentimentAnalysis = Field(description="情感分析结果") + keywords: List[KeywordExtraction] = Field(description="关键词提取结果") + + # 质量评估 + readability: Literal["high", "medium", "low"] = Field(description="可读性评估") + complexity: float = Field(description="复杂度评分", ge=0.0, le=1.0) + + @validator('text_length') + def validate_text_length(cls, v): + """验证文本长度""" + if v < 0: + raise ValueError('文本长度不能为负数') + return v + + @validator('keywords') + def validate_keywords(cls, v): + """验证关键词列表""" + if len(v) > 20: + # 只保留前20个最重要的关键词 + v = sorted(v, key=lambda x: x.importance, reverse=True)[:20] + return v + + +class CategoryClassification(BaseModel): + """分类结果项""" + + category: str = Field(description="分类名称") + confidence: float = Field(description="分类置信度", ge=0.0, le=1.0) + probability: float = Field(description="分类概率", ge=0.0, le=1.0) + + @validator('confidence', 'probability') + def round_float_values(cls, v): + """保留3位小数""" + return round(v, 3) + + +class DocumentClassificationResult(BaseModel): + """文档分类结果""" + + primary_category: str = Field(description="主要分类") + confidence: float = Field(description="主分类置信度", ge=0.0, le=1.0) + + all_categories: List[CategoryClassification] = Field( + description="所有分类结果(按置信度排序)" + ) + + features_used: List[str] = Field( + description="使用的特征列表", + default_factory=list + ) + + processing_time: Optional[float] = Field( + description="处理时间(秒)", + default=None + ) + + @validator('all_categories') + def sort_categories(cls, v): + """按置信度降序排列""" + return sorted(v, key=lambda x: x.confidence, reverse=True) + + +class DataExtractionItem(BaseModel): + """数据提取项""" + + field_name: str = Field(description="字段名称") + field_value: Union[str, int, float, bool, None] = Field(description="字段值") + confidence: float = Field(description="提取置信度", ge=0.0, le=1.0) + source_text: str = Field(description="来源文本片段") + extraction_method: str = Field(description="提取方法", default="llm") + + +class StructuredDataExtraction(BaseModel): + """结构化数据提取结果""" + + extracted_data: Dict[str, Any] = Field(description="提取的结构化数据") + extraction_items: List[DataExtractionItem] = Field(description="详细提取项目") + + # 质量评估 + extraction_quality: Literal["excellent", "good", "fair", "poor"] = Field( + description="提取质量评估" + ) + completeness: float = Field( + description="完整性评分", + ge=0.0, + le=1.0 + ) + accuracy: float = Field( + description="准确性评分", + ge=0.0, + le=1.0 + ) + + # 统计信息 + total_fields: int = Field(description="总字段数", ge=0) + extracted_fields: int = Field(description="成功提取字段数", ge=0) + failed_fields: int = Field(description="提取失败字段数", ge=0) + + @validator('extracted_fields', 'failed_fields') + def validate_field_counts(cls, v, values): + """验证字段计数""" + total = values.get('total_fields', 0) + if v > total: + raise ValueError('提取字段数不能超过总字段数') + return v + + +class TaskExecutionResult(BaseModel): + """任务执行结果""" + + # 任务信息 + task_id: str = Field(description="任务ID") + task_type: str = Field(description="任务类型") + status: Literal["completed", "failed", "partial"] = Field(description="执行状态") + + # 执行详情 + result_data: Optional[Dict[str, Any]] = Field(description="结果数据", default=None) + error_message: Optional[str] = Field(description="错误信息", default=None) + warnings: List[str] = Field(description="警告信息", default_factory=list) + + # 性能指标 + execution_time: float = Field(description="执行时间(秒)", ge=0.0) + memory_usage: Optional[float] = Field(description="内存使用量(MB)", default=None) + + # 质量评估 + success_rate: float = Field(description="成功率", ge=0.0, le=1.0) + quality_score: float = Field(description="质量评分", ge=0.0, le=1.0) + + @validator('success_rate', 'quality_score') + def round_scores(cls, v): + """保留3位小数""" + return round(v, 3) + + +class ComprehensiveAnalysisResult(BaseModel): + """综合分析结果(组合多个分析)""" + + # 基本信息 + analysis_id: str = Field(description="分析ID") + input_summary: str = Field(description="输入数据摘要") + analysis_timestamp: datetime = Field(default_factory=datetime.now) + + # 各项分析结果 + text_analysis: Optional[TextAnalysisResult] = Field( + description="文本分析结果", + default=None + ) + classification: Optional[DocumentClassificationResult] = Field( + description="分类结果", + default=None + ) + data_extraction: Optional[StructuredDataExtraction] = Field( + description="数据提取结果", + default=None + ) + + # 综合评估 + overall_quality: Literal["excellent", "good", "fair", "poor"] = Field( + description="整体质量评估" + ) + confidence_level: float = Field( + description="整体置信度", + ge=0.0, + le=1.0 + ) + + # 处理统计 + total_processing_time: float = Field(description="总处理时间(秒)", ge=0.0) + components_completed: int = Field(description="完成的组件数量", ge=0) + components_failed: int = Field(description="失败的组件数量", ge=0) + + recommendations: List[str] = Field( + description="改进建议", + default_factory=list + ) + + @validator('confidence_level') + def validate_confidence(cls, v): + """验证并格式化置信度""" + return round(v, 3) + + +# 测试模型定义 +def test_models(): + """测试所有模型定义""" + print("正在测试示例模型定义...") + + try: + # 测试基础响应模型 + basic = BasicResponse(message="测试消息") + print(f"✅ BasicResponse模型测试成功: {basic.message}") + + # 测试情感分析模型 + sentiment = SentimentAnalysis( + sentiment="positive", + confidence=0.95, + explanation="积极的情感表达", + keywords=["好", "满意", "推荐"] + ) + print(f"✅ SentimentAnalysis模型测试成功: {sentiment.sentiment}") + + # 测试关键词提取模型 + keyword = KeywordExtraction( + keyword="人工智能", + frequency=5, + importance=0.8, + category="technology" + ) + print(f"✅ KeywordExtraction模型测试成功: {keyword.keyword}") + + # 测试文本分析结果模型 + text_result = TextAnalysisResult( + text_length=150, + word_count=25, + language="zh", + summary="这是一个测试文本摘要", + sentiment=sentiment, + keywords=[keyword], + readability="high", + complexity=0.3 + ) + print(f"✅ TextAnalysisResult模型测试成功: {text_result.summary}") + + # 测试分类结果模型 + classification = DocumentClassificationResult( + primary_category="技术文档", + confidence=0.92, + all_categories=[ + CategoryClassification( + category="技术文档", + confidence=0.92, + probability=0.87 + ) + ] + ) + print(f"✅ DocumentClassificationResult模型测试成功: {classification.primary_category}") + + # 测试数据提取模型 + extraction_item = DataExtractionItem( + field_name="标题", + field_value="SubAgent系统指南", + confidence=0.98, + source_text="# SubAgent系统指南", + extraction_method="pattern_matching" + ) + + data_extraction = StructuredDataExtraction( + extracted_data={"title": "SubAgent系统指南"}, + extraction_items=[extraction_item], + extraction_quality="excellent", + completeness=1.0, + accuracy=0.95, + total_fields=1, + extracted_fields=1, + failed_fields=0 + ) + print(f"✅ StructuredDataExtraction模型测试成功: {data_extraction.extraction_quality}") + + # 测试任务执行结果模型 + task_result = TaskExecutionResult( + task_id="task_001", + task_type="text_analysis", + status="completed", + result_data={"status": "success"}, + execution_time=2.5, + success_rate=1.0, + quality_score=0.95 + ) + print(f"✅ TaskExecutionResult模型测试成功: {task_result.status}") + + # 测试综合分析结果模型 + comprehensive = ComprehensiveAnalysisResult( + analysis_id="analysis_001", + input_summary="测试输入摘要", + text_analysis=text_result, + classification=classification, + data_extraction=data_extraction, + overall_quality="excellent", + confidence_level=0.93, + total_processing_time=5.2, + components_completed=3, + components_failed=0, + recommendations=["继续保持高质量输出"] + ) + print(f"✅ ComprehensiveAnalysisResult模型测试成功: {comprehensive.overall_quality}") + + # 测试JSON序列化 + json_str = comprehensive.model_dump_json(indent=2) + print(f"✅ JSON序列化测试成功,长度: {len(json_str)}字符") + + # 列出所有模型字段 + print("\n📋 模型字段信息:") + for model_name, model_class in [ + ("BasicResponse", BasicResponse), + ("SentimentAnalysis", SentimentAnalysis), + ("TextAnalysisResult", TextAnalysisResult), + ("DocumentClassificationResult", DocumentClassificationResult), + ("StructuredDataExtraction", StructuredDataExtraction), + ("ComprehensiveAnalysisResult", ComprehensiveAnalysisResult), + ]: + fields = list(model_class.model_fields.keys()) + print(f" {model_name}: {len(fields)} 个字段 - {fields[:3]}{'...' if len(fields) > 3 else ''}") + + return True + + except Exception as e: + print(f"❌ 模型测试失败: {e}") + import traceback + traceback.print_exc() + return False + + +if __name__ == "__main__": + success = test_models() + if success: + print("\n🎉 所有示例模型测试通过!") + else: + print("\n💥 模型测试失败,请检查定义") \ No newline at end of file diff --git a/example/subagent-example/text_analysis_example.py b/example/subagent-example/text_analysis_example.py new file mode 100644 index 0000000..5e6616e --- /dev/null +++ b/example/subagent-example/text_analysis_example.py @@ -0,0 +1,762 @@ +#!/usr/bin/env python3 +""" +文本分析综合示例 + +基于SubAgent系统的复杂应用示例,展示: +1. 多Agent协作系统 +2. 复杂的数据处理pipeline +3. 结构化输出和错误恢复 +4. 性能监控和质量评估 +""" + +import sys +import os +import time +from typing import List, Dict, Any, Optional, Tuple +from datetime import datetime +import uuid + +# 添加项目根路径到Python路径 +project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +sys.path.append(project_root) + +from src.agent_system import SubAgent, create_json_agent +from example_models import ( + TextAnalysisResult, + DocumentClassificationResult, + StructuredDataExtraction, + ComprehensiveAnalysisResult, + SentimentAnalysis, + KeywordExtraction, + DataExtractionItem, + CategoryClassification, + TaskExecutionResult +) + + +class TextAnalysisEngine: + """文本分析引擎 - 多Agent协作系统""" + + def __init__(self): + """初始化文本分析引擎""" + self.agents = {} + self.processing_stats = { + "total_processed": 0, + "successful_analyses": 0, + "failed_analyses": 0, + "average_processing_time": 0.0 + } + + # 初始化所有Agent + self._initialize_agents() + + def _initialize_agents(self): + """初始化所有分析Agent""" + print("🔧 初始化文本分析引擎...") + + try: + # 1. 情感分析Agent + self.agents['sentiment'] = self._create_sentiment_agent() + + # 2. 关键词提取Agent + self.agents['keywords'] = self._create_keyword_agent() + + # 3. 文本分类Agent + self.agents['classification'] = self._create_classification_agent() + + # 4. 数据提取Agent + self.agents['extraction'] = self._create_extraction_agent() + + # 5. 综合分析Agent + self.agents['comprehensive'] = self._create_comprehensive_agent() + + print(f"✅ 成功初始化 {len(self.agents)} 个Agent") + + except Exception as e: + print(f"❌ Agent初始化失败: {e}") + raise + + def _create_sentiment_agent(self) -> SubAgent: + """创建情感分析Agent""" + instructions = [ + "你是专业的文本情感分析专家", + "准确识别文本的情感倾向和情感强度", + "提供详细的分析依据和相关关键词", + "对分析结果给出可信度评估" + ] + + prompt_template = """ +请对以下文本进行深入的情感分析: + +【文本内容】 +{text} + +【分析要求】 +1. 识别主要情感倾向(positive/negative/neutral) +2. 评估情感强度和置信度(0-1) +3. 提供分析说明和判断依据 +4. 提取影响情感判断的关键词和短语 +5. 考虑语言的细微差别和上下文含义 + +请提供准确、专业的分析结果。 +""" + + return SubAgent( + provider="aliyun", + model_name="qwen-max", + name="sentiment_analyzer", + description="专业的情感分析系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=SentimentAnalysis + ) + + def _create_keyword_agent(self) -> SubAgent: + """创建关键词提取Agent""" + instructions = [ + "你是专业的关键词提取专家", + "从文本中识别最重要和最相关的关键词", + "评估关键词的重要性和频率", + "对关键词进行合理的分类" + ] + + prompt_template = """ +请从以下文本中提取关键词: + +【文本内容】 +{text} + +【提取要求】 +1. 识别最重要的关键词和短语 +2. 统计关键词出现频率 +3. 评估每个关键词的重要性(0-1) +4. 对关键词进行分类(如:人物、地点、概念、技术等) +5. 排除停用词和无意义词汇 + +请提供结构化的关键词提取结果。 +""" + + # 创建专门处理关键词列表的响应模型 + from pydantic import BaseModel, Field + from typing import List + + class KeywordExtractionResult(BaseModel): + keywords: List[KeywordExtraction] = Field(description="提取的关键词列表") + total_count: int = Field(description="关键词总数", ge=0) + text_complexity: float = Field(description="文本复杂度", ge=0.0, le=1.0) + + return SubAgent( + provider="aliyun", + model_name="qwen-max", + name="keyword_extractor", + description="智能关键词提取系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=KeywordExtractionResult + ) + + def _create_classification_agent(self) -> SubAgent: + """创建文档分类Agent""" + instructions = [ + "你是专业的文档分类专家", + "准确识别文档的类型和主题", + "提供多级分类和置信度评估", + "考虑文档的内容、风格和用途" + ] + + prompt_template = """ +请对以下文档进行分类: + +【文档内容】 +{text} + +【分类体系】 +主要分类:技术文档、商业文档、学术论文、新闻报道、个人写作、法律文档、医学文档等 +详细分类:根据具体内容进一步细分 + +【分类要求】 +1. 确定主要分类和置信度 +2. 提供所有可能分类的概率分布 +3. 识别用于分类判断的关键特征 +4. 评估分类的可信度 + +请提供准确的分类结果。 +""" + + return SubAgent( + provider="aliyun", + model_name="qwen-max", + name="document_classifier", + description="智能文档分类系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=DocumentClassificationResult + ) + + def _create_extraction_agent(self) -> SubAgent: + """创建数据提取Agent""" + instructions = [ + "你是专业的结构化数据提取专家", + "从非结构化文本中提取有价值的信息", + "确保提取的数据准确性和完整性", + "评估提取质量和可靠性" + ] + + prompt_template = """ +请从以下文本中提取结构化数据: + +【文本内容】 +{text} + +【提取目标】 +根据文本内容自动识别可提取的数据类型,可能包括: +- 人名、地名、机构名 +- 日期、时间、数量 +- 联系方式、地址 +- 专业术语、概念 +- 关键指标、统计数据 + +【提取要求】 +1. 自动识别文本中的结构化信息 +2. 为每个提取项提供置信度评估 +3. 记录提取依据和来源文本片段 +4. 评估整体提取质量和完整性 + +请提供详细的数据提取结果。 +""" + + return SubAgent( + provider="aliyun", + model_name="qwen-max", + name="data_extractor", + description="智能数据提取系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=StructuredDataExtraction + ) + + def _create_comprehensive_agent(self) -> SubAgent: + """创建综合分析Agent""" + instructions = [ + "你是文本综合分析专家", + "整合多种分析结果提供整体评估", + "识别分析中的一致性和矛盾之处", + "提供改进建议和深度见解" + ] + + prompt_template = """ +基于以下多维度分析结果,请提供综合评估: + +【原始文本】 +{original_text} + +【分析结果】 +情感分析: {sentiment_result} +关键词提取: {keyword_result} +文档分类: {classification_result} +数据提取: {extraction_result} + +【综合评估要求】 +1. 评估各项分析结果的一致性 +2. 识别潜在的分析矛盾或问题 +3. 提供整体质量评估 +4. 给出置信度评估 +5. 提出改进建议 + +请提供专业的综合分析报告。 +""" + + return SubAgent( + provider="aliyun", + model_name="qwen-max", + name="comprehensive_analyzer", + description="文本综合分析系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=ComprehensiveAnalysisResult + ) + + def analyze_text(self, text: str, analysis_id: Optional[str] = None) -> ComprehensiveAnalysisResult: + """执行完整的文本分析""" + if analysis_id is None: + analysis_id = f"analysis_{uuid.uuid4().hex[:8]}" + + start_time = time.time() + self.processing_stats["total_processed"] += 1 + + print(f"\n🔍 开始分析 [{analysis_id}]") + print(f"文本长度: {len(text)} 字符") + + try: + # 阶段1: 基础分析 + print("📊 执行基础分析...") + sentiment_result = self._analyze_sentiment(text) + keyword_result = self._extract_keywords(text) + + # 阶段2: 高级分析 + print("🧠 执行高级分析...") + classification_result = self._classify_document(text) + extraction_result = self._extract_data(text) + + # 阶段3: 综合分析 + print("🎯 执行综合分析...") + comprehensive_result = self._comprehensive_analysis( + text, sentiment_result, keyword_result, + classification_result, extraction_result, analysis_id + ) + + # 更新统计信息 + processing_time = time.time() - start_time + self.processing_stats["successful_analyses"] += 1 + self._update_processing_stats(processing_time) + + print(f"✅ 分析完成 [{analysis_id}] - 耗时: {processing_time:.2f}秒") + return comprehensive_result + + except Exception as e: + self.processing_stats["failed_analyses"] += 1 + print(f"❌ 分析失败 [{analysis_id}]: {e}") + raise + + def _analyze_sentiment(self, text: str) -> SentimentAnalysis: + """执行情感分析""" + try: + result = self.agents['sentiment'].run(template_vars={"text": text}) + print(f" 情感: {result.sentiment} (置信度: {result.confidence:.3f})") + return result + except Exception as e: + print(f" ⚠️ 情感分析失败: {e}") + # 返回默认结果 + return SentimentAnalysis( + sentiment="neutral", + confidence=0.0, + explanation=f"分析失败: {e}", + keywords=[] + ) + + def _extract_keywords(self, text: str): + """提取关键词""" + try: + result = self.agents['keywords'].run(template_vars={"text": text}) + print(f" 关键词: {result.total_count} 个") + return result + except Exception as e: + print(f" ⚠️ 关键词提取失败: {e}") + # 返回空结果 + from pydantic import BaseModel, Field + from typing import List + + class KeywordExtractionResult(BaseModel): + keywords: List[KeywordExtraction] = Field(default_factory=list) + total_count: int = Field(default=0) + text_complexity: float = Field(default=0.5) + + return KeywordExtractionResult() + + def _classify_document(self, text: str) -> DocumentClassificationResult: + """执行文档分类""" + try: + result = self.agents['classification'].run(template_vars={"text": text}) + print(f" 分类: {result.primary_category} (置信度: {result.confidence:.3f})") + return result + except Exception as e: + print(f" ⚠️ 文档分类失败: {e}") + # 返回默认结果 + return DocumentClassificationResult( + primary_category="未知", + confidence=0.0, + all_categories=[ + CategoryClassification( + category="未知", + confidence=0.0, + probability=0.0 + ) + ] + ) + + def _extract_data(self, text: str) -> StructuredDataExtraction: + """提取结构化数据""" + try: + result = self.agents['extraction'].run(template_vars={"text": text}) + print(f" 数据提取: {result.extracted_fields}/{result.total_fields} 字段") + return result + except Exception as e: + print(f" ⚠️ 数据提取失败: {e}") + # 返回空结果 + return StructuredDataExtraction( + extracted_data={}, + extraction_items=[], + extraction_quality="poor", + completeness=0.0, + accuracy=0.0, + total_fields=0, + extracted_fields=0, + failed_fields=0 + ) + + def _comprehensive_analysis( + self, + original_text: str, + sentiment_result: SentimentAnalysis, + keyword_result, + classification_result: DocumentClassificationResult, + extraction_result: StructuredDataExtraction, + analysis_id: str + ) -> ComprehensiveAnalysisResult: + """执行综合分析""" + try: + # 准备模板变量 + template_vars = { + "original_text": original_text[:500] + ("..." if len(original_text) > 500 else ""), + "sentiment_result": f"情感:{sentiment_result.sentiment}, 置信度:{sentiment_result.confidence}", + "keyword_result": f"关键词数量:{getattr(keyword_result, 'total_count', 0)}", + "classification_result": f"分类:{classification_result.primary_category}, 置信度:{classification_result.confidence}", + "extraction_result": f"提取质量:{extraction_result.extraction_quality}" + } + + result = self.agents['comprehensive'].run(template_vars=template_vars) + + # 补充一些字段 + result.analysis_id = analysis_id + result.input_summary = f"长度:{len(original_text)}字符, 类型:{classification_result.primary_category}" + result.text_analysis = self._build_text_analysis_result( + original_text, sentiment_result, keyword_result + ) + result.classification = classification_result + result.data_extraction = extraction_result + + print(f" 综合评估: {result.overall_quality}") + return result + + except Exception as e: + print(f" ⚠️ 综合分析失败: {e}") + # 构建基本的综合结果 + return ComprehensiveAnalysisResult( + analysis_id=analysis_id, + input_summary=f"长度:{len(original_text)}字符", + overall_quality="poor", + confidence_level=0.0, + total_processing_time=0.0, + components_completed=0, + components_failed=4, + recommendations=["分析失败,请检查输入文本和系统配置"] + ) + + def _build_text_analysis_result( + self, + text: str, + sentiment: SentimentAnalysis, + keyword_result + ) -> TextAnalysisResult: + """构建文本分析结果""" + + # 获取关键词列表 + keywords = getattr(keyword_result, 'keywords', []) + + return TextAnalysisResult( + text_length=len(text), + word_count=len(text.split()), + language="zh", + summary=f"文本分析摘要: 情感倾向为{sentiment.sentiment}", + sentiment=sentiment, + keywords=keywords, + readability="medium", + complexity=getattr(keyword_result, 'text_complexity', 0.5) + ) + + def _update_processing_stats(self, processing_time: float): + """更新处理统计信息""" + total = self.processing_stats["total_processed"] + current_avg = self.processing_stats["average_processing_time"] + + # 计算新的平均处理时间 + new_avg = ((current_avg * (total - 1)) + processing_time) / total + self.processing_stats["average_processing_time"] = new_avg + + def get_processing_stats(self) -> Dict[str, Any]: + """获取处理统计信息""" + return self.processing_stats.copy() + + def display_stats(self): + """显示处理统计""" + stats = self.get_processing_stats() + print("\n📈 处理统计信息:") + print(f" 总处理数: {stats['total_processed']}") + print(f" 成功数: {stats['successful_analyses']}") + print(f" 失败数: {stats['failed_analyses']}") + if stats['total_processed'] > 0: + success_rate = stats['successful_analyses'] / stats['total_processed'] * 100 + print(f" 成功率: {success_rate:.1f}%") + print(f" 平均处理时间: {stats['average_processing_time']:.2f}秒") + + +def demo_single_text_analysis(): + """演示单个文本分析""" + print("🔍 单文本分析演示") + print("="*50) + + # 创建分析引擎 + engine = TextAnalysisEngine() + + # 测试文本 + test_text = """ +人工智能技术正在快速发展,深度学习和机器学习算法在各个领域都取得了显著的进展。 +从自然语言处理到计算机视觉,从推荐系统到自动驾驶,AI技术正在改变我们的生活方式。 + +然而,我们也需要关注AI发展带来的挑战,包括隐私保护、算法偏见、就业影响等问题。 +只有在技术发展和社会责任之间找到平衡,AI才能真正造福人类社会。 + +总的来说,人工智能的未来充满希望,但也需要我们谨慎对待,确保技术发展的方向符合人类的长远利益。 +""" + + try: + # 执行分析 + result = engine.analyze_text(test_text) + + # 显示详细结果 + display_analysis_result(result) + + # 显示统计信息 + engine.display_stats() + + return True + + except Exception as e: + print(f"❌ 演示失败: {e}") + return False + + +def demo_batch_analysis(): + """演示批量文本分析""" + print("\n🔄 批量文本分析演示") + print("="*50) + + # 创建分析引擎 + engine = TextAnalysisEngine() + + # 测试文本集合 + test_texts = [ + "今天天气真好,阳光明媚,心情特别愉快!", + + "公司最新发布的季度财报显示,营收同比增长15%,净利润达到2.3亿元。董事会决定向股东分红每股0.5元。", + + "机器学习是人工智能的一个重要分支,通过算法让计算机能够从数据中学习模式。常见的机器学习算法包括线性回归、决策树、神经网络等。", + + "服务态度恶劣,产品质量很差,完全不值这个价格。强烈不推荐大家购买!", + + "根据《合同法》第一百二十一条规定,当事人一方因第三人的原因造成违约的,应当向对方承担违约责任。" + ] + + results = [] + start_time = time.time() + + for i, text in enumerate(test_texts, 1): + print(f"\n处理第 {i}/{len(test_texts)} 个文本...") + try: + result = engine.analyze_text(text, f"batch_{i}") + results.append(result) + + # 显示简要结果 + print(f" 结果: {result.overall_quality} | 置信度: {result.confidence_level:.3f}") + + except Exception as e: + print(f" ❌ 处理失败: {e}") + results.append(None) + + total_time = time.time() - start_time + + # 显示批量处理总结 + print(f"\n📊 批量处理完成 - 总耗时: {total_time:.2f}秒") + engine.display_stats() + + # 显示成功处理的结果统计 + successful_results = [r for r in results if r is not None] + if successful_results: + print(f"\n🎯 成功处理 {len(successful_results)} 个文本:") + + quality_stats = {} + for result in successful_results: + quality = result.overall_quality + quality_stats[quality] = quality_stats.get(quality, 0) + 1 + + for quality, count in quality_stats.items(): + print(f" {quality}: {count} 个") + + return len(successful_results) > 0 + + +def display_analysis_result(result: ComprehensiveAnalysisResult): + """显示详细的分析结果""" + print(f"\n📋 详细分析结果 [{result.analysis_id}]") + print("="*60) + + print(f"输入摘要: {result.input_summary}") + print(f"分析时间: {result.analysis_timestamp}") + print(f"整体质量: {result.overall_quality}") + print(f"置信度: {result.confidence_level:.3f}") + print(f"处理时间: {result.total_processing_time:.2f}秒") + + # 文本分析结果 + if result.text_analysis: + ta = result.text_analysis + print(f"\n📝 文本分析:") + print(f" 长度: {ta.text_length} 字符") + print(f" 词数: {ta.word_count}") + print(f" 摘要: {ta.summary}") + print(f" 情感: {ta.sentiment.sentiment} (置信度: {ta.sentiment.confidence:.3f})") + print(f" 可读性: {ta.readability}") + if ta.keywords: + top_keywords = ta.keywords[:5] + print(f" 关键词: {[k.keyword for k in top_keywords]}") + + # 分类结果 + if result.classification: + cls = result.classification + print(f"\n🏷️ 文档分类:") + print(f" 主分类: {cls.primary_category}") + print(f" 置信度: {cls.confidence:.3f}") + if len(cls.all_categories) > 1: + other_cats = cls.all_categories[1:3] + print(f" 其他可能: {[c.category for c in other_cats]}") + + # 数据提取结果 + if result.data_extraction: + de = result.data_extraction + print(f"\n🔍 数据提取:") + print(f" 质量: {de.extraction_quality}") + print(f" 完整性: {de.completeness:.3f}") + print(f" 准确性: {de.accuracy:.3f}") + print(f" 字段统计: {de.extracted_fields}/{de.total_fields}") + + if de.extraction_items: + print(" 提取项目:") + for item in de.extraction_items[:3]: # 显示前3个 + print(f" {item.field_name}: {item.field_value} (置信度: {item.confidence:.3f})") + + # 改进建议 + if result.recommendations: + print(f"\n💡 改进建议:") + for i, rec in enumerate(result.recommendations[:3], 1): + print(f" {i}. {rec}") + + +def interactive_analysis(): + """交互式分析功能""" + print("\n💬 交互式文本分析") + print("="*50) + print("输入文本进行综合分析,输入'quit'退出") + + try: + engine = TextAnalysisEngine() + + while True: + print("\n" + "-"*30) + user_input = input("请输入要分析的文本: ").strip() + + if user_input.lower() == 'quit': + print("分析结束,再见!") + break + + if not user_input: + continue + + if len(user_input) < 10: + print("⚠️ 文本太短,请输入至少10个字符") + continue + + try: + result = engine.analyze_text(user_input) + display_analysis_result(result) + + except Exception as e: + print(f"❌ 分析失败: {e}") + + # 显示最终统计 + engine.display_stats() + + except KeyboardInterrupt: + print("\n程序已中断") + except Exception as e: + print(f"❌ 交互式分析失败: {e}") + + +def test_engine_initialization(): + """测试引擎初始化""" + print("正在测试文本分析引擎初始化...") + + try: + engine = TextAnalysisEngine() + print(f"✅ 引擎初始化成功,包含 {len(engine.agents)} 个Agent") + + # 显示Agent信息 + for name, agent in engine.agents.items(): + info = agent.get_model_info() + print(f" {name}: {info['model_name']} ({info['provider']})") + + return True + + except Exception as e: + print(f"❌ 引擎初始化失败: {e}") + return False + + +def main(): + """主函数""" + print("🚀 文本分析综合示例") + print("="*60) + + # 运行各种演示 + demos = [ + ("引擎初始化测试", test_engine_initialization), + ("单文本分析", demo_single_text_analysis), + ("批量文本分析", demo_batch_analysis), + ] + + results = {} + + for name, demo_func in demos: + print(f"\n开始: {name}") + try: + success = demo_func() + results[name] = success + print(f"{'✅' if success else '❌'} {name} {'成功' if success else '失败'}") + except Exception as e: + print(f"❌ {name} 异常: {e}") + results[name] = False + + # 显示总结 + print(f"\n📊 演示总结") + print("="*60) + + successful_demos = sum(results.values()) + total_demos = len(results) + + for name, success in results.items(): + status = "✅ 成功" if success else "❌ 失败" + print(f" {name}: {status}") + + print(f"\n🎯 总计: {successful_demos}/{total_demos} 个演示成功") + + # 询问是否运行交互式演示 + if successful_demos > 0: + try: + choice = input("\n是否运行交互式分析?(y/n): ").strip().lower() + if choice in ['y', 'yes', '是']: + interactive_analysis() + except (KeyboardInterrupt, EOFError): + print("\n程序结束") + + return successful_demos == total_demos + + +if __name__ == "__main__": + # 可以选择运行测试或完整演示 + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "--test": + # 仅运行初始化测试 + success = test_engine_initialization() + exit(0 if success else 1) + else: + # 运行完整演示 + main() \ No newline at end of file diff --git a/experiment_runner.py b/experiment_runner.py new file mode 100644 index 0000000..df2a138 --- /dev/null +++ b/experiment_runner.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +""" +数字提取实验运行器 + +基于Agno框架的SubAgent系统,从文本中提取数字并进行解释 +展示SubAgent系统的核心功能:配置读取、模型创建、动态prompt构建、JSON解析 +""" + +import sys +import os +from typing import List + +# 添加src路径到Python路径 +sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) + +from src.agent_system import SubAgent, create_json_agent +from number_extraction_models import NumberExtractionResult + + +def create_number_extraction_agent() -> SubAgent: + """创建数字提取专用SubAgent""" + + instructions = [ + "你是一个专业的数字提取专家", + "你的任务是从给定文本中识别和提取所有的数字", + "重点关注:整数、小数、百分比、比率、统计数据等", + "对每个数字提供准确的上下文和清晰的解释" + ] + + prompt_template = """ +请仔细分析以下文本,提取其中的所有数字: + +【输入文本】 +{input_text} + +【提取要求】 +1. 识别文本中的所有数字(整数、小数、百分比、比率等) +2. 对每个数字进行分类和解释 +3. 记录数字出现的完整上下文 +4. 识别数字的单位(如果有) +5. 提供对整个文本中数字的总体摘要 +""" + + try: + agent = SubAgent( + provider="aliyun", + model_name="qwen-max", + name="number_extractor", + description="专业的文本数字提取和解释系统", + instructions=instructions, + prompt_template=prompt_template, + response_model=NumberExtractionResult + ) + + print("✅ 数字提取Agent初始化成功") + return agent + + except Exception as e: + print(f"❌ Agent初始化失败: {e}") + raise + + +def display_results(result: NumberExtractionResult): + """展示提取结果""" + print("\n" + "="*50) + print("🔍 数字提取结果") + print("="*50) + + print(f"摘要: {result.summary}") + print(f"总数: {result.total_count}") + + if result.extractions: + for i, item in enumerate(result.extractions, 1): + print(f"\n数字{i}: {item.number}") + if item.unit: + print(f" 单位: {item.unit}") + print(f" 上下文: {item.context}") + print(f" 解释: {item.explanation}") + + print("="*50) + + +def get_user_input() -> str: + """获取用户输入""" + print("\n请输入要分析的文本:") + print("(输入'quit'退出,输入'END'结束输入)") + + lines = [] + while True: + try: + line = input(">>> ").strip() + if line.lower() == 'quit': + return 'quit' + elif line.upper() == 'END': + break + else: + lines.append(line) + break + except (KeyboardInterrupt, EOFError): + return 'quit' + + return '\n'.join(lines).strip() + + +def main(): + """主函数""" + print("🎉 数字提取工具启动!") + + try: + agent = create_number_extraction_agent() + except Exception as e: + print(f"初始化失败: {e}") + return + + print("✅ 系统就绪") + + while True: + user_input = get_user_input() + + if user_input == 'quit': + print("再见!") + break + + if not user_input: + continue + + try: + print(f"分析中... (文本长度: {len(user_input)}字符)") + result = agent.run(template_vars={'input_text': user_input}) + display_results(result) + + except Exception as e: + print(f"提取失败: {e}") + print("请尝试重新输入") + + +def test_basic(): + """基础测试""" + try: + agent = create_number_extraction_agent() + + # 测试prompt构建 + prompt = agent.build_prompt({'input_text': '测试数字95.2%'}) + print(f"✅ 测试通过,prompt长度: {len(prompt)}") + return True + + except Exception as e: + print(f"❌ 测试失败: {e}") + return False + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/number_extraction_models.py b/number_extraction_models.py new file mode 100644 index 0000000..a380a1b --- /dev/null +++ b/number_extraction_models.py @@ -0,0 +1,78 @@ +""" +数字提取Pydantic模型定义 + +用于结构化解析从文本中提取的数字和相关解释信息 +""" + +from typing import List, Union +from pydantic import BaseModel, Field + + +class NumberExtraction(BaseModel): + """单个数字提取项""" + + number: Union[int, float] = Field(description="提取的数字值") + explanation: str = Field(description="对该数字的解释说明") + context: str = Field(description="数字出现的上下文片段") + unit: str = Field(default="", description="数字的单位(如果有)") + + class Config: + json_encoders = { + # 确保浮点数正确序列化 + float: lambda v: round(v, 6) if v is not None else None + } + + +class NumberExtractionResult(BaseModel): + """数字提取完整结果""" + + extractions: List[NumberExtraction] = Field(description="提取的数字项列表") + summary: str = Field(description="对整个文本中数字的总结") + total_count: int = Field(description="提取的数字总数", ge=0) + + def __post_init__(self): + """确保total_count与extractions长度一致""" + if self.total_count != len(self.extractions): + object.__setattr__(self, 'total_count', len(self.extractions)) + + +# 测试模型定义 +def test_models(): + """测试Pydantic模型定义""" + print("正在测试数字提取模型...") + + try: + # 测试单个数字提取项 + extraction = NumberExtraction( + number=95.2, + explanation="模型准确率", + context="模型在测试集上达到了95.2%的准确率", + unit="%" + ) + print(f"✅ NumberExtraction模型测试成功: {extraction}") + + # 测试完整结果 + result = NumberExtractionResult( + extractions=[extraction], + summary="发现1个准确率数值", + total_count=1 + ) + print(f"✅ NumberExtractionResult模型测试成功") + + # 测试JSON序列化 + json_str = result.model_dump_json(indent=2) + print(f"✅ JSON序列化测试成功,长度: {len(json_str)}字符") + + # 测试模型字段 + print(f"✅ NumberExtraction字段: {list(NumberExtraction.model_fields.keys())}") + print(f"✅ NumberExtractionResult字段: {list(NumberExtractionResult.model_fields.keys())}") + + return True + + except Exception as e: + print(f"❌ 模型测试失败: {e}") + return False + + +if __name__ == "__main__": + test_models() \ No newline at end of file