diff --git a/src/parse.py b/src/parse.py
index 2ac33f4..a418715 100644
--- a/src/parse.py
+++ b/src/parse.py
@@ -18,7 +18,11 @@ from typing import List, Dict, Optional, Tuple
 
 
 class PDFParser:
-    """PDF解析类 - 用于将PDF文件转换为Markdown格式并按任务类型筛选
+    """PDF解析类 - 用于将PDF文件转换为Markdown格式并筛选医学相关论文
+    
+    筛选机制：
+    1. 医学相关性：使用AI判断论文是否属于医学、临床、生物医学等领域
+    2. 任务类型：在医学相关的基础上进一步筛选指定的研究任务类型
     
     支持的任务类型：
     - prediction: 预测任务 (PRED_)
@@ -43,16 +47,11 @@ class PDFParser:
         # OCR API配置
         self.ocr_api_url = "http://100.106.4.14:7861/parse"
         
-        # AI模型API配置（用于四类任务识别：prediction/classification/time_series/correlation）
+        # AI模型API配置（用于医学相关性和四类任务识别）
         self.ai_api_url = "http://100.82.33.121:11001/v1/chat/completions"
         self.ai_model = "gpt-oss-20b"
         
-        # MIMIC-IV关键词配置（用于内容筛选）
-        self.mimic_keywords = [
-            "MIMIC-IV", "MIMIC 4", "MIMIC IV", "MIMIC-4",
-            "Medical Information Mart Intensive Care IV",
-            "MIMIC-IV dataset", "MIMIC-IV database"
-        ]
+        # 注意：原来的MIMIC关键词配置已移除，现在使用AI判断医学相关性
         
         # 任务类型到前缀的映射配置
         self.task_type_prefixes = {
@@ -105,44 +104,9 @@ class PDFParser:
         logging.info(f"发现 {len(pdf_files)} 个PDF文件待处理")
         return pdf_files
     
-    def _check_mimic_keywords(self, output_subdir: Path) -> bool:
-        """检查Markdown文件是否包含MIMIC-IV关键词
-        
-        Args:
-            output_subdir (Path): 包含Markdown文件的输出子目录
-            
-        Returns:
-            bool: 是否包含MIMIC-IV关键词
-        """
-        try:
-            # 查找所有.md文件
-            md_files = list(output_subdir.glob("*.md"))
-            if not md_files:
-                logging.warning(f"未找到Markdown文件进行MIMIC关键词检查: {output_subdir}")
-                return False
-            
-            # 检查每个Markdown文件的内容
-            for md_file in md_files:
-                try:
-                    with open(md_file, 'r', encoding='utf-8') as f:
-                        content = f.read().lower()  # 转换为小写进行不区分大小写匹配
-                    
-                    # 检查是否包含任何MIMIC-IV关键词
-                    for keyword in self.mimic_keywords:
-                        if keyword.lower() in content:
-                            logging.info(f"发现MIMIC-IV关键词 '{keyword}' 在文件 {md_file.name}")
-                            return True
-                            
-                except Exception as e:
-                    logging.error(f"读取Markdown文件时发生错误: {md_file.name} - {e}")
-                    continue
-            
-            logging.info(f"未发现MIMIC-IV关键词: {output_subdir.name}")
-            return False
-            
-        except Exception as e:
-            logging.error(f"检查MIMIC关键词时发生错误: {output_subdir} - {e}")
-            return False
+    # 注意：_check_mimic_keywords函数已移除
+    # 原功能：检查Markdown文件是否包含MIMIC-IV关键词
+    # 移除原因：改用AI分析医学相关性，不再依赖特定关键词筛选
     
     def _extract_introduction(self, output_subdir: Path) -> Optional[str]:
         """从Markdown文件中提取Introduction部分
@@ -210,33 +174,46 @@ class PDFParser:
             logging.error(f"提取Introduction时发生错误: {output_subdir} - {e}")
             return None
     
-    def _analyze_research_task(self, introduction: str) -> str:
-        """使用AI模型分析论文的研究任务类型
+    def _analyze_research_task(self, introduction: str) -> Dict[str, any]:
+        """使用AI模型分析论文的医学相关性和研究任务类型
         
         Args:
             introduction (str): 论文的Introduction内容
             
         Returns:
-            str: 任务类型 ('prediction', 'classification', 'time_series', 'correlation', 'none')
+            Dict[str, any]: 包含医学相关性和任务类型的分析结果
+                - is_medical: bool，是否为医学相关论文
+                - task_type: str，任务类型 ('prediction', 'classification', 'time_series', 'correlation', 'none')
+                - medical_confidence: float，医学相关性置信度
+                - task_confidence: float，任务类型置信度
         """
         try:
             # 构造AI分析的提示词
-            system_prompt = """你是一个医学研究专家。请分析给定的论文Introduction部分，判断该研究属于以下哪种任务类型：
+            system_prompt = """你是一个医学研究专家。请分析给定的论文Introduction部分，判断两个维度：
 
-1. prediction - 预测任务：预测未来事件、结局或数值（如死亡率预测、住院时长预测、疾病进展预测）
-2. classification - 分类任务：将患者或病例分类到不同类别（如疾病诊断分类、风险等级分类、药物反应分类）
-3. time_series - 时间序列分析：分析随时间变化的医疗数据（如生命体征趋势分析、病情演进分析、纵向队列研究）
-4. correlation - 关联性分析：研究变量间的关系或关联（如痾病与人口特征关系、药物与副作用关联、风险因素识别）
-5. none - 不属于以上任何类型
+1. 医学相关性：判断该论文是否属于医学、临床医学、生物医学、公共卫生、护理学等医学相关领域
+   - 医学相关：涉及疾病、患者、临床数据、医疗干预、生物医学指标等
+   - 非医学相关：纯计算机科学、工程学、物理学、经济学等非医学领域
 
-请以JSON格式回答，包含任务类型和置信度：
-{\"task_type\": \"prediction\", \"confidence\": 0.85}
+2. 任务类型：如果是医学相关论文，进一步判断属于以下哪种任务类型：
+   - prediction: 预测任务（预测未来事件、结局或数值，如死亡率预测、住院时长预测、疾病进展预测）
+   - classification: 分类任务（将患者或病例分类到不同类别，如疾病诊断分类、风险等级分类、药物反应分类）
+   - time_series: 时间序列分析（分析随时间变化的医疗数据，如生命体征趋势分析、病情演进分析、纵向队列研究）
+   - correlation: 关联性分析（研究变量间的关系或关联，如疾病与人口特征关系、药物与副作用关联、风险因素识别）
+   - none: 不属于以上任何类型
+
+请以JSON格式回答，包含所有字段：
+{\"is_medical\": true, \"task_type\": \"prediction\", \"medical_confidence\": 0.90, \"task_confidence\": 0.85}
+
+字段说明：
+- is_medical: 布尔值，是否为医学相关论文
+- task_type: 任务类型（prediction/classification/time_series/correlation/none）
+- medical_confidence: 医学相关性置信度（0-1之间）
+- task_confidence: 任务类型置信度（0-1之间）
 
-task_type必须是以下选项之一：prediction、classification、time_series、correlation、none
-confidence为0-1之间的数值，表示判断的置信度。
 只返回JSON，不要添加其他文字。"""
 
-            user_prompt = f"请分析以下论文Introduction，判断属于哪种任务类型：\n\n{introduction[:2000]}"  # 限制长度避免token过多
+            user_prompt = f"请分析以下论文Introduction，判断医学相关性和任务类型：\n\n{introduction[:2000]}"  # 限制长度避免token过多
             
             # 构造API请求数据
             api_data = {
@@ -245,7 +222,7 @@ confidence为0-1之间的数值，表示判断的置信度。
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt}
                 ],
-                "max_tokens": 50,  # 需要返回JSON格式
+                "max_tokens": 100,  # 需要返回更复杂的JSON格式
                 "temperature": 0.1  # 降低随机性
             }
             
@@ -264,35 +241,50 @@ confidence为0-1之间的数值，表示判断的置信度。
                 try:
                     # 解析JSON响应
                     parsed_response = json.loads(ai_response)
+                    is_medical = parsed_response.get('is_medical', False)
                     task_type = parsed_response.get('task_type', 'none').lower()
-                    confidence = parsed_response.get('confidence', 0.0)
+                    medical_confidence = parsed_response.get('medical_confidence', 0.0)
+                    task_confidence = parsed_response.get('task_confidence', 0.0)
                     
                     # 验证任务类型是否有效
                     valid_types = ['prediction', 'classification', 'time_series', 'correlation', 'none']
                     if task_type not in valid_types:
                         logging.warning(f"AI返回了无效的任务类型: {task_type}，使用默认值 'none'")
                         task_type = "none"
-                        confidence = 0.0
+                        task_confidence = 0.0
                     
-                    # 只接受高置信度的结果
-                    if confidence < 0.7:
-                        logging.info(f"AI分析置信度过低 ({confidence:.2f})，归类为 'none'")
+                    # 检查医学相关性置信度（要求至少 0.7）
+                    if medical_confidence < 0.7:
+                        logging.info(f"医学相关性置信度过低 ({medical_confidence:.2f})，标记为非医学论文")
+                        is_medical = False
+                    
+                    # 检查任务类型置信度（要求至少 0.7）
+                    if task_confidence < 0.7:
+                        logging.info(f"任务类型置信度过低 ({task_confidence:.2f})，标记为 'none'")
                         task_type = "none"
                     
-                    logging.info(f"AI分析结果: 任务类型={task_type}, 置信度={confidence:.2f}")
-                    return task_type
+                    # 构建返回结果
+                    result = {
+                        'is_medical': is_medical,
+                        'task_type': task_type,
+                        'medical_confidence': medical_confidence,
+                        'task_confidence': task_confidence
+                    }
+                    
+                    logging.info(f"AI分析结果: 医学相关={is_medical}({medical_confidence:.2f}), 任务类型={task_type}({task_confidence:.2f})")
+                    return result
                     
                 except json.JSONDecodeError as e:
                     logging.error(f"解析AI JSON响应失败: {ai_response} - 错误: {e}")
-                    return "none"
+                    return {'is_medical': False, 'task_type': 'none', 'medical_confidence': 0.0, 'task_confidence': 0.0}
                 
             else:
                 logging.error(f"AI API调用失败，状态码: {response.status_code}")
-                return "none"
+                return {'is_medical': False, 'task_type': 'none', 'medical_confidence': 0.0, 'task_confidence': 0.0}
                 
         except Exception as e:
             logging.error(f"AI分析研究任务时发生错误: {e}")
-            return "none"
+            return {'is_medical': False, 'task_type': 'none', 'medical_confidence': 0.0, 'task_confidence': 0.0}
     
     def _mark_valid_folder(self, output_subdir: Path, task_type: str) -> bool:
         """为通过筛选的文件夹添加任务类型前缀标记
@@ -541,26 +533,28 @@ confidence为0-1之间的数值，表示判断的置信度。
             # 获取解压后的文件夹路径
             output_subdir = self.markdown_dir / pdf_file.stem
             
-            # 第一层筛选：检查MIMIC-IV关键词
-            logging.info(f"开始MIMIC-IV关键词筛选: {pdf_file.stem}")
-            if not self._check_mimic_keywords(output_subdir):
-                logging.info(f"未通过MIMIC-IV关键词筛选，跳过: {pdf_file.stem}")
-                return True  # 处理成功但未通过筛选
-            
-            # 第二层筛选：AI分析研究任务
+            # AI分析研究任务（医学相关性 + 任务类型）
             logging.info(f"开始AI研究任务分析: {pdf_file.stem}")
             introduction = self._extract_introduction(output_subdir)
             if not introduction:
                 logging.warning(f"无法提取Introduction，跳过AI分析: {pdf_file.stem}")
                 return True  # 处理成功但无法进行任务分析
             
-            task_type = self._analyze_research_task(introduction)
-            if task_type == "none":
-                logging.info(f"未通过研究任务筛选 (task_type=none)，跳过: {pdf_file.stem}")
+            analysis_result = self._analyze_research_task(introduction)
+            is_medical = analysis_result['is_medical']
+            task_type = analysis_result['task_type']
+            
+            # 检查是否通过筛选（必须是医学相关且属于指定任务类型）
+            if not is_medical:
+                logging.info(f"未通过医学相关性筛选，跳过: {pdf_file.stem}")
                 return True  # 处理成功但未通过筛选
             
-            # 两层筛选都通过，根据任务类型标记文件夹
-            logging.info(f"通过所有筛选，标记为{task_type}任务论文: {pdf_file.stem}")
+            if task_type == "none":
+                logging.info(f"未通过任务类型筛选 (task_type=none)，跳过: {pdf_file.stem}")
+                return True  # 处理成功但未通过筛选
+            
+            # 通过所有筛选，根据任务类型标记文件夹
+            logging.info(f"通过所有筛选，标记为{task_type}任务医学论文: {pdf_file.stem}")
             if self._mark_valid_folder(output_subdir, task_type):
                 logging.info(f"论文筛选完成，已标记为{task_type}任务: {pdf_file.stem}")
             else: