feat: 实现论文爬取核心功能

- 新增src/crawler.py模块,实现PaperCrawler类
- 更新papers_crawler.py主文件,集成实际爬取逻辑
- 添加requests依赖支持HTTP请求
- 更新依赖锁定文件uv.lock
This commit is contained in:
iomgaa 2025-08-23 16:33:36 +08:00
parent 6110251f05
commit 27398dc890
4 changed files with 525 additions and 4 deletions

View File

@ -1,5 +1,7 @@
import argparse
from src.crawler import PaperCrawler
def setup_args():
"""设置命令行参数解析
@ -36,6 +38,7 @@ def setup_args():
return parser.parse_args()
def main():
"""主函数 - 执行论文爬取任务"""
try:
@ -43,13 +46,29 @@ def main():
args = setup_args()
print(f"=== 论文爬取工具启动 ===")
print(f"论文数据文件: {args.paper_website}")
print(f"论文数据: {args.paper_website}")
print(f"并行处理数: {args.parallel}")
print(f"========================")
# TODO: 在这里添加实际的论文爬取逻辑
# 初始化论文爬取器
crawler = PaperCrawler(
websites=args.paper_website,
parallel=args.parallel
)
print("功能开发中,敬请期待...")
# 执行论文爬取
print("开始爬取MIMIC-4相关论文...")
papers = crawler.crawl_papers()
if papers:
# 保存到CSV文件
csv_file_path = crawler.save_to_csv(papers)
print(f"\n=== 爬取完成 ===")
print(f"成功爬取: {len(papers)} 篇论文")
print(f"保存位置: {csv_file_path}")
print(f"================")
else:
print("未找到相关论文,请检查网络连接或关键词设置")
except FileNotFoundError as e:
print(f"错误: 找不到指定的文件 - {e}")

View File

@ -4,4 +4,6 @@ version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = []
dependencies = [
"requests>=2.32.5",
]

421
src/crawler.py Normal file
View File

@ -0,0 +1,421 @@
"""论文爬取模块
该模块提供PaperCrawler类用于从ArXiv和MedRxiv爬取MIMIC 4相关论文
支持并发处理智能去重数据标准化等功能
"""
import requests
import xml.etree.ElementTree as ET
import logging
import time
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Optional
from pathlib import Path
from src.utils.csv_utils import write_dict_to_csv
class PaperCrawler:
"""论文爬取类 - 用于从ArXiv和MedRxiv爬取MIMIC 4相关论文"""
def __init__(self, websites: List[str], parallel: int = 20,
arxiv_max_results: int = 200, medrxiv_days_range: int = 730):
"""初始化爬虫配置
Args:
websites (List[str]): 论文网站列表 ["arxiv", "medrxiv"]
parallel (int): 并发处理数默认20
"""
self.websites = websites
self.parallel = parallel
# 新增爬取参数配置
self.arxiv_max_results = arxiv_max_results # ArXiv最大爬取数量
self.medrxiv_days_range = medrxiv_days_range # MedRxiv爬取时间范围(天)
# MIMIC关键词配置
self.mimic_keywords = [
"MIMIC-IV", "MIMIC 4", "MIMIC IV",
"Medical Information Mart",
"intensive care", "ICU database",
"critical care database", "electronic health record"
]
# HTTP会话配置
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'MedResearcher-PaperCrawler/1.0'
})
# API配置
self.arxiv_base_url = "http://export.arxiv.org/api/query"
self.medrxiv_base_url = "https://api.medrxiv.org/details"
# 配置日志
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
def crawl_papers(self) -> List[Dict[str, str]]:
"""主要爬取逻辑,协调各个数据源
Returns:
List[Dict[str, str]]: 标准化的论文数据列表
"""
all_papers = []
try:
# 并发调用各数据源爬取方法
with ThreadPoolExecutor(max_workers=2) as executor:
futures = {}
if "arxiv" in self.websites:
futures[executor.submit(self._crawl_arxiv)] = "arxiv"
if "medrxiv" in self.websites:
futures[executor.submit(self._crawl_medrxiv)] = "medrxiv"
# 收集结果
for future in as_completed(futures):
source = futures[future]
try:
papers = future.result()
all_papers.extend(papers)
logging.info(f"{source} 获取到 {len(papers)} 篇论文")
except Exception as e:
logging.error(f"{source} 爬取数据时出错: {e}")
# 数据去重
deduplicated_papers = self._deduplicate_papers(all_papers)
logging.info(f"去重后共 {len(deduplicated_papers)} 篇论文")
return deduplicated_papers
except Exception as e:
logging.error(f"爬取论文时发生错误: {e}")
raise
def _crawl_arxiv(self) -> List[Dict[str, str]]:
"""ArXiv API精准搜索
Returns:
List[Dict[str, str]]: 标准化的论文数据列表
"""
papers = []
try:
# 构建关键词搜索查询
keywords_query = " OR ".join([f'ti:"{kw}"' for kw in self.mimic_keywords[:3]])
abstract_query = " OR ".join([f'abs:"{kw}"' for kw in self.mimic_keywords])
search_query = f"({keywords_query}) OR ({abstract_query})"
# API请求参数
params = {
'search_query': search_query,
'start': 0,
'max_results': self.arxiv_max_results,
'sortBy': 'submittedDate',
'sortOrder': 'descending'
}
response = self._make_request_with_retry(
self.arxiv_base_url,
params=params
)
if response.status_code == 200:
raw_papers = self._parse_arxiv_xml(response.text)
# 标准化ArXiv数据
papers = []
for raw_paper in raw_papers:
normalized_paper = self._normalize_paper_data(raw_paper, "arxiv")
if normalized_paper:
papers.append(normalized_paper)
logging.info(f"ArXiv API 返回 {len(papers)} 篇论文")
else:
logging.error(f"ArXiv API 请求失败,状态码: {response.status_code}")
except Exception as e:
logging.error(f"ArXiv爬取过程中出现错误: {e}")
return papers
def _crawl_medrxiv(self) -> List[Dict[str, str]]:
"""MedRxiv API时间范围搜索+本地过滤
Returns:
List[Dict[str, str]]: 标准化的论文数据列表
"""
papers = []
try:
# 获取近2年的时间范围
end_date = datetime.now()
start_date = end_date - timedelta(days=self.medrxiv_days_range)
date_from = start_date.strftime("%Y-%m-%d")
date_to = end_date.strftime("%Y-%m-%d")
# 分页获取所有论文
cursor = 0
while True:
url = f"{self.medrxiv_base_url}/medrxiv/{date_from}/{date_to}/{cursor}/json"
response = self._make_request_with_retry(url)
if response.status_code != 200:
logging.error(f"MedRxiv API 请求失败: {response.status_code}")
break
data = response.json()
# 检查是否有数据
if not data.get('collection') or not data['collection']:
break
# 处理当前页数据
page_papers = []
for paper_data in data['collection']:
normalized_paper = self._normalize_paper_data(paper_data, "medrxiv")
if normalized_paper:
page_papers.append(normalized_paper)
# 本地关键词过滤
filtered_papers = self._filter_papers_by_keywords(page_papers, self.mimic_keywords)
papers.extend(filtered_papers)
logging.info(f"MedRxiv 第{cursor//100 + 1}页: {len(page_papers)}篇总论文, {len(filtered_papers)}篇MIMIC相关")
# 检查是否还有更多数据
cursor += 100
if len(data['collection']) < 100:
break
# 避免请求过于频繁
time.sleep(0.5)
except Exception as e:
logging.error(f"MedRxiv爬取过程中出现错误: {e}")
return papers
def _filter_papers_by_keywords(self, papers: List[Dict], keywords: List[str]) -> List[Dict]:
"""关键词过滤逻辑
Args:
papers (List[Dict]): 论文列表
keywords (List[str]): 关键词列表
Returns:
List[Dict]: 过滤后的论文列表
"""
filtered_papers = []
for paper in papers:
title = paper.get('title', '').lower()
abstract = paper.get('abstract', '').lower()
# 检查是否包含任何关键词
for keyword in keywords:
keyword_lower = keyword.lower()
if keyword_lower in title or keyword_lower in abstract:
filtered_papers.append(paper)
break
return filtered_papers
def _normalize_paper_data(self, paper_data: Dict, source: str) -> Optional[Dict[str, str]]:
"""数据标准化映射
Args:
paper_data (Dict): 原始论文数据
source (str): 数据源 "arxiv" "medrxiv"
Returns:
Optional[Dict[str, str]]: 标准化后的论文数据如果数据无效返回None
"""
try:
if source == "arxiv":
return {
'title': paper_data.get('title', '').strip(),
'authors': ', '.join(paper_data.get('authors', [])),
'abstract': paper_data.get('summary', '').strip(),
'doi': paper_data.get('doi', ''),
'published_date': paper_data.get('published', '').split('T')[0] if 'T' in paper_data.get('published', '') else paper_data.get('published', ''),
'url': paper_data.get('link', ''),
'source': 'arxiv',
'keywords': ', '.join(paper_data.get('categories', []))
}
elif source == "medrxiv":
return {
'title': paper_data.get('title', '').strip(),
'authors': paper_data.get('authors', ''),
'abstract': paper_data.get('abstract', '').strip(),
'doi': paper_data.get('doi', ''),
'published_date': paper_data.get('date', ''),
'url': f"https://doi.org/{paper_data.get('doi', '')}" if paper_data.get('doi') else '',
'source': 'medrxiv',
'keywords': paper_data.get('category', '')
}
except Exception as e:
logging.error(f"标准化论文数据时出错: {e}")
return None
return None
def _deduplicate_papers(self, papers: List[Dict[str, str]]) -> List[Dict[str, str]]:
"""去重逻辑
Args:
papers (List[Dict[str, str]]): 论文列表
Returns:
List[Dict[str, str]]: 去重后的论文列表
"""
seen_dois = set()
seen_titles = set()
unique_papers = []
for paper in papers:
doi = paper.get('doi', '').strip()
title = paper.get('title', '').strip().lower()
# 优先使用DOI去重
if doi and doi not in seen_dois:
seen_dois.add(doi)
unique_papers.append(paper)
# 备用title去重仅当没有DOI时
elif not doi and title and title not in seen_titles:
seen_titles.add(title)
unique_papers.append(paper)
return unique_papers
def _make_request_with_retry(self, url: str, params: Optional[Dict] = None, max_retries: int = 3) -> requests.Response:
"""带指数退避的重试机制
Args:
url (str): 请求URL
params (Optional[Dict]): 请求参数
max_retries (int): 最大重试次数
Returns:
requests.Response: HTTP响应
Raises:
requests.RequestException: 当所有重试都失败时抛出
"""
for attempt in range(max_retries):
try:
response = self.session.get(url, params=params, timeout=30)
return response
except requests.RequestException as e:
if attempt == max_retries - 1:
logging.error(f"请求失败,已达到最大重试次数: {e}")
raise
wait_time = 2 ** attempt
logging.warning(f"请求失败,{wait_time}秒后重试 (第{attempt + 1}次): {e}")
time.sleep(wait_time)
def _parse_arxiv_xml(self, xml_content: str) -> List[Dict]:
"""解析ArXiv XML响应
Args:
xml_content (str): XML内容
Returns:
List[Dict]: 解析后的论文数据列表
"""
papers = []
try:
root = ET.fromstring(xml_content)
# 定义命名空间
namespaces = {
'atom': 'http://www.w3.org/2005/Atom',
'arxiv': 'http://arxiv.org/schemas/atom'
}
# 解析每个entry
for entry in root.findall('atom:entry', namespaces):
paper_data = {}
# 标题
title_elem = entry.find('atom:title', namespaces)
paper_data['title'] = title_elem.text.strip() if title_elem is not None else ''
# 摘要
summary_elem = entry.find('atom:summary', namespaces)
paper_data['summary'] = summary_elem.text.strip() if summary_elem is not None else ''
# 作者
authors = []
for author in entry.findall('atom:author', namespaces):
name_elem = author.find('atom:name', namespaces)
if name_elem is not None:
authors.append(name_elem.text.strip())
paper_data['authors'] = authors
# 发布日期
published_elem = entry.find('atom:published', namespaces)
paper_data['published'] = published_elem.text if published_elem is not None else ''
# 链接
links = entry.findall('atom:link', namespaces)
for link in links:
if link.get('type') == 'text/html':
paper_data['link'] = link.get('href', '')
break
# DOI如果有
paper_data['doi'] = entry.find('arxiv:doi', namespaces)
paper_data['doi'] = paper_data['doi'].text if paper_data['doi'] is not None else ''
# 分类
categories = []
for category in entry.findall('atom:category', namespaces):
term = category.get('term')
if term:
categories.append(term)
paper_data['categories'] = categories
papers.append(paper_data)
except ET.ParseError as e:
logging.error(f"XML解析错误: {e}")
except Exception as e:
logging.error(f"ArXiv XML解析过程中出现错误: {e}")
return papers
def save_to_csv(self, data: List[Dict[str, str]]) -> str:
"""保存到CSV文件
Args:
data (List[Dict[str, str]]): 论文数据列表
Returns:
str: 保存的文件路径
"""
if not data:
raise ValueError("没有数据可以保存")
# 生成文件名
timestamp = datetime.now().strftime("%Y%m%d")
filename = f"mimic_papers_{timestamp}.csv"
file_path = Path("dataset") / filename
try:
# 使用现有的CSV工具函数
fieldnames = ['title', 'authors', 'abstract', 'doi', 'published_date', 'url', 'source', 'keywords']
write_dict_to_csv(data, file_path, fieldnames=fieldnames)
logging.info(f"成功保存 {len(data)} 篇论文到: {file_path}")
return str(file_path)
except Exception as e:
logging.error(f"保存CSV文件时出错: {e}")
raise

79
uv.lock generated
View File

@ -2,7 +2,86 @@ version = 1
revision = 2
requires-python = ">=3.13"
[[package]]
name = "certifi"
version = "2025.8.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" },
{ url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" },
{ url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" },
{ url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" },
{ url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" },
{ url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" },
{ url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" },
{ url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" },
{ url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" },
{ url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" },
{ url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" },
{ url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" },
{ url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" },
{ url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" },
{ url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" },
{ url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" },
{ url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" },
{ url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" },
{ url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" },
{ url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" },
{ url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" },
{ url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" },
{ url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" },
]
[[package]]
name = "idna"
version = "3.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
]
[[package]]
name = "medresearcher"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "requests" },
]
[package.metadata]
requires-dist = [{ name = "requests", specifier = ">=2.32.5" }]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "urllib3"
version = "2.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
]