84 lines
2.9 KiB
Python
84 lines
2.9 KiB
Python
|
|
import logging
|
|||
|
|
import langextract as lx
|
|||
|
|
import os
|
|||
|
|
os.environ["LANGEXTRACT_API_KEY"] = "gpustack_d402860477878812_9ec494a501497d25b565987754f4db8c"
|
|||
|
|
|
|||
|
|
# 可选:开启日志,便于调试
|
|||
|
|
logging.basicConfig(level=logging.INFO)
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
# 输入文本
|
|||
|
|
input_text = """
|
|||
|
|
这篇文章介绍了张华,他今年32岁,是一位经验丰富的工程师。
|
|||
|
|
他的同事李明,今年28岁,是一位充满活力的设计师。
|
|||
|
|
他们一起在一家科技公司工作。
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# 提取示例(用于 prompt alignment)
|
|||
|
|
examples = [
|
|||
|
|
lx.data.ExampleData(
|
|||
|
|
text="王小明,25岁,是一名软件开发者。",
|
|||
|
|
extractions=[
|
|||
|
|
lx.data.Extraction(
|
|||
|
|
extraction_class="person",
|
|||
|
|
extraction_text="王小明",
|
|||
|
|
attributes={"age": "25", "occupation": "软件开发者"}
|
|||
|
|
)
|
|||
|
|
]
|
|||
|
|
)
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# HTTP API 配置(注意:不要带 /v1)
|
|||
|
|
# 创建ModelConfig,强制使用OpenAI提供者访问vllm端点
|
|||
|
|
model_config = lx.factory.ModelConfig(
|
|||
|
|
model_id="gpt-oss", # 使用vllm中实际部署的模型名称
|
|||
|
|
provider="OpenAILanguageModel", # 强制指定OpenAI提供者
|
|||
|
|
provider_kwargs={
|
|||
|
|
"base_url": "http://192.168.31.127:19090/v1", # vllm API端点
|
|||
|
|
"api_key": "gpustack_d402860477878812_9ec494a501497d25b565987754f4db8c",
|
|||
|
|
"model_id": "gpt-oss" # 确保使用正确的模型ID
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# LangExtract通用配置参数
|
|||
|
|
extract_config = {
|
|||
|
|
"config": model_config,
|
|||
|
|
"max_workers": 5, # 降低并发,避免过载vllm服务
|
|||
|
|
"max_char_buffer": 6000, # 适合医学论文的上下文长度
|
|||
|
|
"extraction_passes": 1, # 单次提取,避免过多API调用
|
|||
|
|
"temperature": 0.1, # 较低温度确保一致性
|
|||
|
|
"fence_output": True, # 期望代码围栏格式输出
|
|||
|
|
"use_schema_constraints": False, # vllm可能不支持严格schema
|
|||
|
|
"debug": False
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
result = lx.extract(
|
|||
|
|
text_or_documents=input_text,
|
|||
|
|
prompt_description="从文本中提取人物姓名、年龄等信息",
|
|||
|
|
examples=examples,
|
|||
|
|
**extract_config
|
|||
|
|
)
|
|||
|
|
# result = lx.extract(
|
|||
|
|
# text_or_documents=input_text,
|
|||
|
|
# prompt_description="从文本中提取人物姓名、年龄等信息",
|
|||
|
|
# examples=examples,
|
|||
|
|
# model_id="gpt-oss", # Automatically selects OpenAI provider
|
|||
|
|
# api_key="gpustack_d402860477878812_9ec494a501497d25b565987754f4db8c",
|
|||
|
|
# model_url="http://192.168.31.127:19090/v1",
|
|||
|
|
# fence_output=True,
|
|||
|
|
# use_schema_constraints=False
|
|||
|
|
# )
|
|||
|
|
|
|||
|
|
# 输出结果
|
|||
|
|
print("提取结果:")
|
|||
|
|
for extraction in result.extractions:
|
|||
|
|
print(f"类别: {extraction.extraction_class}")
|
|||
|
|
print(f"文本: {extraction.extraction_text}")
|
|||
|
|
print(f"属性: {extraction.attributes}")
|
|||
|
|
print("---")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.exception("调用 langextract.extract 失败,详情:")
|
|||
|
|
raise
|