84 lines
2.9 KiB
Python
84 lines
2.9 KiB
Python
import logging
|
||
import langextract as lx
|
||
import os
|
||
os.environ["LANGEXTRACT_API_KEY"] = "gpustack_d402860477878812_9ec494a501497d25b565987754f4db8c"
|
||
|
||
# 可选:开启日志,便于调试
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 输入文本
|
||
input_text = """
|
||
这篇文章介绍了张华,他今年32岁,是一位经验丰富的工程师。
|
||
他的同事李明,今年28岁,是一位充满活力的设计师。
|
||
他们一起在一家科技公司工作。
|
||
"""
|
||
|
||
# 提取示例(用于 prompt alignment)
|
||
examples = [
|
||
lx.data.ExampleData(
|
||
text="王小明,25岁,是一名软件开发者。",
|
||
extractions=[
|
||
lx.data.Extraction(
|
||
extraction_class="person",
|
||
extraction_text="王小明",
|
||
attributes={"age": "25", "occupation": "软件开发者"}
|
||
)
|
||
]
|
||
)
|
||
]
|
||
|
||
# HTTP API 配置(注意:不要带 /v1)
|
||
# 创建ModelConfig,强制使用OpenAI提供者访问vllm端点
|
||
model_config = lx.factory.ModelConfig(
|
||
model_id="gpt-oss", # 使用vllm中实际部署的模型名称
|
||
provider="OpenAILanguageModel", # 强制指定OpenAI提供者
|
||
provider_kwargs={
|
||
"base_url": "http://192.168.31.127:19090/v1", # vllm API端点
|
||
"api_key": "gpustack_d402860477878812_9ec494a501497d25b565987754f4db8c",
|
||
"model_id": "gpt-oss" # 确保使用正确的模型ID
|
||
}
|
||
)
|
||
|
||
# LangExtract通用配置参数
|
||
extract_config = {
|
||
"config": model_config,
|
||
"max_workers": 5, # 降低并发,避免过载vllm服务
|
||
"max_char_buffer": 6000, # 适合医学论文的上下文长度
|
||
"extraction_passes": 1, # 单次提取,避免过多API调用
|
||
"temperature": 0.1, # 较低温度确保一致性
|
||
"fence_output": True, # 期望代码围栏格式输出
|
||
"use_schema_constraints": False, # vllm可能不支持严格schema
|
||
"debug": False
|
||
}
|
||
|
||
try:
|
||
result = lx.extract(
|
||
text_or_documents=input_text,
|
||
prompt_description="从文本中提取人物姓名、年龄等信息",
|
||
examples=examples,
|
||
**extract_config
|
||
)
|
||
# result = lx.extract(
|
||
# text_or_documents=input_text,
|
||
# prompt_description="从文本中提取人物姓名、年龄等信息",
|
||
# examples=examples,
|
||
# model_id="gpt-oss", # Automatically selects OpenAI provider
|
||
# api_key="gpustack_d402860477878812_9ec494a501497d25b565987754f4db8c",
|
||
# model_url="http://192.168.31.127:19090/v1",
|
||
# fence_output=True,
|
||
# use_schema_constraints=False
|
||
# )
|
||
|
||
# 输出结果
|
||
print("提取结果:")
|
||
for extraction in result.extractions:
|
||
print(f"类别: {extraction.extraction_class}")
|
||
print(f"文本: {extraction.extraction_text}")
|
||
print(f"属性: {extraction.attributes}")
|
||
print("---")
|
||
|
||
except Exception as e:
|
||
logger.exception("调用 langextract.extract 失败,详情:")
|
||
raise
|