跟着AI学AI - 诊断结论信息抽取 - 模型压缩与部署
# 设置镜像源的环境变量
(vippython) PS D:\OpenSource\Python\VipPython> $env:HF_ENDPOINT = "https://hf-mirror.com"
# 添加依赖
(vippython) PS D:\OpenSource\Python\VipPython\information_extraction> uv add fastapi==0.136.1
(vippython) PS D:\OpenSource\Python\VipPython\information_extraction> uv add uvicorn==0.30.6
# 切换下目录,否则会报文件不存在
(vippython) PS D:\OpenSource\Python\VipPython> cd D:\OpenSource\Python\VipPython\information_extraction
(vippython) PS D:\OpenSource\Python\VipPython> D:\OpenSource\Python\VipPython\.venv\Scripts\python.exe -c "import uvicorn; print(uvicorn.__version__)"
0.46.0
# 启动服务
(vippython) PS D:\OpenSource\Python\VipPython> uv run uvicorn ner_service:app --reload --host 0.0.0.0 --port 8001
debug_server.py -- PyCharm debug 没跑起来。说是有版本错误不搞了(见最后的图)
# debug_server.py - 新建一个调试入口文件
import uvicorn
from ner_service import app
if __name__ == "__main__":
# 使用 reload=False 以便调试
uvicorn.run(
app,
host="127.0.0.1",
port=8000,
reload=False, # 设为False以便断点生效
log_level="debug"
)
ner_service.py
# ner_service.py - 修复后的版本
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Dict, Optional
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
import json
import re
from datetime import datetime
app = FastAPI(title="心电图NER服务", description="心电图报告实体识别服务")
class ECGReport(BaseModel):
text: str
report_id: Optional[str] = None
class Entity(BaseModel):
text: str
label: str
start: int
end: int
confidence: float
class NERResponse(BaseModel):
report_id: str
text: str
entities: List[Entity]
summary: Dict
class NERService:
def __init__(self, model_path='./ecg_ner_model'):
self.model_path = model_path
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.load_model()
def load_model(self):
print(f"加载模型: {self.model_path}")
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
self.model = AutoModelForTokenClassification.from_pretrained(self.model_path)
self.model.to(self.device)
self.model.eval()
# 加载标签映射
try:
with open(f'{self.model_path}/id2label.json', 'r', encoding='utf-8') as f:
self.id2label = json.load(f)
self.id2label = {int(k): v for k, v in self.id2label.items()}
except:
# 如果没有模型,使用默认映射
self.id2label = {0: "O", 1: "B-指标名称", 2: "I-指标名称"}
def predict(self, text):
"""预测文本实体"""
# 编码
inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=256,
padding=True
)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# 预测
with torch.no_grad():
outputs = self.model(**inputs)
predictions = torch.argmax(outputs.logits, dim=2)
# 解码
tokens = self.tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
predictions = predictions[0].cpu().numpy()
entities = []
current_entity = None
for i, (token, pred_id) in enumerate(zip(tokens, predictions)):
label = self.id2label.get(pred_id, "O")
if label.startswith('B-'):
if current_entity:
entities.append(current_entity)
current_entity = {
'text': token.replace('##', ''),
'label': label[2:],
'start': i,
'end': i + 1,
'confidence': 1.0
}
elif label.startswith('I-') and current_entity:
current_entity['text'] += token.replace('##', '')
current_entity['end'] = i + 1
elif label == 'O' and current_entity:
entities.append(current_entity)
current_entity = None
if current_entity:
entities.append(current_entity)
return entities
def extract_summary(self, entities):
"""提取报告摘要 - 修复版本"""
summary = {}
# 修复:正确处理Entity对象
# 将Entity对象转换为字典列表以便处理
entity_dicts = []
for entity in entities:
if hasattr(entity, 'dict'):
# 如果是Pydantic模型
entity_dicts.append(entity.dict())
elif isinstance(entity, dict):
# 如果是字典
entity_dicts.append(entity)
else:
# 如果是其他对象,尝试转换为字典
entity_dicts.append({
'text': getattr(entity, 'text', ''),
'label': getattr(entity, 'label', ''),
'start': getattr(entity, 'start', 0),
'end': getattr(entity, 'end', 0)
})
# 提取心率信息
hr_info = {}
for i, entity in enumerate(entity_dicts):
if entity['label'] == '指标名称':
if entity['text'] in ['平均心率', '最快心率', '最慢心率', '心率']:
# 查找对应的数值(下一个实体可能是数值)
if i + 1 < len(entity_dicts) and entity_dicts[i + 1]['label'] == '数值':
hr_info[entity['text']] = entity_dicts[i + 1]['text']
# 也可能数值在更后面的位置
else:
for j in range(i + 1, min(i + 5, len(entity_dicts))):
if entity_dicts[j]['label'] == '数值':
hr_info[entity['text']] = entity_dicts[j]['text']
break
if hr_info:
summary['心率'] = hr_info
# 提取事件信息
events = []
event_labels = ['事件类型', '诊断结论', '事件子类']
for entity in entity_dicts:
if entity['label'] in event_labels:
events.append(entity['text'])
if events:
summary['主要事件'] = list(set(events)) # 去重
# 提取数值异常
abnormalities = []
heart_rate_value = None
# 先找到心率数值
for entity in entity_dicts:
if entity['label'] == '数值':
try:
value = float(entity['text'])
# 检查是否有前面的指标名称
idx = entity_dicts.index(entity)
if idx > 0 and entity_dicts[idx - 1]['label'] == '指标名称':
if '心率' in entity_dicts[idx - 1]['text']:
heart_rate_value = value
if value > 100:
abnormalities.append(f"心率过高({value}次/分)")
elif value < 60:
abnormalities.append(f"心率过低({value}次/分)")
except:
pass
if abnormalities:
summary['异常提示'] = abnormalities
# 添加实体统计
entity_counts = {}
for entity in entity_dicts:
label = entity['label']
entity_counts[label] = entity_counts.get(label, 0) + 1
if entity_counts:
summary['实体统计'] = entity_counts
return summary
# 初始化服务
ner_service = NERService(model_path='./ecg_ner_model') # 使用你训练好的模型路径
@app.post("/predict", response_model=NERResponse)
async def predict_entities(report: ECGReport):
"""预测实体"""
try:
# 预测
entities_dict = ner_service.predict(report.text)
# 转换为Entity对象列表
entity_responses = [
Entity(
text=e['text'],
label=e['label'],
start=e['start'],
end=e['end'],
confidence=e.get('confidence', 1.0)
)
for e in entities_dict
]
# 提取摘要
summary = ner_service.extract_summary(entity_responses)
return NERResponse(
report_id=report.report_id or f"report_{int(datetime.now().timestamp())}",
text=report.text,
entities=entity_responses,
summary=summary
)
except Exception as e:
import traceback
error_detail = traceback.format_exc()
print(f"错误: {error_detail}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
"""健康检查"""
return {"status": "healthy", "model_loaded": True, "device": str(ner_service.device)}
@app.get("/test")
async def test_endpoint():
"""测试端点"""
test_text = "平均心率为76次/分"
entities = ner_service.predict(test_text)
return {
"test_text": test_text,
"entities": entities,
"entity_count": len(entities)
}
# 运行命令: uvicorn ner_service:app --reload --host 0.0.0.0 --port 8001
# 如果直接运行这个文件
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001, reload=True)
启服务


Request Body
{
"text": "平均心率为76次/分,最快心率是142次/分,发生于01-17 15:57:16,最慢心率是46次/分,发生01-18 07:57:21,其中心动过速事件(心率>100次/分),持续时间占总时间的1.3%,心动过缓事件(心率<60次/分),持续时间占总时间的4.0%. 室性早搏共发生2695次,占总心搏数的9.0%,包括.2695次单发室早.42次三联律. 诊断: 1、窦性心律(心率波动于46次/分--142次/分之间) 2、频发室性早搏(2695次单发室早.插入性室早.42次三联律) 3、心率变异性分析:SDNN 211.54(正常参考值范围:102-180ms),SDANN 139.41(正常参考值范围:92-162ms)"
}
Reponse Body
{
"report_id": "report_1778489993",
"text": "平均心率为76次/分,最快心率是142次/分,发生于01-17 15:57:16,最慢心率是46次/分,发生01-18 07:57:21,其中心动过速事件(心率>100次/分),持续时间占总时间的1.3%,心动过缓事件(心率<60次/分),持续时间占总时间的4.0%. 室性早搏共发生2695次,占总心搏数的9.0%,包括.2695次单发室早.42次三联律. 诊断: 1、窦性心律(心率波动于46次/分--142次/分之间) 2、频发室性早搏(2695次单发室早.插入性室早.42次三联律) 3、心率变异性分析:SDNN 211.54(正常参考值范围:102-180ms),SDANN 139.41(正常参考值范围:92-162ms)",
"entities": [
{
"text": "平均心率",
"label": "指标名称",
"start": 1,
"end": 5,
"confidence": 1.0
},
{
"text": "76",
"label": "数值",
"start": 6,
"end": 7,
"confidence": 1.0
},
{
"text": "次/分",
"label": "单位",
"start": 7,
"end": 10,
"confidence": 1.0
},
{
"text": "最快心率",
"label": "指标名称",
"start": 11,
"end": 15,
"confidence": 1.0
},
{
"text": "142",
"label": "数值",
"start": 16,
"end": 17,
"confidence": 1.0
},
{
"text": "次/分",
"label": "单位",
"start": 17,
"end": 20,
"confidence": 1.0
},
{
"text": "01-1715:57:16",
"label": "日期时间",
"start": 24,
"end": 32,
"confidence": 1.0
},
{
"text": "最慢心率",
"label": "指标名称",
"start": 33,
"end": 37,
"confidence": 1.0
},
{
"text": "46",
"label": "数值",
"start": 38,
"end": 39,
"confidence": 1.0
},
{
"text": "次/分",
"label": "单位",
"start": 39,
"end": 42,
"confidence": 1.0
},
{
"text": "01-1807:57:21",
"label": "日期时间",
"start": 45,
"end": 53,
"confidence": 1.0
},
{
"text": "心动过速事件",
"label": "事件类型",
"start": 56,
"end": 62,
"confidence": 1.0
},
{
"text": "心率>100次/分",
"label": "条件定义",
"start": 63,
"end": 70,
"confidence": 1.0
},
{
"text": "持续时间占总时间的1.3%",
"label": "时间占比",
"start": 72,
"end": 85,
"confidence": 1.0
},
{
"text": "心动过缓事件",
"label": "事件类型",
"start": 86,
"end": 92,
"confidence": 1.0
},
{
"text": "心率<60次/分",
"label": "条件定义",
"start": 93,
"end": 100,
"confidence": 1.0
},
{
"text": "持续时间占总时间的4.0%",
"label": "时间占比",
"start": 102,
"end": 115,
"confidence": 1.0
},
{
"text": "室性早搏",
"label": "事件类型",
"start": 116,
"end": 120,
"confidence": 1.0
},
{
"text": "2695",
"label": "数值",
"start": 123,
"end": 125,
"confidence": 1.0
},
{
"text": "次",
"label": "单位",
"start": 125,
"end": 126,
"confidence": 1.0
},
{
"text": "占总心搏数的9.0%,",
"label": "时间占比",
"start": 127,
"end": 138,
"confidence": 1.0
},
{
"text": "2695",
"label": "数值",
"start": 141,
"end": 143,
"confidence": 1.0
},
{
"text": "次",
"label": "单位",
"start": 143,
"end": 144,
"confidence": 1.0
},
{
"text": "单发室早",
"label": "事件子类",
"start": 144,
"end": 148,
"confidence": 1.0
},
{
"text": "42",
"label": "数值",
"start": 149,
"end": 150,
"confidence": 1.0
},
{
"text": "次",
"label": "单位",
"start": 150,
"end": 151,
"confidence": 1.0
},
{
"text": "三联律",
"label": "事件子类",
"start": 151,
"end": 154,
"confidence": 1.0
},
{
"text": "诊断",
"label": "诊断类别",
"start": 155,
"end": 157,
"confidence": 1.0
},
{
"text": "窦性心律",
"label": "诊断结论",
"start": 160,
"end": 164,
"confidence": 1.0
},
{
"text": "心率波动于46次/分--142次/分之间",
"label": "数值范围",
"start": 165,
"end": 182,
"confidence": 1.0
},
{
"text": "频发室性早搏",
"label": "诊断结论",
"start": 185,
"end": 191,
"confidence": 1.0
},
{
"text": "2695",
"label": "数值",
"start": 192,
"end": 194,
"confidence": 1.0
},
{
"text": "次",
"label": "单位",
"start": 194,
"end": 195,
"confidence": 1.0
},
{
"text": "单发室早",
"label": "事件子类",
"start": 195,
"end": 199,
"confidence": 1.0
},
{
"text": "42",
"label": "数值",
"start": 206,
"end": 207,
"confidence": 1.0
},
{
"text": "次",
"label": "单位",
"start": 207,
"end": 208,
"confidence": 1.0
},
{
"text": "三联律",
"label": "事件子类",
"start": 208,
"end": 211,
"confidence": 1.0
},
{
"text": "心率变异性分析",
"label": "诊断结论",
"start": 214,
"end": 221,
"confidence": 1.0
},
{
"text": "[UNK]",
"label": "指标名称",
"start": 222,
"end": 223,
"confidence": 1.0
},
{
"text": "211.54",
"label": "数值",
"start": 223,
"end": 226,
"confidence": 1.0
},
{
"text": "正常参考值范围:102-180ms",
"label": "条件定义",
"start": 227,
"end": 239,
"confidence": 1.0
},
{
"text": "[UNK]",
"label": "指标名称",
"start": 241,
"end": 242,
"confidence": 1.0
},
{
"text": "139.41",
"label": "数值",
"start": 242,
"end": 245,
"confidence": 1.0
},
{
"text": "正常参考值范围:92",
"label": "条件定义",
"start": 246,
"end": 255,
"confidence": 1.0
}
],
"summary": {
"心率": {
"平均心率": "76",
"最快心率": "142",
"最慢心率": "46"
},
"主要事件": [
"心动过缓事件",
"单发室早",
"室性早搏",
"心率变异性分析",
"窦性心律",
"三联律",
"心动过速事件",
"频发室性早搏"
],
"异常提示": [
"心率过高(142.0次/分)",
"心率过低(46.0次/分)"
],
"实体统计": {
"指标名称": 5,
"数值": 10,
"单位": 8,
"日期时间": 2,
"事件类型": 3,
"条件定义": 4,
"时间占比": 3,
"事件子类": 4,
"诊断类别": 1,
"诊断结论": 3,
"数值范围": 1
}
}
}
注意: PyCharm Debug 运行的话,会存在版本冲突问题

本文来自博客园,作者:VipSoft 转载请注明原文链接:https://chuna2.787528.xyz/vipsoft/p/20012737
浙公网安备 33010602011771号