医学语义分割类-基于UPerNet模型的视网膜血管语义分割 深度学习医学图像处理 视觉眼睛视网膜血管语义分割
利用UPerNet模型架构实现视网膜血管语义分割,在深度学习在医学图像处理中的应用中采用PyTorch作为运行环境
pytorch1.10.0 cpu or gpu都可以
python3.8

代码中包含两个主要程序:一个是名为run.py的脚本文件,默认情况下可以直接启动训练流程;另一个是提供用户界面的部分(GUI),该界面允许导入预先训练好的模型,并通过输入图片进行语义分割


基于UPerNet架构设计的视网膜血管语义分割系统。我们采用了PyTorch 1.10.0版本,并将提供两大核心组件:其一是run.py脚本文件(具体功能为训练模型),其二是gui.py文件(主要用于加载预训练模型并实现友好的图形界面以完成语义分割功能)。
1. 环境准备
请确认您已正确安装了必要的依赖包
pip install torch==1.10.0 torchvision matplotlib opencv-python

2. 数据集准备
假设你的数据集已完成准备,并划分为训练集与验证集。Data set directory structure is as follows: training set/ contains all training data, validation set/ contains all validation data, and each file within these directories includes samples annotated with corresponding labels.

retinal_vessel_dataset/
├── images/
│ ├── train/
│ └── val/
├── masks/
│ ├── train/
│ └── val/
3. UPerNet模型定义
我们采用基于深度学习框架的语义分割技术,并将基于此开发一个新的模块以实现目标检测功能。在本节中我们将详细阐述该模块的基本架构设计以及其实现细节
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet50
class UPerNet(nn.Module):
def __init__(self, num_classes=2):
super(UPerNet, self).__init__()
# Backbone: ResNet50
self.backbone = resnet50(pretrained=True)
self.backbone.fc = nn.Identity() # Remove the fully connected layer
# PPM (Pyramid Pooling Module)
self.ppm = nn.ModuleList([
nn.Sequential(
nn.Conv2d(2048, 512, kernel_size=1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True)
),
nn.Sequential(
nn.Conv2d(2048, 512, kernel_size=1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
),
nn.Sequential(
nn.Conv2d(2048, 512, kernel_size=1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Upsample(scale_factor=4, mode='bilinear', align_corners=False)
),
nn.Sequential(
nn.Conv2d(2048, 512, kernel_size=1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Upsample(scale_factor=8, mode='bilinear', align_corners=False)
)
])
# Fusion Layer
self.fusion = nn.Sequential(
nn.Conv2d(2048 + 512 * 4, 512, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True)
)
# Final Convolution
self.final_conv = nn.Conv2d(512, num_classes, kernel_size=1)
def forward(self, x):
# Backbone
x = self.backbone.conv1(x)
x = self.backbone.bn1(x)
x = self.backbone.relu(x)
x = self.backbone.maxpool(x)
c1 = self.backbone.layer1(x)
c2 = self.backbone.layer2(c1)
c3 = self.backbone.layer3(c2)
c4 = self.backbone.layer4(c3)
# PPM
ppm_out = [c4]
for pool in self.ppm:
ppm_out.append(pool(c4))
# Fusion
fusion_out = self.fusion(torch.cat(ppm_out, dim=1))
# Final Convolution
out = self.final_conv(fusion_out)
out = F.interpolate(out, size=x.size()[2:], mode='bilinear', align_corners=False)
return out
4. 训练脚本 (run.py)
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, Normalize
from dataset import RetinalVesselDataset
from model import UPerNet
# Hyperparameters
batch_size = 4
learning_rate = 1e-4
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Data transforms
transform = Compose([ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# Datasets
train_dataset = RetinalVesselDataset(root_dir='retinal_vessel_dataset', split='train', transform=transform)
val_dataset = RetinalVesselDataset(root_dir='retinal_vessel_dataset', split='val', transform=transform)
# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
# Model
model = UPerNet(num_classes=2).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Training loop
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for images, masks in train_loader:
images, masks = images.to(device), masks.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, masks)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
# Validation
model.eval()
with torch.no_grad():
val_loss = 0.0
for images, masks in val_loader:
images, masks = images.to(device), masks.to(device)
outputs = model(images)
loss = criterion(outputs, masks)
val_loss += loss.item()
print(f"Validation Loss: {val_loss/len(val_loader):.4f}")
# Save the model
torch.save(model.state_dict(), 'upernet_retinal_vessel.pth')
5. 数据集类 (dataset.py)
import os
import cv2
import numpy as np
from torch.utils.data import Dataset
class RetinalVesselDataset(Dataset):
def __init__(self, root_dir, split='train', transform=None):
self.root_dir = root_dir
self.split = split
self.transform = transform
self.image_paths = sorted(os.listdir(os.path.join(root_dir, 'images', split)))
self.mask_paths = sorted(os.listdir(os.path.join(root_dir, 'masks', split)))
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
image_path = os.path.join(self.root_dir, 'images', self.split, self.image_paths[idx])
mask_path = os.path.join(self.root_dir, 'masks', self.split, self.mask_paths[idx])
image = cv2.imread(image_path)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
if self.transform:
image = self.transform(image)
mask = torch.tensor(mask, dtype=torch.long)
return image, mask
6. GUI界面 (gui.py)
import tkinter as tk
from tkinter import filedialog
import cv2
import torch
import numpy as np
from torchvision.transforms import Compose, ToTensor, Normalize
from model import UPerNet
# Load the trained model
model = UPerNet(num_classes=2)
model.load_state_dict(torch.load('upernet_retinal_vessel.pth', map_location=torch.device('cpu')))
model.eval()
# Data transforms
transform = Compose([ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
def load_image():
file_path = filedialog.askopenfilename()
if file_path:
image = cv2.imread(file_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
original_image = image.copy()
# Preprocess the image
image = transform(image).unsqueeze(0)
# Perform inference
with torch.no_grad():
output = model(image)
output = torch.argmax(output.squeeze(), dim=0).numpy()
# Overlay the segmentation mask on the original image
overlay = original_image.copy()
mask_color = (0, 255, 0) # Green color for the vessels
overlay[output > 0] = mask_color
alpha = 0.5
segmented_image = cv2.addWeighted(original_image, 1 - alpha, overlay, alpha, 0)
# Display the result
cv2.imshow('Segmented Image', segmented_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Create the GUI
root = tk.Tk()
root.title("Retinal Vessel Segmentation")
load_button = tk.Button(root, text="Load Image", command=load_image)
load_button.pack(pady=20)
root.mainloop()
7. 运行脚本
训练模型 :
python run.py
启动GUI界面 :
python gui.py
总结
按照以下流程所述的方法进行操作后,则能成功搭建基于UPerNet框架的视网膜血管语义分割系统。其中,在开发环境中运行run.py将完成模型的训练过程;而加载完成后,则需在图形用户界面(GUI)上执行语义分割任务。
