Advertisement

手搓基于CNN的Chest X-ray图像分类

阅读量:

数据集[Chest X-ray PD Dataset 数据集介绍 - 知乎

关于心肺X射线图像数据集的研究综述 文章标题

CPU版本

复制代码
 import torch

    
 import torch.nn as nn
    
 import torch.optim as optim
    
 from torch.utils.data import Dataset, DataLoader
    
 from torchvision import transforms, models
    
 import pandas as pd
    
 import os
    
 from PIL import Image
    
 from sklearn.model_selection import train_test_split
    
 from time import time
    
  
    
 start_time = time()
    
  
    
  
    
 # 定义数据集类
    
 class ChestXRayDataset(Dataset):
    
     def __init__(self, csv_path, root_dir, transform=None):
    
     self.data_info = pd.read_csv(csv_path)
    
     self.root_dir = root_dir
    
     self.transform = transform
    
  
    
     def __len__(self):
    
     return len(self.data_info)
    
  
    
     def __getitem__(self, idx):
    
     img_name = os.path.join(self.root_dir, self.data_info.iloc[idx, 0])
    
     image = Image.open(img_name).convert('RGB')
    
     label = self.data_info.iloc[idx, 1]
    
     if label == 'covid':
    
         label = 0
    
     elif label == 'normal':
    
         label = 1
    
     else:
    
         label = 2
    
  
    
     if self.transform:
    
         image = self.transform(image)
    
  
    
     return image, label
    
  
    
 # 数据预处理
    
 data_transform = transforms.Compose([
    
     transforms.Resize((224, 224)),
    
     transforms.ToTensor(),
    
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    
 ])
    
  
    
 # 加载数据
    
 root_dir = r'E:\NiuCode\LianXi\data\Chest X-ray\DataSet'
    
 csv_path = r'E:\NiuCode\LianXi\data\Chest X-ray\metadata.csv'
    
 dataset = ChestXRayDataset(csv_path, root_dir, transform=data_transform)
    
 train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
    
  
    
 # 创建数据加载器
    
 train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    
 test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
    
  
    
 print("==========")
    
  
    
 # 定义CNN模型
    
 class CNNModel(nn.Module):
    
     def __init__(self):
    
     super(CNNModel, self).__init__()
    
     self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
    
     self.relu1 = nn.ReLU()
    
     self.pool1 = nn.MaxPool2d(2, 2)
    
  
    
     self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
    
     self.relu2 = nn.ReLU()
    
     self.pool2 = nn.MaxPool2d(2, 2)
    
  
    
     self.fc1 = nn.Linear(32 * 56 * 56, 128)
    
     self.relu3 = nn.ReLU()
    
     self.fc2 = nn.Linear(128, 3)
    
  
    
     def forward(self, x):
    
     out = self.pool1(self.relu1(self.conv1(x)))
    
     out = self.pool2(self.relu2(self.conv2(out)))
    
     out = out.view(-1, 32 * 56 * 56)
    
     out = self.relu3(self.fc1(out))
    
     out = self.fc2(out)
    
     return out
    
  
    
 model = CNNModel()
    
  
    
 # 定义损失函数和优化器
    
 criterion = nn.CrossEntropyLoss()
    
 optimizer = optim.Adam(model.parameters(), lr=0.001)
    
  
    
 # 训练模型
    
 num_epochs = 10
    
 for epoch in range(num_epochs):
    
     running_loss = 0.0
    
     for i, (images, labels) in enumerate(train_loader):
    
     optimizer.zero_grad()
    
     outputs = model(images)
    
     loss = criterion(outputs, labels)
    
     loss.backward()
    
     optimizer.step()
    
  
    
     running_loss += loss.item()
    
     print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
    
  
    
 # 测试模型
    
 softmax = nn.Softmax(dim=1)  # 定义softmax函数,在类别维度上进行操作
    
 correct = 0
    
 total = 0
    
 all_probabilities = []  # 用于存储所有样本的概率
    
 with torch.no_grad():
    
     for images, labels in test_loader:
    
     outputs = model(images)
    
     probabilities = softmax(outputs)  # 将模型输出转换为概率分布
    
     all_probabilities.extend(probabilities.cpu().numpy())  # 存储概率
    
     _, predicted = torch.max(outputs.data, 1)
    
     total += labels.size(0)
    
     correct += (predicted == labels).sum().item()
    
  
    
 print(f'Accuracy of the model on the test set: {100 * correct / total}%')
    
  
    
 # 打印前几个样本的概率
    
 print("Probabilities for the first few samples:")
    
 for i in range(min(5, len(all_probabilities))):
    
     print(f"Sample {i+1}: {all_probabilities[i]}")
    
  
    
 end_time = time()
    
 print("消耗时间={}".format(end_time - start_time))

GPU版本

复制代码
 import torch

    
 import torch.nn as nn
    
 import torch.optim as optim
    
 from torch.utils.data import Dataset, DataLoader
    
 from torchvision import transforms, models
    
 import pandas as pd
    
 import os
    
 from PIL import Image
    
 from sklearn.model_selection import train_test_split
    
 from time import time
    
  
    
 start_time = time()
    
 # 定义数据集类
    
 class ChestXRayDataset(Dataset):
    
     def __init__(self, csv_path, root_dir, transform=None):
    
     self.data_info = pd.read_csv(csv_path)
    
     self.root_dir = root_dir
    
     self.transform = transform
    
  
    
     def __len__(self):
    
     return len(self.data_info)
    
  
    
     def __getitem__(self, idx):
    
     img_name = os.path.join(self.root_dir, self.data_info.iloc[idx, 0])
    
     image = Image.open(img_name).convert('RGB')
    
     label = self.data_info.iloc[idx, 1]
    
     if label == 'covid':
    
         label = 0
    
     elif label == 'normal':
    
         label = 1
    
     else:
    
         label = 2
    
  
    
     if self.transform:
    
         image = self.transform(image)
    
  
    
     return image, label
    
  
    
 # 数据预处理
    
 data_transform = transforms.Compose([
    
     transforms.Resize((224, 224)),
    
     transforms.ToTensor(),
    
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    
 ])
    
  
    
 # 加载数据
    
 root_dir = r'E:\NiuCode\LianXi\data\Chest X-ray\DataSet'
    
 csv_path = r'E:\NiuCode\LianXi\data\Chest X-ray\metadata.csv'
    
 dataset = ChestXRayDataset(csv_path, root_dir, transform=data_transform)
    
 train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
    
  
    
 # 创建数据加载器
    
 train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    
 test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
    
  
    
 print("==========")
    
  
    
 # 定义CNN模型
    
 class CNNModel(nn.Module):
    
     def __init__(self):
    
     super(CNNModel, self).__init__()
    
     self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
    
     self.relu1 = nn.ReLU()
    
     self.pool1 = nn.MaxPool2d(2, 2)
    
  
    
     self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
    
     self.relu2 = nn.ReLU()
    
     self.pool2 = nn.MaxPool2d(2, 2)
    
  
    
     self.fc1 = nn.Linear(32 * 56 * 56, 128)
    
     self.relu3 = nn.ReLU()
    
     self.fc2 = nn.Linear(128, 3)
    
  
    
     def forward(self, x):
    
     out = self.pool1(self.relu1(self.conv1(x)))
    
     out = self.pool2(self.relu2(self.conv2(out)))
    
     out = out.view(-1, 32 * 56 * 56)
    
     out = self.relu3(self.fc1(out))
    
     out = self.fc2(out)
    
     return out
    
  
    
 # 检查 CUDA 是否可用
    
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
 print(f"Using device: {device}")
    
  
    
 model = CNNModel()
    
 # 将模型移到设备上
    
 model.to(device)
    
  
    
 # 定义损失函数和优化器
    
 criterion = nn.CrossEntropyLoss()
    
 optimizer = optim.Adam(model.parameters(), lr=0.001)
    
  
    
 # 训练模型
    
 num_epochs = 10
    
 for epoch in range(num_epochs):
    
     running_loss = 0.0
    
     for i, (images, labels) in enumerate(train_loader):
    
     # 将数据移到设备上
    
     images = images.to(device)
    
     labels = labels.to(device)
    
  
    
     optimizer.zero_grad()
    
     outputs = model(images)
    
     loss = criterion(outputs, labels)
    
     loss.backward()
    
     optimizer.step()
    
  
    
     running_loss += loss.item()
    
     print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
    
  
    
 # 测试模型
    
 softmax = nn.Softmax(dim=1)  # 定义softmax函数,在类别维度上进行操作
    
 correct = 0
    
 total = 0
    
 all_probabilities = []  # 用于存储所有样本的概率
    
 with torch.no_grad():
    
     for images, labels in test_loader:
    
     # 将数据移到设备上
    
     images = images.to(device)
    
     labels = labels.to(device)
    
  
    
     outputs = model(images)
    
     probabilities = softmax(outputs)  # 将模型输出转换为概率分布
    
     all_probabilities.extend(probabilities.cpu().numpy())  # 存储概率
    
     _, predicted = torch.max(outputs.data, 1)
    
     total += labels.size(0)
    
     correct += (predicted == labels).sum().item()
    
  
    
 print(f'Accuracy of the model on the test set: {100 * correct / total}%')
    
  
    
 # 打印前几个样本的概率
    
 print("Probabilities for the first few samples:")
    
 for i in range(min(5, len(all_probabilities))):
    
     print(f"Sample {i+1}: {all_probabilities[i]}")
    
  
    
 end_time = time()
    
  
    
 print("消耗时间={}".format(end_time-start_time))

CPU版本耗时:1310.6518561840057

GPU版本耗时:70.60973024368286

正确率:100%

全部评论 (0)

还没有任何评论哟~