Advertisement

医学图像九分类

阅读量:

在上一次分类的基础上将二分类扩展为九分类。
切割图片为2000*2000,space=2000,按类别放在patch文件夹中。
要将整个数据集划分为训练集和测试集。
下面是两种方法划分数据集:
方法1:将源文件夹和目标文件夹相互对应起来,根据图片在文件夹中的位置索引,来判断是放在训练集,还是测试集。

复制代码
    import os
    import shutil
    
    path="/home/cad429/code/yue/Week2/patch"
    name_list=os.listdir(path)
    print(name_list)
    
    train_dir="/home/cad429/code/yue/Week2/train"
    if not os.path.exists(train_dir):
    os.makedirs(train_dir)
    
    test_dir="/home/cad429/code/yue/Week2/test"
    if not os.path.exists(test_dir):
    os.makedirs(test_dir)
    
    val_dir="/home/cad429/code/yue/Week2/val"
    if not os.path.exists(val_dir):
    os.makedirs(val_dir)
    
    for i in range(len(name_list)):
    patch_path=os.path.join(path,name_list[i])
    print(patch_path)
    patch_list=os.listdir(patch_path)
    print(patch_list)
    patch_name_list = [i.split('.')[0] for i in patch_list]
    print(patch_name_list)
    print(len(patch_name_list))
    for j in range(len(patch_name_list)):
        original_path = os.path.join(patch_path, patch_name_list[j] + ".png")
        # print(original_path)
        des_path = os.path.join(train_dir, name_list[i])
        des_path2=os.path.join(test_dir,name_list[i])
        if j%2==0:
            #print(des_path)
            shutil.copy(original_path,des_path)
        else:
            shutil.copy(original_path,des_path2)
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    AI写代码

方法2:利用sklearn中带有的划分数据集的方法,将每个类别中的图片的名字按照一定的比例划分到x_train,x_test.

复制代码
    from sklearn.model_selection import train_test_split
    import numpy as np
    import os
    import shutil
    split_train="/home/cad429/code/yue/Week2/splittrain"
    if not os.path.exists(split_train):
    os.makedirs(split_train)
    
    split_test="/home/cad429/code/yue/Week2/splittest"
    if not os.path.exists(split_test):
    os.makedirs(split_test)
    
    
    Image_path="/home/cad429/code/yue/Week2/patch"
    Image_list=os.listdir(Image_path)
    for i in Image_list:#对应的0,1,2,3,4...
    path=os.path.join(Image_path,i)
    imag=os.listdir(path)
    imag_name=[j.split('.')[0] for j in imag]
    x_train,x_test=train_test_split(imag_name,test_size=0.3)
    print("i:",i)
    # print("x_train list:",x_train)
    # print("x_test list:",x_test)
    #将图片归到对应的train文件夹
    for er in x_train:
        train_image_path=os.path.join(split_train,i,er+".png")
        train_image_original_path=os.path.join(path,er+".png")
        #print("destination",train_image_path)
        #print("original:",train_image_original_path)
        shutil.copy(train_image_original_path,train_image_path)
    
    for es in x_test:
        test_image_path=os.path.join(split_test,i,es+".png")
        test_image_original_path=os.path.join(path,es+".png")
        shutil.copy(test_image_original_path,test_image_path)
        # print("original:",test_image_original_path)
        # print("destination:",test_image_path)
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    AI写代码

图片大小为2000*2000,batch_size=2,learnning_rate=0.01,epoch=100,损失函数为CrossEntropyLoss,优化 函数为Adam时的训练结果:

测试的结果:

控制变量法做对比试验:将图片切为2000*2000,spacing=1000,learnning_rate=0.01,损失函数为CrossEntropyLoss,优化函数为Adam的
训练 结果:
在这里插入图片描述
测试结果:
在这里插入图片描述
测试结果显示,过拟合,要增加一个验证集,监控模型 是否发生过拟合,来决定是否停止训练。

将train的训练集的图片划分一部分到验证集。

复制代码
    import os
    import shutil
    import random
    from sklearn.model_selection import train_test_split
    def move_train_to_val(train_path,val_path,train2_path):
    class_list=os.listdir(train_path)
    print(class_list)
    for i in class_list:
        print(i)
        class_path=os.path.join(train_path,i)
        Image_list=os.listdir(class_path)
        Image_len=len(Image_list)
        Image_name=[j.split('.')[0] for j in Image_list]
        train_name,val_name=train_test_split(Image_name)
    
        for valname in val_name:
            original=os.path.join(class_path,valname+".png")
            val_image_path=os.path.join(val_path,i,valname+".png")
            shutil.move(original,val_image_path)
            # print("original:", original)
            # print("vallllll:", val_image_path)
        for trainname in train_name:
            trainoriginal=os.path.join(class_path,trainname+".png")
            #print("trainoriginal:",trainoriginal)
            train2_image_path=os.path.join(train2_path,i,trainname+".png")
            shutil.move(trainoriginal,train2_image_path)
            # print("original",trainoriginal)
            # print("train2",train2_image_path)
    
    if __name__=="__main__":
    train_path="/home/cad429/code/yue/Week2/train"
    val_path="/home/cad429/code/yue/Week2/val"
    train2_path="/home/cad429/code/yue/Week2/train2"
    move_train_to_val(train_path,val_path,train2_path)
    
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    AI写代码

将添加的新的验证集用来判断是否结束训练,然后用测试集来测试训练的模型。
结果:
![!在这里插入图片描述\((https://ad.itadn.com/c/weblog/blog-img/images/2025-04-02/XIYjFWve9g4GVrm3MkO2qRnipaNB.png)
总结:对图片预处理的部分较少,test准确率较低,评判标准只有acc和loss,评判标准较少。有些代码较冗余,重复功能的代码需要反复写。
下一次实验,减少冗余代码,评判标准多一点。

全部评论 (0)

还没有任何评论哟~