基于tensorflow2.1.0+python3.7.4的CNN识别X光片判断患者是否有肺炎的demo

xddcore

各位神力AI的前辈，大家好。我是xddcore，目前在北京理工大学就读EE专业。(专业比较偏向于硬件，和ai还是有一丢丢距离的)。最近在研究yolov3的时候发现了神力AI，大概浏览了一下，感觉做得非常不错，理念挺好的！
今天来分享下自己前不久训练的一个模型。

声明数据集来源：
丹尼尔（Kermany），丹尼尔（Daniel）；张康戈德鲍姆（ Goldbaum），迈克尔（Michael）（2018），“标签光学相干断层扫描（OCT）和胸部X射线图像分类”， Mendeley Data，第2 版，
http：//dx.doi.org/10.17632/rscbjbr9sj.2

百度网盘下载地址:链接：https://pan.baidu.com/s/1U-xjQHYmhHp-vDpHoScbqw
提取码：i3f0

开源链接:https://gitee.com/xddcore/Jetson_Nano/tree/master/example/Classification/x_ray
TFRecord文件制作:

import os 
import tensorflow as tf 
from PIL import Image  #注意Image,后面会用到
import matplotlib.pyplot as plt 
import numpy as np

i = 0

cwd='E:/AI/datasets/chest_xray/test/'
classes=['normal','pneumonia'] #正常与肺炎
writer= tf.io.TFRecordWriter("oct_test.tfrecords") #要生成的文件
 
for index,name in enumerate(classes):
    class_path = cwd + name + '/'
    for img_name in os.listdir(class_path): 
        img_path=class_path+img_name #每一个图片的地址
        
        img=Image.open(img_path)
        img= img.resize((128,128))
        img_raw=img.tobytes()#将图片转化为二进制格式
        example = tf.train.Example(
        features=tf.train.Features(
        feature={
            "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
            'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
        }
        )
        ) #example对象对label和image数据进行封装
        writer.write(example.SerializeToString())  #序列化为字符串
        i = i + 1
        print("已写入"+str(i)+"张图片")
writer.close()

模型训练:

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models, layers, optimizers
import numpy as np
import matplotlib.pyplot as plt

# Create a dictionary describing the features.
image_feature_description = {
    "label": tf.io.FixedLenFeature([], tf.int64),
    "img_raw": tf.io.FixedLenFeature([], tf.string),
}
#解析一条example
def _parse_image_function(example_proto):
  # Parse the input tf.Example proto using the dictionary above.
  return tf.io.parse_single_example(example_proto, image_feature_description)
  
def read_and_decode(filename): # 读入tfrecords
    i = 0
    img_1 ,label_1 = np.array([]), np.array([])
    img, label = np.array([]), np.array([])
    parsed_image_dataset = tf.data.TFRecordDataset(filename)
    #解析所有example
    parsed_image_dataset = parsed_image_dataset.map (_parse_image_function)
    #取出数据
    for item in parsed_image_dataset:
        #转为向量
        img_1 = np.frombuffer(item['img_raw'].numpy(),dtype=np.uint8).flatten()
        #拼接向量
        img = np.append(img,img_1)
        #转为向量
        label_1 = np.frombuffer(item['label'].numpy(),dtype=np.uint8).flatten()
        #拼接向量
        #获得标签值：0：正常 1：肺炎
        #print((label_1[np.argmax(label_1)]))
        label_1 =np.array([label_1[np.argmax(label_1)]])
        label = np.append(label,label_1)
        i = i + 1
        print("已从TFRecord加载"+str(i)+"张图片...")
    img = img.reshape(-1,128,128,1)
    print(img.shape)
    print(label.shape)
    return img, label

x_train,y_train = read_and_decode("oct_train.tfrecords")
x_test,y_test = read_and_decode("oct_test.tfrecords")
x_valid,y_valid = read_and_decode("oct_val.tfrecords")


#打印一张照片
# def show_single_image(img_arr):
#      plt.imshow(img_arr,cmap='binary')
#      plt.show()
# show_single_image(x_train[2])

# 将模型的各层堆叠起来，以搭建 tf.keras.Sequential 模型。为训练选择优化器和损失函数：
                          
model = models.Sequential([
                           tf.keras.layers.Conv2D(32, (3,3), padding='same', activation=tf.nn.relu,
                           input_shape=(128,128,1)),
                           tf.keras.layers.MaxPooling2D((2, 2), strides=2),
                           tf.keras.layers.Conv2D(64, (3,3), padding='same', activation=tf.nn.relu),
                           tf.keras.layers.MaxPooling2D((2, 2), strides=2),
                           tf.keras.layers.Conv2D(64, (3,3), padding='same', activation=tf.nn.relu),
                           tf.keras.layers.MaxPooling2D((2, 2), strides=2),
                           tf.keras.layers.Flatten(),
                           tf.keras.layers.Dense(128, activation=tf.nn.relu),
                           layers.Dropout(0.4),
                           tf.keras.layers.Dense(2,  activation=tf.nn.softmax)
])
#model = models.Sequential([layers.Flatten(input_shape=(28, 28)),
#                           layers.Dense(128, activation='relu'),
#                           layers.Dense(128, activation='relu'),
#                           layers.Dense(128, activation='relu'),
#                           layers.Dropout(0.5),
#                           layers.Dense(10, activation='softmax')
#])
# 编译模型
model.compile(optimizer=optimizers.SGD(lr=1e-5), loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 打印网络参数量
model.summary()
#x_train = tf.expand_dims(x_train,axis=-1)
#print(x_train.shape)
#x_train_1 = tf.reshape(x_train,[-1,28,28,1])
#x_test_1 = tf.reshape(x_test,[-1,28,28,1])

#x_train = x_train.reshape(-1,128,128,1)
#x_valid = x_valid.reshape(-1,128,128,1)
#x_test = x_test.reshape(-1,128,128,1)

print(len(model.layers))
#print(x_train[0])
# 训练模型
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_valid.shape)
print(y_valid.shape)
history = model.fit(x_train, y_train, epochs=500, batch_size = 16,
                  validation_data=(x_test,y_test))
                  
# 验证模型：
model.evaluate(x_valid,  y_valid, verbose=1)


history_dict = history.history         # history对象有一个history成员，它是一个字典，包含训练过程中的所有数据。
print(history_dict)

# 保存模型权重和偏置
model.save_weights('./save/1660ti_cnn/save_weights3/')

#保存完整模型(含网络)
model.save('./save/save_models/1660ti_cnn.h5')



# 绘制loss曲线
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values)+1)
plt.plot(epochs, loss_values, 'bo', label='Training loss')         # bo代表蓝色圆点
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')    # bo代表蓝色实线
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 绘制acc曲线
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
plt.plot(epochs, acc_values, 'ro', label='Training acc')           # bo代表蓝色圆点
plt.plot(epochs, val_acc_values, 'r', label='Validation acc')      # bo代表蓝色实线
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()

模型加载与推断

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models, layers, optimizers
import numpy as np
# 模型加载

model = tf.keras.models.load_model('./save/save_models/1660ti_cnn.h5')
       
image_value_5 = tf.io.read_file('./pic/n1.jpeg')
image_value_2 = tf.io.read_file('./pic/n2.jpeg')
image_value_0 = tf.io.read_file('./pic/p1.jpeg')

#解码为tensor
image_value_5 = tf.io.decode_jpeg(image_value_5,channels = 1)
image_value_2 = tf.io.decode_jpeg(image_value_2,channels = 1)
image_value_0 = tf.io.decode_jpeg(image_value_0,channels = 1)

image_value_5 = tf.image.resize(image_value_5, (128,128))#改变像素值为128*128
image_value_2 = tf.image.resize(image_value_2, (128,128))#改变像素值为128*128
image_value_0 = tf.image.resize(image_value_0, (128,128))#改变像素值为128*128

#tensor转array
image_value_5 = image_value_5.numpy()
image_value_2 = image_value_2.numpy()
image_value_0 = image_value_0.numpy()

#转为三维数组
image_value_5 = image_value_5.reshape(-1,128,128,1)
image_value_2 = image_value_2.reshape(-1,128,128,1)
image_value_0 = image_value_0.reshape(-1,128,128,1)

#输入模型进行预测
predict_value_5 = model.predict(image_value_5,batch_size = None)
predict_value_2 = model.predict(image_value_2,batch_size = None)
predict_value_0 = model.predict(image_value_0,batch_size = None)

print("")

if np.argmax(predict_value_5) == 1:
    value_5 = '肺炎'
else :
    value_5 = '正常'
if np.argmax(predict_value_2) == 1:
    value_2 = '肺炎'
else :
    value_2 = '正常'
if np.argmax(predict_value_0) == 1:
    value_0 = '肺炎'
else :
    value_0 = '正常'
print("神经网络信息(ACC:76.67%):")
model.summary()
print("导入X光胸片标签： 正常 正常 肺炎")
print("X光胸片已预测完成，对三张X光胸片预测值分别为: ")
print("",value_5,value_2,value_0)
print("武汉 加油！")
print("-xdd_core 正月初一 25/1/2020")

最后的ACC差不多80%左右，感谢大家阅读，希望以后能多多和大神们学习。

刘看山

不错不错，加一个精！！