02-手写数字识别实战¶

加载数据集
检查数据格式
数据整形编码
模型建立
训练模型
模型评估
数据预测

加载数据集¶

# step: 1. 加载数据集
f = np.load('/home/zzh/Downloads/mnist.npz')
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
f.close()

# 也可以用我excle处理好的数据集
print(x_train.shape)
print(x_test.shape)

检查数据内容¶

 # setp2: 检查图片的大小和维度
 # 多少个训练集数据，多少个测试集数据
 # x_train[0].shape
 # y_test.shape

opencv展示数据¶

# step3： 展示数据
import cv2 
import numpy as np

for i in range(0,6):
    random_num = np.random.randint(0, len(x_train))
    img = x_train[random_num]
    window_name = 'img #' + str(random_num)
    cv2.imshow(window_name, img)
    cv2.waitKey(0)

cv2.destroyAllWindows()

matplotlib展示数据¶

# step3: matplotlib 展示
# 导包
import matplotlib.pyplot as plt

# 2行3列 
# cmap=plt.get_cmap('gray') 制定数据显示为灰度显示
plt.subplot(231)
random_num = np.random.randint(0,len(x_train))
plt.imshow(x_train[random_num] )

plt.subplot(232)
random_num = np.random.randint(0,len(x_train))
plt.imshow(x_train[random_num] )

plt.subplot(233)
random_num = np.random.randint(0,len(x_train))
plt.imshow(x_train[random_num] )

plt.subplot(234)
random_num = np.random.randint(0,len(x_train))
plt.imshow(x_train[random_num] )

plt.subplot(235)
random_num = np.random.randint(0,len(x_train))
plt.imshow(x_train[random_num] )

plt.subplot(236)
random_num = np.random.randint(0,len(x_train))
plt.imshow(x_train[random_num]) #, cmap=plt.get_cmap('gray')


# 展示数据 
plt.show()

加载数据¶

##加载数据
# (60000,28,28) to (60000,28,28,1)
# tensorflow 的数据格式 要求是4维的， 最后的1代表的是图片是灰度图，如果是3代表的是彩色图
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

#数据改为float类型，方便做除法
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# 数据归一化 (0 to 255) -> (0 to 1)
x_train /= 255
x_test /= 255

print('x_train shape:', x_train.shape)
print(x_train.shape[0], '训练集')
print(x_test.shape[0], '测试集')

独热编码¶

#数据编码
from keras.utils import np_utils

#  one hot encoding
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

num_classes = y_test.shape[1]
print ("分类数量: {}".format( num_classes))

定义模型¶

#确定模型
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD 

# create model
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(28,28,1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss = 'categorical_crossentropy',
              optimizer = SGD(0.01),
              metrics = ['accuracy'])

print(model.summary())

可视化模型¶

from keras.utils.vis_utils import plot_model
# 保存路径
model_diagrams_path = '/home/zzh/'

# 生成图片
plot_model(model, to_file = model_diagrams_path + 'model_plot.png',
           show_shapes = True,
           show_layer_names = True)

训练模型¶

#训练模型
batch_size = 32
epochs = 10

history = model.fit(x_train,
                    y_train,
                    batch_size = batch_size,
                    epochs = epochs,
                    verbose = 1,
                    validation_data = (x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

绘图显示loss¶

#显示损失度
# Plotting our loss charts
import matplotlib.pyplot as plt

history_dict = history.history

loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)

line1 = plt.plot(epochs, val_loss_values, label='Validation/Test Loss')
line2 = plt.plot(epochs, loss_values, label='Training Loss')
plt.setp(line1, linewidth=2.0, marker = '+', markersize=10.0)
plt.setp(line2, linewidth=2.0, marker = '4', markersize=10.0)
plt.xlabel('Epochs') 
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.show()

绘图显示精度¶

# 显示精度
# Plotting our accuracy charts
import matplotlib.pyplot as plt

history_dict = history.history
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(loss_values) + 1)

line1 = plt.plot(epochs, val_acc_values, label='Validation/Test Accuracy')
line2 = plt.plot(epochs, acc_values, label='Training Accuracy')
plt.setp(line1, linewidth=2.0, marker = '+', markersize=10.0)
plt.setp(line2, linewidth=2.0, marker = '4', markersize=10.0)
plt.xlabel('Epochs') 
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.show()

保存模型¶

1 2	`model.save("test_mnist_simple_cnn_10_Epochs.h5") print("Model Saved")`

加载模型¶

## 加载模型
from keras.models import load_model

classifier = load_model('/home/deeplearningcv/DeepLearningCV/Trained Models/8_mnist_simple_cnn_10_Epochs.h5')

数据预测¶

## 数据预测
import matplotlib.pyplot as plt
rand = np.random.randint(0,len(x_test))
input_im = x_test[rand]
plt.imshow(input_im.reshape(28,28))
predict = classifier.predict_classes(input_im.reshape(1,28,28,1), 1, verbose = 0)[0]
print("预测数字为:{}".format(predict))

实战训练¶

## 数据预处理工具方法
def x_sort(contour):
    # 通过计算质心来实现从左到右排序
    M = cv2.moments(contour)
    return (int(M['m10']/M['m00']))


def make_square(not_square):
    # 画矩形包裹
    BLACK = [0,0,0]
    img_dim = not_square.shape
    height = img_dim[0]
    width = img_dim[1]
    #print("Height = ", height, "Width = ", width)
    if (height == width):
        square = not_square
        return square
    else:
        doublesize = cv2.resize(not_square,(2*width, 2*height), interpolation = cv2.INTER_CUBIC)
        height = height * 2
        width = width * 2
        #print("New Height = ", height, "New Width = ", width)
        if (height > width):
            pad = int((height - width)/2)
            #print("Padding = ", pad)
            doublesize_square = cv2.copyMakeBorder(doublesize,0,0,pad,pad,cv2.BORDER_CONSTANT,value=BLACK)
        else:
            pad = (width - height)/2
            #print("Padding = ", pad)
            doublesize_square = cv2.copyMakeBorder(doublesize,pad,pad,0,0,\
                                                   cv2.BORDER_CONSTANT,value=BLACK)
    doublesize_square_dim = doublesize_square.shape
    #print("Sq Height = ", doublesize_square_dim[0], "Sq Width = ", doublesize_square_dim[1])
    return doublesize_square


def resize_to_pixel(dimensions, image):
    # 缩放
    buffer_pix = 4
    dimensions  = dimensions - buffer_pix
    squared = image
    r = float(dimensions) / squared.shape[1]
    dim = (dimensions, int(squared.shape[0] * r))
    resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
    img_dim2 = resized.shape
    height_r = img_dim2[0]
    width_r = img_dim2[1]
    BLACK = [0,0,0]
    if (height_r > width_r):
        resized = cv2.copyMakeBorder(resized,0,0,0,1,cv2.BORDER_CONSTANT,value=BLACK)
    if (height_r < width_r):
        resized = cv2.copyMakeBorder(resized,1,0,0,0,cv2.BORDER_CONSTANT,value=BLACK)
    p = 2
    ReSizedImg = cv2.copyMakeBorder(resized,p,p,p,p,cv2.BORDER_CONSTANT,value=BLACK)
    img_dim = ReSizedImg.shape
    height = img_dim[0]
    width = img_dim[1]
    #print("Padded Height = ", height, "Width = ", width)
    return ReSizedImg

import cv2
import numpy as np
from keras.datasets import mnist
from keras.models import load_model

image = cv2.imread('image2.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
cv2.imshow("image", image)
cv2.waitKey(0)

# 高斯模糊
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# cv2.imshow("blurred", blurred)
# cv2.waitKey(0)

edged = cv2.Canny(blurred, 30, 150)
# cv2.imshow("edged", edged)
# cv2.waitKey(0)

# 查找边缘
contours, _ = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#从左到右排序坐标
contours = sorted(contours, key = x_sort, reverse = False)


# 存储数据的数组
full_number = []
classifier = load_model('/home/zzh/a.h5')

#循环兴趣点
for c in contours:
    # 矩形边框包过兴趣点
    (x, y, w, h) = cv2.boundingRect(c)    

    if w >= 5 and h >= 25:
        roi = blurred[y:y + h, x:x + w]
        ret, roi = cv2.threshold(roi, 127, 255,cv2.THRESH_BINARY_INV)
        roi = make_square(roi)
        roi = resize_to_pixel(28, roi)
        cv2.imshow("ROI", roi)
        roi = roi / 255.0       
        roi = roi.reshape(1,28,28,1) 

        ## 预测
        res = str(classifier.predict_classes(roi, 1, verbose = 0)[0])
        full_number.append(res)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
        cv2.putText(image, res, (x , y + 155), cv2.FONT_HERSHEY_COMPLEX, 2, (255, 0, 0), 2)
        cv2.imshow("image", image)
        cv2.waitKey(0) 

cv2.destroyAllWindows()
print ("图片中的数字为: " + ''.join(full_number))