04-手写数字识别

image-20191120170504507

输入(feature) ,图片像素点

输出(label) 0~9的数字

多分类问题

数据格式和灰度图

image-20191120171230657

灰度图片示例

图像数据读取

1
2
3
import numpy as np
feature = np.loadtxt('test_image.csv',delimiter=',')
feature[0]

数据图像可视化

1
2
3
import matplotlib.pyplot as plt 
%matplotlib notebook
plt.imshow(feature[3].reshape(28,28))

image-20191120182248124

自己做数据集的步骤

  1. 手写数字
  2. 拍照
  3. opencv缩放到28*28
  4. opencv转为灰度图片,生成28*28的像素点值
  5. 打标签
  6. 重复上述步骤,收集6万个训练数据,1万个测试数据

one-hot encoding 独热编码

1
2
label = np.array([1,2,1,2,3,4,5,6,7,8,9,0])
np.eye(10)[label]
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

完整代码

1
import numpy as np
1
feature = np.loadtxt('test_image.csv',delimiter=',')/255.0
1
2
label = np.loadtxt('test_label.csv',delimiter=',').astype(int)
Label = np.eye(10)[label]
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
# np.set_printoptions(threshold = np.inf) 
np.set_printoptions(suppress=True)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


ones = np.ones((len(feature),1))
Feature = np.hstack((feature ,ones))

weight = np.ones((785,10))

msehistory = []
learningrate = 0.000001

##关键代码
changeweight  = np.zeros((785,10))

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def gradentdecent():
    global weight,learningrate,changeweight
    mse = np.sum(np.power((sigmoid(np.dot(Feature,weight))-Label),2))
    msehistory.append(mse)
    if len(msehistory)>=2:
        if(msehistory[-1]>msehistory[-2]):
            learningrate = learningrate /2
        else :
            learningrate = learningrate * 1.1

    change = np.dot(Feature.T,(sigmoid(np.dot(Feature,weight))-Label))
    changeweight = changeweight + change**2
#     print(changeweight)
#     print('--------')
#     print(np.where(changeweight != 0.0 ,changeweight, 1))
    weight = weight - learningrate* change / np.sqrt(np.where(changeweight != 0 ,changeweight, 1))
    ###关键代码
    return change
1
2
3
4
5
6
for i in range(10000):
    change = gradentdecent()
    if(np.sum(change**2)<1):
        print(change)
        print('---{}---'.format(i))
        break
1
2
3
value =Feature[30]
np.set_printoptions(suppress=True)
result = np.exp(np.dot(value,weight))/np.sum(np.exp(np.dot(value,weight)))
1
2
3
4
5
6
7
8
import random
import matplotlib.pyplot as plt 
%matplotlib notebook
index = random.randint(0,10000)
plt.imshow(feature[index].reshape(28,28))
value =Feature[index]
result = np.exp(np.dot(value,weight))/np.sum(np.exp(np.dot(value,weight)))
np.argmax(result)

image-20191120202714808

准确性分析

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
correct = 0 
for i in range(10000):
    value =Feature[i]
    result = np.exp(np.dot(value,weight))/np.sum(np.exp(np.dot(value,weight)))
    expect = np.argmax(result)
    real = label[i]

    if expect == real:
        correct = correct + 1

print("精确度:{}".format(correct/10000))

image-20191121080900492