SVM算法代码实现

Posted by mudux on February 27, 2020

  在之前介绍了SVM理论知识,SVM求解可以形式化为一个凸二次规划问题,在SVM求解方法中介绍了用SMO方法求解这个凸二次规划问题。这里介绍了基于SMO优化方法的SVM。

1. python代码

  SVM.py文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import numpy as np

class SVMClassifier():
    def __init__(self, C=1.0, kernel='rbf', degree=3, gamma=1, coef0=0.0, tol=1e-3, verbose=False, max_iter=100):
        self.C = C
        self.kernel_type = kernel
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0
        self.tol = tol
        self.verbose = verbose
        self.max_iter = max_iter

    def fit(self, trainX, trainY):
        if not isinstance(trainX, (np.ndarray, np.generic)):
            trainX = np.array(trainX)
        if not isinstance(trainY, (np.ndarray, np.generic)):
            trainY = np.array(trainY)                    
        self.X = trainX
        self.Y = trainY
        self.m, self.n = np.shape(trainX)
        self.alpha = np.zeros((self.m, 1))
        self.b = 0
        self.eCache = np.zeros((self.m, 2))
        self.weight = np.zeros((self.n, 1))
        self.K = np.zeros((self.m, self.m)) # kernel matrix
        for i in range(self.m):    
            self.K[:, i] = self.kernelTrans(self.X[i, :])


        iters = 0
        entireSet = True
        alphaPairsChanged = 0
        while ((iters < self.max_iter) and (alphaPairsChanged > 0)) or entireSet:
            alphaPairsChanged = 0
            if entireSet:   # 遍历所有值
                for i in range(self.m):
                    alphaPairsChanged += self.innerL(i)
                if self.verbose:
                    print("fullSet, iters: %d i: %d, pairs changed: %d" % (iters, i, alphaPairsChanged) )
                iters += 1
            else:   # 遍历边界值
                nonBoundId = np.where((self.alpha > 0) & (self.alpha < self.C))[0]
                for i in nonBoundId:
                    alphaPairsChanged += self.innerL(i)
                    
                iters += 1
            if entireSet:
                entireSet = False
            elif alphaPairsChanged == 0:
                entireSet = True
            if self.verbose:                
                print("iteration number: %d" % iters)
        return self.b, self.alpha


    def predict(self, testX):
        if not isinstance(testX, (np.ndarray, np.generic)):
            testX = np.array(testX)
        m, _ = np.shape(testX)
        self.labels_ = np.zeros((m, 1))
        t = np.dot(testX, self.weight) + self.b
        self.labels_[t > 0] = 1
        self.labels_[t <= 0] = -1
        return self.labels_

    def score(self, testX, testY):
        pred = self.predict(testX)
        acc = np.mean(pred == testY)
        print("Test accuracy: %.3f" % acc)
        return acc

    def calcWs(self):
        self.weight = np.zeros((self.n, 1))
        for i in range(self.m):
            self.weight += self.alpha[i] * self.Y[i, :] * self.X[i, :].T
        return self.weight
        

    def calcEk(self, k):
        fXk = np.sum(self.alpha * self.Y * self.K[:, k].reshape((-1, 1))) + self.b
        Ek = fXk - float(self.Y[k, :])
        return Ek

    def selectJrand(self, i):
        j = i
        while j == i:
            j = int(np.random.uniform(0, self.m))
        return j

    def clipAlpha(self, aj, L, H):
        if aj > H:
            aj = H
        if aj < L:
            aj = L
        return aj

    def kernelTrans(self, A):
        K = np.zeros((self.m, ))
        if self.kernel_type == 'linear':
            K = np.dot(self.X, A.T)
        elif self.kernel_type == 'rbf':
            for i in range(self.m):
                deltaRow = self.X[i, :] - A
                K[i] = np.dot(deltaRow, deltaRow.T)
            K = np.exp(-K / self.gamma)
        else:
            raise NameError("That kernel type: %s is not recognized" % self.kernel_type)
        return K

    def selectJ(self, i, Ei):
        maxK = -1
        maxDeltaE = 0
        Ej = 0
        self.eCache[i] = [1, Ei]
        validEcacheList = np.nonzero(self.eCache[:, 0])     
        if len(validEcacheList) > 1:
            for k in validEcacheList:
                if k == i:
                    continue
                Ek = self.calcEk(k)
                deltaE = abs(Ei - Ek)
                if deltaE > maxDeltaE:
                    maxK = k
                    maxDeltaE = deltaE
                    Ej = Ek
            return maxK, Ej
        else:
            j = self.selectJrand(i)
            Ej = self.calcEk(j)
        return j, Ej

    def updateEk(self, k):
        Ek = self.calcEk(k)
        self.eCache[k] = [1, Ek]

    def innerL(self, i):
        Ei = self.calcEk(i)
        if (((self.Y[i] * Ei < -self.tol) and (self.alpha[i] < self.C)) or ((self.Y[i] * Ei > self.tol) and (self.alpha[i] > 0))):
            j, Ej = self.selectJ(i, Ei)
            alphaIold = self.alpha[i].copy()
            alphaJold = self.alpha[j].copy()
            if self.Y[i] != self.Y[j]:
                L = max(0, self.alpha[j] - self.alpha[i])
                H = min(self.C, self.C + self.alpha[j] - self.alpha[i])
            else:
                L = max(0, self.alpha[j] + self.alpha[i] - self.C)
                H = min(self.C, self.alpha[j] + self.alpha[i])
            if L == H:
                if self.verbose:
                    print("L == H")
                return 0
            eta = self.K[i, i] + self.K[j, j] - 2.0 * self.K[i, j]
            if eta >= 0:
                if self.verbose:
                    print("eta == 0")                        
                return 0
            self.alpha[j] += self.Y[j] * (Ei - Ej) / eta
            self.alpha[j] = self.clipAlpha(self.alpha[j], L, H)
            self.updateEk(j)
            if abs(self.alpha[j] - alphaJold) < 1e-5:
                if self.verbose:                
                    print("j not moving enough")
                return 0
            self.alpha[i] += self.alpha[j] * self.alpha[i] * (alphaJold - self.alpha[j])
            self.updateEk(i)
            b1 = self.b - Ei - self.Y[i] * self.K[i, i] *\
                (self.alpha[i] - alphaIold) - self.Y[j] * self.K[i, j] *\
                    (self.alpha[j] - alphaJold)
            b2 = self.b - Ej - self.Y[i] * self.K[i, j] *\
                (self.alpha[i] - alphaIold) - self.Y[j] * self.K[j, j] *\
                    (self.alpha[j] - alphaJold) 
            if (self.alpha[i] > 0) and (self.alpha[i] < self.C):
                self.b = b1
            elif (self.alpha[j] > 0) and (self.alpha[j] < self.C):
                self.b = b2
            else: 
                self.b = (b1 + b2) / 2.0
            return 1                
        else:
            return 0    

  测试一下:利用sklearn的digits数据集,转换为二分类问题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import numpy as np
from sklearn.datasets import load_breast_cancer, load_iris, load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale
from SVM import SVMClassifier

iris = load_digits()
X = iris['data']
Y = iris['target']
m = np.shape(Y)[0]
tY = np.zeros((m, 1))
X = scale(X)
tY[Y == 2] = 1
tY[Y != 2] = -1

trainX, testX, trainY, testY = train_test_split(X, tY, test_size=0.2)
trainY = np.reshape(trainY, (-1, 1))
testY = np.reshape(testY, (-1, 1))


svc = SVMClassifier(gamma=0.001, C=100, max_iter=500, kernel='rbf')
svc.fit(trainX, trainY)
acc = svc.score(testX, testY)
print(acc)

  输出为1.1

2. sklearn库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import numpy as np
from sklearn import datasets
from sklearn.datasets import load_breast_cancer, load_iris, load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale
from sklearn.svm import SVC

iris = load_digits()
X = iris['data']
Y = iris['target']
m = np.shape(Y)[0]
tY = np.zeros((m, 1))
tY[Y == 2] = 1
tY[Y != 2] = -1
X = scale(X)

trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2)
trainY = np.reshape(trainY, (-1, 1))
testY = np.reshape(testY, (-1, 1))

svc = SVC(kernel='rbf', gamma=0.001, C=100)
svc.fit(trainX, trainY)
print(svc.score(testX, testY))

输出为2.1python代码实现比sklearn库运行结果稍低点,但准确率还是挺高

3. Reference

机器学习实战-第六章