现在的位置: 首页 > 综合 > 正文

Softmax算法:逻辑回归的扩展

2017年12月12日 ⁄ 综合 ⁄ 共 4424字 ⁄ 字号 评论关闭

终于实现了逻辑回归的扩展版本,训练方法采用梯度下降法,这种方法对学习率的要求比较高,不同的学习率可能导致结果大相径庭。见相关图

参考资料:http://deeplearning.stanford.edu/wiki/index.php/Softmax%E5%9B%9E%E5%BD%92

Python代码如下:

import numpy as np
import matplotlib.pylab as plt
import copy
from scipy.linalg import norm
from math import pow
from scipy.optimize import fminbound,minimize
import random
def _dot(a, b):
    mat_dot = np.dot(a, b)
    return np.exp(mat_dot)
def condProb(theta, thetai, xi):
    numerator = _dot(thetai, xi.transpose())
        
    denominator = _dot(theta, xi.transpose())
    denominator = np.sum(denominator, axis=0)
    p = numerator / denominator
    return p
def costFunc(alfa, *args):
    i = args[2]
    original_thetai = args[0]
    delta_thetai = args[1]
    x = args[3]
    y = args[4]
    lamta = args[5]
    
    labels = set(y)
    thetai = original_thetai
    thetai[i, :] = thetai[i, :] - alfa * delta_thetai
    k = 0
    sum_log_p = 0.0
    for label in labels:
        index = y == label
        xi = x[index]
        p = condProb(original_thetai,thetai[k, :], xi)
        log_p = np.log10(p)
        sum_log_p = sum_log_p + log_p.sum()
        k = k + 1
    r = -sum_log_p / x.shape[0]+ (lamta / 2.0) * pow(norm(thetai),2)
    #print r ,alfa

    return r
class Softmax:
    def __init__(self, alfa, lamda, feature_num, label_mum, run_times = 500, col = 1e-6):
        self.alfa = alfa
        self.lamda = lamda
        self.feature_num = feature_num
        self.label_num = label_mum
        self.run_times = run_times
        self.col = col
        self.theta = np.random.random((label_mum, feature_num + 1))+1.0
    def oneDimSearch(self, original_thetai,delta_thetai,i,x,y ,lamta):
        res = minimize(costFunc, 0.0, method = 'Powell', args =(original_thetai,delta_thetai,i,x,y ,lamta))
        return res.x
    def train(self, x, y):
        tmp = np.ones((x.shape[0], x.shape[1] + 1))
        tmp[:,1:tmp.shape[1]] = x
        x = tmp
        del tmp
        labels = set(y)
        self.errors = []
        old_alfa = self.alfa
        for kk in range(0, self.run_times):
            i = 0
            
            for label in labels:
                tmp_theta = copy.deepcopy(self.theta)
                one = np.zeros(x.shape[0])
                index = y == label
                one[index] = 1.0
                thetai = np.array([self.theta[i, :]])
                prob = self.condProb(thetai, x)
                prob = np.array([one - prob])
                prob = prob.transpose()
                delta_thetai = - np.sum(x * prob, axis = 0)/ x.shape[0] + self.lamda * self.theta[i, :]
                #alfa = self.oneDimSearch(self.theta,delta_thetai,i,x,y ,self.lamda)#一维搜索法寻找最优的学习率,没有实现
                self.theta[i,:] = self.theta[i,:] - self.alfa * np.array([delta_thetai])
                i = i + 1
            self.errors.append(self.performance(tmp_theta))
        
    def performance(self, tmp_theta):
        return norm(self.theta - tmp_theta) 
    def dot(self, a, b):
        mat_dot = np.dot(a, b)
        return np.exp(mat_dot)
    def condProb(self, thetai, xi):
        numerator = self.dot(thetai, xi.transpose())
        
        denominator = self.dot(self.theta, xi.transpose())
        denominator = np.sum(denominator, axis=0)
        p = numerator[0] / denominator
        return p
    def predict(self, x):
        tmp = np.ones((x.shape[0], x.shape[1] + 1))
        tmp[:,1:tmp.shape[1]] = x
        x = tmp
        row = x.shape[0]
        col = self.theta.shape[0]
        pre_res = np.zeros((row, col))
        for i in range(0, row):
            xi = x[i, :]
            for j in range(0, col):
                thetai = self.theta[j, :]
                p = self.condProb(np.array([thetai]), np.array([xi]))
                pre_res[i, j] = p
        r = []
        for i in range(0, row):
            tmp = []
            line = pre_res[i, :]
            ind = line.argmax()
            tmp.append(ind)
            tmp.append(line[ind])
            r.append(tmp)
        return np.array(r)
    def evaluate(self):
        pass
def samples(sample_num, feature_num, label_num):
    n = int(sample_num / label_num)
    x = np.zeros((n*label_num, feature_num))
    y = np.zeros(n*label_num, dtype=np.int)
    for i in range(0, label_num):
        x[i*n : i*n + n, :] = np.random.random((n, feature_num)) + i
        y[i*n : i*n + n] = i
    return [x, y]
def save(name, x, y):
    writer = open(name, 'w')
    for i in range(0, x.shape[0]):
        for j in range(0, x.shape[1]):
            writer.write(str(x[i,j]) + ' ')
        writer.write(str(y[i])+ '\n')
    writer.close()
def load(name):
    x = []
    y = []
    for line in open(name, 'r'):
        ele = line.split(' ')
        tmp = []
        for i in range(0, len(ele) - 1):
            tmp.append(float(ele[i]))
        x.append(tmp)
        y.append(int(ele[len(ele) - 1]))
    return [x, y]
def plotRes(pre, real, test_x,l):
    s = set(pre)
    col = ['r','b','g','y','m']
    fig = plt.figure()
    
    ax = fig.add_subplot(111)
    for i in range(0, len(s)):
        index1 = pre == i
        index2 = real == i
        x1 = test_x[index1, :]
        x2 = test_x[index2, :]
        ax.scatter(x1[:,0],x1[:,1],color=col[i],marker='v',linewidths=0.5)
        ax.scatter(x2[:,0],x2[:,1],color=col[i],marker='.',linewidths=12)
    plt.title('learning rating='+str(l))
    plt.legend(('c1:predict','c1:true',\
                'c2:predict','c2:true',
                'c3:predict','c3:true',
                'c4:predict','c4:true',
                'c5:predict','c5:true'), shadow = True, loc = (0.01, 0.4))
    plt.show()  
if __name__ == '__main__':
    #[x, y] = samples(1000, 2, 5)
    #save('data.txt', x, y)
    [x, y] = load('data.txt')
    index= range(0, len(x))
    random.shuffle(index)
    x = np.array(x)
    y = np.array(y)
    x_train = x[index[0:700],:]
    y_train = y[index[0:700]]
    softmax = Softmax(0.4, 0.0, 2, 5)#这里讲第二个参数设置为0.0,即不用正则化,因为模型中没有高次项,用正则化反而使效果变差
    softmax.train(x_train, y_train)
    x_test = x[index[700:1000],:]
    y_test = y[index[700:1000]]
    r= softmax.predict(x_test)
    plotRes(r[:,0],y_test,x_test,softmax.alfa)
    t = r[:,0] != y_test
    o = np.zeros(len(t))
    o[t] = 1
    err = sum(o)
    

计算结果如下,如下面几幅图中看到,随着学习率的变大,分类效果越来越好,当大到一定程度,如为1时效果又变差,所以如何学则学习率是关键,该代码中用到的数据可以自动生成。

抱歉!评论已关闭.