先按照范例写了用Mini-batch的logistic regression,处理方式和范例有一些区别,因为不太会numpy,只会矩阵向量乘来乘去,不会用广播之类的操作(((
如果仿照范例不做优化的话,在train_set上跑出来的acc和loss和范例差不多
import numpy as np with open("./X_train") as f: next(f) X_train = np.array([line.strip('\n').split(',')[1:] for line in f], dtype = float) with open("./Y_train") as f: next(f) # 和范例不同,这里读进来的是列向量 Y_train = np.array([line.strip('\n').split(',')[1:] for line in f], dtype = float) with open("./X_test") as f: next(f) X_test = np.array([line.strip('\n').split(',')[1:] for line in f], dtype = float)
\(X_{after} = \frac{X - X_{mean}}{\sigma}\)
# 算出train_set的mean和std X_mean = np.mean(X_train[:,range(X_train.shape[1])], axis = 0).flatten() X_std = np.std(X_train[:,range(X_train.shape[1])], axis = 0).flatten() # 对X_train进行Normalize X_train[:,range(X_train.shape[1])] = (X_train[:,range(X_train.shape[1])]-X_mean)/(X_std + 1e-8) # 同理 X_test[:,range(X_test.shape[1])] = (X_test[:,range(X_test.shape[1])]-X_mean)/(X_std+1e-8)
划分出train_set and development_set
dev_ratio = 0.1 # development_set占比 train_size = int((1 - dev_ratio) * len(X_train)) X_dev = X_train[train_size:] Y_dev = Y_train[train_size:] X_train = X_train[:train_size] Y_train = Y_train[:train_size]
# 洗牌 np.random.seed(0) def _shuffle(X, Y): randomize = np.arange(len(X)) # 得到长度为len(x)的多个0的permutation np.random.shuffle(randomize) # 将排列打乱 return X[randomize], Y[randomize] # 把X和Y用这个排列作为索引打乱 # sigmoid函数 def _sigmoid(z): return np.clip(1 / (1.0 + np.exp(-z)), 1e-8, 1-(1e-8)) # 规定作为边界最大值和最小值,超过限度的值都会取边界 # 得到一组x和w logistic回归的结果 # X: input data # w: weight vector # b: bais def _f(X, w, b): return _sigmoid(np.dot(X, w) + b) # 得到二分类结果 def _predict(X, w, b): return np.round(_f(X, w, b)).astype(np.int) # acc def _accuracy(Y_pred, Y_label): return 1 - np.mean(np.abs(Y_pred - Y_label))
# 交叉熵 = - yhat * ln(f(x_n)) - (1 - yhat) * ln(1 - f(x_n)) # 将cross_entropy求和得到损失 def _cross_entropy_loss(Y_pred, Y_label): cross_entropy = -np.dot(Y_label.transpose(), np.log(Y_pred)) - np.dot(1 - Y_label.transpose(), np.log(1 - Y_pred)) #print(cross_entropy) return cross_entropy[0][0]
# 梯度 w_grad = -\sigma {(yhat - f(x_n)) * X.T}, b_grad = -\sigma {(yhat - f(x_n))} def _gradient(X, Y_label, w, b): #print(Y_label.flatten()) #print(_f(X, w, b).flatten()) pred_error = (Y_label.flatten() - _f(X, w, b).flatten()) # 得到行向量 #print(Y_pred) #print(Y_label) #print(pred_error) #print(pred_error.shape) #print(X) #print(np.sum(np.dot(pred_error, X))) w_grad = -np.dot(pred_error, X) # 让它对X的每一维求内积得到行向量w_grad b_grad = -np.sum(pred_error) # 对b求微分的结果就是pred_error求和 #print(w_grad.shape) return w_grad.reshape(X_train.shape[1],1), b_grad # 返回列向量
# 初始化参数 w = np.zeros((X_train.shape[1],1)) b = np.zeros((1,)) print(w.shape) print(b) max_iter = 10 # 迭代次数 batch_size = 10 # mini-batch每次选取的size learning_rate = 0.25 train_loss = [] dev_loss = [] train_acc = [] dev_acc = [] cnt = 1 # 下降的步数,用于每一步后调整学习率 for epoch in range(max_iter): X_train, Y_train = _shuffle(X_train, Y_train) # Mini-batch training for idx in range(train_size//batch_size): X = X_train[idx*batch_size:(idx+1)*batch_size] Y = Y_train[idx*batch_size:(idx+1)*batch_size] #print(X.shape) #print(Y.shape) # 计算梯度 w_grad, b_grad = _gradient(X, Y, w, b) w = w - learning_rate/np.sqrt(cnt) * w_grad #print(w) b = b - learning_rate/np.sqrt(cnt) * b_grad cnt += 1 #print(cnt) # 计算acc和平均loss Y_train_pred = np.round(_f(X_train, w, b)) train_acc.append(_accuracy(Y_train_pred, Y_train)) #print(_cross_entropy_loss(_f(X_train, w, b), Y_train)) #print("\n\n") train_loss.append(_cross_entropy_loss(_f(X_train, w, b), Y_train)/len(X_train)) Y_dev_pred = np.round(_f(X_dev, w, b)) dev_acc.append(_accuracy(Y_dev_pred, Y_dev)) dev_loss.append(_cross_entropy_loss(_f(X_dev, w, b), Y_dev)/len(X_dev)) print(train_loss[-1]) #print("\n") print(train_acc[-1]) print('Training loss: {}'.format(train_loss[-1])) print('Development loss: {}'.format(dev_loss[-1])) print('Training accuracy: {}'.format(train_acc[-1])) print('Development accuracy: {}'.format(dev_acc[-1]))