Python教程

python——线性回归实例实战

本文主要是介绍python——线性回归实例实战,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

二维线性回归:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from linear_regression import LinearRegression

data = pd.read_csv('../data/world-happiness-report-2017.csv')

# 得到训练和测试数据
train_data = data.sample(frac = 0.8)
test_data = data.drop(train_data.index)

input_param_name = 'Economy..GDP.per.Capita.'
# 输入特征名字
output_param_name = 'Happiness.Score'
# 输出特征名字

x_train = train_data[[input_param_name]].values
# .values表示转换成ndarray格式 [input_param_name]表示列值
# shape = (124,1) min = 0.0226431842893362 max = 1.87076568603516
y_train = train_data[[output_param_name]].values
# .values表示转换成ndarray格式 [output_par  am_name]表示列值
# shape = (124,1) min = 2.90499997138977 max = 7.50400018692017
x_test = test_data[input_param_name].values
# x_test = [1.61646318 1.48238301 1.53570664 1.69227767 1.43092346 1.12786877, 1.43362653 1.3613559  1.41691518 1.09186447 0.72887063 1.21768391, 0.83375657 1.03522527 1.35593808 1.32087934 1.10180306 0.92557931, 0.95148438 0.78375626 0.47982019 0.36842093 1.15687311
# 31
y_test = test_data[output_param_name].values
# y_test =  [7.53700018 7.52199984 6.97700024 6.57200003 6.44199991 6.42399979, 6.42199993 6.16800022 5.92000008 5.87200022 5.83799982 5.82499981, 5.82299995 5.71500015 5.62099981 5.61100006 5.5250001  5.31099987, 5.27899981 5.07399988 4.96199989 4.70900011 4.69199991

# 散点图绘制
plt.scatter(x_train,y_train,label='Train data')
plt.scatter(x_test,y_test,label='test data')
plt.xlabel(input_param_name)
plt.ylabel(output_param_name)
plt.title('Happy')
plt.legend()
plt.show()

# 迭代次数
num_iterations = 500
# 学习率
learning_rate = 0.01

linear_regression = LinearRegression(x_train,y_train)
# data = {ndarray:(124,2)} labels = {ndarray:(124,1)} theta = {ndarray:(2,1)} [[5.30513794], [0.89649877]]
(theta,cost_history) = linear_regression.train(learning_rate,num_iterations)
# 调用train模块传入学习率和和迭代次数

print ('开始时的损失:',cost_history[0])
# cost_history[0]表示开始的
print ('训练后的损失:',cost_history[-1])
# cost_history[-1]表示最后的那次

# 梯度下降 损失函数
plt.plot(range(num_iterations),cost_history)
# x=range(num_iterations) y=cost_history
plt.xlabel('Iter')
plt.ylabel('cost')
plt.title('GD')
plt.show()

predictions_num = 100
x_predictions = np.linspace(x_train.min(),x_train.max(),predictions_num).reshape(predictions_num,1)
# .reshape(predictions_num,1) 表示100*1的矩阵再乘以  shape = (100, 1) min = 0.0226431842893362 max = 1.87076568603516
# x_train.min() -> 最小值,x_train.max() -> 最大值,predictions_num -> 数量
y_predictions = linear_regression.predict(x_predictions)
# shape = (100, 1) min = 3.7678074723211252 max = 6.84246841761371
plt.scatter(x_train,y_train,label='Train data')
plt.scatter(x_test,y_test,label='test data')
plt.plot(x_predictions,y_predictions,'r',label = 'Prediction')
# x值 y值 颜色
plt.xlabel(input_param_name)
plt.ylabel(output_param_name)
plt.title('Happy')
plt.legend()
plt.show()

 

 

多参数线性回归:

MultivariateLinearRegression.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
# https://plotly.com/python/line-and-scatter/
# https://plotly.com/python/
# plotly.offline.init_notebook_mode()
from linear_regression import LinearRegression

data = pd.read_csv('../data/world-happiness-report-2017.csv')
# Country  ...  Dystopia.Residual [0                      Norway  ...           2.277027] [1                     Denmark  ...           2.313707] [2                     Iceland  ...           2.322715] [3                 Switzerland  ...           2.276716]
# shape=(155, 12)
train_data = data.sample(frac=0.8)
# Country  ...  Dystopia.Residual [67         Libya  ...           1.835011] [9      Australia  ...           2.065211] [138      Lesotho  ...           1.429835] [110      Namibia  ...           1.481890] [66       Belarus  ...           1.723233] [..
# shape=(124, 12) 这里的shape值为155*9(frac=0.8)=124 其实就是将data中的一部分抽取出来当作训练数据
test_data = data.drop(train_data.index)
# shape=(31, 12)
# Country  ...  Dystopia.Residual [3    Switzerland  ...           2.276716] [5    Netherlands  ...           2.294804] [6         Canada  ...           2.187264] [10        Israel  ...           2.801757] [24        Mexico  ...           2.837155] [31

# x1 'Economy..GDP.per.Capita.'
input_param_name_1 = 'Economy..GDP.per.Capita.'
# x2 'Freedom'
input_param_name_2 = 'Freedom'
# y 'Happiness.Score'
output_param_name = 'Happiness.Score'


x_train = train_data[[input_param_name_1, input_param_name_2]].values
# [[1.10180306 0.46573323], [1.48441494 0.60160738], [0.52102125 0.3906613 ], [0.96443433 0.52030355], [1.15655756 0.29540026], [1.38439786 0.40878123], [0.77715313 0.08153944], [1.53062356 0.44975057], [0.79222125 0.469987  ], [1.43362653 0.36146659], [0.36
# 0.0  1.87076568603516 shape=(124, 2)
#  .values -> Return Series as ndarray or ndarray-like depending on the dtype.
y_train = train_data[[output_param_name]].values
# [[5.5250001 ], [7.28399992], [3.80800009], [4.57399988], [5.56899977], [6.40299988], [3.46199989], [6.34399986], [4.31500006], [6.42199993], [4.54500008], [5.26900005], [6.35699987], [6.99300003], [6.57200003], [5.82200003], [4.73500013], [4.51399994], [5.
# shape=(124, 1) min=2.69300007820129 max=7.53700017929077
x_test = test_data[[input_param_name_1, input_param_name_2]].values
# min=0.0149958552792668 max=1.56497955322266 shape=(31, 2)
y_test = test_data[[output_param_name]].values
# min=7.53700017929077 max=7.49399995803833 shape=(31, 1)
# Configure the plot with training dataset. Scatter3d三维散点图
plot_training_trace = go.Scatter3d(
    # :表示取所有数据 0表示取x1
    x=x_train[:, 0].flatten(),
    # [0.78644109 0.36874589 0.71624923 1.28601193 0.43801299 0.85769922, 0.88541639 1.44357193 0.30580869 1.12209415 0.78854758 1.48238301, 1.00726581 0.96443433 1.15360177 1.61646318 0.99553859 0.23430565, 0.60304892 1.40167844 1.34327984 0.73057312 1.48709726 0.59622008, 0.7372992  1.08116579 0.11904179 1.3613559  1.63295245 0.79222125, 0.47930902 1.10271049 1.43362653 1.2817781  0.98240942 1.39506662, 0.24454993 0.72887063 0.89465195 0.02264318 1.40570605 1.29178786, 1.2175597  1.62634337 0.9097845  1.87076569 0.56430537 1.10180306, 1.69227767 1.18529546 1.02723587 0.63640678 1.29121542 0., 0.90059674 1.49438727 0.23344204 1.46378076 0.09210235 1.10735321, 0.47618049 1.38439786 1.54625928 1.10970628 0.95148438 1.53570664, 1.15318382 1.16145909 1.19821024 1.1284312  1.15655756 1.18939555, 1.25278461 1.44163394 1.03522527 0.99619275 0.51113588 1.32087934, 1.28455627 0.93253732 0.80896425 1.09186447 0.35022771 1.07498753, 1.06931758 0.64845729 0.6017651  0.77715313 0.37584653 1.0008204, 1.2...
    # :表示取所有数据 0表示取x2
    y=x_train[:, 1].flatten(),
    # [0.65824866 0.58184385 0.25471106 0.17586352 0.16234203 0.58521467, 0.50153768 0.61795086 0.18919677 0.50519633 0.57105559 0.62600672, 0.28968069 0.52030355 0.39815584 0.63542259 0.44332346 0.48079109, 0.44770619 0.25792167 0.58876705 0.34807986 0.56776619 0.45494339, 0.44755185 0.47278771 0.33288118 0.51863074 0.49633759 0.469987, 0.37792227 0.28855553 0.36146659 0.37378311 0.20440318 0.25645071, 0.34858751 0.24072905 0.12297478 0.60212696 0.61406213 0.52034211, 0.57939225 0.60834527 0.43245253 0.60413098 0.43038875 0.46573323, 0.54984057 0.4945192  0.39414397 0.46160349 0.40226498 0.27084205, 0.19830327 0.6129241  0.46691465 0.53977072 0.23596135 0.43745375, 0.30661374 0.40878123 0.50574052 0.58013165 0.26028794 0.57311034, 0.41273001 0.28923172 0.31232858 0.15399712 0.29540026 0.49124733, 0.37689528 0.50819004 0.45000288 0.38149863 0.39001778 0.47913143, 0.43745428 0.47350779 0.43502587 0.23333581 0.32436785 0.28851599, 0.20871553 0.09609804 0.63337582 0.08153944 0.33638421 0.455198...
    z=y_train.flatten(),
    # [5.97100019 3.47099996 4.7750001  5.32399988 3.93600011 5.42999983, 5.01100016 7.46899986 3.64400005 3.76600003 5.07399988 7.52199984, 4.80499983 4.57399988 5.23400021 7.53700018 5.26200008 4.55000019, 4.17999983 5.83799982 6.52699995 5.18100023 7.00600004 5.00400019, 6.0710001  5.27299976 3.53299999 6.16800022 6.10500002 4.31500006, 4.53499985 4.49700022 6.42199993 5.96299982 5.18200016 5.96400023, 3.50699997 5.83799982 4.09600019 5.15100002 7.31400013 5.97300005, 6.454      6.64799976 6.00299978 6.375      4.69500017 5.5250001, 6.57200003 6.59899998 4.95499992 4.51399994 6.08400011 2.69300008, 4.37599993 7.28399992 3.97000003 6.89099979 4.28000021 6.63500023, 4.19000006 6.40299988 6.99300003 7.079      5.27899981 6.97700024, 6.57800007 4.71400023 4.46500015 5.25       5.56899977 5.62900019, 6.65199995 6.71400023 5.71500015 4.64400005 3.34899998 5.61100006, 5.81899977 5.49300003 4.29099989 5.87200022 4.03200006 5.2249999, 5.39499998 4.29199982 4.16800022 3.46199989 3.875      6.007999...
    name='Training Set',
    mode='markers',
    marker={
        'size': 10,
        'opacity': 1,
        'line': {
            'color': 'rgb(255, 255, 255)',
            # 颜色为红色
            'width': 1
        },
    }
)


plot_test_trace = go.Scatter3d(
    # [1.56497955 1.50394464 1.48441494 1.37538242 1.35268235 0.87200195, 1.53062356 1.41691518 1.26074862 1.21768391 0.83375657 1.13077676, 1.34120595 1.35593808 1.55167484 0.92557931 0.87811458 1.07937384, 1.31517529 1.06457794 0.52471364 0.47982019 1.05469871 0.36842093, 1.15687311 0.58668298 0.36711055 0.65951669 0.66722482 0.52102125, 0.36861026]
    x=x_test[:, 0].flatten(),
    # [0.62007058 0.58538449 0.60160738 0.4059886  0.49094617 0.53131062, 0.44975057 0.50562555 0.32570791 0.45700374 0.55873293 0.41827193, 0.57257581 0.35511154 0.49096864 0.47430724 0.40815833 0.55258983, 0.4984653  0.32590598 0.47156671 0.44030595 0.47924674 0.31869769, 0.24932261 0.47835666 0.51449203 0.01499586 0.42302629 0.3906613, 0.03036986]
    y=x_test[:, 1].flatten(),
    # [7.49399996 7.37699986 7.28399992 7.21299982 6.60900021 6.454, 6.34399986 5.92000008 5.8499999  5.82499981 5.82299995 5.82200003, 5.7579999  5.62099981 5.47200012 5.31099987 5.23500013 5.23000002, 5.19500017 5.17500019 5.04099989 4.96199989 4.829      4.70900011, 4.69199991 4.6079998  4.54500008 4.13899994 4.11999989 3.80800009, 3.60299993]
    z=y_test.flatten(),
    name='Test Set',
    mode='markers',
    marker={
        'size': 10,
        'opacity': 1,
        'line': {
            'color': 'rgb(255, 255, 255)',
            'width': 1
        },
    }
)


plot_layout = go.Layout(
    title='Date Sets',
    scene={
        # x轴
        'xaxis': {'title': input_param_name_1},
        # y轴
        'yaxis': {'title': input_param_name_2},
        # z轴
        'zaxis': {'title': output_param_name} 
    },
    margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)

plot_data = [plot_training_trace, plot_test_trace]

plot_figure = go.Figure(data=plot_data, layout=plot_layout)
# .Figure -> Create a new :class:Figure instance
plotly.offline.plot(plot_figure)

# 迭代次数
num_iterations = 500  
# 学习率
learning_rate = 0.01
polynomial_degree = 0
sinusoid_degree = 0  

linear_regression = LinearRegression(x_train, y_train, polynomial_degree, sinusoid_degree)
# data = {ndarray:(124,3)} [[ 1.00000000e+00 -4.41248542e-01  1.68691910e+00], [ 1.00000000e+00 -1.42201163e+00  1.18275714e+00], [ 1.00000000e+00 -6.06061512e-01 -9.75849221e-01], [ 1.00000000e+00  7.31761469e-01 -1.49612970e+00], [ 1.00000000e+00 -1.25937004e+00 -1.58535213e+00], [ 1.00000000e+00 -2.73931939e-01  1.20499972e+00], [ 1.00000000e+00 -2.08851037e-01  6.52851799e-01], [ 1.00000000e+00  1.10171790e+00  1.42101149e+00], [ 1.00000000e+00 -1.56979040e+00 -1.40814944e+00], [ 1.00000000e+00  3.46876726e-01  6.76993647e-01], [ 1.00000000e+00 -4.36302435e-01  1.11157012e+00], [ 1.00000000e+00  1.19284770e+00  1.47416863e+00], [ 1.00000000e+00  7.72557044e-02 -7.45099925e-01], [ 1.00000000e+00 -2.33141263e-02  7.76679555e-01], [ 1.00000000e+00  4.20857711e-01 -2.93198112e-02], [ 1.00000000e+00  1.50767269e+00  1.53629980e+00], [ 1.00000000e+00  4.97197916e-02  2.68721612e-01], [ 1.00000000e+00 -1.73768207e+00  5.15954124e-01], [ 1.00000000e+00 -8.71859835e-01  2.97641329e-01], [ 1.00000000e+00  1.00335052e+...
(theta, cost_history) = linear_regression.train(
    learning_rate,
    num_iterations
)
# theta shape=(3, 1) [[5.28604648], [0.80957372], [0.36349081]]
# cost_history  {list:500}
print('开始损失',cost_history[0])
print('结束损失',cost_history[-1])

plt.plot(range(num_iterations), cost_history)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('Gradient Descent Progress')
plt.show()

predictions_num = 10

x_min = x_train[:, 0].min()
# x_min =0.0
x_max = x_train[:, 0].max()
# x_max = 1.87076568603516
y_min = x_train[:, 1].min()
# y_min = 0.0
y_max = x_train[:, 1].max()
# y_max = 0.658248662948608

x_axis = np.linspace(x_min, x_max, predictions_num)
# min= 0.0 max = 1.87076568603516  [0.         0.20786285 0.41572571 0.62358856 0.83145142 1.03931427, 1.24717712 1.45503998 1.66290283 1.87076569]
y_axis = np.linspace(y_min, y_max, predictions_num)
# min= 0.0 max = 0.658248662948608  [0.         0.07313874 0.14627748 0.21941622 0.29255496 0.3656937, 0.43883244 0.51197118 0.58510992 0.65824866]

x_predictions = np.zeros((predictions_num * predictions_num, 1))
# min= 0.0 max = 0.658248662948608 shape =(100,1)
y_predictions = np.zeros((predictions_num * predictions_num, 1))
# min= 0.0 max = 0.658248662948608  shape =(100,1)

x_y_index = 0
# x_y_index = 100
for x_index, x_value in enumerate(x_axis):
    # x_index:9 x_value:1.87076568603516
    for y_index, y_value in enumerate(y_axis):
        # y_index:9 y_value:0.658248662948608

        # 不断的得到x1
        x_predictions[x_y_index] = x_value
        # 不断的得到x2
        y_predictions[x_y_index] = y_value
        x_y_index += 1

z_predictions = linear_regression.predict(np.hstack((x_predictions, y_predictions)))
# shape = (100,1) min = 3.544753490888676 max = 6.9769309177100425
plot_predictions_trace = go.Scatter3d(
    x=x_predictions.flatten(),
    y=y_predictions.flatten(),
    z=z_predictions.flatten(),
    name='Prediction Plane',
    mode='markers',
    marker={
        'size': 1,
    },
    opacity=0.8,
    surfaceaxis=2, 
)

plot_data = [plot_training_trace, plot_test_trace, plot_predictions_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.plot(plot_figure)

 梯度下降:

 散点图:

 平面拟合:

开始损失 14.438348601809059
结束损失 0.22726258270086874

MultivariateLinearRegression1.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go

from linear_regression import LinearRegression

data = pd.read_csv('../data/world-happiness-report-2017.csv')

train_data = data.sample(frac=0.8)
test_data = data.drop(train_data.index)

input_param_name_1 = 'Family'
input_param_name_2 = 'Health..Life.Expectancy.'
output_param_name = 'Happiness.Score'

x_train = train_data[[input_param_name_1,input_param_name_2]].values
y_train = train_data[[output_param_name]].values

x_test = test_data[[input_param_name_1,input_param_name_2]].values
y_test = test_data[output_param_name].values

# 画出训练数据的三维散点图
plot_training_trace = go.Scatter3d(
    x=x_train[:, 0].flatten(),
    y=x_train[:, 1].flatten(),
    z=y_train.flatten(),
    name='Training set',
    mode='markers',
    marker={
        'size': 10,
        'opacity': 1,
        'line': {
            'color': 'rgb(255,255,255)',
            'width': 1
        },
    }
)

# 画出测试数据的三维散点图
plot_testing_trace = go.Scatter3d(
    x=x_test[:, 0].flatten(),
    y=x_test[:, 1].flatten(),
    z=y_test.flatten(),
    name='Testing set',
    mode='markers',
    marker={
        'size': 10,
        'opacity': 1,
        'line': {
            'color': 'rgb(255,255,255)',
            'width': 1
        },
    }
)

# 三维图的x轴,y轴,z轴的布局
plot_layout = go.Layout(
    title='Data Set',
    scene={
        'xaxis':{'title':input_param_name_1},
        'yaxis':{'title':input_param_name_2},
        'zaxis':{'title':output_param_name}
    },
    margin={'l':0,'r':0,'b':0,'t':0}
)

plot_data = [plot_training_trace,plot_testing_trace]
plot_figure = go.Figure(data=plot_data, layout=plot_layout)
plotly.offline.plot(plot_figure)

num_iterations = 500
learning_rate = 0.01
polynomial_degree = 0
sinusoid_degree = 0

linear_regression = LinearRegression(x_train,y_train,polynomial_degree,sinusoid_degree)
(theta,cost_history) = linear_regression.train(
    learning_rate,
    num_iterations
)
# 输出损失值
print('开始损失',cost_history[0])
print('结束损失',cost_history[-1])

# 画出损失函数
plt.plot(range(num_iterations),cost_history)
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('Gradient Descent Progression')
plt.show()

predictions_num = 10

x_min = x_train[:,0].min()
x_max = x_train[:,0].max()
y_min = x_train[:,1].min()
y_max = x_train[:,1].max()

x_axis = np.linspace(x_min,x_max,predictions_num)
y_axis = np.linspace(y_min,y_max,predictions_num)

x_predictions = np.zeros((predictions_num * predictions_num,1))
y_predictions = np.zeros((predictions_num * predictions_num,1))

x_y_index = 0
for x_index,x_value in enumerate(x_axis):
    for y_index,y_value in enumerate(y_axis):
        x_predictions[x_y_index] = x_value
        y_predictions[x_y_index] = y_value
        x_y_index += 1

z_predictions = linear_regression.predict(np.hstack((x_predictions,y_predictions)))
plot_predictions_trace = go.Scatter3d(
    x=x_predictions.flatten(),
    y=y_predictions.flatten(),
    z=z_predictions.flatten(),
    name='Prediction Plane',
    mode='markers',
    marker={
        'size': 1,
    },
    opacity=0.8,
    surfaceaxis=2,
)
plot_data = [plot_training_trace,plot_testing_trace,plot_predictions_trace]
plot_figure = go.Figure(data=plot_data,layout=plot_layout)
plotly.offline.plot(plot_figure)

 非线性二维回归分析:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from linear_regression import LinearRegression
# 读取数据
data = pd.read_csv('../data/non-linear-regression-x-y.csv')

x = data['x'].values.reshape((data.shape[0], 1))
# shape=(250,1)
y = data['y'].values.reshape((data.shape[0], 1))
# shape=(250,1)
data.head(10)
# 画出曲线图
plt.plot(x, y)
plt.show()

# 迭代次数
num_iterations = 50000
# 学习率
learning_rate = 0.02
# 多项式
polynomial_degree = 15
# 对数据进行正弦计算
sinusoid_degree = 15  
normalize_data = True  

linear_regression = LinearRegression(x, y, polynomial_degree, sinusoid_degree, normalize_data)

(theta, cost_history) = linear_regression.train(
    learning_rate,
    num_iterations
)

print('开始损失: {:.2f}'.format(cost_history[0]))
print('结束损失: {:.2f}'.format(cost_history[-1]))

theta_table = pd.DataFrame({'Model Parameters': theta.flatten()})
# theta_table = {DataFrame:(152,1)}

plt.plot(range(num_iterations), cost_history)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('Gradient Descent Progress')
plt.show()

predictions_num = 1000
x_predictions = np.linspace(x.min(), x.max(), predictions_num).reshape(predictions_num, 1)
# shape = (1000,1)
y_predictions = linear_regression.predict(x_predictions)
# y_predictions = {ndarray:(1000,1)}
plt.scatter(x, y, label='Training Dataset')
plt.plot(x_predictions, y_predictions, 'r', label='Prediction')
plt.show()

 损失函数:

 曲线拟合:

开始损失: 2274.66
结束损失: 35.04

 

 

 

这篇关于python——线性回归实例实战的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!