python 数据分析

本文主要是介绍python 数据分析，对大家解决编程问题具有一定的参考价值，需要的程序猿们随着小编来一起学习吧！

基本环境安装

安装Anaconda

Matplot绘图架构

Scripting(脚本) -> Artist(美工) -> Backend(后端)

折线图

点击查看代码

import matplotlib.pyplot as plt
import random 

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(20,8),dpi=80)  # 绘图区域   创建大小 和 清晰度
x = range(60)
# 构造中文
x_ch = ['11点{}分'.format(i) for i in x]
y_ticks = range(40)   # 刻度

# 修改x,y的刻度
plt.xticks(x[::5],x_ch[::5])
plt.yticks(y_ticks[::5])

# 增加标题，坐标描述
plt.xlabel('时间')
plt.ylabel('温度')
plt.title('某些城市11点到12点之间的温度变化')

# 准备上海的数据
y_shanghai = [random.uniform(15,18) for i in x]
# 准备北京的数据
y_beijing = [random.uniform(1,2) for i in x]

# 画折线图
plt.plot(x,y_shanghai,label='上海')   # 实现绘图
plt.plot(x,y_beijing,color='r', linestyle ='--',label='北京')   # 实现绘图

plt.legend(loc='best')   # 实现图例  up
# plt.savefig('test.png')     # 保存图片  
plt.show()                  # 显示图片

颜色字符	风格字符	位置信息
r 红色	- 实线	'bese' 0
g 绿色	-- 虚线	'upper right' 1
b 蓝色	-. 点画线	'upper left' 2
w 白色	：点虚线	'lower left' 3
c 青色	''留空空格	'lower right' 4
m 洋红		'right' 5
y 黄色		'center left' 6
k 黑色		'center right' 7
		'lower center' 8
		'upper center 9'
		'center' 10

多个坐标系绘制

点击查看代码

import matplotlib.pyplot as plt
import random 
plt.rcParams['font.sans-serif'] = ['SimHei']

# 画出某城市11点到12点之间1小时的每分钟的温度变化显示，温度范围在15-18之间
# 创建一个figure
# plt.figure(figsize=(20, 8), dpi=80)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20, 8))

# 准备数据
x = range(60)

# 准备上海的温度数据
y_shanghai = [random.uniform(15, 18) for i in x]
# 准备北京的温度数据
y_beijing = [random.uniform(1, 3) for i in x]

# 构造中文
x_ch = ['11点{}分'.format(i) for i in x]
y_ticks = range(40)

# 画折线图
# plt.plot(x, y_shanghai, label='上海')
# plt.plot(x, y_beijing, color='r', linestyle='--', label='北京')

ax[0].plot(x, y_shanghai, label='上海')
ax[1].plot(x, y_beijing, color='r', linestyle='--', label='北京')

# plt是对整体画图，ax是对每个坐标系做处理

# 修改x，y的刻度
# plt.xticks(x[::5], x_ch[::5])
# plt.yticks(y_ticks[::5])

ax[0].set_xticks(x[::5], x_ch[::5])
ax[1].set_xticks(x[::5], x_ch[::5])

ax[0].set_yticks(y_ticks[::5])
ax[1].set_yticks(y_ticks[::5])

# 增加标题，坐标描述
# plt.xlabel('时间')
# plt.ylabel('温度')
# plt.title('某些城市11点到12点之间的温度变化显示')

ax[0].set_xlabel('时间')
ax[1].set_xlabel('时间')

ax[0].set_ylabel("温度")
ax[1].set_ylabel("温度")

ax[0].set_title("中午11点到12点之间的温度变化显示")
ax[1].set_title("中午11点到12点之间的温度变化显示")

ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()

柱状图

点击查看代码

import matplotlib.pyplot as plt

# bar(x, width)
plt.rcParams['font.sans-serif'] = ['SimHei']

# 创建fig对象
plt.figure(figsize=(20, 8))

# 准备数据
movie_name = ['雷神3：诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴','降魔传','追捕','七十七天','密战','狂兽','其它']
y = [73853,57767,22354,15969,14839,8725,8716,8318,7916,6764,52222]

# 放进横坐标的数字列表
x = range(len(movie_name))

# 画图
plt.bar(x, y, width=0.5, color=['b','r','g','y','c','m','y','k','c','g','g'])

# 修改刻度名称
plt.xticks(x, movie_name)

plt.show()

点击查看代码

plt.rcParams['font.sans-serif'] = ['SimHei']

# 创建fig对象
plt.figure(figsize=(20, 8))

movie_name = ['雷神3：诸神黄昏','正义联盟','寻梦环游记']

first_day = [10587.6,10062.5,1275.7]
first_weekend=[36224.9,34479.6,11830]

x = range(len(movie_name))
plt.bar(x, first_day, width=0.2, label='首日票房')
plt.bar([i+0.2 for i in x],first_weekend, width=0.2, label='首周票房')

# 修改刻度
plt.xticks([i + 0.1 for i in x], movie_name)
plt.legend(loc='best')

plt.show()

直方图

点击查看代码

import matplotlib.pyplot as plt

# 组数：数据按照不同的范围分组，分成的组成为组数 = 极差/组距(max-mix)/bins
# 组距：每一组两个端点的差

plt.rcParams['font.sans-serif'] = ['SimHei']

plt.figure(figsize=(20, 8))

time =[131,  98, 125, 131, 124, 139, 131, 117, 128, 108, 135, 138, 131, 102, 107, 114, 119, 128, 121, 142, 127, 130, 124, 101, 110, 116, 117, 110, 128, 128, 115,  99, 136, 126, 134,  95, 138, 117, 111,78, 132, 124, 113, 150, 110, 117,  86,  95, 144, 105, 126, 130,126, 130, 126, 116, 123, 106, 112, 138, 123,  86, 101,  99, 136,123, 117, 119, 105, 137, 123, 128, 125, 104, 109, 134, 125, 127,105, 120, 107, 129, 116, 108, 132, 103, 136, 118, 102, 120, 114,105, 115, 132, 145, 119, 121, 112, 139, 125, 138, 109, 132, 134,156, 106, 117, 127, 144, 139, 139, 119, 140,  83, 110, 102,123,107, 143, 115, 136, 118, 139, 123, 112, 118, 125, 109, 119, 133,112, 114, 122, 109, 106, 123, 116, 131, 127, 115, 118, 112, 135,115, 146, 137, 116, 103, 144,  83, 123, 111, 110, 111, 100, 154,136, 100, 118, 119, 133, 134, 106, 129, 126, 110, 111, 109, 141,120, 117, 106, 149, 122, 122, 110, 118, 127, 121, 114, 125, 126,114, 140, 103, 130, 141, 117, 106, 114, 121, 114, 133, 137,  92,121, 112, 146,  97, 137, 105,  98, 117, 112,  81,  97, 139, 113,134, 106, 144, 110, 137, 137, 111, 104, 117, 100, 111, 101, 110,105, 129, 137, 112, 120, 113, 133, 112,  83,  94, 146, 133, 101,131, 116, 111,  84, 137, 115, 122, 106, 144, 109, 123, 116, 111,111, 133, 150]

# 组距 2分钟  组数
bins = 2

groups = int((max(time)-min(time)) / bins)

# 画直方图
# normed： 纵坐标的显示频率

plt.hist(time, groups)

# 指定刻度范围，以及步长
plt.xticks(list(range(min(time), max(time)))[::2])

plt.xlabel('电影时长大小')
plt.ylabel('电影的数据量')
# 增加网格显示
plt.grid(None, linestyle='--', alpha=1)

plt.show()

饼图

点击查看代码

import matplotlib.pyplot as plt
import pandas as pd
from mplfinance.original_flavor import candlestick_ochl

plt.rcParams['font.sans-serif'] = ['SimHei']

plt.figure(figsize=(20, 8))

movie_name = ['雷神3：诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴','降魔传','追捕','七十七天','密战','狂兽','其它']

place_count = [60605,54546,45819,28243,13270,9945,7679,6799,6101,4621,20105]

# 绘制饼图
plt.pie(place_count, labels=movie_name, autopct='%1.2f%%', colors=['b','r','g','y','c','m','y','r','c','g','g'])

# 显示正圆
plt.axis('equal')

plt.legend(loc='best')
plt.title('排片占比示意图')

plt.show()

点击查看代码

labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
sizes = [15, 30, 45, 10]
# 将某部分爆炸出来， 使用括号将第一块分割出来，数值的大小是分割出来的与其他两块之间的间隙
explode = (0, 0.1, 0, 0)  # 分别对应labels

fig1, ax1 = plt.subplots()

# pctdistance, 百分比的text离圆心的距离

ax1.pie(sizes,explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)

ax1.axis('equal')

plt.show()

K线图

点击查看代码

# 了解部分
data = pd.read_hdf("./stock_plot/day_open.h5")[:100]
data1 = pd.read_hdf("./stock_plot/day_close.h5")[:100]
data2 = pd.read_hdf("./stock_plot/day_high.h5")[:100]
data3 = pd.read_hdf("./stock_plot/day_low(1).h5")[:100]

day = pd.concat([data["000001.SZ"], data1["000001.SZ"], data2["000001.SZ"],
data3["000001.SZ"]], axis=1)

day.columns = ["open", "close", "high", "low"]
day = day.reset_index().values

# 画图
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(20, 8), dpi=80)
# 第一个参数axes
candlestick_ochl(axes, day, width=0.2, colorup='r', colordown='g')
plt.show()

Numpy

了解Numpy

import random
import time
import numpy as np

a = []
for i in range(100000000):
    a.append(random.random())

t1 = time.time()
sum1 = sum(a)
t2 = time.time()

b = np.array(a)
t4 = time.time()
sum3 = np.sum(b)
t5 = time.time()

print(t2-t1, t5-t4)

1.6841034889221191 0.5198299884796143

ndarray n维数组

# ndarray  n维数组
# 主要存储相同的类型的数据集合

# 创建数组
# 创建二维数组
a = np.array([[1,2,3], [4,5,6]])

a.shape  # 查看数组形状
# （2,3）

a.ndim  # 数组的维度
# 2

a.size  # 查看数组中的元素的数量
# 6

a.itemsize
# 4

a.nbytes  # 6 * 4
# 24

a.flags
 # C_CONTIGUOUS : True
 # F_CONTIGUOUS : False
 # OWNDATA : True
 # WRITEABLE : True
 # ALIGNED : True
 # WRITEBACKIFCOPY : False
 # UPDATEIFCOPY : False

a = np.array([[1,2,3], [4,5,6]])
b = np.array([7,8,9,10])
c = np.array([[[1,2,3], [4,5,6]], [[11,22,33], [44,55,66]]])

a.shape
b.shape
c.shape
# (2, 2, 3)

# N维数组
# 0维：1，2,3，
# 1维：[7,8,9,10]
# 2维：[[1,2,3], [4,5,6]]
# 3维：[[[1,2,3], [4,5,6]], [[11,22,33], [44,55,66]]]

a
a.dtype  # 获取数组的类型
# dtype('int32')

a = np.array([[1,2,3],[4,5,6]],dtype=np.float32)
a.dtype
# dtype('float32')

数组之间的运算

import numpy as np

arr = np.array([1,2,3,4])
arr+1
# array([2, 3, 4, 5])

# 数组与数组之间的运算
# 广播机制
a = np.array([[4,5,6],[7,8,9]])
b = np.array([[2,10], [2, 15]])
# a * b  element-wise

score = np.array([[80,86],
[82,80],
[85,78],
[90,90],
[86,82],
[82,90],
[78,80],
[92,94]])

percent = np.array([[0.3, 0.7]])
score * percent
#array([[24. , 60.2],
#       [24.6, 56. ],
#       [25.5, 54.6],
#       [27. , 63. ],
#       [25.8, 57.4],
#       [24.6, 63. ],
#       [23.4, 56. ],
#       [27.6, 65.8]])

# 矩阵，特殊在运算机制
np.mat(score)
#matrix([[80, 86],
#        [82, 80],
#        [85, 78],
#        [90, 90],
#        [86, 82],
#        [82, 90],
#        [78, 80],
#        [92, 94]])

c = np.array([[0.3], [0.7]])
np.mat(c)
#matrix([[0.3, 0.7],
#        [0.3, 0.7]])

# 矩阵运算
# (8, 2) * (2, 1) = (8, 1)
np.matmul(score, c)

#array([[84.2],
#       [80.6],
#       [80.1],
#       [90. ],
#       [83.2],
#       [87.6],
#       [79.4],
#       [93.4]])

stock_day_rise = np.random.normal(0, 1, [500, 504])
stock_day_rise.shape
# (500, 504)

stock1 = stock_day_rise[:10, :100]
stock2 = stock_day_rise[10: 20, :100]

stock2

# 合并
# axis: 0按照数组的行的方向拼接在一起
# axis: 1按照数组的列的方向拼接在一起

all_ = np.concatenate([stock1, stock2], axis=0)

# hstack  列拼接  axis: 1
# vstack  行拼接  axis: 0

# 分割
np.split(all_, 20, axis=0)
np.genfromtxt('test.csv', delimiter=',')
#array([[  nan,   nan,   nan,   nan],
#       [  1. , 123. ,   1.4,  23. ],
#       [  2. , 110. ,   nan,  18. ],
#       [  3. ,   nan,   2.1,  19. ]])

type(np.nan)
#float

e = 2.73
1/e
#0.3663003663003663

np.exp(2)
# 7.38905609893065

1 / np.exp(2)
#0.1353352832366127

1 / (1 + 1/np.exp(2))
# 0.8807970779778823

np.exp(2)
# 7.38905609893065

m = np.array([1,2,3])
1 / (1 + 1/np.exp(m))
# array([0.73105858, 0.88079708, 0.95257413])

Pandas

pandas数据结构

import numpy as np
import pandas as pd

stock_day_rise = np.random.normal(0, 1, [500, 504])

stock_day_rise
#array([[-0.51275272,  0.94026123, -0.28734351, ..., -1.80535228,
#         1.12647759, -0.34482647],
#       [-0.11082195, -0.61753087,  0.51247014, ..., -0.71336186,
#        -0.75038013,  1.23107248],
#       [ 1.30920002, -0.86247187, -0.18046507, ...,  0.41082344,
#         0.36615753, -1.15248877],
#       ...,
#       [-0.64597353,  0.98051196,  0.21157511, ...,  0.3901954 ,
#         0.44220279,  0.7628329 ],
#       [-0.45372471,  0.74978987,  1.14269309, ..., -0.9227356 ,
#        -0.64413556, -0.36949079],
#       [-0.7002719 ,  0.57790589, -1.65279998, ..., -1.57232142,
#        -0.51782955,  0.13426912]])

stock_df = pd.DataFrame(stock_day_rise)
stock_df
| 0    | 1         | 2         | 3         | 4         | 5         | 6         | 7         | 8         | 9         | ...       | 494  | 495       | 496       | 497       | 498       | 499       | 500       | 501       | 502       | 503       |
| ---- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ---- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- |
| 0    | -0.512753 | 0.940261  | -0.287344 | 0.531760  | 0.012567  | 0.709473  | 0.239689  | -1.779217 | -0.501474 | -0.507617 | ...  | 1.377147  | 1.783230  | 0.196377  | 1.594897  | 0.619660  | -1.876187 | 1.279120  | -1.805352 | 1.126478  |
| 1    | -0.110822 | -0.617531 | 0.512470  | 0.581689  | 0.711916  | 0.813071  | 1.521003  | -0.290721 | -0.156604 | -1.124984 | ...  | 0.948753  | 1.402447  | 0.294993  | -0.802038 | -1.067637 | -0.223470 | 0.445096  | -0.713362 | -0.750380 |
| 2    | 1.309200  | -0.862472 | -0.180465 | 0.028584  | 0.037257  | 0.051052  | 1.629817  | -1.133528 | -0.987510 | -1.585423 | ...  | 0.245225  | 1.909723  | -1.403577 | -0.406025 | -1.327163 | -0.608240 | 0.755096  | 0.410823  | 0.366158  |
| 3    | 0.920909  | -0.473799 | -1.925638 | -0.989393 | 0.837138  | 0.948183  | 0.011733  | 0.466019  | 0.258141  | 0.270631  | ...  | 1.028244  | 0.550098  | -0.168381 | 0.029352  | 0.652068  | -1.366157 | 2.141130  | -0.391050 | -0.524698 |
| 4    | -0.319762 | 0.599024  | -0.154454 | -1.954056 | -1.672396 | -0.158403 | 1.369486  | 1.294337  | 0.920220  | 0.784408  | ...  | -0.694639 | -0.250066 | 0.229763  | -1.020350 | 0.725860  | -0.062765 | -0.071443 | -0.708495 | -1.298314 |
| ...  | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...  | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       |
| 495  | -0.189318 | 0.680488  | 0.696482  | -0.480230 | -1.868852 | -0.012383 | -1.519626 | -1.279518 | 0.804979  | 1.390888  | ...  | 1.013731  | -1.506497 | -0.326615 | -1.552188 | 0.427825  | -0.533029 | 0.143934  | 0.192034  | 1.304076  |
| 496  | 0.838544  | -0.455677 | -0.874880 | 0.494403  | -0.196655 | -0.738068 | -2.619937 | -0.151928 | -1.533008 | -2.134869 | ...  | -0.575703 | -0.237983 | -1.551520 | 0.825470  | 0.186887  | -0.449823 | 1.406305  | 1.347674  | 0.058468  |
| 497  | -0.645974 | 0.980512  | 0.211575  | -0.397760 | -0.926155 | -0.628815 | 0.407839  | -0.002652 | 0.106013  | 0.377582  | ...  | -0.984033 | 0.882435  | 0.741889  | 1.084276  | -0.514312 | 1.374642  | 0.186176  | 0.390195  | 0.442203  |
| 498  | -0.453725 | 0.749790  | 1.142693  | -0.058502 | 0.327256  | 1.752110  | 0.535332  | 1.743112  | -0.459879 | -2.108713 | ...  | 0.119614  | -0.412215 | 0.209263  | 0.313788  | 0.216358  | -1.119070 | 1.067892  | -0.922736 | -0.644136 |
| 499  | -0.700272 | 0.577906  | -1.652800 | -0.523849 | -0.342849 | -0.937188 | 0.835102  | 0.269253  | -0.754492 | -0.169862 | ...  | -0.792549 | -0.159701 | 0.900721  | -0.909817 | -1.044447 | -1.155437 | 0.309660  | -1.572321 | -0.517830 |

type(stock_df)
#pandas.core.frame.DataFrame

# 添加行索引
stock_code = ['股票' + str(i) for i in range(stock_day_rise.shape[0])]

# stcok_code
stock_df = pd.DataFrame(stock_day_rise,index=stock_code)

stock_df
| 0     | 1         | 2         | 3         | 4         | 5         | 6         | 7         | 8         | 9         | ...       | 494  | 495       | 496       | 497       | 498       | 499       | 500       | 501       | 502       | 503       |
| ----- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ---- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- |
| 股票0   | -0.512753 | 0.940261  | -0.287344 | 0.531760  | 0.012567  | 0.709473  | 0.239689  | -1.779217 | -0.501474 | -0.507617 | ...  | 1.377147  | 1.783230  | 0.196377  | 1.594897  | 0.619660  | -1.876187 | 1.279120  | -1.805352 | 1.126478  |
| 股票1   | -0.110822 | -0.617531 | 0.512470  | 0.581689  | 0.711916  | 0.813071  | 1.521003  | -0.290721 | -0.156604 | -1.124984 | ...  | 0.948753  | 1.402447  | 0.294993  | -0.802038 | -1.067637 | -0.223470 | 0.445096  | -0.713362 | -0.750380 |
| 股票2   | 1.309200  | -0.862472 | -0.180465 | 0.028584  | 0.037257  | 0.051052  | 1.629817  | -1.133528 | -0.987510 | -1.585423 | ...  | 0.245225  | 1.909723  | -1.403577 | -0.406025 | -1.327163 | -0.608240 | 0.755096  | 0.410823  | 0.366158  |
| 股票3   | 0.920909  | -0.473799 | -1.925638 | -0.989393 | 0.837138  | 0.948183  | 0.011733  | 0.466019  | 0.258141  | 0.270631  | ...  | 1.028244  | 0.550098  | -0.168381 | 0.029352  | 0.652068  | -1.366157 | 2.141130  | -0.391050 | -0.524698 |
| 股票4   | -0.319762 | 0.599024  | -0.154454 | -1.954056 | -1.672396 | -0.158403 | 1.369486  | 1.294337  | 0.920220  | 0.784408  | ...  | -0.694639 | -0.250066 | 0.229763  | -1.020350 | 0.725860  | -0.062765 | -0.071443 | -0.708495 | -1.298314 |
| ...   | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...  | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       | ...       |
| 股票495 | -0.189318 | 0.680488  | 0.696482  | -0.480230 | -1.868852 | -0.012383 | -1.519626 | -1.279518 | 0.804979  | 1.390888  | ...  | 1.013731  | -1.506497 | -0.326615 | -1.552188 | 0.427825  | -0.533029 | 0.143934  | 0.192034  | 1.304076  |
| 股票496 | 0.838544  | -0.455677 | -0.874880 | 0.494403  | -0.196655 | -0.738068 | -2.619937 | -0.151928 | -1.533008 | -2.134869 | ...  | -0.575703 | -0.237983 | -1.551520 | 0.825470  | 0.186887  | -0.449823 | 1.406305  | 1.347674  | 0.058468  |
| 股票497 | -0.645974 | 0.980512  | 0.211575  | -0.397760 | -0.926155 | -0.628815 | 0.407839  | -0.002652 | 0.106013  | 0.377582  | ...  | -0.984033 | 0.882435  | 0.741889  | 1.084276  | -0.514312 | 1.374642  | 0.186176  | 0.390195  | 0.442203  |
| 股票498 | -0.453725 | 0.749790  | 1.142693  | -0.058502 | 0.327256  | 1.752110  | 0.535332  | 1.743112  | -0.459879 | -2.108713 | ...  | 0.119614  | -0.412215 | 0.209263  | 0.313788  | 0.216358  | -1.119070 | 1.067892  | -0.922736 | -0.644136 |

# freq='B' 默认略过周六周日
date = pd.date_range('2017-01-01', periods=504, freq='B')
stock_df = pd.DataFrame(stock_day_rise, index=stock_code, columns=date)

pandas的索引与修改

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

stock_day_rise = np.random.normal(0, 1, [500, 504])
# stock_day_rise
stock_code = ['股票' + str(i) for i in range(stock_day_rise.shape[0])]
data = pd.date_range('2017-01-01', periods=504, freq='B')
stock_dataframe = pd.DataFrame(stock_day_rise, index=stock_code, columns=data)
stock_dataframe
| 2017-01-02 | 2017-01-03 | 2017-01-04 | 2017-01-05 | 2017-01-06 | 2017-01-09 | 2017-01-10 | 2017-01-11 | 2017-01-12 | 2017-01-13 | ...       | 2018-11-23 | 2018-11-26 | 2018-11-27 | 2018-11-28 | 2018-11-29 | 2018-11-30 | 2018-12-03 | 2018-12-04 | 2018-12-05 | 2018-12-06 |
| ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | --------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- |
| 股票0        | -0.336502  | -0.283818  | -1.833312  | -0.034063  | -0.273923  | -0.013894  | 0.571314   | -0.685192  | -0.844952  | 0.697230  | ...        | -0.897525  | 2.295753   | 0.726545   | -0.332880  | -0.707125  | 0.301560   | -1.315805  | 1.038277   | 0.232298   |
| 股票1        | 0.431983   | -0.128563  | 0.430541   | 0.260152   | 0.885598   | 1.659742   | 0.407230   | 0.011112   | 0.624398   | -1.356692 | ...        | 0.433011   | -0.468825  | 0.536704   | -0.796652  | 0.972271   | 1.537066   | -0.146411  | 1.468827   | 1.733275   |
| 股票2        | 1.068510   | 0.637716   | -1.626844  | -0.985523  | 0.745854   | -0.359343  | 0.889808   | 1.364657   | -1.017752  | -0.772868 | ...        | -0.310762  | 0.420062   | 0.903381   | -0.804816  | -0.444837  | 1.373565   | -1.688836  | -0.853804  | 1.056135   |
| 股票3        | 1.650343   | -0.921815  | -0.068494  | 1.043372   | -1.766311  | -1.018881  | -1.031309  | 1.024690   | -0.533850  | 0.350309  | ...        | -1.010353  | 0.614537   | -0.511354  | -0.752013  | -1.017201  | -0.886048  | 0.680733   | 1.063538   | -0.383206  |
| 股票4        | -1.128249  | -1.282252  | -0.928848  | 0.075446   | -1.358604  | 1.602723   | -0.966502  | 2.256386   | 0.925430   | -1.027316 | ...        |            |            |            |            |            |            |            |            |            |

stock_dataframe.values
array([[-0.33650197, -0.28381791, -1.83331156, ...,  1.03827662,
         0.23229771,  0.50349308],
       [ 0.43198327, -0.12856302,  0.4305411 , ...,  1.46882666,
         1.73327538,  0.44540417],
       [ 1.06851021,  0.63771568, -1.6268439 , ..., -0.8538035 ,
         1.05613455,  1.13792046],
stock_dataframe.T
| 股票0        | 股票1       | 股票2       | 股票3       | 股票4       | 股票5       | 股票6       | 股票7       | 股票8       | 股票9       | ...       | 股票490 | 股票491     | 股票492     | 股票493     | 股票494     | 股票495     | 股票496     | 股票497     | 股票498     | 股票499     |
| ---------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ----- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- |
| 2017-01-02 | -0.336502 | 0.431983  | 1.068510  | 1.650343  | -1.128249 | -0.605703 | -0.551460 | 0.019854  | -2.092409 | -0.495476 | ...   | 0.451665  | -0.328315 | -0.289311 | 0.204668  | 2.794759  | 0.877930  | 1.944935  | -0.657216 | 1.261522  |
| 2017-01-03 | -0.283818 | -0.128563 | 0.637716  | -0.921815 | -1.282252 | 0.427100  | -1.296923 | 0.767681  | -0.621305 | 0.122074  | ...   | 2.337562  | -0.350175 | -0.424671 | -1.011431 | 0.184091  | 0.242851  | 0.164125  | 0.910831  | -2.520630 |
| 2017-01-04 | -1.833312 | 0.430541  | -1.626844 | -0.068494 | -0.928848 | 0.030197  | -0.171296 | -0.267061 | -0.285124 | -0.212287 | ...   | -0.301684 | 0.015821  | 0.582552  | -0.349317 | 2.052757  | 0.056201  | 1.028949  | -0.730406 | -0.275469 |
| 2017-01-05 | -0.034063 | 0.260152  | -0.985523 | 1.043372  | 0.075446  | -0.282063 | 0.939964  | -1.005864 | -0.536240 | -0.521829 | ...   | 0.487618  | 0.211755  | 1.134300  | -1.530601 | -1.129824 | -0.106915 | -0.757018 | -0.306077 | 0.088784  |
| 2017-01-06 | -0.273923 | 0.885598  | 0.745854  | -1.766311 | -1.358604 | -1.407985 | -1.195100 | -0.552709 | -1.014346 | -0.442240 |       |           |           |           |           |           |           |           |           |           |

stock_dataframe.head(10)
2017-01-02	2017-01-03	2017-01-04	2017-01-05	2017-01-06	2017-01-09	2017-01-10	2017-01-11	2017-01-12	2017-01-13	...	2018-11-23	2018-11-26	2018-11-27	2018-11-28	2018-11-29	2018-11-30	2018-12-03	2018-12-04	2018-12-05	2018-12-06
股票0	-0.336502	-0.283818	-1.833312	-0.034063	-0.273923	-0.013894	0.571314	-0.685192	-0.844952	0.697230	...	-0.897525	2.295753	0.726545	-0.332880	-0.707125	0.301560	-1.315805	1.038277	0.232298	0.503493
股票1	0.431983	-0.128563	0.430541	0.260152	0.885598	1.659742	0.407230	0.011112	0.624398	-1.356692	...	0.433011	-0.468825	0.536704	-0.796652	0.972271	1.537066	-0.146411	1.468827	1.733275	0.445404
股票2	1.068510	0.637716	-1.626844	-0.985523	0.745854	-0.359343	0.889808	1.364657	-1.017752	-0.772868	...	-0.310762	0.420062	0.903381	-0.804816	-0.444837	1.373565	-1.688836	-0.853804	1.056135	1.137920
股票3	1.650343	-0.921815	-0.068494	1.043372	-1.766311	-1.018881	-1.031309	1.024690	-0.533850	0.350309	
# stock_dataframe.tail(10)

# DataFrame索引操作
# 重设索引
# stock_dataframe.reset_index(drop=True)

df = pd.DataFrame({'month':[1,4,7,10], 'year':[1, 1, 2, 2], 'sale':[55, 40, 84, 31]})
df = df.set_index('year','month')
# df

df
month	sale
year		
1	1	55
1	4	40
2	7	84
2	10	31

df.index
Int64Index([1, 1, 2, 2], dtype='int64', name='year')

# MutiIndex
stock_dataframe = stock_dataframe.T
stock_dataframe
| 股票0        | 股票1       | 股票2       | 股票3       | 股票4       | 股票5       | 股票6       | 股票7       | 股票8       | 股票9       | ...       | 股票490 | 股票491     | 股票492     | 股票493     | 股票494     | 股票495     | 股票496     | 股票497     | 股票498     | 股票499     |
| ---------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ----- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- |
| 2017-01-02 | -0.336502 | 0.431983  | 1.068510  | 1.650343  | -1.128249 | -0.605703 | -0.551460 | 0.019854  | -2.092409 | -0.495476 | ...   | 0.451665  | -0.328315 | -0.289311 | 0.204668  | 2.794759  | 0.877930  | 1.944935  | -0.657216 | 1.261522  |
| 2017-01-03 | -0.283818 | -0.128563 | 0.637716  | -0.921815 | -1.282252 | 0.427100  | -1.296923 | 0.767681  | -0.621305 | 0.122074  | ...   | 2.337562  | -0.350175 | -0.424671 | -1.011431 | 0.184091  | 0.242851  | 0.164125  | 0.910831  | -2.520630 |
| 2017-01-04 | -1.833312 | 0.430541  | -1.626844 | -0.068494 | -0.928848 | 0.030197  | -0.171296 | -0.267061 | -0.285124 | -0.212287 | ...   | -0.301684 | 0.015821  | 0.582552  | -0.349317 | 2.052757  | 0.056201  | 1.028949  | -0.730406 | -0.275469 |
| 2017-01-05 | -0.034063 | 0.260152  | -0.985523 | 1.043372  | 0.075446  | -0.282063 | 0.939964  | -1.005864 | -0.536240 | -0.521829 | ...   | 0.487618  | 0.211755  | 1.134300  | -1.530601 | -1.129824 | -0.106915 | -0.757018 | -0.306077 | 0.088784  |
| 2017-01-06 | -0.273923 | 0.885598  | 0.745854  | -1.766311 |           |           |           |           |           |           |       |           |           |           |           |           |           |           |           |           |

stock_dataframe['股票0']['2017-01-02']
# -0.33650197255654596

pd.Series(np.arange(10))
0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

pd.Series({'red':100, 'blue':200, 'green': 500, 'yellow':1000})
red        100
blue       200
green      500
yellow    1000
dtype: int64

data = pd.read_csv('./stock_day.csv')
data.head()
open	high	close	low	volume	price_change	p_change	ma5	ma10	ma20	v_ma5	v_ma10	v_ma20	turnover
2018-02-27	23.53	25.88	24.16	23.53	95578.03	0.63	2.68	22.942	22.142	22.875	53782.64	46738.65	55576.11	2.39
2018-02-26	22.80	23.78	23.53	22.80	60985.11	0.69	3.02	22.406	21.955	22.942	40827.52	42736.34	56007.50	1.53
2018-02-23	22.88	23.37	22.82	22.71	52914.01	0.54	2.42	21.938	21.929	23.022	35119.58	41871.97	56372.85	1.32
2018-02-22	22.25	22.76	22.28	22.02	36105.01	0.36	1.64	21.446	21.909	23.137	35397.58	39904.78	60149.60	0.90
2018-02-14	21.49	21.99	21.92	21.48	23331.04	0.44	2.05	21.366	21.923	23.253	33590.21	42935.74	61716.11	0.58

data[['open', 'high', 'close']]
open	high	close
2018-02-27	23.53	25.88	24.16
2018-02-26	22.80	23.78	23.53
2018-02-23	22.88	23.37	22.82
2018-02-22	22.25	22.76	22.28
2018-02-14	21.49	21.99	21.92
...	...	...	...
2015-03-06	13.17	14.48	14.28
2015-03-05	12.88	13.45	13.16
2015-03-04	12.80	12.92	12.90
2015-03-03	12.52	13.06	12.70
2015-03-02	12.25	12.67	12.52
643 rows × 3 columns

# 使用行列索引的方式取值，必须按照先列后行的顺序
data['open']['2018-02-27']
23.53

# data[:1, :2]

# loc: 只能指定行列索引的名字
# iloc: 可以通过索引的下标获取，索引是时间或者指标的名字
data.loc['2018-02-27': '2018-02-23', 'open']

2018-02-27    23.53
2018-02-26    22.80
2018-02-23    22.88
Name: open, dtype: float64

data.iloc[0:3, 0:4]  # 相当于取到一个DataFrame

	open	high	close	low
2018-02-27	23.53	25.88	24.16	23.53
2018-02-26	22.80	23.78	23.53	22.80
2018-02-23	22.88	23.37	22.82	22.71


# ix, 在1.0.0版本之后就删除这个方法
# 排序
data.sort_index()

# ascending=False 按照升序还是降序的顺序排序，默认从小到大
data.sort_values(by='p_change', ascending=False)

data.sort_values(by=['open', 'close'], ascending=False).head(10)

# 统计分析
# 求出最小值
data.idxmin(axis=0)

# cumsum
data = data.sort_index()
data

data.p_change.cumsum().plot()
plt.show()

# 逻辑运算
# 通过运算符
data['p_change'] > 2

data[data['p_change'] > 2]

data[(data['p_change']>2) & (data['turnover']>5)]

data.query('p_change>2&turnover>5')

# isin
data[data['turnover'].isin([4.19, 2.39])]

# 数学运算
data


open_ = data['open']
close_ = data['close']

# add  加法 sub 减法
data['my_price_change'] = close_.sub(open_)

# 自定义运算
data[['open', 'close']].apply(lambda x: x.max() - x.min(), axis=0)

这篇关于python 数据分析的文章就介绍到这儿，希望我们推荐的文章对大家有所帮助，也希望大家多多支持为之网！

Python教程

python 数据分析

基本环境安装

Matplot绘图架构

折线图

多个坐标系绘制

柱状图

直方图

饼图

K线图

Numpy

了解Numpy

ndarray n维数组

数组之间的运算

Pandas

pandas数据结构

pandas的索引与修改

前端开发

后端开发

移动端开发

数据库

服务器运维

人工智能

区块链

游戏开发

网站运营

大数据/云计算

软件工程

软件/开发工具使用

资讯