代码:
# -*- coding: utf-8 -*- #频率 and 众数 freDict={} #统计元素数量 def count(l): for item in l: if item in freDict.keys(): freDict[item] +=1 else: freDict[item] =1 return #求元素频率 def transform(): s = float(sum(freDict.values())) for k in freDict.keys(): freDict[k] /=s return #获取众数 def mode(): max_value = list(freDict.values())[0] max_key = list(freDict.keys())[0] for key,value in freDict.items(): if value>max_value: max_value=value max_key=key return max_key l=['a','b','c','d','e','a','b','c','d','e','c','d','e'] count(l) print("Count for Distinct:",freDict) transform() print("Frequency by precent:",freDict) print("Mode:",mode())
输出结果:
代码:
# -*- coding: utf-8 -*- from math import * import numpy as np iris = np.loadtxt(r"E:\iris_proc.data",delimiter=",") rel = np.linspace(0,0, 11*5).reshape(11,5) rel[...,0]=range(0,101,10) for col in range(1,5): rel[...,col]=[np.percentile(iris[...,col-1], p) for p in rel[...,0]] print(rel)
输出结果:
代码:
# -*- coding: utf-8 -*- import math import numpy as np def mean(x): return sum(x)/ float(len(x)) def median(y): x=np.sort(y) if len(x)%2==0: return (x[len(x)//2]+x[len(x)//2+1])/2.0 else: return x[len(x)//2] def trimmean(x,p): b= np.percentile(x, p//2) t= np.percentile(x, 100-p//2) return mean([i for i in x if b <= i <= t]) iris = np.loadtxt(r"E:\iris_proc.data",delimiter=",") rel = np.linspace(0,0, 3*4).reshape(3,4) for i in range(3): for j in range(4): if(i==0): rel[0,j] = mean(iris[...,j]) elif(i==1): rel[1,j] = median(iris[...,j]) else: rel[2,j] = trimmean(iris[...,j], 20) print(rel)
输出结果:
代码:
import numpy as np from math import * def rang(x): return max(x)-min(x) def var(x): return np.var(x)*len(x)/(len(x)-1) def std(x): return sqrt(var(x)) def aad(x): x_mean = np.mean(x) return sum([abs(x[i]-x_mean) for i in range(len(x))])/len(x) def mad(x): x_median = np.median(x) return np.median([abs(x[i]-x_median) for i in range(len(x))]) def iqr(x): return np.percentile(x,75)-np.percentile(x,25) iris = np.loadtxt("E:\iris_proc.data",delimiter=',') rel=np.linspace(0,0,5*4).reshape(5,4) for col in range(4): rel[0, col] = rang(iris[..., col]) rel[1, col] = std(iris[..., col]) rel[2, col] = aad(iris[..., col]) rel[3, col] = mad(iris[..., col]) rel[4, col] = iqr(iris[..., col]) print(rel)
输出结果:
代码:
from itertools import groupby import numpy as np iris = np.loadtxt("E:\iris_proc.data",delimiter=',') data = iris[...,0]*10 data = sorted([str(int(e)) for e in data]) #k 和 h 分别为每个数值的十位数字和个位数字的字符形式 for k,g in groupby(data,key=lambda x:int(x)//5): lst = map(str,[int(h)%10 for h in list(g)]) print(k//2,'|',''.join(lst))
输出结果: