KNN:
- 一种非参数、惰性学习方法,导致预测时速度慢
- 当训练样本集较大时,会导致其计算开销高
- 样本不平衡时,对稀有类别的预测准确率低
- KNN模型的可解释性不强
k近邻法(k-nearest neighbor, k-NN)可以做分类也可以做回归
k=1:最近邻(注:容易过拟合,不能用)
k≠1:k最近邻
k过大也不行,容易欠拟合
电影所属类别距离度量
import numpy as np import collections #统计的库 #数据集 dataset=np.array([[1,101],[5,89],[108,5],[115,8]])#四组二维特征 labels = ['爱情片','爱情片','动作片','动作片'] test_data = [101,20]#测试数据,确定类别 k=3 #超参数,需要自己设定 distances = np.sum((test_data - dataset)**2, axis=1)**0.5#欧式距离 #argsort():表示对数据进行排序,返回排序后的索引。然后根据索引获取标签 k_labels = [labels[index] for index in distances.argsort()[0:k]]# k个最近的标签 label = collections.Counter(k_labels).most_common(1)[0][0]# 出现次数最多的标签即为最终类别 print(label)
#封装代码 #classify(测试样本点,训练集数据,训练集标签,k) def classify(test, dataset, labels, k): #测试样本点,训练数据的特征,训练数据的标签,超参数 dist = np.sum((test - dataset)**2, axis=1)**0.5# 计算距离 k_labels = [labels[index] for index in dist.argsort()[0 : k]]# k个最近的标签 label = collections.Counter(k_labels).most_common(1)[0][0]# 出现次数最多的标签即为最终类别 return label #kNN分类 test_class = classify(test_data, dataset, labels, 3) test_class
import pandas as pd df=pd.DataFrame(dataset,columns=['打斗镜头数','接吻镜头数']) df['label']=labels test_data = [101,20]#测试数据,确定类别 k=3 #超参数,需要自己设定 #求距离 df['distance']=np.sum((df.iloc[:,:2].values-test_data)**2, axis=1)**0.5 #基于distance进行排序,再取出前k个标签,然后计数,再提取出第1个的索引 df.iloc[:,2:].sort_values(by='distance')['label'][:k].value_counts().index[0]
↓这是datingTestSet.txt里的内容,直接复制到记事本内保存
40920 8.326976 0.953952 largeDoses 14488 7.153469 1.673904 smallDoses 26052 1.441871 0.805124 didntLike 75136 13.147394 0.428964 didntLike 38344 1.669788 0.134296 didntLike 72993 10.141740 1.032955 didntLike 35948 6.830792 1.213192 largeDoses 42666 13.276369 0.543880 largeDoses 67497 8.631577 0.749278 didntLike 35483 12.273169 1.508053 largeDoses 50242 3.723498 0.831917 didntLike 63275 8.385879 1.669485 didntLike 5569 4.875435 0.728658 smallDoses 51052 4.680098 0.625224 didntLike 77372 15.299570 0.331351 didntLike 43673 1.889461 0.191283 didntLike 61364 7.516754 1.269164 didntLike 69673 14.239195 0.261333 didntLike 15669 0.000000 1.250185 smallDoses 28488 10.528555 1.304844 largeDoses 6487 3.540265 0.822483 smallDoses 37708 2.991551 0.833920 didntLike 22620 5.297865 0.638306 smallDoses 28782 6.593803 0.187108 largeDoses 19739 2.816760 1.686209 smallDoses 36788 12.458258 0.649617 largeDoses 5741 0.000000 1.656418 smallDoses 28567 9.968648 0.731232 largeDoses 6808 1.364838 0.640103 smallDoses 41611 0.230453 1.151996 didntLike 36661 11.865402 0.882810 largeDoses 43605 0.120460 1.352013 didntLike 15360 8.545204 1.340429 largeDoses 63796 5.856649 0.160006 didntLike 10743 9.665618 0.778626 smallDoses 70808 9.778763 1.084103 didntLike 72011 4.932976 0.632026 didntLike 5914 2.216246 0.587095 smallDoses 14851 14.305636 0.632317 largeDoses 33553 12.591889 0.686581 largeDoses 44952 3.424649 1.004504 didntLike 17934 0.000000 0.147573 smallDoses 27738 8.533823 0.205324 largeDoses 29290 9.829528 0.238620 largeDoses 42330 11.492186 0.263499 largeDoses 36429 3.570968 0.832254 didntLike 39623 1.771228 0.207612 didntLike 32404 3.513921 0.991854 didntLike 27268 4.398172 0.975024 didntLike 5477 4.276823 1.174874 smallDoses 14254 5.946014 1.614244 smallDoses 68613 13.798970 0.724375 didntLike 41539 10.393591 1.663724 largeDoses 7917 3.007577 0.297302 smallDoses 21331 1.031938 0.486174 smallDoses 8338 4.751212 0.064693 smallDoses 5176 3.692269 1.655113 smallDoses 18983 10.448091 0.267652 largeDoses 68837 10.585786 0.329557 didntLike 13438 1.604501 0.069064 smallDoses 48849 3.679497 0.961466 didntLike 12285 3.795146 0.696694 smallDoses 7826 2.531885 1.659173 smallDoses 5565 9.733340 0.977746 smallDoses 10346 6.093067 1.413798 smallDoses 1823 7.712960 1.054927 smallDoses 9744 11.470364 0.760461 largeDoses 16857 2.886529 0.934416 smallDoses 39336 10.054373 1.138351 largeDoses 65230 9.972470 0.881876 didntLike 2463 2.335785 1.366145 smallDoses 27353 11.375155 1.528626 largeDoses 16191 0.000000 0.605619 smallDoses 12258 4.126787 0.357501 smallDoses 42377 6.319522 1.058602 didntLike 25607 8.680527 0.086955 largeDoses 77450 14.856391 1.129823 didntLike 58732 2.454285 0.222380 didntLike 46426 7.292202 0.548607 largeDoses 32688 8.745137 0.857348 largeDoses 64890 8.579001 0.683048 didntLike 8554 2.507302 0.869177 smallDoses 28861 11.415476 1.505466 largeDoses 42050 4.838540 1.680892 didntLike 32193 10.339507 0.583646 largeDoses 64895 6.573742 1.151433 didntLike 2355 6.539397 0.462065 smallDoses 0 2.209159 0.723567 smallDoses 70406 11.196378 0.836326 didntLike 57399 4.229595 0.128253 didntLike 41732 9.505944 0.005273 largeDoses 11429 8.652725 1.348934 largeDoses 75270 17.101108 0.490712 didntLike 5459 7.871839 0.717662 smallDoses 73520 8.262131 1.361646 didntLike 40279 9.015635 1.658555 largeDoses 21540 9.215351 0.806762 largeDoses 17694 6.375007 0.033678 smallDoses 22329 2.262014 1.022169 didntLike 46570 5.677110 0.709469 didntLike 42403 11.293017 0.207976 largeDoses 33654 6.590043 1.353117 didntLike 9171 4.711960 0.194167 smallDoses 28122 8.768099 1.108041 largeDoses 34095 11.502519 0.545097 largeDoses 1774 4.682812 0.578112 smallDoses 40131 12.446578 0.300754 largeDoses 13994 12.908384 1.657722 largeDoses 77064 12.601108 0.974527 didntLike 11210 3.929456 0.025466 smallDoses 6122 9.751503 1.182050 largeDoses 15341 3.043767 0.888168 smallDoses 44373 4.391522 0.807100 didntLike 28454 11.695276 0.679015 largeDoses 63771 7.879742 0.154263 didntLike 9217 5.613163 0.933632 smallDoses 69076 9.140172 0.851300 didntLike 24489 4.258644 0.206892 didntLike 16871 6.799831 1.221171 smallDoses 39776 8.752758 0.484418 largeDoses 5901 1.123033 1.180352 smallDoses 40987 10.833248 1.585426 largeDoses 7479 3.051618 0.026781 smallDoses 38768 5.308409 0.030683 largeDoses 4933 1.841792 0.028099 smallDoses 32311 2.261978 1.605603 didntLike 26501 11.573696 1.061347 largeDoses 37433 8.038764 1.083910 largeDoses 23503 10.734007 0.103715 largeDoses 68607 9.661909 0.350772 didntLike 27742 9.005850 0.548737 largeDoses 11303 0.000000 0.539131 smallDoses 0 5.757140 1.062373 smallDoses 32729 9.164656 1.624565 largeDoses 24619 1.318340 1.436243 didntLike 42414 14.075597 0.695934 largeDoses 20210 10.107550 1.308398 largeDoses 33225 7.960293 1.219760 largeDoses 54483 6.317292 0.018209 didntLike 18475 12.664194 0.595653 largeDoses 33926 2.906644 0.581657 didntLike 43865 2.388241 0.913938 didntLike 26547 6.024471 0.486215 largeDoses 44404 7.226764 1.255329 largeDoses 16674 4.183997 1.275290 smallDoses 8123 11.850211 1.096981 largeDoses 42747 11.661797 1.167935 largeDoses 56054 3.574967 0.494666 didntLike 10933 0.000000 0.107475 smallDoses 18121 7.937657 0.904799 largeDoses 11272 3.365027 1.014085 smallDoses 16297 0.000000 0.367491 smallDoses 28168 13.860672 1.293270 largeDoses 40963 10.306714 1.211594 largeDoses 31685 7.228002 0.670670 largeDoses 55164 4.508740 1.036192 didntLike 17595 0.366328 0.163652 smallDoses 1862 3.299444 0.575152 smallDoses 57087 0.573287 0.607915 didntLike 63082 9.183738 0.012280 didntLike 51213 7.842646 1.060636 largeDoses 6487 4.750964 0.558240 smallDoses 4805 11.438702 1.556334 largeDoses 30302 8.243063 1.122768 largeDoses 68680 7.949017 0.271865 didntLike 17591 7.875477 0.227085 smallDoses 74391 9.569087 0.364856 didntLike 37217 7.750103 0.869094 largeDoses 42814 0.000000 1.515293 didntLike 14738 3.396030 0.633977 smallDoses 19896 11.916091 0.025294 largeDoses 14673 0.460758 0.689586 smallDoses 32011 13.087566 0.476002 largeDoses 58736 4.589016 1.672600 didntLike 54744 8.397217 1.534103 didntLike 29482 5.562772 1.689388 didntLike 27698 10.905159 0.619091 largeDoses 11443 1.311441 1.169887 smallDoses 56117 10.647170 0.980141 largeDoses 39514 0.000000 0.481918 didntLike 26627 8.503025 0.830861 largeDoses 16525 0.436880 1.395314 smallDoses 24368 6.127867 1.102179 didntLike 22160 12.112492 0.359680 largeDoses 6030 1.264968 1.141582 smallDoses 6468 6.067568 1.327047 smallDoses 22945 8.010964 1.681648 largeDoses 18520 3.791084 0.304072 smallDoses 34914 11.773195 1.262621 largeDoses 6121 8.339588 1.443357 smallDoses 38063 2.563092 1.464013 didntLike 23410 5.954216 0.953782 didntLike 35073 9.288374 0.767318 largeDoses 52914 3.976796 1.043109 didntLike 16801 8.585227 1.455708 largeDoses 9533 1.271946 0.796506 smallDoses 16721 0.000000 0.242778 smallDoses 5832 0.000000 0.089749 smallDoses 44591 11.521298 0.300860 largeDoses 10143 1.139447 0.415373 smallDoses 21609 5.699090 1.391892 smallDoses 23817 2.449378 1.322560 didntLike 15640 0.000000 1.228380 smallDoses 8847 3.168365 0.053993 smallDoses 50939 10.428610 1.126257 largeDoses 28521 2.943070 1.446816 didntLike 32901 10.441348 0.975283 largeDoses 42850 12.478764 1.628726 largeDoses 13499 5.856902 0.363883 smallDoses 40345 2.476420 0.096075 didntLike 43547 1.826637 0.811457 didntLike 70758 4.324451 0.328235 didntLike 19780 1.376085 1.178359 smallDoses 44484 5.342462 0.394527 didntLike 54462 11.835521 0.693301 largeDoses 20085 12.423687 1.424264 largeDoses 42291 12.161273 0.071131 largeDoses 47550 8.148360 1.649194 largeDoses 11938 1.531067 1.549756 smallDoses 40699 3.200912 0.309679 didntLike 70908 8.862691 0.530506 didntLike 73989 6.370551 0.369350 didntLike 11872 2.468841 0.145060 smallDoses 48463 11.054212 0.141508 largeDoses 15987 2.037080 0.715243 smallDoses 70036 13.364030 0.549972 didntLike 32967 10.249135 0.192735 largeDoses 63249 10.464252 1.669767 didntLike 42795 9.424574 0.013725 largeDoses 14459 4.458902 0.268444 smallDoses 19973 0.000000 0.575976 smallDoses 5494 9.686082 1.029808 largeDoses 67902 13.649402 1.052618 didntLike 25621 13.181148 0.273014 largeDoses 27545 3.877472 0.401600 didntLike 58656 1.413952 0.451380 didntLike 7327 4.248986 1.430249 smallDoses 64555 8.779183 0.845947 didntLike 8998 4.156252 0.097109 smallDoses 11752 5.580018 0.158401 smallDoses 76319 15.040440 1.366898 didntLike 27665 12.793870 1.307323 largeDoses 67417 3.254877 0.669546 didntLike 21808 10.725607 0.588588 largeDoses 15326 8.256473 0.765891 smallDoses 20057 8.033892 1.618562 largeDoses 79341 10.702532 0.204792 didntLike 15636 5.062996 1.132555 smallDoses 35602 10.772286 0.668721 largeDoses 28544 1.892354 0.837028 didntLike 57663 1.019966 0.372320 didntLike 78727 15.546043 0.729742 didntLike 68255 11.638205 0.409125 didntLike 14964 3.427886 0.975616 smallDoses 21835 11.246174 1.475586 largeDoses 7487 0.000000 0.645045 smallDoses 8700 0.000000 1.424017 smallDoses 26226 8.242553 0.279069 largeDoses 65899 8.700060 0.101807 didntLike 6543 0.812344 0.260334 smallDoses 46556 2.448235 1.176829 didntLike 71038 13.230078 0.616147 didntLike 47657 0.236133 0.340840 didntLike 19600 11.155826 0.335131 largeDoses 37422 11.029636 0.505769 largeDoses 1363 2.901181 1.646633 smallDoses 26535 3.924594 1.143120 didntLike 47707 2.524806 1.292848 didntLike 38055 3.527474 1.449158 didntLike 6286 3.384281 0.889268 smallDoses 10747 0.000000 1.107592 smallDoses 44883 11.898890 0.406441 largeDoses 56823 3.529892 1.375844 didntLike 68086 11.442677 0.696919 didntLike 70242 10.308145 0.422722 didntLike 11409 8.540529 0.727373 smallDoses 67671 7.156949 1.691682 didntLike 61238 0.720675 0.847574 didntLike 17774 0.229405 1.038603 smallDoses 53376 3.399331 0.077501 didntLike 30930 6.157239 0.580133 didntLike 28987 1.239698 0.719989 didntLike 13655 6.036854 0.016548 smallDoses 7227 5.258665 0.933722 smallDoses 40409 12.393001 1.571281 largeDoses 13605 9.627613 0.935842 smallDoses 26400 11.130453 0.597610 largeDoses 13491 8.842595 0.349768 largeDoses 30232 10.690010 1.456595 largeDoses 43253 5.714718 1.674780 largeDoses 55536 3.052505 1.335804 didntLike 8807 0.000000 0.059025 smallDoses 25783 9.945307 1.287952 largeDoses 22812 2.719723 1.142148 didntLike 77826 11.154055 1.608486 didntLike 38172 2.687918 0.660836 didntLike 31676 10.037847 0.962245 largeDoses 74038 12.404762 1.112080 didntLike 44738 10.237305 0.633422 largeDoses 17410 4.745392 0.662520 smallDoses 5688 4.639461 1.569431 smallDoses 36642 3.149310 0.639669 didntLike 29956 13.406875 1.639194 largeDoses 60350 6.068668 0.881241 didntLike 23758 9.477022 0.899002 largeDoses 25780 3.897620 0.560201 smallDoses 11342 5.463615 1.203677 smallDoses 36109 3.369267 1.575043 didntLike 14292 5.234562 0.825954 smallDoses 11160 0.000000 0.722170 smallDoses 23762 12.979069 0.504068 largeDoses 39567 5.376564 0.557476 didntLike 25647 13.527910 1.586732 largeDoses 14814 2.196889 0.784587 smallDoses 73590 10.691748 0.007509 didntLike 35187 1.659242 0.447066 didntLike 49459 8.369667 0.656697 largeDoses 31657 13.157197 0.143248 largeDoses 6259 8.199667 0.908508 smallDoses 33101 4.441669 0.439381 largeDoses 27107 9.846492 0.644523 largeDoses 17824 0.019540 0.977949 smallDoses 43536 8.253774 0.748700 largeDoses 67705 6.038620 1.509646 didntLike 35283 6.091587 1.694641 largeDoses 71308 8.986820 1.225165 didntLike 31054 11.508473 1.624296 largeDoses 52387 8.807734 0.713922 largeDoses 40328 0.000000 0.816676 didntLike 34844 8.889202 1.665414 largeDoses 11607 3.178117 0.542752 smallDoses 64306 7.013795 0.139909 didntLike 32721 9.605014 0.065254 largeDoses 33170 1.230540 1.331674 didntLike 37192 10.412811 0.890803 largeDoses 13089 0.000000 0.567161 smallDoses 66491 9.699991 0.122011 didntLike 15941 0.000000 0.061191 smallDoses 4272 4.455293 0.272135 smallDoses 48812 3.020977 1.502803 didntLike 28818 8.099278 0.216317 largeDoses 35394 1.157764 1.603217 didntLike 71791 10.105396 0.121067 didntLike 40668 11.230148 0.408603 largeDoses 39580 9.070058 0.011379 largeDoses 11786 0.566460 0.478837 smallDoses 19251 0.000000 0.487300 smallDoses 56594 8.956369 1.193484 largeDoses 54495 1.523057 0.620528 didntLike 11844 2.749006 0.169855 smallDoses 45465 9.235393 0.188350 largeDoses 31033 10.555573 0.403927 largeDoses 16633 6.956372 1.519308 smallDoses 13887 0.636281 1.273984 smallDoses 52603 3.574737 0.075163 didntLike 72000 9.032486 1.461809 didntLike 68497 5.958993 0.023012 didntLike 35135 2.435300 1.211744 didntLike 26397 10.539731 1.638248 largeDoses 7313 7.646702 0.056513 smallDoses 91273 20.919349 0.644571 didntLike 24743 1.424726 0.838447 didntLike 31690 6.748663 0.890223 largeDoses 15432 2.289167 0.114881 smallDoses 58394 5.548377 0.402238 didntLike 33962 6.057227 0.432666 didntLike 31442 10.828595 0.559955 largeDoses 31044 11.318160 0.271094 largeDoses 29938 13.265311 0.633903 largeDoses 9875 0.000000 1.496715 smallDoses 51542 6.517133 0.402519 largeDoses 11878 4.934374 1.520028 smallDoses 69241 10.151738 0.896433 didntLike 37776 2.425781 1.559467 didntLike 68997 9.778962 1.195498 didntLike 67416 12.219950 0.657677 didntLike 59225 7.394151 0.954434 didntLike 29138 8.518535 0.742546 largeDoses 5962 2.798700 0.662632 smallDoses 10847 0.637930 0.617373 smallDoses 70527 10.750490 0.097415 didntLike 9610 0.625382 0.140969 smallDoses 64734 10.027968 0.282787 didntLike 25941 9.817347 0.364197 largeDoses 2763 0.646828 1.266069 smallDoses 55601 3.347111 0.914294 didntLike 31128 11.816892 0.193798 largeDoses 5181 0.000000 1.480198 smallDoses 69982 10.945666 0.993219 didntLike 52440 10.244706 0.280539 largeDoses 57350 2.579801 1.149172 didntLike 57869 2.630410 0.098869 didntLike 56557 11.746200 1.695517 largeDoses 42342 8.104232 1.326277 largeDoses 15560 12.409743 0.790295 largeDoses 34826 12.167844 1.328086 largeDoses 8569 3.198408 0.299287 smallDoses 77623 16.055513 0.541052 didntLike 78184 7.138659 0.158481 didntLike 7036 4.831041 0.761419 smallDoses 69616 10.082890 1.373611 didntLike 21546 10.066867 0.788470 largeDoses 36715 8.129538 0.329913 largeDoses 20522 3.012463 1.138108 smallDoses 42349 3.720391 0.845974 didntLike 9037 0.773493 1.148256 smallDoses 26728 10.962941 1.037324 largeDoses 587 0.177621 0.162614 smallDoses 48915 3.085853 0.967899 didntLike 9824 8.426781 0.202558 smallDoses 4135 1.825927 1.128347 smallDoses 9666 2.185155 1.010173 smallDoses 59333 7.184595 1.261338 didntLike 36198 0.000000 0.116525 didntLike 34909 8.901752 1.033527 largeDoses 47516 2.451497 1.358795 didntLike 55807 3.213631 0.432044 didntLike 14036 3.974739 0.723929 smallDoses 42856 9.601306 0.619232 largeDoses 64007 8.363897 0.445341 didntLike 59428 6.381484 1.365019 didntLike 13730 0.000000 1.403914 smallDoses 41740 9.609836 1.438105 largeDoses 63546 9.904741 0.985862 didntLike 30417 7.185807 1.489102 largeDoses 69636 5.466703 1.216571 didntLike 64660 0.000000 0.915898 didntLike 14883 4.575443 0.535671 smallDoses 7965 3.277076 1.010868 smallDoses 68620 10.246623 1.239634 didntLike 8738 2.341735 1.060235 smallDoses 7544 3.201046 0.498843 smallDoses 6377 6.066013 0.120927 smallDoses 36842 8.829379 0.895657 largeDoses 81046 15.833048 1.568245 didntLike 67736 13.516711 1.220153 didntLike 32492 0.664284 1.116755 didntLike 39299 6.325139 0.605109 largeDoses 77289 8.677499 0.344373 didntLike 33835 8.188005 0.964896 largeDoses 71890 9.414263 0.384030 didntLike 32054 9.196547 1.138253 largeDoses 38579 10.202968 0.452363 largeDoses 55984 2.119439 1.481661 didntLike 72694 13.635078 0.858314 didntLike 42299 0.083443 0.701669 didntLike 26635 9.149096 1.051446 largeDoses 8579 1.933803 1.374388 smallDoses 37302 14.115544 0.676198 largeDoses 22878 8.933736 0.943352 largeDoses 4364 2.661254 0.946117 smallDoses 4985 0.988432 1.305027 smallDoses 37068 2.063741 1.125946 didntLike 41137 2.220590 0.690754 didntLike 67759 6.424849 0.806641 didntLike 11831 1.156153 1.613674 smallDoses 34502 3.032720 0.601847 didntLike 4088 3.076828 0.952089 smallDoses 15199 0.000000 0.318105 smallDoses 17309 7.750480 0.554015 largeDoses 42816 10.958135 1.482500 largeDoses 43751 10.222018 0.488678 largeDoses 58335 2.367988 0.435741 didntLike 75039 7.686054 1.381455 didntLike 42878 11.464879 1.481589 largeDoses 42770 11.075735 0.089726 largeDoses 8848 3.543989 0.345853 smallDoses 31340 8.123889 1.282880 largeDoses 41413 4.331769 0.754467 largeDoses 12731 0.120865 1.211961 smallDoses 22447 6.116109 0.701523 largeDoses 33564 7.474534 0.505790 largeDoses 48907 8.819454 0.649292 largeDoses 8762 6.802144 0.615284 smallDoses 46696 12.666325 0.931960 largeDoses 36851 8.636180 0.399333 largeDoses 67639 11.730991 1.289833 didntLike 171 8.132449 0.039062 smallDoses 26674 10.296589 1.496144 largeDoses 8739 7.583906 1.005764 smallDoses 66668 9.777806 0.496377 didntLike 68732 8.833546 0.513876 didntLike 69995 4.907899 1.518036 didntLike 82008 8.362736 1.285939 didntLike 25054 9.084726 1.606312 largeDoses 33085 14.164141 0.560970 largeDoses 41379 9.080683 0.989920 largeDoses 39417 6.522767 0.038548 largeDoses 12556 3.690342 0.462281 smallDoses 39432 3.563706 0.242019 didntLike 38010 1.065870 1.141569 didntLike 69306 6.683796 1.456317 didntLike 38000 1.712874 0.243945 didntLike 46321 13.109929 1.280111 largeDoses 66293 11.327910 0.780977 didntLike 22730 4.545711 1.233254 didntLike 5952 3.367889 0.468104 smallDoses 72308 8.326224 0.567347 didntLike 60338 8.978339 1.442034 didntLike 13301 5.655826 1.582159 smallDoses 27884 8.855312 0.570684 largeDoses 11188 6.649568 0.544233 smallDoses 56796 3.966325 0.850410 didntLike 8571 1.924045 1.664782 smallDoses 4914 6.004812 0.280369 smallDoses 10784 0.000000 0.375849 smallDoses 39296 9.923018 0.092192 largeDoses 13113 2.389084 0.119284 smallDoses 70204 13.663189 0.133251 didntLike 46813 11.434976 0.321216 largeDoses 11697 0.358270 1.292858 smallDoses 44183 9.598873 0.223524 largeDoses 2225 6.375275 0.608040 smallDoses 29066 11.580532 0.458401 largeDoses 4245 5.319324 1.598070 smallDoses 34379 4.324031 1.603481 didntLike 44441 2.358370 1.273204 didntLike 2022 0.000000 1.182708 smallDoses 26866 12.824376 0.890411 largeDoses 57070 1.587247 1.456982 didntLike 32932 8.510324 1.520683 largeDoses 51967 10.428884 1.187734 largeDoses 44432 8.346618 0.042318 largeDoses 67066 7.541444 0.809226 didntLike 17262 2.540946 1.583286 smallDoses 79728 9.473047 0.692513 didntLike 14259 0.352284 0.474080 smallDoses 6122 0.000000 0.589826 smallDoses 76879 12.405171 0.567201 didntLike 11426 4.126775 0.871452 smallDoses 2493 0.034087 0.335848 smallDoses 19910 1.177634 0.075106 smallDoses 10939 0.000000 0.479996 smallDoses 17716 0.994909 0.611135 smallDoses 31390 11.053664 1.180117 largeDoses 20375 0.000000 1.679729 smallDoses 26309 2.495011 1.459589 didntLike 33484 11.516831 0.001156 largeDoses 45944 9.213215 0.797743 largeDoses 4249 5.332865 0.109288 smallDoses 6089 0.000000 1.689771 smallDoses 7513 0.000000 1.126053 smallDoses 27862 12.640062 1.690903 largeDoses 39038 2.693142 1.317518 didntLike 19218 3.328969 0.268271 smallDoses 62911 7.193166 1.117456 didntLike 77758 6.615512 1.521012 didntLike 27940 8.000567 0.835341 largeDoses 2194 4.017541 0.512104 smallDoses 37072 13.245859 0.927465 largeDoses 15585 5.970616 0.813624 smallDoses 25577 11.668719 0.886902 largeDoses 8777 4.283237 1.272728 smallDoses 29016 10.742963 0.971401 largeDoses 21910 12.326672 1.592608 largeDoses 12916 0.000000 0.344622 smallDoses 10976 0.000000 0.922846 smallDoses 79065 10.602095 0.573686 didntLike 36759 10.861859 1.155054 largeDoses 50011 1.229094 1.638690 didntLike 1155 0.410392 1.313401 smallDoses 71600 14.552711 0.616162 didntLike 30817 14.178043 0.616313 largeDoses 54559 14.136260 0.362388 didntLike 29764 0.093534 1.207194 didntLike 69100 10.929021 0.403110 didntLike 47324 11.432919 0.825959 largeDoses 73199 9.134527 0.586846 didntLike 44461 5.071432 1.421420 didntLike 45617 11.460254 1.541749 largeDoses 28221 11.620039 1.103553 largeDoses 7091 4.022079 0.207307 smallDoses 6110 3.057842 1.631262 smallDoses 79016 7.782169 0.404385 didntLike 18289 7.981741 0.929789 largeDoses 43679 4.601363 0.268326 didntLike 22075 2.595564 1.115375 didntLike 23535 10.049077 0.391045 largeDoses 25301 3.265444 1.572970 smallDoses 32256 11.780282 1.511014 largeDoses 36951 3.075975 0.286284 didntLike 31290 1.795307 0.194343 didntLike 38953 11.106979 0.202415 largeDoses 35257 5.994413 0.800021 didntLike 25847 9.706062 1.012182 largeDoses 32680 10.582992 0.836025 largeDoses 62018 7.038266 1.458979 didntLike 9074 0.023771 0.015314 smallDoses 33004 12.823982 0.676371 largeDoses 44588 3.617770 0.493483 didntLike 32565 8.346684 0.253317 largeDoses 38563 6.104317 0.099207 didntLike 75668 16.207776 0.584973 didntLike 9069 6.401969 1.691873 smallDoses 53395 2.298696 0.559757 didntLike 28631 7.661515 0.055981 largeDoses 71036 6.353608 1.645301 didntLike 71142 10.442780 0.335870 didntLike 37653 3.834509 1.346121 didntLike 76839 10.998587 0.584555 didntLike 9916 2.695935 1.512111 smallDoses 38889 3.356646 0.324230 didntLike 39075 14.677836 0.793183 largeDoses 48071 1.551934 0.130902 didntLike 7275 2.464739 0.223502 smallDoses 41804 1.533216 1.007481 didntLike 35665 12.473921 0.162910 largeDoses 67956 6.491596 0.032576 didntLike 41892 10.506276 1.510747 largeDoses 38844 4.380388 0.748506 didntLike 74197 13.670988 1.687944 didntLike 14201 8.317599 0.390409 smallDoses 3908 0.000000 0.556245 smallDoses 2459 0.000000 0.290218 smallDoses 32027 10.095799 1.188148 largeDoses 12870 0.860695 1.482632 smallDoses 9880 1.557564 0.711278 smallDoses 72784 10.072779 0.756030 didntLike 17521 0.000000 0.431468 smallDoses 50283 7.140817 0.883813 largeDoses 33536 11.384548 1.438307 largeDoses 9452 3.214568 1.083536 smallDoses 37457 11.720655 0.301636 largeDoses 17724 6.374475 1.475925 largeDoses 43869 5.749684 0.198875 largeDoses 264 3.871808 0.552602 smallDoses 25736 8.336309 0.636238 largeDoses 39584 9.710442 1.503735 largeDoses 31246 1.532611 1.433898 didntLike 49567 9.785785 0.984614 largeDoses 7052 2.633627 1.097866 smallDoses 35493 9.238935 0.494701 largeDoses 10986 1.205656 1.398803 smallDoses 49508 3.124909 1.670121 didntLike 5734 7.935489 1.585044 smallDoses 65479 12.746636 1.560352 didntLike 77268 10.732563 0.545321 didntLike 28490 3.977403 0.766103 didntLike 13546 4.194426 0.450663 smallDoses 37166 9.610286 0.142912 largeDoses 16381 4.797555 1.260455 smallDoses 10848 1.615279 0.093002 smallDoses 35405 4.614771 1.027105 didntLike 15917 0.000000 1.369726 smallDoses 6131 0.608457 0.512220 smallDoses 67432 6.558239 0.667579 didntLike 30354 12.315116 0.197068 largeDoses 69696 7.014973 1.494616 didntLike 33481 8.822304 1.194177 largeDoses 43075 10.086796 0.570455 largeDoses 38343 7.241614 1.661627 largeDoses 14318 4.602395 1.511768 smallDoses 5367 7.434921 0.079792 smallDoses 37894 10.467570 1.595418 largeDoses 36172 9.948127 0.003663 largeDoses 40123 2.478529 1.568987 didntLike 10976 5.938545 0.878540 smallDoses 12705 0.000000 0.948004 smallDoses 12495 5.559181 1.357926 smallDoses 35681 9.776654 0.535966 largeDoses 46202 3.092056 0.490906 didntLike 11505 0.000000 1.623311 smallDoses 22834 4.459495 0.538867 didntLike 49901 8.334306 1.646600 largeDoses 71932 11.226654 0.384686 didntLike 13279 3.904737 1.597294 smallDoses 49112 7.038205 1.211329 largeDoses 77129 9.836120 1.054340 didntLike 37447 1.990976 0.378081 didntLike 62397 9.005302 0.485385 didntLike 0 1.772510 1.039873 smallDoses 15476 0.458674 0.819560 smallDoses 40625 10.003919 0.231658 largeDoses 36706 0.520807 1.476008 didntLike 28580 10.678214 1.431837 largeDoses 25862 4.425992 1.363842 didntLike 63488 12.035355 0.831222 didntLike 33944 10.606732 1.253858 largeDoses 30099 1.568653 0.684264 didntLike 13725 2.545434 0.024271 smallDoses 36768 10.264062 0.982593 largeDoses 64656 9.866276 0.685218 didntLike 14927 0.142704 0.057455 smallDoses 43231 9.853270 1.521432 largeDoses 66087 6.596604 1.653574 didntLike 19806 2.602287 1.321481 smallDoses 41081 10.411776 0.664168 largeDoses 10277 7.083449 0.622589 smallDoses 7014 2.080068 1.254441 smallDoses 17275 0.522844 1.622458 smallDoses 31600 10.362000 1.544827 largeDoses 59956 3.412967 1.035410 didntLike 42181 6.796548 1.112153 largeDoses 51743 4.092035 0.075804 didntLike 5194 2.763811 1.564325 smallDoses 30832 12.547439 1.402443 largeDoses 7976 5.708052 1.596152 smallDoses 14602 4.558025 0.375806 smallDoses 41571 11.642307 0.438553 largeDoses 55028 3.222443 0.121399 didntLike 5837 4.736156 0.029871 smallDoses 39808 10.839526 0.836323 largeDoses 20944 4.194791 0.235483 smallDoses 22146 14.936259 0.888582 largeDoses 42169 3.310699 1.521855 didntLike 7010 2.971931 0.034321 smallDoses 3807 9.261667 0.537807 smallDoses 29241 7.791833 1.111416 largeDoses 52696 1.480470 1.028750 didntLike 42545 3.677287 0.244167 didntLike 24437 2.202967 1.370399 didntLike 16037 5.796735 0.935893 smallDoses 8493 3.063333 0.144089 smallDoses 68080 11.233094 0.492487 didntLike 59016 1.965570 0.005697 didntLike 11810 8.616719 0.137419 smallDoses 68630 6.609989 1.083505 didntLike 7629 1.712639 1.086297 smallDoses 71992 10.117445 1.299319 didntLike 13398 0.000000 1.104178 smallDoses 26241 9.824777 1.346821 largeDoses 11160 1.653089 0.980949 smallDoses 76701 18.178822 1.473671 didntLike 32174 6.781126 0.885340 largeDoses 45043 8.206750 1.549223 largeDoses 42173 10.081853 1.376745 largeDoses 69801 6.288742 0.112799 didntLike 41737 3.695937 1.543589 didntLike 46979 6.726151 1.069380 largeDoses 79267 12.969999 1.568223 didntLike 4615 2.661390 1.531933 smallDoses 32907 7.072764 1.117386 largeDoses 37444 9.123366 1.318988 largeDoses 569 3.743946 1.039546 smallDoses 8723 2.341300 0.219361 smallDoses 6024 0.541913 0.592348 smallDoses 52252 2.310828 1.436753 didntLike 8358 6.226597 1.427316 smallDoses 26166 7.277876 0.489252 largeDoses 18471 0.000000 0.389459 smallDoses 3386 7.218221 1.098828 smallDoses 41544 8.777129 1.111464 largeDoses 10480 2.813428 0.819419 smallDoses 5894 2.268766 1.412130 smallDoses 7273 6.283627 0.571292 smallDoses 22272 7.520081 1.626868 largeDoses 31369 11.739225 0.027138 largeDoses 10708 3.746883 0.877350 smallDoses 69364 12.089835 0.521631 didntLike 37760 12.310404 0.259339 largeDoses 13004 0.000000 0.671355 smallDoses 37885 2.728800 0.331502 didntLike 52555 10.814342 0.607652 largeDoses 38997 12.170268 0.844205 largeDoses 69698 6.698371 0.240084 didntLike 11783 3.632672 1.643479 smallDoses 47636 10.059991 0.892361 largeDoses 15744 1.887674 0.756162 smallDoses 69058 8.229125 0.195886 didntLike 33057 7.817082 0.476102 largeDoses 28681 12.277230 0.076805 largeDoses 34042 10.055337 1.115778 largeDoses 29928 3.596002 1.485952 didntLike 9734 2.755530 1.420655 smallDoses 7344 7.780991 0.513048 smallDoses 7387 0.093705 0.391834 smallDoses 33957 8.481567 0.520078 largeDoses 9936 3.865584 0.110062 smallDoses 36094 9.683709 0.779984 largeDoses 39835 10.617255 1.359970 largeDoses 64486 7.203216 1.624762 didntLike 0 7.601414 1.215605 smallDoses 39539 1.386107 1.417070 didntLike 66972 9.129253 0.594089 didntLike 15029 1.363447 0.620841 smallDoses 44909 3.181399 0.359329 didntLike 38183 13.365414 0.217011 largeDoses 37372 4.207717 1.289767 didntLike 0 4.088395 0.870075 smallDoses 17786 3.327371 1.142505 smallDoses 39055 1.303323 1.235650 didntLike 37045 7.999279 1.581763 largeDoses 6435 2.217488 0.864536 smallDoses 72265 7.751808 0.192451 didntLike 28152 14.149305 1.591532 largeDoses 25931 8.765721 0.152808 largeDoses 7538 3.408996 0.184896 smallDoses 1315 1.251021 0.112340 smallDoses 12292 6.160619 1.537165 smallDoses 49248 1.034538 1.585162 didntLike 9025 0.000000 1.034635 smallDoses 13438 2.355051 0.542603 smallDoses 69683 6.614543 0.153771 didntLike 25374 10.245062 1.450903 largeDoses 55264 3.467074 1.231019 didntLike 38324 7.487678 1.572293 largeDoses 69643 4.624115 1.185192 didntLike 44058 8.995957 1.436479 largeDoses 41316 11.564476 0.007195 largeDoses 29119 3.440948 0.078331 didntLike 51656 1.673603 0.732746 didntLike 3030 4.719341 0.699755 smallDoses 35695 10.304798 1.576488 largeDoses 1537 2.086915 1.199312 smallDoses 9083 6.338220 1.131305 smallDoses 47744 8.254926 0.710694 largeDoses 71372 16.067108 0.974142 didntLike 37980 1.723201 0.310488 didntLike 42385 3.785045 0.876904 didntLike 22687 2.557561 0.123738 didntLike 39512 9.852220 1.095171 largeDoses 11885 3.679147 1.557205 smallDoses 4944 9.789681 0.852971 smallDoses 73230 14.958998 0.526707 didntLike 17585 11.182148 1.288459 largeDoses 68737 7.528533 1.657487 didntLike 13818 5.253802 1.378603 smallDoses 31662 13.946752 1.426657 largeDoses 86686 15.557263 1.430029 didntLike 43214 12.483550 0.688513 largeDoses 24091 2.317302 1.411137 didntLike 52544 10.069724 0.766119 largeDoses 61861 5.792231 1.615483 didntLike 47903 4.138435 0.475994 didntLike 37190 12.929517 0.304378 largeDoses 6013 9.378238 0.307392 smallDoses 27223 8.361362 1.643204 largeDoses 69027 7.939406 1.325042 didntLike 78642 10.735384 0.705788 didntLike 30254 11.592723 0.286188 largeDoses 21704 10.098356 0.704748 largeDoses 34985 9.299025 0.545337 largeDoses 31316 11.158297 0.218067 largeDoses 76368 16.143900 0.558388 didntLike 27953 10.971700 1.221787 largeDoses 152 0.000000 0.681478 smallDoses 9146 3.178961 1.292692 smallDoses 75346 17.625350 0.339926 didntLike 26376 1.995833 0.267826 didntLike 35255 10.640467 0.416181 largeDoses 19198 9.628339 0.985462 largeDoses 12518 4.662664 0.495403 smallDoses 25453 5.754047 1.382742 smallDoses 12530 0.000000 0.037146 smallDoses 62230 9.334332 0.198118 didntLike 9517 3.846162 0.619968 smallDoses 71161 10.685084 0.678179 didntLike 1593 4.752134 0.359205 smallDoses 33794 0.697630 0.966786 didntLike 39710 10.365836 0.505898 largeDoses 16941 0.461478 0.352865 smallDoses 69209 11.339537 1.068740 didntLike 4446 5.420280 0.127310 smallDoses 9347 3.469955 1.619947 smallDoses 55635 8.517067 0.994858 largeDoses 65889 8.306512 0.413690 didntLike 10753 2.628690 0.444320 smallDoses 7055 0.000000 0.802985 smallDoses 7905 0.000000 1.170397 smallDoses 53447 7.298767 1.582346 largeDoses 9194 7.331319 1.277988 smallDoses 61914 9.392269 0.151617 didntLike 15630 5.541201 1.180596 smallDoses 79194 15.149460 0.537540 didntLike 12268 5.515189 0.250562 smallDoses 33682 7.728898 0.920494 largeDoses 26080 11.318785 1.510979 largeDoses 19119 3.574709 1.531514 smallDoses 30902 7.350965 0.026332 largeDoses 63039 7.122363 1.630177 didntLike 51136 1.828412 1.013702 didntLike 35262 10.117989 1.156862 largeDoses 42776 11.309897 0.086291 largeDoses 64191 8.342034 1.388569 didntLike 15436 0.241714 0.715577 smallDoses 14402 10.482619 1.694972 smallDoses 6341 9.289510 1.428879 smallDoses 14113 4.269419 0.134181 smallDoses 6390 0.000000 0.189456 smallDoses 8794 0.817119 0.143668 smallDoses 43432 1.508394 0.652651 didntLike 38334 9.359918 0.052262 largeDoses 34068 10.052333 0.550423 largeDoses 30819 11.111660 0.989159 largeDoses 22239 11.265971 0.724054 largeDoses 28725 10.383830 0.254836 largeDoses 57071 3.878569 1.377983 didntLike 72420 13.679237 0.025346 didntLike 28294 10.526846 0.781569 largeDoses 9896 0.000000 0.924198 smallDoses 65821 4.106727 1.085669 didntLike 7645 8.118856 1.470686 smallDoses 71289 7.796874 0.052336 didntLike 5128 2.789669 1.093070 smallDoses 13711 6.226962 0.287251 smallDoses 22240 10.169548 1.660104 largeDoses 15092 0.000000 1.370549 smallDoses 5017 7.513353 0.137348 smallDoses 10141 8.240793 0.099735 smallDoses 35570 14.612797 1.247390 largeDoses 46893 3.562976 0.445386 didntLike 8178 3.230482 1.331698 smallDoses 55783 3.612548 1.551911 didntLike 1148 0.000000 0.332365 smallDoses 10062 3.931299 0.487577 smallDoses 74124 14.752342 1.155160 didntLike 66603 10.261887 1.628085 didntLike 11893 2.787266 1.570402 smallDoses 50908 15.112319 1.324132 largeDoses 39891 5.184553 0.223382 largeDoses 65915 3.868359 0.128078 didntLike 65678 3.507965 0.028904 didntLike 62996 11.019254 0.427554 didntLike 36851 3.812387 0.655245 didntLike 36669 11.056784 0.378725 largeDoses 38876 8.826880 1.002328 largeDoses 26878 11.173861 1.478244 largeDoses 46246 11.506465 0.421993 largeDoses 12761 7.798138 0.147917 largeDoses 35282 10.155081 1.370039 largeDoses 68306 10.645275 0.693453 didntLike 31262 9.663200 1.521541 largeDoses 34754 10.790404 1.312679 largeDoses 13408 2.810534 0.219962 smallDoses 30365 9.825999 1.388500 largeDoses 10709 1.421316 0.677603 smallDoses 24332 11.123219 0.809107 largeDoses 45517 13.402206 0.661524 largeDoses 6178 1.212255 0.836807 smallDoses 10639 1.568446 1.297469 smallDoses 29613 3.343473 1.312266 didntLike 22392 5.400155 0.193494 didntLike 51126 3.818754 0.590905 didntLike 53644 7.973845 0.307364 largeDoses 51417 9.078824 0.734876 largeDoses 24859 0.153467 0.766619 didntLike 61732 8.325167 0.028479 didntLike 71128 7.092089 1.216733 didntLike 27276 5.192485 1.094409 largeDoses 30453 10.340791 1.087721 largeDoses 18670 2.077169 1.019775 smallDoses 70600 10.151966 0.993105 didntLike 12683 0.046826 0.809614 smallDoses 81597 11.221874 1.395015 didntLike 69959 14.497963 1.019254 didntLike 8124 3.554508 0.533462 smallDoses 18867 3.522673 0.086725 smallDoses 80886 14.531655 0.380172 didntLike 55895 3.027528 0.885457 didntLike 31587 1.845967 0.488985 didntLike 10591 10.226164 0.804403 largeDoses 70096 10.965926 1.212328 didntLike 53151 2.129921 1.477378 didntLike 11992 0.000000 1.606849 smallDoses 33114 9.489005 0.827814 largeDoses 7413 0.000000 1.020797 smallDoses 10583 0.000000 1.270167 smallDoses 58668 6.556676 0.055183 didntLike 35018 9.959588 0.060020 largeDoses 70843 7.436056 1.479856 didntLike 14011 0.404888 0.459517 smallDoses 35015 9.952942 1.650279 largeDoses 70839 15.600252 0.021935 didntLike 3024 2.723846 0.387455 smallDoses 5526 0.513866 1.323448 smallDoses 5113 0.000000 0.861859 smallDoses 20851 7.280602 1.438470 smallDoses 40999 9.161978 1.110180 largeDoses 15823 0.991725 0.730979 smallDoses 35432 7.398380 0.684218 largeDoses 53711 12.149747 1.389088 largeDoses 64371 9.149678 0.874905 didntLike 9289 9.666576 1.370330 smallDoses 60613 3.620110 0.287767 didntLike 18338 5.238800 1.253646 smallDoses 22845 14.715782 1.503758 largeDoses 74676 14.445740 1.211160 didntLike 34143 13.609528 0.364240 largeDoses 14153 3.141585 0.424280 smallDoses 9327 0.000000 0.120947 smallDoses 18991 0.454750 1.033280 smallDoses 9193 0.510310 0.016395 smallDoses 2285 3.864171 0.616349 smallDoses 9493 6.724021 0.563044 smallDoses 2371 4.289375 0.012563 smallDoses 13963 0.000000 1.437030 smallDoses 2299 3.733617 0.698269 smallDoses 5262 2.002589 1.380184 smallDoses 4659 2.502627 0.184223 smallDoses 17582 6.382129 0.876581 smallDoses 27750 8.546741 0.128706 largeDoses 9868 2.694977 0.432818 smallDoses 18333 3.951256 0.333300 smallDoses 3780 9.856183 0.329181 smallDoses 18190 2.068962 0.429927 smallDoses 11145 3.410627 0.631838 smallDoses 68846 9.974715 0.669787 didntLike 26575 10.650102 0.866627 largeDoses 48111 9.134528 0.728045 largeDoses 43757 7.882601 1.332446 largeDoses
数据归一化
约会样本数据:
样本 | 玩游戏所耗时间百分比 | 每年获得的飞行常用里程数 | 每周消费的冰淇淋公升数 | 样本分类 |
---|---|---|---|---|
1 | 0.8 | 400 | 0.5 | 1 |
2 | 12 | 134000 | 0.9 | 3 |
3 | 0 | 20000 | 1.1 | 2 |
4 | 67 | 32000 | 0.1 | 2 |
这三种特征是同等重要的 因此作为三个等权重的特征之一 飞行常客里程数并不应该如此严重地影响到计算结果
将任意取值范围的特征值转化为 [0, 1]区间内的值
import numpy as np import pandas as pd data=pd.read_csv('datingTestSet.txt',sep=' ',names=['x1','x2','x3','y']) feature=data.loc[:,'x1':'x3'] #特征x label=data['y'].values #标签y
from sklearn.preprocessing import MinMaxScaler#缩放到[0, 1]区间的数据 feature_mm=MinMaxScaler().fit_transform(feature) feature_mm
errorCount = 0.0 #错误计数 for i in range(100):#前100作为测试,后900个作为训练 classifierResult = classify(feature_mm[i,:], feature_mm[100:,:],label[100:], 4)#预测的结果 if classifierResult is not label[i]:#与真实的结果判断是否相等 errorCount += 1.0 #不相等的话错误数+1 print("错误率:",errorCount/100) #错误率=总的错误数/样本总数
注意:sklearn内所有的模型必须得是二维结构,比如:二维列表、二维数组、dataframe、矩阵
大多数原则,返回k个最近样本点中的大多数
文档直达超链接↓
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier
#分类 X = [[0], [1], [2], [3]] #特征,二维结构 y = [0,0,1,1] #标签 from sklearn.neighbors import KNeighborsClassifier #分类 knn = KNeighborsClassifier(n_neighbors=3)#创建模型 knn.fit(X, y) #训练模型,拟合 knn.predict([[1.1]]) #对测试数据做预测
#也可以对多个样本点做预测 knn.predict([[1.1],[2]]) #预测
knn.predict_proba([[1.1]]) #预测概率
#训练集和测试集划分 随机划分 from sklearn.model_selection import train_test_split #返回的4组数据都是固定的,随机的目的是为了使训练集和测试集的分布差不多 iris_train_X , iris_test_X, iris_train_y ,iris_test_y = train_test_split(iris.data, iris.target, test_size=0.2,random_state=2)#test_size测试集的比例;random_state随机划分方式,和随机数种子一样可以固定输出的数据 #补充:stratify=iris.target严格等比例划分
from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=6) #创建模型 knn.fit(iris_train_X, iris_train_y) #训练模型 predict_result=knn.predict(iris_test_X) #对测试集进行预测 print('预测结果',predict_result)
knn = KNeighborsClassifier(n_neighbors=6) #创建模型 knn.fit(iris_train_X, iris_train_y) #训练模型 #score评估模型性能;多分类问题一般都是看准确率 knn.score(iris_test_X, iris_test_y) #测试集准确率 1.预测 2.比较
#其他衡量指标 #https://scikit-learn.org/stable/modules/model_evaluation.html#model-evaluation from sklearn.metrics import accuracy_score accuracy_score(iris_test_y, predict_result)
返回最近的k个最近样本点的均值
文档直达超链接↓
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html#sklearn.neighbors.KNeighborsRegressor
#回归 X = [[0], [1], [2], [3]] y = [0, 0, 1, 1] from sklearn.neighbors import KNeighborsRegressor knn = KNeighborsRegressor(n_neighbors=2) #创建模型 knn.fit(X, y) #训练模型 print(knn.predict([[1.5]])) #1.5距离1和2最近,它们分别对应0和1,均值为0.5
from sklearn.linear_model import LinearRegression, Ridge,Lasso from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_boston #数据集 from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsRegressor # 1、加载数据集 ld = load_boston() ld.data.shape #506个样本,13个特征
#由于量纲不一样,需要查看统计描述信息,观察均值可以发现差异蛮大的。所以需要做归一化或标准化处理 pd.DataFrame(ld.data).describe()
#训练集和测试集划分 注:要先划分才能进行标准化 x_train,x_test,y_train,y_test = train_test_split(ld.data,ld.target,test_size=0.2,random_state=3)
# 2、标准化处理 std_x = StandardScaler() #创建标准化模型 x_train = std_x.fit_transform(x_train) #对训练集做fit_transform x_test = std_x.transform(x_test) #对测试集做transform
# knn回归 knn=KNeighborsRegressor() knn.fit(x_train,y_train) knn.score(x_test,y_test) #回归问题,返回R2系数
#若想看其他指标:https://scikit-learn.org/stable/modules/model_evaluation.html#model-evaluation from sklearn.metrics import mean_squared_error mean_squared_error(y_test, knn.predict(x_test)) #MSE
超参数调节:
分类问题:准确率
回归问题:R²系数
from sklearn.model_selection import GridSearchCV #网格搜索交叉验证;cross-validated简写CV #↓40种组合 params={'weights':['uniform','distance'],'n_neighbors':range(2,22)} #字典类型 ;固定的,模型封装时必须是字典形式 knn = KNeighborsRegressor() #创建模型 grid_search=GridSearchCV(knn,param_grid=params,cv=10,verbose=2,n_jobs=-1) #创建网格搜索模型;CV表示几折交叉验证,也就是分成几份,这里10份就是拿9份做训练,1份做验证,CV一般选10或5;verbose:输入日志信息,一般写2;n_jobs表示并行计算 grid_search.fit(x_train,y_train) grid_search.best_params_ #输出最优的参数 默认R²系数最高 #结果的意思是,计算机认为n_neighbors=5,weights=distance的时候通过交叉验证求平均的R²系数是最高的
grid_search.score(x_test,y_test) #R²系数
其他标准还是这个链接里面找,但是用默认就好了
https://scikit-learn.org/stable/modules/model_evaluation.html#model-evaluation
#选择其他标准进行超参数的选择 grid_search=GridSearchCV(knn,param_grid=params,cv=10,scoring='neg_mean_squared_error',verbose=2,n_jobs=-1) #创建网格搜索模型 grid_search.fit(x_train,y_train) grid_search.best_params_ #输出最优的参数
from sklearn.metrics import r2_score r2_score(y_test,grid_search.predict(x_test))#r2_score(真实值,预测值)
对文件目录的一些常用操作
import os os.getcwd() #获取当前工作路径
os.chdir(r'F:\Anaconda3\数据挖掘概论、预处理与特征工程\算法-第一部分\1.KNN\数据和代码\trainingDigits') # 改变当前工作目录 os.getcwd() #获取当前工作路径
os.listdir(r'F:\Anaconda3\数据挖掘概论、预处理与特征工程\算法-第一部分\1.KNN\数据和代码\trainingDigits') # 列举目录下的所有文件名
os.makedirs(r'F:\Anaconda3\1\2') #递归式的创建目录
代码实现:
import numpy as np from os import listdir #返回路径下,所有文件的文件名 from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import GridSearchCV #后序操作都会在这个路径下进行 os.chdir('F:\\Anaconda3\\数据挖掘概论、预处理与特征工程\\算法-第一部分\\1.KNN\\数据和代码') #读取单个文件,并转换为1x1024的向量 def img2vector(filename): returnVect = np.zeros((1, 1024)) #新建一个空数组 fr = open(filename) #根据文件名打开文件 for i in range(32): #行 lineStr = fr.readline() #读取一行 for j in range(32): #列 处理读取的每一行中的每一列,也就是每一个 returnVect[0, 32*i+j] = int(lineStr[j])#由于读取的都是字符串,需要转化为数值型,把每一个数字读取进去,然后拼到returnVect空数组里面去 return returnVect #训练集 trainingLabels = [] #新建一个空的列表,用于存放训练集的标签Labels trainingFileList = listdir('trainingDigits') #listdir()是返回指定目录下的所有文件名,返回的是列表 m = len(trainingFileList) #1934 #返回文件夹下文件的个数 trainingMat = np.zeros((m, 1024)) #新建一个空的数组,数组维度为1934x1024;初始化训练的Mat矩阵,测试集,就是批量处理 for i in range(m): #从文件名中解析出训练集的类别 fileNameStr = trainingFileList[i] #获得文件的名字;是为了下一步获取每个文件的数字类别 classNumber = int(fileNameStr.split('_')[0]) #获得分类的数字;因为分隔后的第一个是代表类别的数字,也就是标签名 trainingLabels.append(classNumber) #将获得的类别标签添加到hwLabels中 #'trainingDigits/%s' % (fileNameStr):trainingDigits路径下的文件名,跳转到函数的操作 trainingMat[i,:] = img2vector('trainingDigits/%s' % (fileNameStr)) #将每一个文件的1x1024数据存储到trainingMat矩阵中,trainingMat最后是m行1024列的矩阵;i为第i行 #测试集 和训练集一样的处理方式 testFileList = listdir('testDigits') #返回testDigits目录下的文件列表 同样也要对测试集进行相同的处理 testLabels = [] mTest = len(testFileList) #测试数据的数量 testMat = np.zeros((mTest, 1024)) for i in range(mTest): #从文件中解析出测试集的类别并进行分类测试 fileNameStr = testFileList[i] #获得文件的名字 classNumber = int(fileNameStr.split('_')[0]) #获得分类的数字 testLabels.append(classNumber) testMat[i,:] = img2vector('testDigits/%s' % (fileNameStr)) #获得测试集的1x1024向量,用于训练
#处理好数据后不要忙着参数调优 #先用简单模型测试,查看数据是否有问题 classifier = KNeighborsClassifier().fit(trainingMat, trainingLabels) classifier.score(testMat, testLabels)#准确率
#参数优化 params={'weights':['uniform','distance'],'n_neighbors':range(2,20,2)} #粗调节 knn = KNeighborsClassifier() grid_search=GridSearchCV(knn,param_grid=params,cv=10,n_jobs=-1,verbose=2) grid_search.fit(trainingMat,trainingLabels) grid_search.best_params_ #打印最优参数,查看是否出现落在边界上的情况;若发现取值在边界上,说明设置的范围小了,还没找到最优的,范围就需要重新设置
params={'weights':['uniform','distance'],'n_neighbors':[3,4,5,6,7]} #在4附近进行细调节 knn = KNeighborsClassifier() grid_search=GridSearchCV(knn,param_grid=params,cv=10,n_jobs=-1,verbose=2) grid_search.fit(trainingMat,trainingLabels) grid_search.best_params_ #打印最优参数发现还是4,说明选对了
grid_search.score(testMat, testLabels)#准确率有所提升
from sklearn import metrics predicted=grid_search.predict(testMat) #预测结果 print(metrics.confusion_matrix(testLabels, predicted)) #混淆矩阵 #可以观察到除了对角线以外的都搞错了
print(metrics.classification_report(testLabels, predicted))#打印分类报告
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors np.random.seed(0) X = np.sort(5 * np.random.rand(40, 1), axis=0) #随机数据X y = np.sin(X).ravel()#标签 y[::5] += 1 * (0.5 - np.random.rand(8)) #加噪声 n_neighbors = 5 knn = neighbors.KNeighborsRegressor(n_neighbors)#创建回归模型 y_ = knn.fit(X, y).predict(X) plt.scatter(X, y, color='darkorange', label='data') plt.plot(X, y_, color='navy', label='prediction') plt.show()