sklearn中K近鄰的使用(python)
摘要:本文使用K近鄰模型進行回歸,分類;
00 構造數據
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors
x=np.pi*2*np.random.rand(100)
y=np.sin(x)
y[::5]+=(np.random.rand(20)-0.5)*2
dex1=np.random.choice(100,75,replace=False)
dex2=[]
for i in range(100):
if i not in dex1:
dex2.append(i)
train_x=x[dex1].reshape(-1,1)
train_y=y[dex1].reshape(-1,1)
test_x=x[dex2].reshape(-1,1)
test_y=y[dex2].reshape(-1,1)
01 KNN回歸
regre=neighbors.KNeighborsRegressor()
regre.fit(train_x,train_y)
regre.score(test_x,test_y)
Out[112]: 0.9719249717643359
regre.kneighbors(test_x,n_neighbors=5,return_distance=True)
研究參數weights,n_neighbors,p對模型預測性能的影響:
fig=plt.figure()
weights=['uniform','distance']
ks=np.linspace(1,len(train_y),50,dtype='int')
for weight in weights:
scor=[]
for k in ks:
regre=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=k)
regre.fit(train_x,train_y)
scor.append(regre.score(test_x,test_y))
plt.plot(ks,scor,label=weight)
plt.legend()

fig=plt.figure()
ps=[1,2,10]
ks=np.linspace(1,len(train_y),50,dtype='int')
for p in ps:
scor=[]
for k in ks:
regre=neighbors.KNeighborsRegressor(p=p,n_neighbors=k)
regre.fit(train_x,train_y)
scor.append(regre.score(test_x,test_y))
plt.plot(ks,scor,label='p='+str(p))
plt.legend(loc='best')

02 獲取sklearn中數據
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets,neighbors
digits=datasets.load_digits()
dex1=np.random.choice(1797,1500,replace=False)
dex2=[]
for i in range(1797):
if i not in dex1:
dex2.append(i)
train_x=digits.data[dex1]
train_y=digits.target[dex1]
test_x=digits.data[dex2]
test_y=digits.target[dex2]
03 KNN分類
classi=neighbors.KNeighborsClassifier()
classi.fit(train_x,train_y)
classi.score(test_x,test_y)
Out[120]: 0.9966329966329966
研究參數weights,n_neighbors,p對模型預測性能的影響:
fig=plt.figure()
weights=['uniform','distance']
ks=np.linspace(1,len(train_y),100,dtype='int')
for weight in weights:
scor=[]
for k in ks:
classi=neighbors.KNeighborsClassifier(weights=weight,n_neighbors=k)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(ks,scor,label=weight)
plt.legend(loc='best')
fig=plt.figure()
ps=[1,2,10]
ks=np.linspace(1,len(train_y),100,dtype='int')
for p in ps:
scor=[]
for k in ks:
classi=neighbors.KNeighborsClassifier(p=p,n_neighbors=k)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(ks,scor,label='p='+str(p))
plt.legend(loc='best')

04 總結
01 K近鄰模型的重要參數,K值,距離,計權,即上文涉及的;
02 K近鄰模型既可以用于分類(多數表決),也可以用于回歸(均值);
03 K近鄰模型是典型的 lazy-learning;
工程師必備
- 項目客服
- 培訓客服
- 平臺客服
TOP




















