i trying learn little nearest neighbour matching. below see 2 scatter plots. first shows real data. trying use scikit-learn's nn-classifier identify white observations. second scatter plot shows achievement - entirely useless, can see.
i don't why case? seems white observations closely related , different fromt other observations. happening here?
here do:
# import neccessary packages import pandas pd import numpy np import sklearn skl sklearn.cross_validation import train_test_split tts import matplotlib.pyplot plt sklearn import neighbors matplotlib.colors import listedcolormap # import data , give little overview sample = pd.read_stata('real_data_1.dta') s = sample print(s.dtypes) print(s.shape) # nearest neighboor print(__doc__) n_neighbors = 1 x = np.array((s.t_ums_ma, s.t_matauf)).reshape(918, 2) y = np.array(s.matauf_measure) plt.scatter(s.t_ums_ma,s.t_matauf, c=s.matauf_measure, label='nordan scatter', color='b', s=25, marker="o") plt.xlabel('crisis') plt.ylabel('current debt') plt.title('interesting graph\ncheck out') plt.legend() plt.gray() plt.show() x_train, x_test, y_train, y_test = tts(x, y, test_size = 1) h = 0.02 # create color maps cmap_light = listedcolormap(['#ffaaaa', '#aaffaa', '#aaaaff']) cmap_bold = listedcolormap(['#ff0000', '#00ff00', '#0000ff']) weights in ['uniform', 'distance']: # create instance of neighbours classifier , fit data. clf = neighbors.kneighborsclassifier(n_neighbors, weights=weights) clf.fit(x, y) # plot decision boundary. that, assign color each # point in mesh [x_min, m_max]x[y_min, y_max]. x_min, x_max = x_train[:, 0].min() - 0.01, x[:, 0].max() + 0.01 y_min, y_max = x_train[:, 1].min() - 0.01, x[:, 1].max() + 0.01 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # put result color plot z = z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, z, cmap=cmap_light) # plot training points plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("3-class classification (k = %i, weights = '%s')" % (n_neighbors, weights)) plt.show()
any appreciated! best /r
Comments
Post a Comment