手写数字识别 Posted on 2021-02-14 具体参考 https://www.cnblogs.com/endlesscoding/p/9901539.html 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120from sklearn.datasets import fetch_mldatafrom sklearn import datasetsimport numpy as np ## 样本可能下载超时mnist = fetch_mldata('mnist-original', data_home = './datasets/') mnistX, y = mnist['data'], mnist['target']print(X.shape)print(y.shape)%matplotlib inlineimport matplotlibimport matplotlib.pyplot as plt # 第一个样本some_digit = X[1]some_digit_image = some_digit.reshape(28,28) plt.imshow(some_digit_image, cmap=matplotlib.cm.binary, interpolation="nearest")plt.axis('off')plt.show()# EXTRA def plot_digits(instances, images_per_row=10, **options): size = 28 images_per_row = min(len(instances), images_per_row) images = [instance.reshape(size,size) for instance in instances] n_rows = (len(instances) - 1) // images_per_row + 1 row_images = [] n_empty = n_rows * images_per_row - len(instances) images.append(np.zeros((size, size * n_empty))) for row in range(n_rows): rimages = images[row * images_per_row : (row + 1) * images_per_row] row_images.append(np.concatenate(rimages, axis=1)) image = np.concatenate(row_images, axis=0) plt.imshow(image, cmap = matplotlib.cm.binary, **options) plt.axis("off") plt.figure(figsize=(9,9))example_images = np.r_[X[:12000:600], X[13000:30600:600], X[30600:60000:590]]plot_digits(example_images, images_per_row=10)# save_fig("more_digits_plot")plt.show()X_train, X_test, y_train, y_test = X[:60000],X[60000:],y[:60000],y[60000:]# 打乱标签import numpy as np shuffle_index = np.random.permutation(60000)X_train, y_train = X_train[shuffle_index],y_train[shuffle_index]# 训练一个二分器# 这是一个逻辑数组,5:True, 非5:Falsey_train_5 = (y_train == 5)y_test_5 = (y_test == 5)from sklearn.linear_model import SGDClassifier sgd_clf = SGDClassifier(random_state = 32)sgd_clf.fit(X_train, y_train_5)SGDClassifier(alpha=0.0001, average=False, class_weight=None, early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5, random_state=32, shuffle=True, tol=None, validation_fraction=0.1, verbose=0, warm_start=False)sgd_clf.predict([some_digit])array([ True])sgd_clf = SGDClassifier(random_state = 42)sgd_clf.fit(X_train, y_train_5)sgd_clf.predict([some_digit])from sklearn.model_selection import StratifiedKFoldfrom sklearn.base import clone # 使用交叉验证测量准确性skfolds = StratifiedKFold(n_splits = 3, random_state = 42)clone_clf = clone(sgd_clf)for train_index, test_index in skfolds.split(X_train, y_train_5): X_train_folds = X_train[train_index] y_train_folds = (y_train_5[train_index]) X_test_fold = X_train[test_index] y_test_fold = (y_train_5[test_index]) clone_clf.fit(X_train_folds, y_train_folds) y_pred = clone_clf.predict(X_test_fold) n_correct = sum(y_pred == y_test_fold) print(n_correct / len(y_pred)) from sklearn.model_selection import cross_val_scorecross_val_score(sgd_clf, X_train, y_train_5, cv = 3, scoring = "accuracy")from sklearn.base import BaseEstimator# 这个模型的预测的策略就是将所有的数据都认为是'非5'class Never5Classifier(BaseEstimator): def fit(self,X,y=None): pass def predict(self,X): return np.zeros((len(X),1), dtype=bool)never_5_clf = Never5Classifier()cross_val_score(never_5_clf, X_train, y_train_5, cv = 3, scoring = "accuracy")from sklearn.model_selection import cross_val_predict y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv = 3) Donate comment here Donate WeChat Pay