""" ============================================= Cross-validation on Digits Dataset Exercise ============================================= A tutorial exercise using Cross-validation with an SVM on the Digits dataset. This exercise is used in the :ref:`cv_generators_tut` part of the :ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`. """ print(__doc__) import numpy as np from sklearn import cross_validation, datasets, svm digits = datasets.load_digits() X = digits.data y = digits.target svc = svm.SVC(kernel='linear') C_s = np.logspace(-10, 0, 10) scores = list() scores_std = list() for C in C_s: svc.C = C this_scores = cross_validation.cross_val_score(svc, X, y, n_jobs=1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) # Do the plotting import matplotlib.pyplot as plt plt.figure(1, figsize=(4, 3)) plt.clf() plt.semilogx(C_s, scores) plt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--') plt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--') locs, labels = plt.yticks() plt.yticks(locs, list(map(lambda x: "%g" % x, locs))) plt.ylabel('CV score') plt.xlabel('Parameter C') plt.ylim(0, 1.1) plt.show()