Contents

Parameter estimation using grid search with a nested cross-validationΒΆ

The classifier is optimized by “nested” cross-validation using the GridSearchCV object.

The performance of the selected parameters is evaluated using cross-validation (different than the nested cross-validation that is used to select the best classifier).

Python source code: grid_search_digits.py

import numpy as np
from scikits.learn.svm import SVC
from scikits.learn.cross_val import StratifiedKFold
from scikits.learn.grid_search import GridSearchCV
from scikits.learn import datasets
from scikits.learn.metrics import zero_one

################################################################################
# Loading the Digits dataset
digits = datasets.load_digits()

# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

################################################################################
# Set the parameters by cross-validation
tuned_parameters = [{'kernel':('rbf', ), 'gamma':[1e-3, 1e-4]},
                    {'kernel':('linear', )}]

clf = GridSearchCV(SVC(C=1), tuned_parameters, n_jobs=2)

y_pred = []
y_true = []
for train, test in StratifiedKFold(y, 2):
    clf.fit(X[train], y[train], cv=StratifiedKFold(y[train], 5))
    y_pred = np.append(y_pred, clf.predict(X[test]))
    y_true = np.append(y_true, y[test])

classif_rate = np.mean(y_pred == y_true) * 100
print "Classification rate : %f" % classif_rate