""" ================================== Comparing various online solvers ================================== An example showing how different online solvers perform on the hand-written digits dataset. """ # Author: Rob Zinkov # License: BSD 3 clause import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.cross_validation import train_test_split from sklearn.linear_model import SGDClassifier, Perceptron from sklearn.linear_model import PassiveAggressiveClassifier heldout = [0.95, 0.90, 0.75, 0.50, 0.01] rounds = 20 digits = datasets.load_digits() X, y = digits.data, digits.target classifiers = [ ("SGD", SGDClassifier()), ("ASGD", SGDClassifier(average=True)), ("Perceptron", Perceptron()), ("Passive-Aggressive I", PassiveAggressiveClassifier(loss='hinge', C=1.0)), ("Passive-Aggressive II", PassiveAggressiveClassifier(loss='squared_hinge', C=1.0)), ] xx = 1. - np.array(heldout) for name, clf in classifiers: rng = np.random.RandomState(42) yy = [] for i in heldout: yy_ = [] for r in range(rounds): X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=i, random_state=rng) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) yy_.append(1 - np.mean(y_pred == y_test)) yy.append(np.mean(yy_)) plt.plot(xx, yy, label=name) plt.legend(loc="upper right") plt.xlabel("Proportion train") plt.ylabel("Test Error Rate") plt.show()