from sklearn.metrics import roc_curve, auc
from numpy import interp
pipe_lr=make_pipeline(StandardScaler(), PCA(n_components=2), LogisticRegression(penalty='l2', random_state=1, C=100.0))
X_train2=X_train[:,[4, 14]]
cv=list(StratifiedKFold(n_splits=3, shuffle=True, random_state=1).split(X_train, y_train))
fig=plt.figure(figsize=(7,5))
mean_tpr=0.0
mean_fpr=np.linspace(0, 1, 100)
for i, (train, test) in enumerate(cv):
probas=pipe_lr.fit(X_train2[train], y_train[train]).predict_proba(X_train2[test])
fpr, tpr, thresholds=roc_curve(y_train[test], probas[:, 1], pos_label=1)
mean_tpr+=interp(mean_fpr, fpr, tpr)
mean_tpr[0]=0.0
roc_auc=auc(fpr, tpr)
plt.plot(fpr, tpr, label='ROC fold %d (area = %0.2f)'%(i+1, roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--', color=(0.6, 0.6, 0.6), label='Random guessing')
mean_tpr/=len(cv)
mean_tpr[-1]=1.0
mean_auc=auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, 'k-', label='Mean ROC (area=%0.2f)'%mean_auc, lw=2)
plt.plot([0, 0, 1], [0, 1, 1], linestyle=':', color='black', label='Perfect performance')
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()