%matplotlib inline import matplotlib.pyplot as plt import scipy as sp try: face = sp.face(gray=True) except AttributeError: from scipy import misc face = misc.face(gray=True) X = face.reshape((-1, 1)) # We need an (n_sample, n_feature) array k_means = cluster.KMeans(n_clusters=5, n_init=1) k_means.fit(X)
from sklearn.feature_extraction.image import grid_to_graph from sklearn.cluster import AgglomerativeClustering
# Generate data try: # SciPy >= 0.16 have face in misc from scipy.misc import face face = face(gray=True) except ImportError: face = sp.face(gray=True)
FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
connectivity=<64x64 sparse matrix of type '<class 'numpy.int64'>'
with 288 stored elements in COOrdinate format>,
linkage='ward', memory=None, n_clusters=32,
pooling_func=<function mean at 0x7f393104d6a8>)
# Generate sample data import numpy as np from scipy import signal time = np.linspace(0, 10, 2000) s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal S = np.c_[s1, s2, s3] S += 0.2 * np.random.normal(size=S.shape) # Add noise S /= S.std(axis=0) # Standardize data
1 2 3 4 5 6 7 8 9
# Mix data A = np.array([[1, 1, 1], [0.5, 2, 1], [1.5, 1, 2]]) # Mixing matrix X = np.dot(S, A.T) # Generate observations
# Parameters of pipelines can be set using ‘__’ separated parameter names: estimator = GridSearchCV(pipe, dict(pca__n_components=n_components, logistic__C=Cs)) estimator.fit(X_digits, y_digits)
""" =================================================== Faces recognition example using eigenfaces and SVMs =================================================== The dataset used in this example is a preprocessed excerpt of the "Labeled Faces in the Wild", aka LFW_: http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB) .. _LFW: http://vis-www.cs.umass.edu/lfw/ Expected results for the top 5 most represented people in the dataset: ================== ============ ======= ========== ======= precision recall f1-score support ================== ============ ======= ========== ======= Ariel Sharon 0.67 0.92 0.77 13 Colin Powell 0.75 0.78 0.76 60 Donald Rumsfeld 0.78 0.67 0.72 27 George W Bush 0.86 0.86 0.86 146 Gerhard Schroeder 0.76 0.76 0.76 25 Hugo Chavez 0.67 0.67 0.67 15 Tony Blair 0.81 0.69 0.75 36 avg / total 0.80 0.80 0.80 322 ================== ============ ======= ========== ======= """
from time import time import logging import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.datasets import fetch_lfw_people from sklearn.metrics import classification_report #显示主要的分类指标,返回每个类标签的精确、召回率及F1值 from sklearn.metrics import confusion_matrix # from sklearn.decomposition import PCA from sklearn.svm import SVC
# Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
# ############################################################################# # Download the data, if not already on disk and load it as numpy arrays
# introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape
# for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data n_features = X.shape[1]
# the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0]
# ############################################################################# # Split into a training set and a test set using a stratified k fold
# split into a training and testing set X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=42)
# ############################################################################# # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150
print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0))
print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0))
# ############################################################################# # Train a SVM classification model
print("Fitting the classifier to the training set") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid) clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_)
# ############################################################################# # Quantitative evaluation of the model quality on the test set
print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0))