1 模型预处理
1 | from sklearn import datasets |
1 | boston = datasets.load_boston() |
1 | housing = datasets.fetch_california_housing() |
1 | X, y = boston.data, boston.target |
1 | from sklearn import datasets |
1 | datasets.make_biclusters |
1 | import sklearn.datasets as d |
1 | reg_data = d.make_regression() |
1 | complex_reg_data = d.make_regression(1000, 10, 5, 2, 1.0) |
1 | classification_set = d.make_classification(weights=[0.1]) |
1 | %matplotlib inline |
1 | X = np.random.randn(n_samples, n_features) |
1 | ground_truth = np.zeroes((np_samples, n_target)) |
1 | y = np.dot(X, ground_truth) + bias |
1 | from sklearn import preprocessing |
1 | from sklearn import datasets |
1 | X[:, :3].mean(axis=0) #前三个特征的均值 |
1 | X[:, :3].std(axis=0) #前三个特征的标准差 |
1 | X_2 = preprocessing.scale(X[:, :3]) |
1 | X_2.mean(axis=0) |
1 | X_2.std(axis=0) |
1 | my_scaler = preprocessing.StandardScaler() |
1 | my_minmax_scaler = preprocessing.MinMaxScaler() |
1 | my_odd_scaler = preprocessing.MinMaxScaler(feature_range=(-3.14, 3.14)) |
1 | normalized_X = preprocessing.normalize(X[:, :3]) |
1 | my_useless_scaler = preprocessing.StandardScaler(with_mean=False, with_std=False) |
1 | import scipy |
1 | preprocessing.scale(matrix, with_mean=False) |
1 | from sklearn import datasets |
1 | from sklearn import preprocessing |
1 | (boston.target[:5] > boston.target.mean()).astype(int) |
1 | bin = preprocessing.Binarizer(boston.target.mean()) |
1 | from scipy.sparse import coo |
1 | from sklearn import datasets |
1 | import numpy as np |
1 | from sklearn import preprocessing |
1 | text_encoder.transform(np.ones((3, 1))).toarray() |
1 | from sklearn.feature_extraction import DictVectorizer |
1 | import patsy |
1 | from sklearn import datasets as d |
1 | from sklearn.preprocessing import LabelBinarizer |
1 | new_target = label_binarizer.fit_transform(target) |
1 | new_target.shape |
1 | new_target[:5] |
1 | new_target[-5:] |
1 | label_binarizer.classes_ |
1 | label_binarizer.transform([4]) |
1 | label_binarizer = LabelBinarizer(neg_label=-1000, pos_label=1000) |
1 | from sklearn import datasets |
1 | masking_array[:5] |
1 | iris_X[:5] |
1 | from sklearn import preprocessing |
1 | iris_X_prime[3,0] |
1 | iris_X[3,0] |
1 | impute = preprocessing.Imputer(strategy='median') |
1 | iris_X[np.isnan(iris_X)] = -1 |
1 | impute = preprocessing.Imputer(missing_values=-1) |
1 | import pandas as pd |
1 | iris_df.fillna(iris_df.max())['sepal length (cm)'].head(5) |
1 | from sklearn import datasets |
1 | from sklearn import preprocessing |
1 | mat_imp_and_scaled = scaler.fit_transform(mat_imputed) |
1 | from sklearn import pipeline |
1 | pipe |
1 | new_mat = pipe.fit_transform(mat) |
1 | np.array_equal(new_mat, mat_imp_and_scaled) |
1 | pipe.inverse_transform(new_mat) |
1 | scaler.inverse_transform(new_mat)[:4, :4] |
1 | from sklearn import datasets |
1 | from sklearn import decomposition |
1 | pca = decomposition.PCA() |
1 | iris_pca = pca.fit_transform(iris_X) |
1 | array([[ -2.68420713e+00, -3.26607315e-01, 2.15118370e-02, |
1 | pca.explained_variance_ratio_ |
1 | pca = decomposition.PCA(n_components=2) |
1 | %matplotlib inline |
1 | pca.explained_variance_ratio_.sum() |
1 | pca = decomposition.PCA(n_components=.98) |
1 | iris_X_prime.shape |
1 | from sklearn import datasets |
1 | fa = FactorAnalysis(n_components=2) |
1 | %matplotlib inline |
1 | import numpy as np |
1 | import matplotlib.pyplot as plt |
1 | from sklearn.decomposition import KernelPCA |
1 | A_color = np.array(['r']*len(B)) |
1 | from sklearn.decomposition import PCA |
1 | from sklearn.datasets import load_iris |
1 | from sklearn.decomposition import TruncatedSVD |
1 | svd = TruncatedSVD(2) |
1 | iris_transformed[:5] |
1 | %matplotlib inline |
1 | import numpy as np |
1 | U, S, V = svd(D, full_matrices=False) |
1 | np.diag(S) |
1 | np.dot(U.dot(np.diag(S)), V) |
1 | new_S = S[0] |
1 | tsvd = TruncatedSVD(2) |
1 | %matplotlib inline |
1 | from sklearn.datasets import load_iris |
1 | from sklearn.decomposition import DictionaryLearning |
1 | dl = DictionaryLearning(3) |
1 | transformed = dl.fit_transform(iris_data[::2]) |
1 | from mpl_toolkits.mplot3d import Axes3D |
1 | transformed = dl.transform(iris_data[1::2]) |
1 | colors = np.array(list('rgb')) |
1 | from sklearn.datasets import load_iris |
1 | from sklearn import pipeline, preprocessing, decomposition |
1 | pca = decomposition.PCA() |
1 | pipe = pipeline.Pipeline([('imputer', imputer), ('pca', pca)]) |
1 | pipe2 = pipeline.make_pipeline(imputer, pca) |
1 | iris_data_transformed2 = pipe2.fit_transform(iris_data) |
1 | pipe2.set_params(pca__n_components=2) |
1 | iris_data_transformed3 = pipe2.fit_transform(iris_data) |
1 | import numpy as np |
1 | from sklearn.gaussian_process import GaussianProcess |
1 | test_preds = gp.predict(boston_X[~train_set]) |
1 | %matplotlib inline |
1 | gp = GaussianProcess(regr='linear', theta0=5e-1) |
1 | f, ax = plt.subplots(figsize=(7, 5)) |
1 | np.power(test_preds - boston_y[~train_set], 2).mean() |
1 | np.power(linear_preds - boston_y[~train_set], 2).mean() |
1 | test_preds, MSE = gp.predict(boston_X[~train_set], eval_MSE=True) |
1 | f, ax = plt.subplots(figsize=(7, 5)) |
1 | from sklearn.datasets import make_regression |
1 | regression_models.constant(X)[:5] |
1 | regression_models.linear(X)[:1] |
1 | regression_models.quadratic(X)[:1] |
1 | from sklearn import datasets |
1 | print("{:,}".format(X.nbytes)) |
1 | X.nbytes / 1e6 |
1 | X.nbytes / (X.shape[0] * X.shape[1]) |
1 | import numpy as np |
1 | linear_preds = sgd.predict(X[~train]) |
1 | %matplotlib inline |
1 | while not_converged: |
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来源 Estom的博客!




