Table of Contents
1. 1.数据前处理
1.1. 行处理
1.2. 列处理
# 标准化 from sklearn.preprocessing import StandardScaler scaler = StandardScaler().fit(X_train) standardized_X = scaler.transform(X_train) standardized_X_test = scaler.transform(X_test) # 归一化 from sklearn.preprocessing import Normalizer scaler = Normalizer().fit(X_train) normalized_X = scaler.transform(X_train) normalized_X_test = scaler.transform(X_test) # 二值化 from sklearn.preprocessing import Binarizer binarizer = Binarizer(threshold=0.0).fit(X) binary_X = binarizer.transform(X) # 编码分类特征 from sklearn.preprocessing import LabelEncoder enc = LabelEncoder() y = enc.fit_transform(y) # 输入缺失值 from sklearn.preprocessing import Imputer imp = Imputer(missing_values=0, strategy='mean', axis=0) imp.fit_transform(X_train) # 生成多项式特征 from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(5) poly.fit_transform(X)