outliers_fraction = 0.01
detector_list = [ LOF( n_neighbors= 5 ) , LOF( n_neighbors= 10 ) , LOF( n_neighbors= 15 ) , LOF( n_neighbors= 20 ) , LOF( n_neighbors= 25 ) , LOF( n_neighbors= 30 ) , LOF( n_neighbors= 35 ) , LOF( n_neighbors= 40 ) , LOF( n_neighbors= 45 ) , LOF( n_neighbors= 50 ) ]
classifiers = { 'Angle-based Outlier Detector (ABOD)' : ABOD( contamination= outliers_fraction) , 'Cluster-based Local Outlier Factor (CBLOF)' : CBLOF( contamination= outliers_fraction, check_estimator= False , random_state= 0 ) , 'Feature Bagging' : FeatureBagging( LOF( n_neighbors= 35 ) , contamination= outliers_fraction, random_state= 0 ) , 'Histogram-base Outlier Detection (HBOS)' : HBOS( contamination= outliers_fraction) , 'Isolation Forest' : IForest( contamination= outliers_fraction, random_state= 0 ) , 'K Nearest Neighbors (KNN)' : KNN( contamination= outliers_fraction) , 'Average KNN' : KNN( method= 'mean' , contamination= outliers_fraction) , 'Local Outlier Factor (LOF)' : LOF( n_neighbors= 35 , contamination= outliers_fraction) , 'Minimum Covariance Determinant (MCD)' : MCD( contamination= outliers_fraction, random_state= 0 ) , 'One-class SVM (OCSVM)' : OCSVM( contamination= outliers_fraction) , 'Principal Component Analysis (PCA)' : PCA( contamination= outliers_fraction, random_state= 0 ) , 'Locally Selective Combination (LSCP)' : LSCP( detector_list, contamination= outliers_fraction, random_state= 0 )
} for i, clf in enumerate ( classifiers. keys( ) ) : print ( 'Model' , i + 1 , clf)
X1= df[ 'num_people' ] . values. reshape( - 1 , 1 )
X2 = df[ 'num_order' ] . values. reshape( - 1 , 1 )
X = np. concatenate( ( X1, X2) , axis= 1 ) xx , yy = np. meshgrid( np. linspace( 0 , 1 , 100 ) , np. linspace( 0 , 1 , 100 ) )
plt. figure( figsize= ( 20 , 15 ) )
for i, ( clf_name, clf) in enumerate ( classifiers. items( ) ) : clf. fit( X) scores_pred = clf. decision_function( X) * - 1 y_pred = clf. predict( X) n_inliers = len ( y_pred) - np. count_nonzero( y_pred) n_outliers = np. count_nonzero( y_pred == 1 ) df1 = dfdf1[ 'outlier' ] = y_pred. tolist( ) inliers_people = np. array( df1[ 'num_people' ] [ df1[ 'outlier' ] == 0 ] ) . reshape( - 1 , 1 ) inliers_order = np. array( df1[ 'num_order' ] [ df1[ 'outlier' ] == 0 ] ) . reshape( - 1 , 1 ) outliers_people = df1[ 'num_people' ] [ df1[ 'outlier' ] == 1 ] . values. reshape( - 1 , 1 ) outliers_order = df1[ 'num_order' ] [ df1[ 'outlier' ] == 1 ] . values. reshape( - 1 , 1 ) threshold = np. percentile( scores_pred, 100 * outliers_fraction) Z = clf. decision_function( np. c_[ xx. ravel( ) , yy. ravel( ) ] ) * - 1 Z = Z. reshape( xx. shape) plt. subplot( 3 , 4 , i+ 1 ) plt. contourf( xx, yy, Z, levels= np. linspace( Z. min ( ) , threshold, 7 ) , cmap= plt. cm. Blues_r) a = plt. contour( xx, yy, Z, levels= [ threshold] , linewidths= 2 , colors= 'red' ) plt. contourf( xx, yy, Z, levels= [ threshold, Z. max ( ) ] , colors= 'orange' ) b = plt. scatter( x= inliers_people, y= inliers_order, c= 'white' , s= 20 , edgecolor= 'k' ) c = plt. scatter( x= outliers_people, y= outliers_order, c= 'black' , s= 20 , edgecolor= 'k' ) plt. axis( 'tight' ) plt. legend( [ a. collections[ 0 ] , b, c] , [ '决策函数' , '正常值' , '异常值' ] , prop= matplotlib. font_manager. FontProperties( size= 12 ) , loc= 'upper right' ) plt. xlim( ( 0 , 1 ) ) plt. ylim( ( 0 , 1 ) ) ss = '异常值数量: ' + str ( n_outliers) + ' 正常值数量: ' + str ( n_inliers) plt. title( clf_name) plt. xlabel( ss)
plt. show( ) ;