[Pyod] df_train, df_test(0.00573)

Author

김보람

Published

May 3, 2024

ref: https://pyod.readthedocs.io/en/latest/pyod.models.html#all-models

1. Imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
/tmp/ipykernel_1207445/761229760.py:1: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
import warnings
warnings.filterwarnings('ignore')
%run ../functions_pyod2.py
with open('../fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df_train1 = pd.read_csv('~/Dropbox/Data/df_train1.csv')
df_train2 = pd.read_csv('~/Dropbox/Data/df_train2.csv')
df_train3 = pd.read_csv('~/Dropbox/Data/df_train3.csv')
df_train4 = pd.read_csv('~/Dropbox/Data/df_train4.csv')
df_train5 = pd.read_csv('~/Dropbox/Data/df_train5.csv')
df_train6 = pd.read_csv('~/Dropbox/Data/df_train6.csv')
df_train7 = pd.read_csv('~/Dropbox/Data/df_train7.csv')
df_train8 = pd.read_csv('~/Dropbox/Data/df_train8.csv')
df_test = pd.read_csv('~/Dropbox/Data/df_test.csv')
_df1 = pd.concat([df_train1, df_test])
_df2 = pd.concat([df_train2, df_test])
_df3 = pd.concat([df_train3, df_test])
_df4 = pd.concat([df_train4, df_test])
_df5 = pd.concat([df_train5, df_test])
_df6 = pd.concat([df_train6, df_test])
_df7 = pd.concat([df_train7, df_test])
_df8 = pd.concat([df_train8, df_test])
_df1_mean = _df1.is_fraud.mean()
_df2_mean = _df2.is_fraud.mean()
_df3_mean = _df3.is_fraud.mean()
_df4_mean = _df4.is_fraud.mean()
_df5_mean = _df5.is_fraud.mean()
_df6_mean = _df6.is_fraud.mean()
_df7_mean = _df7.is_fraud.mean()
_df8_mean = _df8.is_fraud.mean()

pyod_0503: 기존거에서 그냥 result2로 저장위치만 변경

def pyod_0507(X,XX,y,yy,predictors,throw_rate):
    model = []
    time_diff = []
    acc = []
    pre = []
    rec = []
    f1 = [] 
    auc = [] 
    graph_based = []
    method = [] 
    train_size = []
    train_cols = []
    train_frate = []
    test_size = []
    test_frate = []
    hyper_params = [] 
    for name, predictor in predictors.items():
        t1 = time.time()
        predictor.fit(X,y)
        t2 = time.time()
        yyhat = predictor.predict(XX)
        scores = evaluate(yy,yyhat)
        model.append(name)
        time_diff.append(t2-t1)
        acc.append(scores['acc'])
        pre.append(scores['pre'])
        rec.append(scores['rec'])
        f1.append(scores['f1'])
        auc.append(scores['auc'])
        graph_based.append(False)
        method.append('pyod')
        train_size.append(len(y)),
        train_cols.append(list(X.columns)),
        train_frate.append(np.array(y).reshape(-1).mean()),
        test_size.append(len(yy)),
        test_frate.append(np.array(yy).reshape(-1).mean())
        hyper_params.append(None)
    df_results = pd.DataFrame(dict(
        model = model,
        time=time_diff,
        acc=acc,
        pre=pre,
        rec=rec,
        f1=f1,
        auc=auc,
        graph_based = graph_based,
        method = method,
        throw_rate = throw_rate,
        train_size = train_size,
        train_cols = train_cols,
        train_frate = np.array(y).mean(),
        test_size = test_size,
        test_frate = np.array(yy).mean(),
        hyper_params = hyper_params
    ))
    ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
    df_results.to_csv(f'../results2/{ymdhms}-pyod.csv',index=False)
    return df_results
def pyod_preprocess_0503(df_tr, df_tstn, _df_mean):
      
    X = pd.DataFrame(df_tr['amt'])
    y = pd.DataFrame(df_tr['is_fraud'])
    XX = pd.DataFrame(df_tstn['amt'])
    yy = pd.DataFrame(df_tstn['is_fraud'])
    throw_rate = _df_mean
    fraud_ratio = df_tr.is_fraud.mean()
    predictors = {
     'ABOD': ABOD(contamination=fraud_ratio), # 약 30초, f1이랑 다른것들 0 나옴
#    'ALAD': ALAD(contamination=fraud_ratio),
#    'AnoGAN': AnoGAN(contamination=fraud_ratio),
#    'AutoEncoder':AutoEncoder(contamination=fraud_ratio),
##    'CBLOF': CBLOF(contamination=fraud_ratio,n_clusters=2),
##    'COF': COF(contamination=fraud_ratio),
##    'CD': CD(contamination=fraud_ratio),
    'COPOD': COPOD(contamination=fraud_ratio),
#    'DeepSVDD': DeepSVDD(contamination=fraud_ratio),
#    'DIF': DIF(contamination=fraud_ratio),    
    'ECOD': ECOD(contamination=fraud_ratio),
#    'FeatureBagging': FeatureBagging(contamination=fraud_ratio),
    'GMM': GMM(contamination=fraud_ratio),
    'HBOS': HBOS(contamination=fraud_ratio),
    'IForest': IForest(contamination=fraud_ratio),
    'INNE': INNE(contamination=fraud_ratio), #20초
#####    'KDE': KDE(contamination=fraud_ratio),# 5분 이상 제외!
    'KNN': KNN(contamination=fraud_ratio),
####    'KPCA': KPCA(contamination=fraud_ratio),
#    'PyODKernelPCA': PyODKernelPCA(contamination=fraud_ratio),
##    'LMDD': LMDD(contamination=fraud_ratio),
    'LODA': LODA(contamination=fraud_ratio),
    'LOF': LOF(contamination=fraud_ratio),
####    'LOCI': LOCI(contamination=fraud_ratio),
#    'LUNAR': LUNAR(contamination=fraud_ratio),
    'LODA': LODA(contamination=fraud_ratio),
#    'LSCP': LSCP(contamination=fraud_ratio),
    'MAD': MAD(contamination=fraud_ratio),
    'MCD': MCD(contamination=fraud_ratio),
#    'MO_GAAL': MO_GAAL(contamination=fraud_ratio),
#####    'OCSVM': OCSVM(contamination=fraud_ratio),# 5분 이상 제외!
    'PCA': PCA(contamination=fraud_ratio),
###    'QMCD': QMCD(contamination=fraud_ratio),
####    'RGraph': RGraph(contamination=fraud_ratio),
    'ROD': ROD(contamination=fraud_ratio),
##    'Sampling': Sampling(contamination=fraud_ratio),
##   'SOD': SOD(contamination=fraud_ratio),
#    'SO_GAAL': SO_GAAL(contamination=fraud_ratio),
####    'SOS': SOS(contamination=fraud_ratio),
#    'SUOD': SUOD(contamination=fraud_ratio),
#    'VAE': VAE(contamination=fraud_ratio),
#    'XGBOD': XGBOD(contamination=fraud_ratio),  
}
    return X, XX, y, yy, predictors, throw_rate
##### 시간 오래걸리는지 확인(
pyod_0507(*pyod_preprocess_0503(df_train1, df_test, _df1_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 29.045729 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
1 COPOD 0.197903 0.992838 0.375000 0.376458 0.375727 0.686422 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
2 ECOD 0.141903 0.989723 0.119554 0.124931 0.122183 0.559816 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
3 GMM 0.203565 0.992835 0.374931 0.377013 0.375969 0.686697 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
4 HBOS 0.789574 0.993938 0.000000 0.000000 0.000000 0.499831 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
5 IForest 4.382793 0.992838 0.370115 0.357579 0.363739 0.677037 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
6 INNE 21.076581 0.992472 0.339921 0.334259 0.337066 0.665261 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
7 KNN 1.223524 0.992148 0.309184 0.300944 0.305008 0.648536 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
8 LODA 1.959376 0.993938 0.000000 0.000000 0.000000 0.499831 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
9 LOF 1.847843 0.988473 0.001638 0.001666 0.001652 0.497911 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
10 MAD 0.026752 0.971867 0.138846 0.752360 0.234429 0.862745 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
11 MCD 0.150459 0.992835 0.374931 0.377013 0.375969 0.686697 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
12 PCA 0.047325 0.992835 0.374931 0.377013 0.375969 0.686697 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
13 ROD 13.736269 0.982551 0.000542 0.001110 0.000728 0.494656 False pyod 0.005728 734003 [amt] 0.005729 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train2, df_test, _df2_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 16.604755 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
1 COPOD 0.084035 0.990832 0.304442 0.468073 0.368928 0.730958 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
2 ECOD 0.084281 0.988054 0.165241 0.268184 0.204488 0.630192 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
3 GMM 0.145955 0.991659 0.334139 0.460300 0.387202 0.727509 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
4 HBOS 0.012371 0.993938 0.000000 0.000000 0.000000 0.499831 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
5 IForest 2.576287 0.991703 0.336036 0.460300 0.388472 0.727531 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
6 INNE 10.766832 0.991814 0.340215 0.457524 0.390244 0.726207 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
7 KNN 0.607349 0.990498 0.268511 0.382565 0.315548 0.688282 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
8 LODA 1.102131 0.993938 0.000000 0.000000 0.000000 0.499831 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
9 LOF 0.986265 0.985056 0.000689 0.001110 0.000850 0.495916 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
10 MAD 0.016052 0.971924 0.139103 0.752360 0.234795 0.862774 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
11 MCD 0.088177 0.991659 0.334139 0.460300 0.387202 0.727509 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
12 PCA 0.025430 0.991659 0.334139 0.460300 0.387202 0.727509 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
13 ROD 10.502578 0.973663 0.000308 0.001110 0.000483 0.490187 False pyod 0.008171 420500 [amt] 0.01 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train3, df_test, _df3_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 3.522757 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
1 COPOD 0.017614 0.959669 0.100543 0.760689 0.177611 0.860752 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
2 ECOD 0.017467 0.955155 0.067907 0.536924 0.120566 0.747244 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
3 GMM 0.081848 0.979006 0.161044 0.633537 0.256808 0.807266 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
4 HBOS 0.003496 0.993617 0.000000 0.000000 0.000000 0.499669 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
5 IForest 0.593165 0.979048 0.161149 0.632426 0.256850 0.806735 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
6 INNE 2.095648 0.977481 0.142219 0.583009 0.228659 0.781381 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
7 KNN 0.098779 0.976559 0.137788 0.588562 0.223299 0.783677 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
8 LODA 0.228884 0.993617 0.000000 0.000000 0.000000 0.499669 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
9 LOF 0.159540 0.936278 0.002292 0.023320 0.004173 0.482428 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
10 MAD 0.003757 0.973831 0.147694 0.748473 0.246706 0.861801 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
11 MCD 0.018752 0.979006 0.161044 0.633537 0.256808 0.807266 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
12 PCA 0.006144 0.970299 0.091795 0.470850 0.153637 0.722012 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
13 ROD 5.795454 0.942194 0.000183 0.001666 0.000330 0.474638 False pyod 0.015065 84100 [amt] 0.05 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train4, df_test, _df4_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 1.887402 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
1 COPOD 0.009322 0.910437 0.047214 0.763465 0.088928 0.837374 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
2 ECOD 0.009243 0.910437 0.047151 0.762354 0.088810 0.836822 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
3 GMM 0.082726 0.971313 0.136633 0.754026 0.231346 0.863295 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
4 HBOS 0.002337 0.993617 0.000000 0.000000 0.000000 0.499669 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
5 IForest 0.348754 0.971259 0.136400 0.754026 0.231011 0.863268 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
6 INNE 1.093964 0.968017 0.120265 0.726263 0.206358 0.847836 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
7 KNN 0.047462 0.962044 0.094797 0.658523 0.165735 0.811157 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
8 LODA 0.120044 0.993617 0.000000 0.000000 0.000000 0.499669 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
9 LOF 0.077448 0.874442 0.006829 0.144919 0.013044 0.511781 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
10 MAD 0.002430 0.975665 0.156133 0.737923 0.257733 0.857479 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
11 MCD 0.010759 0.971313 0.136633 0.754026 0.231346 0.863295 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
12 PCA 0.003648 0.937483 0.041194 0.445308 0.075411 0.692813 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
13 ROD 3.592023 0.881445 0.000169 0.003331 0.000322 0.444917 False pyod 0.016841 42050 [amt] 0.1 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train5, df_test, _df5_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 1.031277 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
1 COPOD 0.005409 0.810520 0.023321 0.785119 0.045297 0.797893 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
2 ECOD 0.005368 0.809732 0.022661 0.765130 0.044019 0.787560 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
3 GMM 0.071681 0.899114 0.032981 0.586896 0.062452 0.743904 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
4 HBOS 0.001884 0.989910 0.276748 0.472515 0.349057 0.732702 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
5 IForest 0.207113 0.937890 0.066989 0.761799 0.123149 0.850352 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
6 INNE 0.587153 0.926208 0.053396 0.710716 0.099329 0.819082 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
7 KNN 0.021524 0.923951 0.050512 0.690172 0.094135 0.807734 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
8 LODA 0.065986 0.989910 0.276748 0.472515 0.349057 0.732702 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
9 LOF 0.036927 0.755309 0.006071 0.256524 0.011862 0.507353 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
10 MAD 0.001767 0.978491 0.161923 0.660189 0.260061 0.820257 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
11 MCD 0.006879 0.937820 0.066917 0.761799 0.123027 0.850316 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
12 PCA 0.002745 0.823932 0.009448 0.286507 0.018292 0.556767 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
13 ROD 2.409948 0.767160 0.000894 0.035536 0.001745 0.403454 False pyod 0.017896 21025 [amt] 0.2 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train6, df_test, _df6_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 0.720466 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
1 COPOD 0.004078 0.709230 0.015350 0.788451 0.030114 0.748613 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
2 ECOD 0.003989 0.709116 0.015208 0.781233 0.029835 0.744967 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
3 GMM 0.074806 0.794473 0.014101 0.506385 0.027438 0.651259 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
4 HBOS 0.001713 0.976508 0.160078 0.730705 0.262622 0.854314 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
5 IForest 0.165700 0.882854 0.035121 0.735147 0.067040 0.809426 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
6 INNE 0.415790 0.845899 0.024278 0.661299 0.046836 0.754130 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
7 KNN 0.015200 0.888515 0.035725 0.710716 0.068031 0.800128 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
8 LODA 0.049869 0.976508 0.160078 0.730705 0.262622 0.854314 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
9 LOF 0.026639 0.647314 0.006547 0.401999 0.012883 0.525363 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
10 MAD 0.001590 0.980923 0.154264 0.520267 0.237968 0.751921 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
11 MCD 0.005644 0.895982 0.040865 0.764020 0.077580 0.830381 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
12 PCA 0.002423 0.674567 0.004318 0.243198 0.008484 0.460125 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
13 ROD 1.816649 0.987946 0.000000 0.000000 0.000000 0.496817 False pyod 0.018278 14017 [amt] 0.299993 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train7, df_test, _df7_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 0.560532 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
1 COPOD 0.003454 0.608424 0.011432 0.788451 0.022536 0.697919 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
2 ECOD 0.003399 0.608592 0.011704 0.807329 0.023073 0.707388 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
3 GMM 0.073514 0.721558 0.011558 0.563576 0.022651 0.643022 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
4 HBOS 0.001605 0.976508 0.160078 0.730705 0.262622 0.854314 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
5 IForest 0.142521 0.844576 0.027910 0.772904 0.053874 0.808946 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
6 INNE 0.333542 0.686339 0.009807 0.538034 0.019263 0.612614 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
7 KNN 0.010657 0.823697 0.023412 0.731816 0.045373 0.778021 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
8 LODA 0.039394 0.976508 0.160078 0.730705 0.262622 0.854314 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
9 LOF 0.018754 0.533716 0.006190 0.504164 0.012229 0.519025 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
10 MAD 0.001444 0.983784 0.174556 0.491394 0.257604 0.739007 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
11 MCD 0.005005 0.849548 0.029145 0.782343 0.056196 0.816139 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
12 PCA 0.002154 0.483336 0.002501 0.224320 0.004947 0.354574 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
13 ROD 1.825828 0.666474 0.002940 0.169350 0.005780 0.419343 False pyod 0.018475 10512 [amt] 0.400019 314572 0.005725 None
pyod_0507(*pyod_preprocess_0503(df_train8, df_test, _df8_mean))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 0.454913 0.994275 0.000000 0.000000 0.000000 0.500000 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
1 COPOD 0.003036 0.506822 0.009117 0.790672 0.018027 0.647930 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
2 ECOD 0.003018 0.508399 0.009719 0.841199 0.019217 0.673841 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
3 GMM 0.003618 0.657573 0.010799 0.649084 0.021244 0.653353 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
4 HBOS 0.001506 0.965312 0.115407 0.759023 0.200352 0.862761 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
5 IForest 0.123626 0.758396 0.017756 0.758468 0.034699 0.758431 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
6 INNE 0.275111 0.573602 0.007840 0.585230 0.015473 0.579383 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
7 KNN 0.008123 0.747546 0.016375 0.729595 0.032032 0.738622 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
8 LODA 0.021789 0.965308 0.115398 0.759023 0.200337 0.862760 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
9 LOF 0.014334 0.438125 0.005986 0.588562 0.011852 0.512911 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
10 MAD 0.001355 0.988203 0.236770 0.476957 0.316449 0.734052 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
11 MCD 0.003533 0.774929 0.019739 0.787340 0.038513 0.781099 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
12 PCA 0.002043 0.225071 0.001615 0.217657 0.003206 0.221385 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None
13 ROD 1.119102 0.496554 0.002308 0.201555 0.004563 0.349904 False pyod 0.018595 8410 [amt] 0.5 314572 0.005725 None

amt 80이하

df_80 = df_test[df_test['amt'] <= 80]
_df1_ = pd.concat([df_train1, df_80])
_df2_ = pd.concat([df_train2, df_80])
_df3_ = pd.concat([df_train3, df_80])
_df4_ = pd.concat([df_train4, df_80])
_df5_ = pd.concat([df_train5, df_80])
_df6_ = pd.concat([df_train6, df_80])
_df7_ = pd.concat([df_train7, df_80])
_df8_ = pd.concat([df_train8, df_80])
_df1_mean_ = _df1_.is_fraud.mean()
_df2_mean_ = _df2_.is_fraud.mean()
_df3_mean_ = _df3_.is_fraud.mean()
_df4_mean_ = _df4_.is_fraud.mean()
_df5_mean_ = _df5_.is_fraud.mean()
_df6_mean_ = _df6_.is_fraud.mean()
_df7_mean_ = _df7_.is_fraud.mean()
_df8_mean_ = _df8_.is_fraud.mean()
pyod_0507(*pyod_preprocess_0503(df_train1, df_80, _df1_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 33.048269 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
1 COPOD 0.264568 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
2 ECOD 0.163266 0.994797 0.000000 0.00000 0.000000 0.498229 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
3 GMM 0.294567 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
4 HBOS 0.987751 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
5 IForest 4.848966 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
6 INNE 24.081585 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
7 KNN 1.383012 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
8 LODA 4.331329 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
9 LOF 2.130468 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
10 MAD 0.027751 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
11 MCD 0.183615 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
12 PCA 0.047241 0.998333 0.000000 0.00000 0.000000 0.500000 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
13 ROD 16.622735 0.619810 0.002061 0.47013 0.004105 0.545095 False pyod 0.004756 734003 [amt] 0.005729 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train2, df_80, _df2_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 24.320429 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
1 COPOD 0.100467 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
2 ECOD 0.100975 0.992221 0.000000 0.000000 0.000000 0.496939 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
3 GMM 0.182355 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
4 HBOS 0.018261 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
5 IForest 2.777030 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
6 INNE 13.074909 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
7 KNN 0.612242 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
8 LODA 1.178642 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
9 LOF 0.991292 0.994212 0.002092 0.005195 0.002983 0.500529 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
10 MAD 0.016337 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
11 MCD 0.090446 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
12 PCA 0.024386 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
13 ROD 10.894339 0.625633 0.002013 0.451948 0.004008 0.538935 False pyod 0.007045 420500 [amt] 0.01 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train3, df_80, _df3_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 3.534318 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
1 COPOD 0.017679 0.995831 0.000000 0.000000 0.000000 0.498747 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
2 ECOD 0.017817 0.971356 0.000321 0.005195 0.000604 0.489082 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
3 GMM 0.089608 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
4 HBOS 0.003508 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
5 IForest 0.591236 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
6 INNE 2.175482 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
7 KNN 0.100606 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
8 LODA 0.314938 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
9 LOF 0.164012 0.922844 0.001429 0.064935 0.002797 0.494605 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
10 MAD 0.003888 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
11 MCD 0.019122 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
12 PCA 0.005870 0.970802 0.000314 0.005195 0.000593 0.488805 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
13 ROD 6.082946 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.014566 84100 [amt] 0.05 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train4, df_80, _df4_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 1.907617 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
1 COPOD 0.009418 0.944314 0.000000 0.000000 0.000000 0.472945 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
2 ECOD 0.009362 0.944137 0.000160 0.005195 0.000310 0.475450 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
3 GMM 0.025668 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
4 HBOS 0.002367 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
5 IForest 0.355085 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
6 INNE 1.125801 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
7 KNN 0.046059 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
8 LODA 0.234425 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
9 LOF 0.077505 0.889430 0.001150 0.075325 0.002266 0.483057 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
10 MAD 0.002438 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
11 MCD 0.018501 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
12 PCA 0.003618 0.923714 0.000174 0.007792 0.000340 0.466517 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
13 ROD 3.671902 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.016809 42050 [amt] 0.1 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train5, df_80, _df5_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 1.038106 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
1 COPOD 0.005453 0.822511 0.000049 0.005195 0.000098 0.414535 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
2 ECOD 0.005515 0.822493 0.000098 0.010390 0.000195 0.417119 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
3 GMM 0.084351 0.888689 0.000158 0.010390 0.000311 0.450273 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
4 HBOS 0.001887 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
5 IForest 0.212140 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
6 INNE 0.586273 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
7 KNN 0.022107 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
8 LODA 0.065750 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
9 LOF 0.037822 0.755804 0.001548 0.225974 0.003075 0.491331 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
10 MAD 0.001889 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
11 MCD 0.006897 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
12 PCA 0.002921 0.768279 0.000263 0.036364 0.000523 0.402932 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
13 ROD 2.782209 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018212 21025 [amt] 0.2 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train6, df_80, _df6_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 0.734261 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
1 COPOD 0.004099 0.714416 0.000046 0.007792 0.000091 0.361694 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
2 ECOD 0.004143 0.714299 0.000091 0.015584 0.000182 0.365525 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
3 GMM 0.083606 0.740657 0.000453 0.070130 0.000901 0.405953 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
4 HBOS 0.001731 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
5 IForest 0.169311 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
6 INNE 0.429041 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
7 KNN 0.015929 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
8 LODA 0.106413 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
9 LOF 0.027041 0.648688 0.001752 0.368831 0.003487 0.508993 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
10 MAD 0.001608 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
11 MCD 0.005758 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
12 PCA 0.002401 0.565198 0.002565 0.670130 0.005111 0.617576 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
13 ROD 1.845949 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.018733 14017 [amt] 0.299993 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train7, df_80, _df7_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 0.566639 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
1 COPOD 0.003471 0.609832 0.000134 0.031169 0.000266 0.320984 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
2 ECOD 0.003462 0.609456 0.000089 0.020779 0.000177 0.315609 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
3 GMM 0.080337 0.636152 0.001810 0.394805 0.003604 0.515680 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
4 HBOS 0.001618 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
5 IForest 0.143335 0.964759 0.000258 0.005195 0.000491 0.485778 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
6 INNE 0.347123 0.932618 0.000986 0.038961 0.001924 0.486535 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
7 KNN 0.011090 0.986771 0.000000 0.000000 0.000000 0.494209 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
8 LODA 0.086885 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
9 LOF 0.019148 0.527975 0.001770 0.501299 0.003527 0.514659 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
10 MAD 0.001481 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
11 MCD 0.004954 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
12 PCA 0.002233 0.304778 0.002268 0.948052 0.004525 0.625878 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
13 ROD 1.902633 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019004 10512 [amt] 0.400019 231011 0.001667 None
pyod_0507(*pyod_preprocess_0503(df_train8, df_80, _df8_mean_))
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params
0 ABOD 0.465660 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
1 COPOD 0.003057 0.506508 0.000246 0.072727 0.000491 0.289980 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
2 ECOD 0.003092 0.506262 0.000220 0.064935 0.000438 0.285967 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
3 GMM 0.003736 0.544883 0.003074 0.841558 0.006126 0.692973 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
4 HBOS 0.001516 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
5 IForest 0.129530 0.931891 0.000130 0.005195 0.000254 0.469316 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
6 INNE 0.291061 0.774019 0.001846 0.249351 0.003664 0.512123 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
7 KNN 0.008794 0.931873 0.000195 0.007792 0.000381 0.470604 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
8 LODA 0.022747 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
9 LOF 0.015063 0.439728 0.001630 0.548052 0.003250 0.493800 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
10 MAD 0.001454 0.998333 0.000000 0.000000 0.000000 0.500000 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
11 MCD 0.003814 0.987650 0.000809 0.005195 0.001400 0.497242 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
12 PCA 0.002218 0.001667 0.001667 1.000000 0.003328 0.500000 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None
13 ROD 1.254212 0.501517 0.003134 0.940260 0.006248 0.720522 False pyod 0.019171 8410 [amt] 0.5 231011 0.001667 None