[Proposed] 불균형 데이터(0.2/0.05)

Author

김보람

Published

February 5, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run function_proposed_gcn.py
with open('fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df_results = try_1(fraudTrain, 0.2, 0.05, 8.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.2, 0.05, 8.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 7.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 6.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 5.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 4.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 3.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 2.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 1.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 0.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.977356 0.708475 0.928889 0.803846 0.988890 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 80280.0 0.3
1 GCN None 0.975025 0.682334 0.935556 0.789128 0.988872 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 85280.0 0.3
2 GCN None 0.976690 0.699336 0.935556 0.800380 0.989083 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 75280.0 0.3
3 GCN None 0.975136 0.685855 0.926667 0.788280 0.983061 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 65280.0 0.3
4 GCN None 0.973138 0.685053 0.855556 0.760870 0.967401 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 55280.0 0.3
5 GCN None 0.970363 0.652246 0.871111 0.745956 0.964500 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 45280.0 0.3
6 GCN None 0.973249 0.693878 0.831111 0.756320 0.953423 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 35280.0 0.3
7 GCN None 0.972472 0.696498 0.795556 0.742739 0.947835 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 25280.0 0.3
8 GCN None 0.970363 0.661376 0.833333 0.737463 0.942470 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 15280.0 0.3
9 GCN None 0.971029 0.681382 0.788889 0.731205 0.920218 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 5280.0 0.3
df_results = try_1(fraudTrain, 0.2, 0.05, 8.028000e+04, 0.2)
df_results = try_1(fraudTrain, 0.2, 0.05, 8.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 7.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 6.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 5.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 4.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 3.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 2.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 1.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 0.528000e+04, 0.2, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.974803 0.679549 0.937778 0.788049 0.990093 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 80280.0 0.2
1 GCN None 0.974692 0.678457 0.937778 0.787313 0.990258 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 85280.0 0.2
2 GCN None 0.977134 0.706780 0.926667 0.801923 0.990247 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 75280.0 0.2
3 GCN None 0.977911 0.720562 0.911111 0.804711 0.989463 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 65280.0 0.2
4 GCN None 0.977356 0.709898 0.924444 0.803089 0.989131 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 55280.0 0.2
5 GCN None 0.973360 0.683566 0.868889 0.765166 0.974388 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 45280.0 0.2
6 GCN None 0.973249 0.688969 0.846667 0.759721 0.965068 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 35280.0 0.2
7 GCN None 0.972139 0.681901 0.828889 0.748245 0.952331 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 25280.0 0.2
8 GCN None 0.972694 0.697674 0.800000 0.745342 0.945920 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 15280.0 0.2
9 GCN None 0.971917 0.685499 0.808889 0.742100 0.927742 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 5280.0 0.2
df_results = try_1(fraudTrain, 0.2, 0.05, 9.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.2, 0.05, 10.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 11.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 12.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 13.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 14.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 15.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 16.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 17.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 18.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.976912 0.703020 0.931111 0.801147 0.989170 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 90280.0 0.3
1 GCN None 0.975913 0.691928 0.933333 0.794702 0.989941 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 105280.0 0.3
2 GCN None 0.977245 0.704508 0.937778 0.804576 0.990374 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 115280.0 0.3
3 GCN None 0.975580 0.686084 0.942222 0.794007 0.989777 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 125280.0 0.3
4 GCN None 0.976912 0.696429 0.953333 0.804878 0.990892 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 135280.0 0.3
5 GCN None 0.977689 0.709949 0.935556 0.807287 0.990954 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 145280.0 0.3
6 GCN None 0.977356 0.699029 0.960000 0.808989 0.990922 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 155280.0 0.3
7 GCN None 0.976912 0.699670 0.942222 0.803030 0.990838 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 165280.0 0.3
8 GCN None 0.974803 0.673406 0.962222 0.792315 0.990807 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 175280.0 0.3
9 GCN None 0.976579 0.693679 0.951111 0.802249 0.990702 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 185280.0 0.3
df_results = try_1(fraudTrain, 0.2, 0.05, 8.028000e+04, 0.4)
df_results = try_1(fraudTrain, 0.2, 0.05, 8.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 7.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 6.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 5.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 4.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 3.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 2.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 1.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 0.528000e+04, 0.4, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.974137 0.690685 0.873333 0.771344 0.976023 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 80280.0 0.4
1 GCN None 0.976024 0.700342 0.908889 0.791103 0.983160 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 85280.0 0.4
2 GCN None 0.973027 0.681261 0.864444 0.761998 0.970200 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 75280.0 0.4
3 GCN None 0.970696 0.658163 0.860000 0.745665 0.965231 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 65280.0 0.4
4 GCN None 0.973360 0.693015 0.837778 0.758551 0.962320 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 55280.0 0.4
5 GCN None 0.971362 0.669014 0.844444 0.746562 0.952634 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 45280.0 0.4
6 GCN None 0.972028 0.684015 0.817778 0.744939 0.950328 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 35280.0 0.4
7 GCN None 0.972694 0.688192 0.828889 0.752016 0.945925 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 25280.0 0.4
8 GCN None 0.971140 0.674632 0.815556 0.738431 0.939967 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 15280.0 0.4
9 GCN None 0.970141 0.681363 0.755556 0.716544 0.913493 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 5280.0 0.4
df_results = try_1(fraudTrain, 0.2, 0.05, 8.028000e+04, 0.5)
df_results = try_1(fraudTrain, 0.2, 0.05, 10.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 11.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 12.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 13.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 8.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 7.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.05, 6.528000e+04, 0.5, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.973582 0.692727 0.846667 0.762000 0.964986 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 80280.0 0.5
1 GCN None 0.972583 0.670017 0.888889 0.764088 0.974615 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 105280.0 0.5
2 GCN None 0.977023 0.711304 0.908889 0.798049 0.985072 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 115280.0 0.5
3 GCN None 0.977467 0.710392 0.926667 0.804243 0.988520 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 125280.0 0.5
4 GCN None 0.971251 0.644917 0.944444 0.766456 0.988576 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 135280.0 0.5
5 GCN None 0.971362 0.662712 0.868889 0.751923 0.965088 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 85280.0 0.5
6 GCN None 0.972139 0.676732 0.846667 0.752221 0.963037 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 75280.0 0.5
7 GCN None 0.972472 0.683636 0.835556 0.752000 0.957532 True Proposed 0.2 21021 amt 0.264307 9009 0.05 None 65280.0 0.5