[Proposed] 불균형 데이터 (0.2/0.005)

Author

김보람

Published

February 5, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run function_proposed_gcn.py
with open('fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df_results = try_1(fraudTrain, 0.2, 0.005, 8.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.2, 0.005, 8.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 7.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 6.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 5.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 4.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 3.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 2.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 1.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 0.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.977689 0.166667 0.866667 0.279570 0.983705 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 80280.0 0.3
1 GCN None 0.976801 0.158333 0.844444 0.266667 0.982562 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 85280.0 0.3
2 GCN None 0.978355 0.171053 0.866667 0.285714 0.984094 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 75280.0 0.3
3 GCN None 0.977911 0.168103 0.866667 0.281588 0.972460 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 65280.0 0.3
4 GCN None 0.976801 0.158333 0.844444 0.266667 0.952367 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 55280.0 0.3
5 GCN None 0.977245 0.158120 0.822222 0.265233 0.951381 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 45280.0 0.3
6 GCN None 0.977578 0.157205 0.800000 0.262774 0.951931 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 35280.0 0.3
7 GCN None 0.978466 0.156682 0.755556 0.259542 0.951116 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 25280.0 0.3
8 GCN None 0.979243 0.168224 0.800000 0.277992 0.950702 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 15280.0 0.3
9 GCN None 0.980131 0.158163 0.688889 0.257261 0.922967 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 5280.0 0.3
df_results = try_1(fraudTrain, 0.2, 0.005, 8.028000e+04, 0.2)
df_results = try_1(fraudTrain, 0.2, 0.005, 8.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 7.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 6.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 5.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 4.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 3.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 2.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 1.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 0.528000e+04, 0.2, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.977023 0.162500 0.866667 0.273684 0.983556 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 80280.0 0.2
1 GCN None 0.978688 0.173333 0.866667 0.288889 0.984035 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 85280.0 0.2
2 GCN None 0.976690 0.160494 0.866667 0.270833 0.983425 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 75280.0 0.2
3 GCN None 0.977356 0.164557 0.866667 0.276596 0.983224 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 65280.0 0.2
4 GCN None 0.978133 0.169565 0.866667 0.283636 0.983361 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 55280.0 0.2
5 GCN None 0.978688 0.170404 0.844444 0.283582 0.956493 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 45280.0 0.2
6 GCN None 0.979465 0.166667 0.777778 0.274510 0.952407 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 35280.0 0.2
7 GCN None 0.973138 0.133829 0.800000 0.229299 0.951815 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 25280.0 0.2
8 GCN None 0.978355 0.162162 0.800000 0.269663 0.951978 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 15280.0 0.2
9 GCN None 0.979243 0.168224 0.800000 0.277992 0.931861 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 5280.0 0.2
df_results = try_1(fraudTrain, 0.2, 0.005, 9.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.2, 0.005, 10.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 11.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 12.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 13.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 14.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 15.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 16.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 17.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 18.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.978688 0.170404 0.844444 0.283582 0.984218 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 90280.0 0.3
1 GCN None 0.977467 0.165254 0.866667 0.277580 0.983574 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 105280.0 0.3
2 GCN None 0.977467 0.165254 0.866667 0.277580 0.984196 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 115280.0 0.3
3 GCN None 0.978355 0.171053 0.866667 0.285714 0.983333 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 125280.0 0.3
4 GCN None 0.974026 0.151292 0.911111 0.259494 0.988520 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 135280.0 0.3
5 GCN None 0.976690 0.165992 0.911111 0.280822 0.988331 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 145280.0 0.3
6 GCN None 0.977023 0.168033 0.911111 0.283737 0.988294 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 155280.0 0.3
7 GCN None 0.977467 0.170833 0.911111 0.287719 0.988234 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 165280.0 0.3
8 GCN None 0.977245 0.166667 0.888889 0.280702 0.988103 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 175280.0 0.3
9 GCN None 0.977356 0.167364 0.888889 0.281690 0.988068 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 185280.0 0.3
df_results = try_1(fraudTrain, 0.2, 0.005, 8.028000e+04, 0.4)
df_results = try_1(fraudTrain, 0.2, 0.005, 8.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 7.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 6.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 5.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 4.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 3.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 2.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 1.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 0.528000e+04, 0.4, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.978022 0.168831 0.866667 0.282609 0.950845 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 80280.0 0.4
1 GCN None 0.977245 0.169421 0.911111 0.285714 0.972178 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 85280.0 0.4
2 GCN None 0.974914 0.147860 0.844444 0.251656 0.954162 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 75280.0 0.4
3 GCN None 0.978244 0.161435 0.800000 0.268657 0.951919 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 65280.0 0.4
4 GCN None 0.973249 0.137037 0.822222 0.234921 0.952063 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 55280.0 0.4
5 GCN None 0.979687 0.165049 0.755556 0.270916 0.951795 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 45280.0 0.4
6 GCN None 0.977800 0.152466 0.755556 0.253731 0.951398 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 35280.0 0.4
7 GCN None 0.978799 0.165138 0.800000 0.273764 0.951024 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 25280.0 0.4
8 GCN None 0.976357 0.155738 0.844444 0.262976 0.950255 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 15280.0 0.4
9 GCN None 0.978910 0.159624 0.755556 0.263566 0.921529 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 5280.0 0.4
df_results = try_1(fraudTrain, 0.2, 0.005, 8.028000e+04, 0.5)
df_results = try_1(fraudTrain, 0.2, 0.005, 10.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 11.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 12.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 13.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 8.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 7.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.2, 0.005, 6.528000e+04, 0.5, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.978910 0.165899 0.800000 0.274809 0.951676 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 80280.0 0.5
1 GCN None 0.977245 0.163866 0.866667 0.275618 0.955950 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 105280.0 0.5
2 GCN None 0.975247 0.157692 0.911111 0.268852 0.971900 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 115280.0 0.5
3 GCN None 0.978022 0.171674 0.888889 0.287770 0.984734 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 125280.0 0.5
4 GCN None 0.978799 0.174107 0.866667 0.289963 0.984117 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 135280.0 0.5
5 GCN None 0.977578 0.157205 0.800000 0.262774 0.951482 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 85280.0 0.5
6 GCN None 0.973471 0.138060 0.822222 0.236422 0.951718 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 75280.0 0.5
7 GCN None 0.979687 0.168269 0.777778 0.276680 0.951842 True Proposed 0.2 21021 amt 0.283574 9009 0.005 None 65280.0 0.5