[Proposed] 불균형 데이터 0.3/0.05

Author

김보람

Published

February 5, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run function_proposed_gcn.py
with open('fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df_results = try_1(fraudTrain, 0.3, 0.05, 8.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.3, 0.05, 8.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 7.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 6.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 5.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 4.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 3.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 2.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 1.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 0.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.970862 0.642369 0.940000 0.763194 0.985496 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 80280.0 0.3
1 GCN None 0.968198 0.618221 0.950000 0.749014 0.985674 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 85280.0 0.3
2 GCN None 0.972194 0.654292 0.940000 0.771546 0.985709 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 75280.0 0.3
3 GCN None 0.972194 0.655738 0.933333 0.770289 0.979327 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 65280.0 0.3
4 GCN None 0.967366 0.622066 0.883333 0.730028 0.968981 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 55280.0 0.3
5 GCN None 0.966034 0.611628 0.876667 0.720548 0.965117 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 45280.0 0.3
6 GCN None 0.964036 0.600478 0.836667 0.699164 0.949307 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 35280.0 0.3
7 GCN None 0.964868 0.606715 0.843333 0.705718 0.944587 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 25280.0 0.3
8 GCN None 0.958874 0.558758 0.840000 0.671105 0.938106 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 15280.0 0.3
9 GCN None 0.966533 0.628571 0.806667 0.706569 0.913194 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 5280.0 0.3
df_results = try_1(fraudTrain, 0.3, 0.05, 8.028000e+04, 0.2)
df_results = try_1(fraudTrain, 0.3, 0.05, 8.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 7.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 6.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 5.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 4.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 3.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 2.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 1.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 0.528000e+04, 0.2, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
pd.read_csv('./results/20240205-210521-proposed.csv')
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN NaN 0.971861 0.648526 0.953333 0.771930 0.987355 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 80280.0 0.2
1 GCN NaN 0.970196 0.632967 0.960000 0.762914 0.987462 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 85280.0 0.2
2 GCN NaN 0.972361 0.652968 0.953333 0.775068 0.988143 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 75280.0 0.2
3 GCN NaN 0.972028 0.652778 0.940000 0.770492 0.985693 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 65280.0 0.2
4 GCN NaN 0.972194 0.654292 0.940000 0.771546 0.984942 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 55280.0 0.2
5 GCN NaN 0.968698 0.629032 0.910000 0.743869 0.971887 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 45280.0 0.2
6 GCN NaN 0.960706 0.567797 0.893333 0.694301 0.965830 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 35280.0 0.2
7 GCN NaN 0.963869 0.598575 0.840000 0.699029 0.948427 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 25280.0 0.2
8 GCN NaN 0.958708 0.556769 0.850000 0.672823 0.943271 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 15280.0 0.2
9 GCN NaN 0.967033 0.629442 0.826667 0.714697 0.922877 True Proposed 0.3 14014 amt 0.407164 6006 0.05 NaN 5280.0 0.2
df_results = try_1(fraudTrain, 0.3, 0.05, 9.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.3, 0.05, 10.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 11.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 12.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 13.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 14.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 15.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 16.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 17.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 18.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.971029 0.650000 0.910000 0.758333 0.971637 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 80280.0 0.4
1 GCN None 0.972527 0.658824 0.933333 0.772414 0.979403 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 85280.0 0.4
2 GCN None 0.968864 0.633570 0.893333 0.741355 0.969695 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 75280.0 0.4
3 GCN None 0.966700 0.618483 0.870000 0.722992 0.966465 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 65280.0 0.4
4 GCN None 0.965201 0.606061 0.866667 0.713306 0.962443 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 55280.0 0.4
5 GCN None 0.958208 0.553145 0.850000 0.670171 0.948366 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 45280.0 0.4
6 GCN None 0.960539 0.571106 0.843333 0.681023 0.947776 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 35280.0 0.4
7 GCN None 0.967033 0.626866 0.840000 0.717949 0.943233 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 25280.0 0.4
8 GCN None 0.965701 0.615764 0.833333 0.708215 0.937167 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 15280.0 0.4
9 GCN None 0.963037 0.597015 0.800000 0.683761 0.909528 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 5280.0 0.4
df_results = try_1(fraudTrain, 0.3, 0.05, 8.028000e+04, 0.4)
df_results = try_1(fraudTrain, 0.3, 0.05, 8.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 7.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 6.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 5.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 4.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 3.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 2.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 1.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.05, 0.528000e+04, 0.4, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.969697 0.635945 0.920000 0.752044 0.971674 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 80280.0 0.4
1 GCN None 0.964535 0.591579 0.936667 0.725161 0.979275 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 85280.0 0.4
2 GCN None 0.960872 0.567850 0.906667 0.698331 0.968544 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 75280.0 0.4
3 GCN None 0.965368 0.606977 0.870000 0.715068 0.965953 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 65280.0 0.4
4 GCN None 0.964702 0.601852 0.866667 0.710383 0.962175 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 55280.0 0.4
5 GCN None 0.956377 0.539095 0.873333 0.666667 0.948450 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 45280.0 0.4
6 GCN None 0.959873 0.565410 0.850000 0.679095 0.947832 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 35280.0 0.4
7 GCN None 0.961871 0.581609 0.843333 0.688435 0.943207 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 25280.0 0.4
8 GCN None 0.962204 0.585082 0.836667 0.688615 0.937430 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 15280.0 0.4
9 GCN None 0.965534 0.620779 0.796667 0.697810 0.910292 True Proposed 0.3 14014 amt 0.407164 6006 0.05 None 5280.0 0.4