[Proposed] 불균형 데이터 (0.005)

Author

김보람

Published

February 12, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run function_proposed_gcn.py
with open('fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df = fraudTrain[::10]

df = df.reset_index()

df.is_fraud.mean()

# df_train, df_test = sklearn.model_selection.train_test_split(df)
0.005807854431707643
df_results = try_2(df, 0.005807854431707643, 0.0058363532328819375, 8.028000e+04, 0.3)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.995805 0.0 0.0 0.0 0.856951 True Proposed 0.005808 7864 amt 0.004578 2622 0.005836 None 80280.0 0.3
df_results = try_2(fraudTrain, 0.005808, 0.005836, 8.028000e+04, 0.3)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 8.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 7.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.0058366, 528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 5.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 4.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 3.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 2.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 1.528000e+04, 0.3, prev_results=df_results)
df_results = try_2(fraudTrain, 0.005808, 0.005836, 0.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results