import pandas as pd
import os
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.model_selection import train_test_split
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder
# sklearn
import sklearn
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
from node2vec import Node2Vec
from node2vec.edges import AverageEmbedder
import
def throw(df, fraud_rate): # 사기 거래 비율에 맞춰 버려지는 함수!
= df[df['is_fraud'] == 1].copy()
df1 = df[df['is_fraud'] == 0].copy()
df0 = (len(df1) * (1-fraud_rate)) / (len(df0) * fraud_rate)
df0_downsample = df0.sample(frac=df0_downsample, random_state=42)
df0_down = pd.concat([df1, df0_down])
df_p return df_p
def split_dataframe(data_frame, test_fraud_rate, test_rate=0.3):
= len(data_frame)
n
# 사기 거래와 정상 거래를 분리
= data_frame[data_frame['is_fraud'] == 1]
fraud_data = data_frame[data_frame['is_fraud'] == 0]
normal_data
# 테스트 데이터 크기 계산
= int(test_fraud_rate * (n * test_rate))
test_samples = int(n * test_rate) - test_samples
remaining_test_samples
# 사기 거래 및 정상 거래에서 무작위로 테스트 데이터 추출
= fraud_data.sample(n=test_samples, replace=False)
test_fraud_data = normal_data.sample(n=remaining_test_samples, replace=False)
test_normal_data
# 테스트 데이터 합치기
= pd.concat([test_normal_data, test_fraud_data])
test_data
# 훈련 데이터 생성
= data_frame[~data_frame.index.isin(test_data.index)]
train_data
return train_data, test_data
def concat(df_tr, df_tst):
= pd.concat([df_tr, df_tst])
df = np.concatenate((np.full(len(df_tr), True), np.full(len(df_tst), False))) # index꼬이는거 방지하기 위해서? ★ (이거,, 훔,,?(
train_mask = np.concatenate((np.full(len(df_tr), False), np.full(len(df_tst), True)))
test_mask = (train_mask, test_mask)
mask return df, mask
def evaluation(y, yhat):
= [sklearn.metrics.accuracy_score,
metrics
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score,
sklearn.metrics.roc_auc_score]return pd.DataFrame({m.__name__:[m(y,yhat).round(6)] for m in metrics})
def compute_time_difference(group):
= len(group)
n = []
result for i in range(n):
for j in range(n):
= abs((group.iloc[i].trans_date_trans_time - group.iloc[j].trans_date_trans_time).total_seconds())
time_difference
result.append([group.iloc[i].name, group.iloc[j].name, time_difference])return result
def edge_index_save(df, unique_col, theta, gamma):
= df.groupby(unique_col)
groups = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
edge_index = edge_index.astype(np.float64)
edge_index = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
filename
while os.path.exists(filename):
self.save_attempt += 1
= f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
filename
np.save(filename, edge_index)#tetha = edge_index_plust_itme[:,].mean()
2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
edge_index[:,= torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
edge_index return edge_index
def edge_index(df, unique_col, theta, gamma):
= df.groupby(unique_col)
groups = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
edge_index = edge_index.astype(np.float64)
edge_index # filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
# while os.path.exists(filename):
# self.save_attempt += 1
# filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
# np.save(filename, edge_index)
#tetha = edge_index_plust_itme[:,].mean()
2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
edge_index[:,= torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
edge_index return edge_index
def build_graph_tripartite(df_input, graph_type=nx.Graph()):
=df_input.copy()
df={x:node_id for node_id, x in enumerate(set(df.index.values.tolist() +
mapping"cc_num"].values.tolist() +
df["merchant"].values.tolist()))}
df["in_node"]= df["cc_num"].apply(lambda x: mapping[x])
df["out_node"]=df["merchant"].apply(lambda x:mapping[x])
df[
=nx.from_edgelist([(x["in_node"], mapping[idx]) for idx, x in df.iterrows()] +\
G"out_node"], mapping[idx]) for idx, x in df.iterrows()], create_using=graph_type)
[(x[
"in_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")
nx.set_edge_attributes(G,{(x[
"out_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")
nx.set_edge_attributes(G,{(x[
"in_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")
nx.set_edge_attributes(G,{(x[
"out_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")
nx.set_edge_attributes(G,{(x[
return G
= pd.read_csv("~/Desktop/fraudTrain.csv").iloc[:,1:]
fraudTrain = fraudTrain.assign(trans_date_trans_time= list(map(lambda x: pd.to_datetime(x), fraudTrain.trans_date_trans_time))) fraudTrain
(throw 0.3 /split 0.05)
= throw(fraudTrain, 0.3)
df = split_dataframe(df, 0.05)
df_tr, df_tst = concat(df_tr, df_tst)
df2, mask 'index'] = df2.index
df2[= df2.reset_index() df3
= build_graph_tripartite(df3) G_down
range(len(G_down.edges))
range(0, 40040)
= train_test_split(list(range(len(G_down.edges))),
train_edges, test_edges, train_labels, test_labels list(nx.get_edge_attributes(G_down, "label").values()),
=0.30,
test_size=42) random_state
= list(G_down.edges)
edgs = G_down.edge_subgraph([edgs[x] for x in train_edges]).copy()
train_graph list(set(G_down.nodes) - set(train_graph.nodes))) train_graph.add_nodes_from(
= Node2Vec(train_graph, weight_key='weight')
node2vec_train = node2vec_train.fit(window=10) model_train
Generating walks (CPU: 1): 100%|██████████| 10/10 [01:02<00:00, 6.20s/it]
= AverageEmbedder(keyed_vectors=model_train.wv)
embeddings_train = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges] test_embeddings
np.array(train_embeddings).shape
(28028, 128)
np.array(edgs).shape
(40040, 2)
np.array(train_labels).shape
(28028,)
test_df
X_0 | X_1 | X_2 | X_3 | X_4 | X_5 | X_6 | X_7 | X_8 | X_9 | ... | X_118 | X_119 | X_120 | X_121 | X_122 | X_123 | X_124 | X_125 | X_126 | X_127 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.294095 | -0.182587 | 0.247678 | 0.590049 | 0.006470 | -0.142112 | 0.089056 | -0.001260 | 0.094326 | 0.135021 | ... | 0.069856 | 0.066224 | 0.115723 | 0.147979 | -0.197021 | -0.095803 | -0.337333 | -0.125295 | -0.009825 | 0.151540 |
1 | -0.272624 | 0.151767 | -0.154251 | 0.323210 | 0.926657 | -0.067704 | 0.017721 | -0.107441 | 0.145443 | 0.302530 | ... | 0.003853 | -0.062410 | 0.533643 | -0.343930 | -0.080773 | 0.252463 | -0.586127 | -0.611270 | 0.173824 | -0.255820 |
2 | -0.285497 | -0.727879 | 0.088415 | 0.351048 | 0.179241 | -0.211548 | 0.396390 | 0.184747 | 0.019989 | 0.056321 | ... | 0.329557 | -0.137155 | -0.492804 | -0.395840 | -0.124599 | 0.071007 | 0.092745 | 0.113933 | 0.531759 | 0.185540 |
3 | 0.101396 | -0.118513 | 0.023900 | 0.479202 | 0.010183 | 0.010264 | 0.090974 | -0.374058 | -0.035802 | 0.005411 | ... | 0.445272 | 0.161917 | 0.132266 | -0.353145 | 0.408945 | 0.278389 | -0.149341 | -0.426027 | 0.157640 | 0.036185 |
4 | 0.335867 | 0.193802 | 0.017849 | -0.094391 | 0.406467 | -0.252247 | 0.253800 | 0.060329 | -0.227593 | 0.060363 | ... | -0.042921 | -0.296897 | -0.108547 | -0.168377 | 0.239888 | 0.338059 | -0.371277 | 0.179102 | -0.186001 | 0.128560 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
12007 | 0.042202 | -0.018433 | 0.708589 | -0.087046 | 0.239773 | -0.049197 | 0.437209 | 0.417002 | -0.341246 | 0.192022 | ... | 0.255594 | 0.440218 | -0.155139 | -0.615026 | 0.221697 | 0.187364 | 0.244558 | -0.492887 | 0.498050 | 0.081703 |
12008 | -0.265624 | -0.221091 | 0.016436 | -0.082620 | 0.160059 | -0.097688 | 0.372506 | 0.080932 | -0.232221 | 0.103701 | ... | 0.106539 | 0.294270 | -0.140846 | 0.088624 | -0.184099 | 0.296281 | -0.292350 | 0.041315 | 0.421299 | -0.014916 |
12009 | -0.040139 | 0.002283 | -0.052671 | 0.268232 | 0.253181 | 0.208248 | 0.081583 | 0.019004 | -0.129444 | 0.090210 | ... | -0.112681 | -0.216101 | -0.273943 | -0.158223 | 0.288049 | 0.343018 | 0.022950 | -0.497030 | 0.158982 | -0.103655 |
12010 | 0.108170 | -0.334442 | -0.155009 | -0.043705 | 0.576253 | -0.157641 | -0.352357 | -0.275126 | -0.084098 | -0.305660 | ... | 0.417730 | -0.285950 | -0.008567 | -0.083101 | 0.029371 | 0.271623 | -0.458983 | 0.137005 | -0.205865 | 0.410773 |
12011 | -0.267466 | 0.094266 | 0.158908 | 0.479838 | 0.401144 | -0.120019 | 0.683475 | 0.051481 | -0.131274 | 0.665234 | ... | 0.423447 | 0.033082 | -0.014823 | -0.390924 | 0.166589 | -0.030113 | -0.244755 | 0.128242 | -0.013727 | 0.101052 |
12012 rows × 128 columns
y.mean()
0.30128205128205127
yhat.mean()
0.02355977355977356
df
X_0 | X_1 | X_2 | X_3 | X_4 | X_5 | X_6 | X_7 | X_8 | X_9 | ... | X_119 | X_120 | X_121 | X_122 | X_123 | X_124 | X_125 | X_126 | X_127 | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.018232 | -0.461368 | 0.364313 | -0.003355 | 0.447681 | 0.124173 | -0.003152 | -0.540774 | -0.572652 | 0.302768 | ... | -0.409031 | 0.276566 | -0.532460 | 0.310427 | -0.138094 | -0.232335 | -0.699818 | 0.409554 | 0.513786 | 1 |
1 | -0.034503 | -0.591312 | 0.221111 | 0.221223 | 0.456697 | 0.206631 | 0.071338 | -0.419212 | -0.801116 | 0.093788 | ... | -0.397874 | -0.322119 | -0.167821 | -0.249518 | 0.307504 | -0.631245 | -0.300779 | 0.297532 | -0.434563 | 0 |
2 | -0.310014 | -0.523434 | -0.253751 | 0.291488 | -0.216207 | -0.602615 | 0.851062 | 0.502921 | -0.013592 | -0.049582 | ... | -0.690101 | 0.033092 | -0.025023 | 0.303007 | -0.769665 | 0.134720 | 0.556271 | 0.101577 | 0.475444 | 0 |
3 | -0.491736 | -0.378155 | -0.069874 | -0.418389 | 0.708743 | -0.091546 | 0.106801 | -0.590884 | 0.115419 | 0.375492 | ... | 0.555193 | -0.788328 | -0.293919 | 0.354652 | -0.004965 | -0.360878 | -0.673302 | 0.952318 | -0.829513 | 0 |
4 | -0.509173 | -0.160158 | 0.732356 | 0.799263 | 0.406524 | 0.346506 | 0.278970 | 0.135962 | -0.022850 | 0.728933 | ... | -0.419749 | -0.317320 | -0.089520 | 0.018165 | 0.111126 | 0.156540 | -0.480898 | 0.906297 | 0.375273 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
28023 | -1.031186 | -0.789458 | -0.395662 | 0.672432 | 0.366671 | -0.042877 | 0.584821 | -1.050437 | -0.178776 | 1.148389 | ... | 0.035637 | -0.534154 | -0.465015 | -0.292141 | -0.529951 | 0.083408 | -0.534623 | -0.033971 | -0.392283 | 0 |
28024 | 0.582753 | -0.374445 | 0.245931 | -0.342081 | 0.360757 | -0.232758 | 0.543620 | 0.188705 | 0.140474 | 0.003210 | ... | 0.469309 | -0.733129 | -0.328261 | 0.185925 | 0.491741 | -0.889476 | -0.515644 | 0.718754 | -0.470877 | 1 |
28025 | -0.073720 | -0.696957 | 0.398181 | 0.148028 | 0.527938 | 0.282207 | 0.083161 | 0.223829 | 0.746264 | -0.078440 | ... | 0.403734 | -0.565365 | -0.134230 | 0.076866 | -0.147996 | -0.263811 | -0.342502 | 0.232086 | 0.396120 | 0 |
28026 | 0.198808 | -0.342205 | -0.171712 | -0.427701 | 0.376993 | -0.368098 | 0.268516 | -0.081111 | -0.124265 | 0.539024 | ... | 0.235571 | 0.433356 | -0.304768 | -0.276453 | 0.025376 | -0.091774 | -1.139718 | -0.064087 | -0.382336 | 0 |
28027 | -0.071535 | -0.478938 | 0.958107 | -0.103668 | 0.735266 | 0.264131 | 0.526417 | -0.046119 | 0.337727 | 0.551102 | ... | -0.100332 | -0.451198 | -0.238998 | -0.315951 | -0.040852 | -0.560907 | 0.301089 | -0.071838 | 0.057509 | 1 |
28028 rows × 129 columns
# embeddings_train = AverageEmbedder(keyed_vectors=model_train.wv)
# train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
# test_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]
# DataFrame 생성
= [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
columns = pd.DataFrame(data=train_embeddings, columns=columns)
df_data
= pd.DataFrame(data=train_labels, columns=['label'])
df_labels
# DataFrame 합치기
= pd.concat([df_data, df_labels], axis=1)
df
= np.array(train_labels)
label
= TabularPredictor(label='label')
predictr
predictr.fit(df)
= np.array(test_embeddings)
test
= [f'X_{i}' for i in range(test.shape[1])]
columns
# DataFrame 생성
= pd.DataFrame(data=test, columns=columns)
test_df
= np.array(test_labels)
y
= predictr.predict(test_df)
yhat
evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240129_072006/"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240129_072006/"
AutoGluon Version: 0.8.2
Python Version: 3.8.18
Operating System: Linux
Platform Machine: x86_64
Platform Version: #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 2 18:01:13 UTC 2
Disk Space Avail: 597.99 GB / 982.82 GB (60.8%)
Train Data Rows: 28028
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
2 unique label values: [1, 0]
If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping: class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
Available Memory: 5155.63 MB
Train Data (Original) Memory Usage: 14.35 MB (0.3% of available memory)
Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
Stage 1 Generators:
Fitting AsTypeFeatureGenerator...
Stage 2 Generators:
Fitting FillNaFeatureGenerator...
Stage 3 Generators:
Fitting IdentityFeatureGenerator...
Stage 4 Generators:
Fitting DropUniqueFeatureGenerator...
Stage 5 Generators:
Fitting DropDuplicatesFeatureGenerator...
Types of features in original data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
Types of features in processed data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
0.3s = Fit runtime
128 features in original data used to generate 128 features in processed data.
Train Data (Processed) Memory Usage: 14.35 MB (0.2% of available memory)
Data preprocessing and feature engineering runtime = 0.31s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.08919651776794633, Train Rows: 25528, Val Rows: 2500
User-specified model hyperparameters to be fit:
{
'NN_TORCH': {},
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
'CAT': {},
'XGB': {},
'FASTAI': {},
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f77c8288f70>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7884 = Validation score (accuracy)
0.08s = Training runtime
0.72s = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f77c8288f70>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7908 = Validation score (accuracy)
0.08s = Training runtime
0.53s = Validation runtime
Fitting model: LightGBMXT ...
0.8216 = Validation score (accuracy)
5.64s = Training runtime
0.02s = Validation runtime
Fitting model: LightGBM ...
0.8376 = Validation score (accuracy)
35.19s = Training runtime
0.1s = Validation runtime
Fitting model: RandomForestGini ...
0.8076 = Validation score (accuracy)
13.37s = Training runtime
0.08s = Validation runtime
Fitting model: RandomForestEntr ...
0.808 = Validation score (accuracy)
17.13s = Training runtime
0.07s = Validation runtime
Fitting model: CatBoost ...
0.8228 = Validation score (accuracy)
21.58s = Training runtime
0.01s = Validation runtime
Fitting model: ExtraTreesGini ...
0.8024 = Validation score (accuracy)
2.14s = Training runtime
0.09s = Validation runtime
Fitting model: ExtraTreesEntr ...
0.8032 = Validation score (accuracy)
2.4s = Training runtime
0.09s = Validation runtime
Fitting model: NeuralNetFastAI ...
0.8508 = Validation score (accuracy)
33.83s = Training runtime
0.09s = Validation runtime
Fitting model: XGBoost ...
0.83 = Validation score (accuracy)
16.11s = Training runtime
0.04s = Validation runtime
Fitting model: NeuralNetTorch ...
0.8516 = Validation score (accuracy)
53.0s = Training runtime
0.02s = Validation runtime
Fitting model: LightGBMLarge ...
0.8308 = Validation score (accuracy)
29.28s = Training runtime
0.05s = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
0.8704 = Validation score (accuracy)
0.99s = Training runtime
0.0s = Validation runtime
AutoGluon training complete, total runtime = 234.95s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240129_072006/")
[1000] valid_set's binary_error: 0.1784
[2000] valid_set's binary_error: 0.1736
[3000] valid_set's binary_error: 0.1736
[4000] valid_set's binary_error: 0.168
[5000] valid_set's binary_error: 0.1628
[6000] valid_set's binary_error: 0.1644
[1000] valid_set's binary_error: 0.1732
accuracy_score | precision_score | recall_score | f1_score | roc_auc_score | |
---|---|---|---|---|---|
0 | 0.711788 | 0.777385 | 0.06079 | 0.112763 | 0.526642 |