imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import sklearn
import xgboost as xgb
# sklearn
from sklearn import model_selection # split함수이용
from sklearn import ensemble # RF,GBM
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
# gnn
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:18: UserWarning: An issue occurred while importing 'pyg-lib'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/libpyg.so: undefined symbol: _ZN2at4_ops12split_Tensor4callERKNS_6TensorEN3c106SymIntEl
warnings.warn(f"An issue occurred while importing 'pyg-lib'. "
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:31: UserWarning: An issue occurred while importing 'torch-scatter'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_scatter/_scatter_cuda.so: undefined symbol: _ZNK3c107SymBool10guard_boolEPKcl
warnings.warn(f"An issue occurred while importing 'torch-scatter'. "
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:42: UserWarning: An issue occurred while importing 'torch-sparse'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_sparse/_diag_cuda.so: undefined symbol: _ZN3c106detail19maybe_wrap_dim_slowIlEET_S2_S2_b
warnings.warn(f"An issue occurred while importing 'torch-sparse'. "
def down_sample_textbook(df):
df_majority = df[df.is_fraud== 0 ].copy()
df_minority = df[df.is_fraud== 1 ].copy()
df_maj_dowsampled = sklearn.utils.resample(df_majority, n_samples= len (df_minority), replace= False , random_state= 42 )
df_downsampled = pd.concat([df_minority, df_maj_dowsampled])
return df_downsampled
def compute_time_difference(group):
n = len (group)
result = []
for i in range (n):
for j in range (n):
time_difference = abs (group.iloc[i].trans_date_trans_time.value - group.iloc[j].trans_date_trans_time.value)
result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
return result
def mask(df):
df_tr,df_test = sklearn.model_selection.train_test_split(df, random_state= 42 )
N = len (df)
train_mask = [i in df_tr.index for i in range (N)]
test_mask = [i in df_test.index for i in range (N)]
train_mask = np.array(train_mask)
test_mask = np.array(test_mask)
return train_mask, test_mask
def edge_index_selected(edge_index):
theta = edge_index[:,2 ].mean()
edge_index[:,2 ] = (np.exp(- edge_index[:,2 ]/ theta) != 1 )* (np.exp(- edge_index[:,2 ]/ theta))
edge_index = edge_index.tolist()
mean_ = np.array(edge_index)[:,2 ].mean()
selected_edges = [(int (row[0 ]), int (row[1 ])) for row in edge_index if row[2 ] > mean_]
edge_index_selected = torch.tensor(selected_edges, dtype= torch.long ).t()
return edge_index_selected
fraudTrain = pd.read_csv("~/Desktop/fraudTrain.csv" ).iloc[:,1 :]
fraudTrain = fraudTrain.assign(trans_date_trans_time= list (map (lambda x: pd.to_datetime(x), fraudTrain.trans_date_trans_time)))
fraudTrain
0
2019-01-01 00:00:00
2.703190e+15
fraud_Rippin, Kub and Mann
misc_net
4.97
Jennifer
Banks
F
561 Perry Cove
Moravian Falls
...
36.0788
-81.1781
3495
Psychologist, counselling
1988-03-09
0b242abb623afc578575680df30655b9
1325376018
36.011293
-82.048315
0
1
2019-01-01 00:00:00
6.304230e+11
fraud_Heller, Gutmann and Zieme
grocery_pos
107.23
Stephanie
Gill
F
43039 Riley Greens Suite 393
Orient
...
48.8878
-118.2105
149
Special educational needs teacher
1978-06-21
1f76529f8574734946361c461b024d99
1325376044
49.159047
-118.186462
0
2
2019-01-01 00:00:00
3.885950e+13
fraud_Lind-Buckridge
entertainment
220.11
Edward
Sanchez
M
594 White Dale Suite 530
Malad City
...
42.1808
-112.2620
4154
Nature conservation officer
1962-01-19
a1a22d70485983eac12b5b88dad1cf95
1325376051
43.150704
-112.154481
0
3
2019-01-01 00:01:00
3.534090e+15
fraud_Kutch, Hermiston and Farrell
gas_transport
45.00
Jeremy
White
M
9443 Cynthia Court Apt. 038
Boulder
...
46.2306
-112.1138
1939
Patent attorney
1967-01-12
6b849c168bdad6f867558c3793159a81
1325376076
47.034331
-112.561071
0
4
2019-01-01 00:03:00
3.755340e+14
fraud_Keeling-Crist
misc_pos
41.96
Tyler
Garcia
M
408 Bradley Rest
Doe Hill
...
38.4207
-79.4629
99
Dance movement psychotherapist
1986-03-28
a41d7549acf90789359a9aa5346dcb46
1325376186
38.674999
-78.632459
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1048570
2020-03-10 16:07:00
6.011980e+15
fraud_Fadel Inc
health_fitness
77.00
Haley
Wagner
F
05561 Farrell Crescent
Annapolis
...
39.0305
-76.5515
92106
Accountant, chartered certified
1943-05-28
45ecd198c65e81e597db22e8d2ef7361
1362931649
38.779464
-76.317042
0
1048571
2020-03-10 16:07:00
4.839040e+15
fraud_Cremin, Hamill and Reichel
misc_pos
116.94
Meredith
Campbell
F
043 Hanson Turnpike
Hedrick
...
41.1826
-92.3097
1583
Geochemist
1999-06-28
c00ce51c6ebb7657474a77b9e0b51f34
1362931670
41.400318
-92.726724
0
1048572
2020-03-10 16:08:00
5.718440e+11
fraud_O'Connell, Botsford and Hand
home
21.27
Susan
Mills
F
005 Cody Estates
Louisville
...
38.2507
-85.7476
736284
Engineering geologist
1952-04-02
17c9dc8b2a6449ca2473726346e58e6c
1362931711
37.293339
-84.798122
0
1048573
2020-03-10 16:08:00
4.646850e+18
fraud_Thompson-Gleason
health_fitness
9.52
Julia
Bell
F
576 House Crossroad
West Sayville
...
40.7320
-73.1000
4056
Film/video editor
1990-06-25
5ca650881b48a6a38754f841c23b77ab
1362931718
39.773077
-72.213209
0
1048574
2020-03-10 16:08:00
2.283740e+15
fraud_Buckridge PLC
misc_pos
6.81
Shannon
Williams
F
9345 Spencer Junctions Suite 183
Alpharetta
...
34.0770
-84.3033
165556
Prison officer
1997-12-27
8d0a575fe635bbde12f1a2bffc126731
1362931730
33.601468
-83.891921
0
1048575 rows × 22 columns
데이터정리
_df1 = fraudTrain[fraudTrain["is_fraud" ] == 0 ].sample(frac= 0.20 , random_state= 42 )
_df2 = fraudTrain[fraudTrain["is_fraud" ] == 1 ]
df02 = pd.concat([_df1,_df2])
df02.shape
df50 = down_sample_textbook(df02)
df50 = df50.reset_index()
df50.shape
tr/test
(array([False, True, True, ..., True, False, True]),
array([ True, False, False, ..., False, True, False]))
train_mask, test_mask = mask(df50)
edge_index 설정
# groups = df50.groupby('cc_num')
# edge_index_list_plus = [compute_time_difference(group) for _, group in groups]
# edge_index_list_plus_flat = [item for sublist in edge_index_list_plus for item in sublist]
# edge_index_list_plus_nparr = np.array(edge_index_list_plus_flat)
# np.save('edge_index_list_plus50.npy', edge_index_list_plus_nparr)
edge_index = np.load('edge_index_list_plus50.npy' ).astype(np.float64)
edge_index.shape
edge_index_selected = edge_index_selected(edge_index)
data설정(x, edge_index, y)
def haversine(lat1, lon1, lat2, lon2):
# 지구의 반지름 (미터)
radius = 6371.0
# 라디안으로 변환
lat1 = np.radians(lat1)
lon1 = np.radians(lon1)
lat2 = np.radians(lat2)
lon2 = np.radians(lon2)
# Haversine 공식 계산
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat / 2 )** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2 )** 2
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
distance = radius * c
return distance
# 데이터프레임(df50)에서 고객 위치 및 상점 위치의 위도와 경도 추출
customer_lat = df50['lat' ]
customer_lon = df50['long' ]
store_lat = df50['merch_lat' ]
store_lon = df50['merch_long' ]
# 거리 계산
distances = haversine(customer_lat, customer_lon, store_lat, store_lon)
# 거리를 데이터프레임에 추가
df50['distance_km' ] = distances
category_map = {category: index for index, category in enumerate (df50['category' ].unique())}
df50['category' ] = df50['category' ].map (category_map)
x = torch.tensor(df50[['amt' , 'category' , 'distance_km' ]].values, dtype= torch.float )
y = torch.tensor(df50['is_fraud' ], dtype= torch.int64)
data = torch_geometric.data.Data(x= x, edge_index= edge_index_selected, y= y, train_mask= train_mask, test_mask= test_mask)
data
Data(x=[12012, 3], edge_index=[2, 93730], y=[12012], train_mask=[12012], test_mask=[12012])
정리
분석1
df50_tr
df50_test
GCN1
x:amt,category, distance
기본
분석2
df50_tr
df50_test
GCN2
GCNConv 1개 추가
분석3
df50_tr
df50_test
GCN2
dropout에서 0.3으로 확률 조정
분석4
df50_tr
df50_test
GCN2
dropout에서 0.2으로 확률 조정
분석5
df50_tr
df50_test
GCN1
dropout에서 0.2으로 확률 조정
분석6
df50_tr
df50_test
GCN1
dropout에서 0.2으로 확률 조정, range:400->800
분석7
df50_tr
df50_test
GCN1
dropout에서 0.2으로 확률 조정, range:400->800, optimizer SGD변경
lst = [_results1, _results2,_results3,_results4,_results5, _results6]
pd.concat(lst)
분석1
0.914752
0.868498
0.979565
0.920694
분석2
0.912421
0.862428
0.983520
0.919002
분석3
0.879454
0.869482
0.895847
0.882468
분석4
0.912421
0.862428
0.983520
0.919002
분석5
0.915418
0.866938
0.983520
0.921557
분석6
0.917416
0.870403
0.982861
0.923220
아놔 그래도 autogluon(0.927858)보단 안조하 , , ,
옵티마이저도 다른 거 해봤는데 Adam이 제일 나은듯 , , ,
분석 1(GCN)
torch.manual_seed(202250926 )
class GCN1(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training= self .training)
x = self .conv2(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN1()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (400 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results1= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석1' ])
_results1
분석1
0.914752
0.868498
0.979565
0.920694
분석 2(GCN): GNNConv3개
-
dropout: 0.5
x = torch.tensor(df50[['amt' , 'category' , 'distance_km' ]].values, dtype= torch.float )
y = torch.tensor(df50['is_fraud' ], dtype= torch.int64)
data = torch_geometric.data.Data(x= x, edge_index= edge_index_selected, y= y, train_mask= train_mask, test_mask= test_mask)
data
torch.manual_seed(202250926 )
class GCN2(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,64 )
self .conv3 = GCNConv(64 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training= self .training)
x = self .conv2(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training= self .training)
x = self .conv3(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN2()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (400 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results2= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석2' ])
_results2
분석2
0.900766
0.848884
0.977587
0.908701
분석3
-
분석2 에서 dropout: 0.3
torch.manual_seed(202250926 )
class GCN2(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,64 )
self .conv3 = GCNConv(64 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.3 , training= self .training)
x = self .conv2(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.3 , training= self .training)
x = self .conv3(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN2()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (400 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results3= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석3' ])
_results3
분석3
0.879454
0.869482
0.895847
0.882468
분석4
-
dropout: 0.2
x = torch.tensor(df50[['amt' , 'category' , 'distance_km' ]].values, dtype= torch.float )
y = torch.tensor(df50['is_fraud' ], dtype= torch.int64)
data = torch_geometric.data.Data(x= x, edge_index= edge_index_selected, y= y, train_mask= train_mask, test_mask= test_mask)
data
torch.manual_seed(202250926 )
class GCN2(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,64 )
self .conv3 = GCNConv(64 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv2(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv3(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN2()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (400 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results4= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석4' ])
_results4
분석4
0.912421
0.862428
0.98352
0.919002
분석 5
torch.manual_seed(202250926 )
class GCN1(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv2(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN1()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (400 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results5= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석5' ])
_results5
분석5
0.915418
0.866938
0.98352
0.921557
분석 6
torch.manual_seed(202250926 )
class GCN1(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv2(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN1()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (800 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results6= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석6' ])
_results6
분석6
0.917416
0.870403
0.982861
0.92322
분석 7
torch.manual_seed(202250926 )
class GCN1(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv2(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN1()
optimizer = torch.optim.SGD(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (800 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results7= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석7' ])
_results7
분석7
0.899101
0.846066
0.978247
0.907368
분석 8
torch.manual_seed(202250926 )
class GCN1(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv2(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN1()
optimizer = torch.optim.RMSprop(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (800 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results8= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석8' ])
_results8
분석8
0.891442
0.828461
0.990112
0.902102
분석 9
torch.manual_seed(202250926 )
class GCN1(torch.nn.Module):
def __init__ (self ):
super ().__init__ ()
self .conv1 = GCNConv(data.num_node_features, 32 )
self .conv2 = GCNConv(32 ,2 )
def forward(self , data):
x, edge_index = data.x, data.edge_index
x = self .conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, p= 0.2 , training= self .training)
x = self .conv2(x, edge_index)
return F.log_softmax(x, dim= 1 )
X = (data.x[data.train_mask]).numpy()
XX = (data.x[data.test_mask]).numpy()
y = (data.y[data.train_mask]).numpy()
yy = (data.y[data.test_mask]).numpy()
model = GCN1()
optimizer = torch.optim.Adagrad(model.parameters(), lr= 0.05 , weight_decay= 5e-4 )
model.train()
for epoch in range (800 ):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval ()
pred = model(data).argmax(dim= 1 )
yyhat = pred[data.test_mask]
metrics = [sklearn.metrics.accuracy_score,
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
_results9= pd.DataFrame({m.__name__ :[m(yy,yyhat).round (6 )] for m in metrics},index= ['분석9' ])
_results9
분석9
0.913087
0.875598
0.965063
0.918156