import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import sklearn
import xgboost as xgb
# sklearn
from sklearn import model_selection # split함수이용
from sklearn import ensemble # RF,GBM
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
# gnn
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv
imports
def down_sample_textbook(df):
= df[df.is_fraud==0].copy()
df_majority = df[df.is_fraud==1].copy()
df_minority = sklearn.utils.resample(df_majority, n_samples=len(df_minority), replace=False, random_state=42)
df_maj_dowsampled = pd.concat([df_minority, df_maj_dowsampled])
df_downsampled return df_downsampled
def compute_time_difference(group):
= len(group)
n = []
result for i in range(n):
for j in range(n):
= abs(group.iloc[i].trans_date_trans_time.value - group.iloc[j].trans_date_trans_time.value)
time_difference
result.append([group.iloc[i].name, group.iloc[j].name, time_difference])return result
def mask(df):
= sklearn.model_selection.train_test_split(df, random_state=42)
df_tr,df_test = len(df)
N = [i in df_tr.index for i in range(N)]
train_mask = [i in df_test.index for i in range(N)]
test_mask = np.array(train_mask)
train_mask = np.array(test_mask)
test_mask return train_mask, test_mask
def edge_index_selected(edge_index):
= edge_index[:,2].mean()
theta 2] = (np.exp(-edge_index[:,2]/theta) != 1)*(np.exp(-edge_index[:,2]/theta))
edge_index[:,= edge_index.tolist()
edge_index = np.array(edge_index)[:,2].mean()
mean_ = [(int(row[0]), int(row[1])) for row in edge_index if row[2] > mean_]
selected_edges = torch.tensor(selected_edges, dtype=torch.long).t()
edge_index_selected return edge_index_selected
= pd.read_csv("~/Desktop/fraudTrain.csv").iloc[:,1:] fraudTrain
= fraudTrain.assign(trans_date_trans_time= list(map(lambda x: pd.to_datetime(x), fraudTrain.trans_date_trans_time)))
fraudTrain fraudTrain
trans_date_trans_time | cc_num | merchant | category | amt | first | last | gender | street | city | ... | lat | long | city_pop | job | dob | trans_num | unix_time | merch_lat | merch_long | is_fraud | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2019-01-01 00:00:00 | 2.703190e+15 | fraud_Rippin, Kub and Mann | misc_net | 4.97 | Jennifer | Banks | F | 561 Perry Cove | Moravian Falls | ... | 36.0788 | -81.1781 | 3495 | Psychologist, counselling | 1988-03-09 | 0b242abb623afc578575680df30655b9 | 1325376018 | 36.011293 | -82.048315 | 0 |
1 | 2019-01-01 00:00:00 | 6.304230e+11 | fraud_Heller, Gutmann and Zieme | grocery_pos | 107.23 | Stephanie | Gill | F | 43039 Riley Greens Suite 393 | Orient | ... | 48.8878 | -118.2105 | 149 | Special educational needs teacher | 1978-06-21 | 1f76529f8574734946361c461b024d99 | 1325376044 | 49.159047 | -118.186462 | 0 |
2 | 2019-01-01 00:00:00 | 3.885950e+13 | fraud_Lind-Buckridge | entertainment | 220.11 | Edward | Sanchez | M | 594 White Dale Suite 530 | Malad City | ... | 42.1808 | -112.2620 | 4154 | Nature conservation officer | 1962-01-19 | a1a22d70485983eac12b5b88dad1cf95 | 1325376051 | 43.150704 | -112.154481 | 0 |
3 | 2019-01-01 00:01:00 | 3.534090e+15 | fraud_Kutch, Hermiston and Farrell | gas_transport | 45.00 | Jeremy | White | M | 9443 Cynthia Court Apt. 038 | Boulder | ... | 46.2306 | -112.1138 | 1939 | Patent attorney | 1967-01-12 | 6b849c168bdad6f867558c3793159a81 | 1325376076 | 47.034331 | -112.561071 | 0 |
4 | 2019-01-01 00:03:00 | 3.755340e+14 | fraud_Keeling-Crist | misc_pos | 41.96 | Tyler | Garcia | M | 408 Bradley Rest | Doe Hill | ... | 38.4207 | -79.4629 | 99 | Dance movement psychotherapist | 1986-03-28 | a41d7549acf90789359a9aa5346dcb46 | 1325376186 | 38.674999 | -78.632459 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1048570 | 2020-03-10 16:07:00 | 6.011980e+15 | fraud_Fadel Inc | health_fitness | 77.00 | Haley | Wagner | F | 05561 Farrell Crescent | Annapolis | ... | 39.0305 | -76.5515 | 92106 | Accountant, chartered certified | 1943-05-28 | 45ecd198c65e81e597db22e8d2ef7361 | 1362931649 | 38.779464 | -76.317042 | 0 |
1048571 | 2020-03-10 16:07:00 | 4.839040e+15 | fraud_Cremin, Hamill and Reichel | misc_pos | 116.94 | Meredith | Campbell | F | 043 Hanson Turnpike | Hedrick | ... | 41.1826 | -92.3097 | 1583 | Geochemist | 1999-06-28 | c00ce51c6ebb7657474a77b9e0b51f34 | 1362931670 | 41.400318 | -92.726724 | 0 |
1048572 | 2020-03-10 16:08:00 | 5.718440e+11 | fraud_O'Connell, Botsford and Hand | home | 21.27 | Susan | Mills | F | 005 Cody Estates | Louisville | ... | 38.2507 | -85.7476 | 736284 | Engineering geologist | 1952-04-02 | 17c9dc8b2a6449ca2473726346e58e6c | 1362931711 | 37.293339 | -84.798122 | 0 |
1048573 | 2020-03-10 16:08:00 | 4.646850e+18 | fraud_Thompson-Gleason | health_fitness | 9.52 | Julia | Bell | F | 576 House Crossroad | West Sayville | ... | 40.7320 | -73.1000 | 4056 | Film/video editor | 1990-06-25 | 5ca650881b48a6a38754f841c23b77ab | 1362931718 | 39.773077 | -72.213209 | 0 |
1048574 | 2020-03-10 16:08:00 | 2.283740e+15 | fraud_Buckridge PLC | misc_pos | 6.81 | Shannon | Williams | F | 9345 Spencer Junctions Suite 183 | Alpharetta | ... | 34.0770 | -84.3033 | 165556 | Prison officer | 1997-12-27 | 8d0a575fe635bbde12f1a2bffc126731 | 1362931730 | 33.601468 | -83.891921 | 0 |
1048575 rows × 22 columns
데이터정리
= fraudTrain[fraudTrain["is_fraud"] == 0].sample(frac=0.20, random_state=42)
_df1 = fraudTrain[fraudTrain["is_fraud"] == 1]
_df2 = pd.concat([_df1,_df2])
df02 df02.shape
(214520, 22)
= down_sample_textbook(df02)
df50 = df50.reset_index()
df50 df50.shape
(12012, 23)
tr/test
mask(df50)
(array([False, True, True, ..., True, False, True]),
array([ True, False, False, ..., False, True, False]))
= mask(df50) train_mask, test_mask
edge_index 설정
# groups = df50.groupby('cc_num')
# edge_index_list_plus = [compute_time_difference(group) for _, group in groups]
# edge_index_list_plus_flat = [item for sublist in edge_index_list_plus for item in sublist]
# edge_index_list_plus_nparr = np.array(edge_index_list_plus_flat)
# np.save('edge_index_list_plus50.npy', edge_index_list_plus_nparr)
= np.load('edge_index_list_plus50.npy').astype(np.float64)
edge_index edge_index.shape
(200706, 3)
= edge_index_selected(edge_index) edge_index_selected
edge_index_selected
tensor([[ 1023, 1023, 1023, ..., 11944, 11944, 11944],
[ 1024, 1028, 1031, ..., 4257, 9241, 9782]])
np.array(edge_index_selected)
array([[ 1023, 1023, 1023, ..., 11944, 11944, 11944],
[ 1024, 1028, 1031, ..., 4257, 9241, 9782]])
분석 1(GCN): amt
= torch.tensor(df50['amt'], dtype=torch.float).reshape(-1,1)
x = torch.tensor(df50['is_fraud'],dtype=torch.int64)
y = torch_geometric.data.Data(x=x, edge_index = edge_index_selected, y=y, train_mask = train_mask, test_mask = test_mask)
data
data
202250926)
torch.manual_seed(class GCN2(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = GCNConv(1, 32)
self.conv2 = GCNConv(32,2)
def forward(self, data):
= data.x, data.edge_index
x, edge_index
= self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
x
return F.log_softmax(x, dim=1)
= (data.x[data.train_mask]).numpy()
X = (data.x[data.test_mask]).numpy()
XX = (data.y[data.train_mask]).numpy()
y = (data.y[data.test_mask]).numpy()
yy
= GCN2()
model = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
optimizer
model.train()for epoch in range(400):
optimizer.zero_grad()= model(data)
out = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss
loss.backward()
optimizer.step()eval()
model.
= model(data).argmax(dim=1)
pred = pred[data.test_mask]
yyhat
= [sklearn.metrics.accuracy_score,
metrics
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score]
= pd.DataFrame({m.__name__:[m(yy,yyhat).round(6)] for m in metrics},index=['분석1'])
_results1 _results1
accuracy_score | precision_score | recall_score | f1_score | |
---|---|---|---|---|
분석1 | 0.902098 | 0.862478 | 0.95913 | 0.90824 |
import matplotlib.pyplot as plt
= torch_geometric.utils.to_networkx(data, to_undirected=True) G
= nx.spring_layout(G)
pos = {i: f"{i}\n{data.y[i].item()}" for i in range(data.num_nodes)}
labels =True, node_color='lightblue', labels=labels)
nx.draw(G, pos, with_labels plt.show()
= [i for i in range(data.num_nodes) if data.y[i].item() == 1]
anomaly_nodes
# 중요한 노드에 대한 레이블 설정
= {i: f"{i}\n{data.y[i].item()}" for i in anomaly_nodes}
labels
# 중요한 노드 강조
= ['lightblue' if i not in anomaly_nodes else 'red' for i in range(data.num_nodes)]
node_color
# 그래프 시각화
=True, node_color=node_color, labels=labels)
nx.draw(G, pos, with_labels plt.show()
G
<networkx.classes.graph.Graph at 0x7f3eaffc0880>
흠…..
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 A와 B의 인덱스
= 1023
node_A_index = 1024
node_B_index
# 노드 A와 B 간의 경로 찾기
= nx.shortest_path(G, source=node_A_index, target=node_B_index)
shortest_path
# 노드 A와 B 간의 경로로 이루어진 부분 그래프 추출
= G.subgraph(shortest_path)
subgraph
# 그래프 시각화
= nx.spring_layout(subgraph) # 그래프 레이아웃 설정
pos
# 노드 그리기
=200)
nx.draw_networkx_nodes(subgraph, pos, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph, pos)
# 그래프 출력
'off') # 축 숨기기
plt.axis( plt.show()
= nx.ego_graph(G, 1023, radius=1)
subgraph
# 그래프 시각화
= nx.spring_layout(subgraph) # 그래프 레이아웃 설정
pos
# 노드 그리기
=200)
nx.draw_networkx_nodes(subgraph, pos, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph, pos)
# 그래프 출력
'off') # 축 숨기기
plt.axis( plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 1023과 연결된 모든 노드와 엣지를 포함하는 부분 그래프 추출
= nx.ego_graph(G, 1023, radius=1)
subgraph
# 그래프 시각화
= nx.spring_layout(subgraph) # 그래프 레이아웃 설정
pos
# 노드 그리기
=200)
nx.draw_networkx_nodes(subgraph, pos, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph, pos)
# 노드 인덱스 표시
= {node: str(node) for node in subgraph.nodes()}
labels =10)
nx.draw_networkx_labels(subgraph, pos, labels, font_size
# 그래프 출력
'off') # 축 숨기기
plt.axis( plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
= nx.ego_graph(G, 9782, radius=1)
subgraph
# 그래프 시각화
= nx.spring_layout(subgraph) # 그래프 레이아웃 설정
pos
# 노드 그리기
=200)
nx.draw_networkx_nodes(subgraph, pos, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph, pos)
# 노드 인덱스 표시
= {node: str(node) for node in subgraph.nodes()}
labels =10)
nx.draw_networkx_labels(subgraph, pos, labels, font_size
# 그래프 출력
'off') # 축 숨기기
plt.axis( plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 1023과 연결된 모든 노드와 엣지를 포함하는 부분 그래프 추출
= nx.ego_graph(G, 1023, radius=1)
subgraph1 = nx.ego_graph(G, 9782, radius=1)
subgraph2
# 그래프 시각화
= nx.spring_layout(subgraph1) # 그래프 레이아웃 설정
pos1 = nx.spring_layout(subgraph2)
pos2
# 노드 그리기
=200, node_color='b', label='Node 1023')
nx.draw_networkx_nodes(subgraph1, pos1, node_size=200, node_color='g', label='Node 1024')
nx.draw_networkx_nodes(subgraph2, pos2, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph1, pos1)
nx.draw_networkx_edges(subgraph2, pos2)
# 노드 인덱스 표시
= {node: str(node) for node in subgraph1.nodes()}
labels1 = {node: str(node) for node in subgraph2.nodes()}
labels2 =10)
nx.draw_networkx_labels(subgraph1, pos1, labels1, font_size=10)
nx.draw_networkx_labels(subgraph2, pos2, labels2, font_size
# 그래프 출력
'off') # 축 숨기기
plt.axis(
# 레이블 표시
='best')
plt.legend(loc
plt.show()
# 노드 1023와 9782 간의 연결 확인
= G.has_edge(1023, 9782)
are_connected
if are_connected:
print("Node 1023 and Node 9782 are connected.")
else:
print("Node 1023 and Node 9782 are not connected.")
Node 1023 and Node 9782 are not connected.
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 1023과 연결된 모든 노드와 엣지를 포함하는 부분 그래프 추출
= nx.ego_graph(G, 1023, radius=1)
subgraph1 = nx.ego_graph(G, 9782, radius=1)
subgraph2
# 왼쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph1, pos=None, seed=42) # 그래프 레이아웃 설정
pos1
# 오른쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph2, pos=None, seed=43)
pos2
# 노드 그리기
=200, node_color='b', label='Node 1023')
nx.draw_networkx_nodes(subgraph1, pos1, node_size=200, node_color='g', label='Node 1024')
nx.draw_networkx_nodes(subgraph2, pos2, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph1, pos1)
nx.draw_networkx_edges(subgraph2, pos2)
# 노드 인덱스 표시
= {node: str(node) for node in subgraph1.nodes()}
labels1 = {node: str(node) for node in subgraph2.nodes()}
labels2 =10)
nx.draw_networkx_labels(subgraph1, pos1, labels1, font_size=10)
nx.draw_networkx_labels(subgraph2, pos2, labels2, font_size
# 그래프 출력
'off') # 축 숨기기
plt.axis(
# 레이블 표시
='best')
plt.legend(loc
plt.show()
len(df50['cc_num'].unique())
932
'cc_num'].value_counts() df50[
4.302480e+15 43
1.800650e+14 36
2.131740e+14 35
2.720430e+15 34
2.242540e+15 33
..
3.885950e+13 1
4.026220e+12 1
6.526450e+15 1
4.972230e+15 1
6.535330e+15 1
Name: cc_num, Length: 932, dtype: int64
= df50['cc_num'].value_counts()
cc_num_counts = cc_num_counts.mean()
mean_counts mean_counts
12.888412017167383
= df50['cc_num'].value_counts()
counts = counts[counts == 13].index
cc_num_with_13_counts cc_num_with_13_counts
Float64Index([3585740000000000.0, 4839040000000000.0, 370349000000000.0,
3583090000000000.0, 3534720000000000.0, 342351000000000.0,
213161000000000.0, 6011860000000000.0, 2288810000000000.0,
560881000000.0, 346273000000000.0, 4760120000000.0,
2356280000000000.0, 501803000000.0, 2297450000000000.0,
4933460000000000.0, 3565940000000000.0, 3597980000000000.0,
36913600000000.0, 630423000000.0, 4746000000000.0,
3536820000000000.0, 2720890000000000.0, 4.537e+18,
4755700000000.0, 38057500000000.0, 4.2929e+18,
6011230000000000.0, 4060580000000000.0, 3546900000000000.0,
345060000000000.0, 30596500000000.0, 3597340000000000.0,
4124540000000000.0, 4204240000000000.0, 375237000000000.0,
36153900000000.0, 4334230000000000.0, 6012000000000000.0,
4134460000000000.0, 4653880000000000.0, 4509140000000000.0,
3565420000000000.0],
dtype='float64')
'cc_num'] == 4.537e+18].index df50[df50[
Int64Index([ 6298, 6714, 7704, 7823, 7910, 7990, 9145, 9806, 10338,
10910, 11621, 11847, 11926],
dtype='int64')
import networkx as nx
import matplotlib.pyplot as plt
= df50[df50['cc_num'] == 3585740000000000.0].index
selected_indices = df50.loc[selected_indices, 'is_fraud'].tolist()
is_fraud_values = torch_geometric.utils.to_networkx(data, to_undirected=True)
G = G.subgraph(selected_indices)
subgraph = ['r' if node in selected_indices and is_fraud_values[selected_indices.get_loc(node)] == 1 else 'k' for node in subgraph.nodes()]
node_colors = nx.spring_layout(subgraph, seed=42)
pos =(10, 5))
plt.figure(figsize=200, node_color=node_colors)
nx.draw_networkx_nodes(subgraph, pos, node_size='gray')
nx.draw_networkx_edges(subgraph, pos, edge_color=10)
nx.draw_networkx_labels(subgraph, pos, font_size'off')
plt.axis( plt.show()
= df50[df50['cc_num'] == 3585740000000000.0].index
index = df50.loc[index, 'is_fraud']
is_fraud_value is_fraud_value
4437 1
4443 1
4446 1
4449 1
4451 1
4452 1
4455 1
4465 1
6914 0
6974 0
8624 0
8830 0
11375 0
Name: is_fraud, dtype: int64
'cc_num'] == 4.302480e+15].index df50[df50[
Int64Index([ 2293, 2294, 2295, 2296, 2297, 2298, 2299, 2300, 4709,
4711, 4718, 4720, 4728, 4729, 4730, 4733, 4734, 4736,
4739, 4742, 6081, 6751, 6855, 7025, 7554, 7817, 7946,
8173, 9152, 9166, 9548, 9708, 9773, 9807, 10198, 10828,
10909, 10953, 10962, 11273, 11330, 11562, 11946],
dtype='int64')
= df50[df50['cc_num'] == 4.302480e+15].index
index = df50.loc[index, 'is_fraud']
is_fraud_value is_fraud_value
2293 1
2294 1
2295 1
2296 1
2297 1
2298 1
2299 1
2300 1
4709 1
4711 1
4718 1
4720 1
4728 1
4729 1
4730 1
4733 1
4734 1
4736 1
4739 1
4742 1
6081 0
6751 0
6855 0
7025 0
7554 0
7817 0
7946 0
8173 0
9152 0
9166 0
9548 0
9708 0
9773 0
9807 0
10198 0
10828 0
10909 0
10953 0
10962 0
11273 0
11330 0
11562 0
11946 0
Name: is_fraud, dtype: int64
-
cc_num=4.302480e+15 인 그래프
해당 노드의 is_fraud=1 이면 빨간색으로 표시했다
import networkx as nx
import matplotlib.pyplot as plt
# 4.50~에 해당하는 행의 인덱스를 찾음
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices
# 선택된 인덱스에 대한 is_fraud 값을 가져옴
= df50.loc[selected_indices, 'is_fraud'].tolist()
is_fraud_values
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 서브그래프를 추출
= G.subgraph(selected_indices)
subgraph
# 노드와 엣지 색상을 설정
= ['r' if node in selected_indices and is_fraud_values[selected_indices.get_loc(node)] == 1 else 'k' for node in subgraph.nodes()]
node_colors
# 그래프 레이아웃 설정
= nx.spring_layout(subgraph, seed=42)
pos
# 그래프 그리기
=(10, 5))
plt.figure(figsize=200, node_color=node_colors)
nx.draw_networkx_nodes(subgraph, pos, node_size='gray')
nx.draw_networkx_edges(subgraph, pos, edge_color=10)
nx.draw_networkx_labels(subgraph, pos, font_size
# 축 숨기기
'off')
plt.axis(
# 그래프 출력
plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# 4.30~에 해당하는 행의 인덱스를 찾음
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices1 = df50[df50['cc_num'] == 2.242540e+15].index
selected_indices2
# 선택된 인덱스에 대한 is_fraud 값을 가져옴
= df50.loc[selected_indices1, 'is_fraud'].tolist()
is_fraud_values1 = df50.loc[selected_indices2, 'is_fraud'].tolist()
is_fraud_values2
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 첫 번째 서브그래프 추출
= G.subgraph(selected_indices1)
subgraph1
# 노드와 엣지 색상 설정 (서브그래프 1)
= ['r' if node in selected_indices1 and is_fraud_values1[selected_indices1.get_loc(node)] == 1 else 'k' for node in subgraph1.nodes()]
node_colors1
# 그래프 레이아웃 설정 (서브그래프 1)
= nx.spring_layout(subgraph1, seed=42)
pos1
# 두 번째 서브그래프 추출
= G.subgraph(selected_indices2)
subgraph2
# 노드와 엣지 색상 설정 (서브그래프 2)
= ['r' if node in selected_indices2 and is_fraud_values2[selected_indices2.get_loc(node)] == 1 else 'k' for node in subgraph2.nodes()]
node_colors2
# 그래프 레이아웃 설정 (서브그래프 2)
= nx.spring_layout(subgraph2, seed=43)
pos2
# 그래프 그리기
=(15, 5))
plt.figure(figsize
# 첫 번째 하위 그래프
1, 2, 1)
plt.subplot('cc_num=4.302480e+15')
plt.title(=200, node_color=node_colors1)
nx.draw_networkx_nodes(subgraph1, pos1, node_size='gray')
nx.draw_networkx_edges(subgraph1, pos1, edge_color=10)
nx.draw_networkx_labels(subgraph1, pos1, font_size'off')
plt.axis(
# 두 번째 하위 그래프
1, 2, 2)
plt.subplot('cc_num=2.242540e+15')
plt.title(=200, node_color=node_colors2)
nx.draw_networkx_nodes(subgraph2, pos2, node_size='gray')
nx.draw_networkx_edges(subgraph2, pos2, edge_color=10)
nx.draw_networkx_labels(subgraph2, pos2, font_size'off')
plt.axis(
# 그래프 출력
plt.tight_layout() plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# 4.30~에 해당하는 행의 인덱스를 찾음
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices1 = df50[df50['cc_num'] == 2.242540e+15].index
selected_indices2
# 선택된 인덱스에 대한 is_fraud 값을 가져옴
= df50.loc[selected_indices1, 'is_fraud'].tolist()
is_fraud_values1 = df50.loc[selected_indices2, 'is_fraud'].tolist()
is_fraud_values2
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 첫 번째 서브그래프 추출
= G.subgraph(selected_indices1)
subgraph1
# 노드와 엣지 색상 설정 (서브그래프 1)
= ['r' if node in selected_indices1 and is_fraud_values1[selected_indices1.get_loc(node)] == 1 else 'k' for node in subgraph1.nodes()]
node_colors1
# 엣지 색상 설정 (파란색)
= ['b' if (u, v) in subgraph1.edges() else 'gray' for u, v in subgraph1.edges()]
edge_colors1
# 그래프 레이아웃 설정 (서브그래프 1)
= nx.spring_layout(subgraph1, seed=42)
pos1
# 두 번째 서브그래프 추출
= G.subgraph(selected_indices2)
subgraph2
# 노드와 엣지 색상 설정 (서브그래프 2)
= ['r' if node in selected_indices2 and is_fraud_values2[selected_indices2.get_loc(node)] == 1 else 'k' for node in subgraph2.nodes()]
node_colors2
# 엣지 색상 설정 (초록색)
= ['g' if (u, v) in subgraph2.edges() else 'gray' for u, v in subgraph2.edges()]
edge_colors2
# 그래프 레이아웃 설정 (서브그래프 2)
= nx.spring_layout(subgraph2, seed=43)
pos2
# 그래프 그리기
=(10, 5))
plt.figure(figsize
# 첫 번째 하위 그래프
'cc_num=4.302480e+15')
plt.title(=200, node_color=node_colors1)
nx.draw_networkx_nodes(subgraph1, pos1, node_size=edge_colors1)
nx.draw_networkx_edges(subgraph1, pos1, edge_color=10)
nx.draw_networkx_labels(subgraph1, pos1, font_size
# 두 번째 하위 그래프
=200, node_color=node_colors2)
nx.draw_networkx_nodes(subgraph2, pos2, node_size=edge_colors2)
nx.draw_networkx_edges(subgraph2, pos2, edge_color=10)
nx.draw_networkx_labels(subgraph2, pos2, font_size
# 축 숨기기
'off')
plt.axis(
# 그래프 출력
plt.show()
'cc_num'] == 6.535330e+15].index df50[df50[
Int64Index([11740], dtype='int64')
'cc_num'] == 4.503100e+18].index df50[df50[
Int64Index([5381, 5382, 7996, 8217, 9176, 10248, 10517, 10613, 10950, 11321], dtype='int64')
'is_fraud'] == 1]['cc_num'].value_counts() df50[df50[
2.131740e+14 24
3.506040e+15 23
1.800680e+14 22
6.011380e+15 22
1.800850e+14 21
..
6.011110e+15 2
4.809700e+12 2
3.576430e+15 2
4.503100e+18 2
3.546670e+15 2
Name: cc_num, Length: 596, dtype: int64
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
= nx.ego_graph(G, 2293, radius=1)
subgraph
# 그래프 시각화
= nx.spring_layout(subgraph) # 그래프 레이아웃 설정
pos
# 노드 그리기
=200)
nx.draw_networkx_nodes(subgraph, pos, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph, pos)
# 노드 인덱스 표시
= {node: str(node) for node in subgraph.nodes()}
labels =10)
nx.draw_networkx_labels(subgraph, pos, labels, font_size
# 그래프 출력
'off') # 축 숨기기
plt.axis( plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 1023과 연결된 모든 노드와 엣지를 포함하는 부분 그래프 추출
= nx.ego_graph(G, 2293, radius=1)
subgraph1 = nx.ego_graph(G, 11740, radius=1)
subgraph2
# 왼쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph1, pos=None, seed=42) # 그래프 레이아웃 설정
pos1
# 오른쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph2, pos=None, seed=43)
pos2
# 노드 그리기
=200, node_color='b', label='cc_num=4.302480e+15')
nx.draw_networkx_nodes(subgraph1, pos1, node_size=200, node_color='g', label='cc_num=6.535330e+15')
nx.draw_networkx_nodes(subgraph2, pos2, node_size
# 엣지 그리기
nx.draw_networkx_edges(subgraph1, pos1)
nx.draw_networkx_edges(subgraph2, pos2)
# 노드 인덱스 표시
= {node: str(node) for node in subgraph1.nodes()}
labels1 = {node: str(node) for node in subgraph2.nodes()}
labels2 =10)
nx.draw_networkx_labels(subgraph1, pos1, labels1, font_size=10)
nx.draw_networkx_labels(subgraph2, pos2, labels2, font_size
# 그래프 출력
'off') # 축 숨기기
plt.axis(
# 레이블 표시
='best')
plt.legend(loc
plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 1023과 연결된 모든 노드와 엣지를 포함하는 부분 그래프 추출
= nx.ego_graph(G, 2293, radius=1)
subgraph1 = nx.ego_graph(G, 8217, radius=1)
subgraph2
# 왼쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph1, pos=None, seed=42) # 그래프 레이아웃 설정
pos1
# 오른쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph2, pos=None, seed=43)
pos2
# 두 개의 하위 그래프 생성
= plt.subplots(1, 2, figsize=(10, 5))
fig, (ax1, ax2)
# 첫 번째 하위 그래프에 그래프 그리기
'cc_num=4.302480e+15')
ax1.set_title(=ax1, node_size=200, node_color='b')
nx.draw_networkx_nodes(subgraph1, pos1, ax=ax1)
nx.draw_networkx_edges(subgraph1, pos1, ax= {node: str(node) for node in subgraph1.nodes()}
labels1 =10, ax=ax1)
nx.draw_networkx_labels(subgraph1, pos1, labels1, font_size
# 두 번째 하위 그래프에 그래프 그리기
'cc_num=4.503100e+18')
ax2.set_title(=ax2, node_size=200, node_color='g')
nx.draw_networkx_nodes(subgraph2, pos2, ax=ax2)
nx.draw_networkx_edges(subgraph2, pos2, ax= {node: str(node) for node in subgraph2.nodes()}
labels2 =10, ax=ax2)
nx.draw_networkx_labels(subgraph2, pos2, labels2, font_size
# 축 숨기기
'off')
ax1.axis('off')
ax2.axis(
# 그래프 출력
plt.tight_layout() plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 노드 1023과 연결된 모든 노드와 엣지를 포함하는 부분 그래프 추출
= nx.ego_graph(G, 2293, radius=1)
subgraph1 = nx.ego_graph(G, 8217, radius=1)
subgraph2
# 왼쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph1, pos=None, seed=42) # 그래프 레이아웃 설정
pos1
# 오른쪽 그래프 레이아웃 설정
= nx.spring_layout(subgraph2, pos=None, seed=43)
pos2
# 두 개의 하위 그래프 생성
= plt.subplots(1, 2, figsize=(10, 5))
fig, (ax1, ax2)
# 첫 번째 하위 그래프에 그래프 그리기
'cc_num=4.302480e+15')
ax1.set_title(=ax1, node_size=200, node_color='b')
nx.draw_networkx_nodes(subgraph1, pos1, ax= {node: str(node) for node in subgraph1.nodes()}
labels1
# 엣지 색상 설정 (is_fraud가 1일 때 빨간색, 그 외에는 검정색)
= ['r' if subgraph1[u][v].get('is_fraud', 0) == 1 else 'k' for u, v in subgraph1.edges()]
edge_colors1 =subgraph1.edges(), edge_color=edge_colors1, ax=ax1)
nx.draw_networkx_edges(subgraph1, pos1, edgelist=10, ax=ax1)
nx.draw_networkx_labels(subgraph1, pos1, labels1, font_size
# 두 번째 하위 그래프에 그래프 그리기
'cc_num=4.503100e+18')
ax2.set_title(=ax2, node_size=200, node_color='g')
nx.draw_networkx_nodes(subgraph2, pos2, ax= {node: str(node) for node in subgraph2.nodes()}
labels2
# 엣지 색상 설정 (is_fraud가 1일 때 빨간색, 그 외에는 검정색)
= ['r' if subgraph2[u][v].get('is_fraud', 0) == 1 else 'k' for u, v in subgraph2.edges()]
edge_colors2 =subgraph2.edges(), edge_color=edge_colors2, ax=ax2)
nx.draw_networkx_edges(subgraph2, pos2, edgelist=10, ax=ax2)
nx.draw_networkx_labels(subgraph2, pos2, labels2, font_size
# 축 숨기기
'off')
ax1.axis('off')
ax2.axis(
# 그래프 출력
plt.tight_layout() plt.show()
- fraud값이 1인게 표시가 안되네??
data
Data(x=[12012, 1], edge_index=<function edge_index_selected at 0x7f3f8d358ee0>, y=[12012], train_mask=[12012], test_mask=[12012])
data.edge_index
<function __main__.edge_index_selected(edge_index)>
= torch.tensor(df50['amt'], dtype=torch.float).reshape(-1,1)
x = torch.tensor(df50['is_fraud'],dtype=torch.int64)
y = torch_geometric.data.Data(x=x, edge_index = edge_index_selected, y=y, train_mask = train_mask, test_mask = test_mask)
data data
Data(x=[12012, 1], edge_index=[2, 93730], y=[12012], train_mask=[12012], test_mask=[12012])
-
ppt 삽입할 그래프.
-
cc_num
거래량이 가장 많은 cc_num:4.302480e+15
거래량 평균인cc_num: 4.2929e+18
import networkx as nx
import matplotlib.pyplot as plt
# 4.50~에 해당하는 행의 인덱스를 찾음
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices
# 선택된 인덱스에 대한 is_fraud 값을 가져옴
= df50.loc[selected_indices, 'is_fraud'].tolist()
is_fraud_values
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 서브그래프를 추출
= G.subgraph(selected_indices)
subgraph
# 노드와 엣지 색상을 설정
= ['r' if node in selected_indices and is_fraud_values[selected_indices.get_loc(node)] == 1 else 'k' for node in subgraph.nodes()]
node_colors
# 그래프 레이아웃 설정
= nx.spring_layout(subgraph, seed=42)
pos
# 그래프 그리기
=(10, 5))
plt.figure(figsize=200, node_color=node_colors)
nx.draw_networkx_nodes(subgraph, pos, node_size='gray')
nx.draw_networkx_edges(subgraph, pos, edge_color=10)
nx.draw_networkx_labels(subgraph, pos, font_size
# 축 숨기기
'off')
plt.axis(
# 그래프 출력
plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# 4.50~에 해당하는 행의 인덱스를 찾음
= df50[df50['cc_num'] == 4.2929e+18].index
selected_indices
# 선택된 인덱스에 대한 is_fraud 값을 가져옴
= df50.loc[selected_indices, 'is_fraud'].tolist()
is_fraud_values
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 서브그래프를 추출
= G.subgraph(selected_indices)
subgraph
# 노드와 엣지 색상을 설정
= ['r' if node in selected_indices and is_fraud_values[selected_indices.get_loc(node)] == 1 else 'k' for node in subgraph.nodes()]
node_colors
# 그래프 레이아웃 설정
= nx.spring_layout(subgraph, seed=42)
pos
# 그래프 그리기
=(10, 5))
plt.figure(figsize=200, node_color=node_colors)
nx.draw_networkx_nodes(subgraph, pos, node_size='gray')
nx.draw_networkx_edges(subgraph, pos, edge_color=10)
nx.draw_networkx_labels(subgraph, pos, font_size
# 축 숨기기
'off')
plt.axis(
# 그래프 출력
plt.show()
import networkx as nx
import matplotlib.pyplot as plt
# 첫 번째 데이터 선택
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices1 = df50.loc[selected_indices1, 'is_fraud'].tolist()
is_fraud_values1
# 두 번째 데이터 선택
= df50[df50['cc_num'] == 4.2929e+18].index
selected_indices2 = df50.loc[selected_indices2, 'is_fraud'].tolist()
is_fraud_values2
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 서브그래프 추출
= G.subgraph(selected_indices1)
subgraph1 = G.subgraph(selected_indices2)
subgraph2
# 노드 색상 설정
= ['r' if node in selected_indices1 and is_fraud_values1[selected_indices1.get_loc(node)] == 1 else 'k' for node in subgraph1.nodes()]
node_colors1 = ['r' if node in selected_indices2 and is_fraud_values2[selected_indices2.get_loc(node)] == 1 else 'k' for node in subgraph2.nodes()]
node_colors2
# 엣지 색상 설정
= ['g' if edge in subgraph1.edges() else 'k' for edge in subgraph1.edges()]
edge_colors1 = ['b' if edge in subgraph2.edges() else 'k' for edge in subgraph2.edges()]
edge_colors2
# 그래프 레이아웃 설정
= nx.spring_layout(subgraph1, seed=42)
pos1 = nx.spring_layout(subgraph2, seed=42)
pos2
# 그래프 그리기
=(12, 6))
plt.figure(figsize
# 그래프 1 그리기
=200, node_color=node_colors1)
nx.draw_networkx_nodes(subgraph1, pos1, node_size=edge_colors1)
nx.draw_networkx_edges(subgraph1, pos1, edge_color
# 그래프 2 그리기
=200, node_color=node_colors2)
nx.draw_networkx_nodes(subgraph2, pos2, node_size=edge_colors2)
nx.draw_networkx_edges(subgraph2, pos2, edge_color
# 축 숨기기
'off')
plt.axis(
# 그래프 출력
plt.show()
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.style as style
# 스타일 설정 (ggplot 스타일 사용)
'ggplot')
style.use(
# 첫 번째 데이터 선택
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices1 = df50.loc[selected_indices1, 'is_fraud'].tolist()
is_fraud_values1
# 두 번째 데이터 선택
= df50[df50['cc_num'] == 4.2929e+18].index
selected_indices2 = df50.loc[selected_indices2, 'is_fraud'].tolist()
is_fraud_values2
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 서브그래프 추출
= G.subgraph(selected_indices1)
subgraph1 = G.subgraph(selected_indices2)
subgraph2
# 노드 색상 설정
= ['r' if node in selected_indices1 and is_fraud_values1[selected_indices1.get_loc(node)] == 1 else 'k' for node in subgraph1.nodes()]
node_colors1 = ['r' if node in selected_indices2 and is_fraud_values2[selected_indices2.get_loc(node)] == 1 else 'k' for node in subgraph2.nodes()]
node_colors2
# 엣지 색상 설정
= ['g' if edge in subgraph1.edges() else 'k' for edge in subgraph1.edges()]
edge_colors1 = ['b' if edge in subgraph2.edges() else 'k' for edge in subgraph2.edges()]
edge_colors2
# 그래프 레이아웃 설정 (kamada_kawai 레이아웃 사용)
= nx.kamada_kawai_layout(subgraph1)
pos1 = nx.kamada_kawai_layout(subgraph2)
pos2
# 그래프 그리기
=(12, 6))
plt.figure(figsize
# 그래프 1 그리기
=200, node_color=node_colors1, node_shape='o') # 노드 모양: 원 (circle)
nx.draw_networkx_nodes(subgraph1, pos1, node_size=edge_colors1)
nx.draw_networkx_edges(subgraph1, pos1, edge_color
# 그래프 2 그리기
=200, node_color=node_colors2, node_shape='D') # 노드 모양: 다이아몬드 (diamond)
nx.draw_networkx_nodes(subgraph2, pos2, node_size=edge_colors2)
nx.draw_networkx_edges(subgraph2, pos2, edge_color
# 범례 추가
= [
legend_elements 0], [0], marker='o', color='g', markerfacecolor='black', markersize=10, label='cc_num 1'),
plt.Line2D([0], [0], marker='D', color='b', markerfacecolor='black', markersize=10, label='cc_num 2'),
plt.Line2D([0], [0], marker='o', color='black', markerfacecolor='r', markersize=10, label='fraud=1'),
plt.Line2D([
]=legend_elements, loc='lower center', ncol=3, frameon=True)
plt.legend(handles
# 그래프 출력
'off')
plt.axis( plt.show()
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.style as style
# 스타일 설정 (ggplot 스타일 사용)
'ggplot')
style.use(
# 첫 번째 데이터 선택
= df50[df50['cc_num'] == 4.302480e+15].index
selected_indices1 = df50.loc[selected_indices1, 'is_fraud'].tolist()
is_fraud_values1
# 두 번째 데이터 선택
= df50[df50['cc_num'] == 4.2929e+18].index
selected_indices2 = df50.loc[selected_indices2, 'is_fraud'].tolist()
is_fraud_values2
# PyTorch Geometric 그래프를 NetworkX 그래프로 변환
= torch_geometric.utils.to_networkx(data, to_undirected=True)
G
# 서브그래프 추출
= G.subgraph(selected_indices1)
subgraph1 = G.subgraph(selected_indices2)
subgraph2
# 노드 색상 설정
= ['r' if node in selected_indices1 and is_fraud_values1[selected_indices1.get_loc(node)] == 1 else 'k' for node in subgraph1.nodes()]
node_colors1 = ['r' if node in selected_indices2 and is_fraud_values2[selected_indices2.get_loc(node)] == 1 else 'k' for node in subgraph2.nodes()]
node_colors2
# 엣지 색상 설정
= ['g' if edge in subgraph1.edges() else 'k' for edge in subgraph1.edges()]
edge_colors1 = ['b' if edge in subgraph2.edges() else 'k' for edge in subgraph2.edges()]
edge_colors2
# 그래프 레이아웃 설정 (kamada_kawai 레이아웃 사용)
= nx.kamada_kawai_layout(subgraph1)
pos1 = nx.kamada_kawai_layout(subgraph2)
pos2
# 그래프 그리기
=(12, 6))
plt.figure(figsize
# 그래프 1 그리기
=200, node_color=node_colors1, node_shape='o') # 노드 모양: 원 (circle)
nx.draw_networkx_nodes(subgraph1, pos1, node_size=edge_colors1)
nx.draw_networkx_edges(subgraph1, pos1, edge_color
# 그래프 2 그리기
=200, node_color=node_colors2, node_shape='D') # 노드 모양: 다이아몬드 (diamond)
nx.draw_networkx_nodes(subgraph2, pos2, node_size=edge_colors2)
nx.draw_networkx_edges(subgraph2, pos2, edge_color
# 범례 추가
= [
legend_elements 0], [0], marker='o', color='g', markerfacecolor='black', markersize=10, label='cc_num 1'),
plt.Line2D([0], [0], marker='D', color='b', markerfacecolor='black', markersize=10, label='cc_num 2'),
plt.Line2D([0], [0], marker='o', color='black', markerfacecolor='r', markersize=10, label='fraud=1'),
plt.Line2D([
]= plt.legend(handles=legend_elements, loc='lower center', ncol=3, frameon=True)
legend 0.5, -0.1)) # 범례 위치 조정
legend.set_bbox_to_anchor((
# 그래프 출력
'off')
plt.axis( plt.show()