import pandas as pd
import os
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
= 'gray'
default_edge_color = '#407cc9'
default_node_color = '#f5b042'
enhanced_node_color = '#cc2f04' enhanced_edge_color
import pandas as pd
= pd.read_csv("fraudTrain.csv")
df = df[df["is_fraud"]==0].sample(frac=0.20, random_state=42).append(df[df["is_fraud"] == 1])
df df.head()
Unnamed: 0 | trans_date_trans_time | cc_num | merchant | category | amt | first | last | gender | street | ... | lat | long | city_pop | job | dob | trans_num | unix_time | merch_lat | merch_long | is_fraud | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
669418 | 669418 | 2019-10-12 18:21 | 4.089100e+18 | fraud_Haley, Jewess and Bechtelar | shopping_pos | 7.53 | Debra | Stark | F | 686 Linda Rest | ... | 32.3836 | -94.8653 | 24536 | Multimedia programmer | 1983-10-14 | d313353fa30233e5fab5468e852d22fc | 1350066071 | 32.202008 | -94.371865 | 0 |
32567 | 32567 | 2019-01-20 13:06 | 4.247920e+12 | fraud_Turner LLC | travel | 3.79 | Judith | Moss | F | 46297 Benjamin Plains Suite 703 | ... | 39.5370 | -83.4550 | 22305 | Television floor manager | 1939-03-09 | 88c65b4e1585934d578511e627fe3589 | 1327064760 | 39.156673 | -82.930503 | 0 |
156587 | 156587 | 2019-03-24 18:09 | 4.026220e+12 | fraud_Klein Group | entertainment | 59.07 | Debbie | Payne | F | 204 Ashley Neck Apt. 169 | ... | 41.5224 | -71.9934 | 4720 | Broadcast presenter | 1977-05-18 | 3bd9ede04b5c093143d5e5292940b670 | 1332612553 | 41.657152 | -72.595751 | 0 |
1020243 | 1020243 | 2020-02-25 15:12 | 4.957920e+12 | fraud_Monahan-Morar | personal_care | 25.58 | Alan | Parsons | M | 0547 Russell Ford Suite 574 | ... | 39.6171 | -102.4776 | 207 | Network engineer | 1955-12-04 | 19e16ee7a01d229e750359098365e321 | 1361805120 | 39.080346 | -103.213452 | 0 |
116272 | 116272 | 2019-03-06 23:19 | 4.178100e+15 | fraud_Kozey-Kuhlman | personal_care | 84.96 | Jill | Flores | F | 639 Cruz Islands | ... | 41.9488 | -86.4913 | 3104 | Horticulturist, commercial | 1981-03-29 | a0c8641ca1f5d6e243ed5a2246e66176 | 1331075954 | 42.502065 | -86.732664 | 0 |
5 rows × 23 columns
"is_fraud"].value_counts() df[
0 208514
1 6006
Name: is_fraud, dtype: int64
def build_graph_bipartite(df_input, graph_type=nx.Graph()):
=df_input.copy()
df={x:node_id for node_id, x in enumerate(set(df["cc_num"].values.tolist()+\
mapping"merchant"].values.tolist()))}
df[
"from"]=df["cc_num"].apply(lambda x:mapping[x]) #엣지의 출발점
df["to"]=df["merchant"].apply(lambda x:mapping[x]) #엣지의 도착점
df[
= df[['from', 'to', "amt", "is_fraud"]].groupby(['from','to']).agg({"is_fraud":"sum","amt":"sum"}).reset_index()
df "is_fraud"]=df["is_fraud"].apply(lambda x:1 if x>0 else 0)
df[
=nx.from_edgelist(df[["from","to"]].values, create_using=graph_type)
G
int(x["from"]),int(x["to"])):x["is_fraud"] for idx, x in df[["from","to","is_fraud"]].iterrows()}, "label") #엣지 속성 설정,각 속성의 사기 여부부
nx.set_edge_attributes(G, {(
int(x["from"]),int(x["to"])):x["amt"] for idx,x in df[["from","to","amt"]].iterrows()}, "weight") # 엣지 속성 설정, 각 엣지의 거래 금액
nx.set_edge_attributes(G,{(
return G
= build_graph_bipartite(df, nx.Graph(name="Bipartite Undirect")) G_bu
# 기존 코드 (down)
from sklearn.utils import resample
= df[df.is_fraud==0]
df_majority = df[df.is_fraud==1]
df_minority
= resample(df_majority,
df_maj_dowsampled =len(df_minority),
n_samples=42)
random_state
= pd.concat([df_minority, df_maj_dowsampled])
df_downsampled
print(df_downsampled.is_fraud.value_counts())
= build_graph_bipartite(df_downsampled) G_down
from sklearn.utils import resample
= df[df.is_fraud==0]
df_majority = df[df.is_fraud==1]
df_minority
= resample(df_minority,
df_min_oversampled =len(df_majority),
n_samples=True,
replace=42)
random_state
= pd.concat([df_majority, df_min_oversampled])
df_oversampled
print(df_oversampled.is_fraud.value_counts())
= build_graph_bipartite(df_oversampled) G_over
pip install gran
Requirement already satisfied: gran in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (0.0.1)
Note: you may need to restart the kernel to use updated packages.
import sys
'/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages') sys.path.append(
from gran import GRAN
import networkx as nx
ModuleNotFoundError: No module named 'gran'
from gran import GRAN
import networkx as nx
# 기존의 그래프 데이터셋
= [...]
graphs # GRAN 모델 초기화
= GRAN(device='cpu')
gran # 그래프 데이터 증강
= gran.generate(graphs)
new_graphs
# 생성된 그래프 데이터 확인
for i, new_graph in enumerate(new_graphs):
# NetworkX 그래프 객체로 변환
= gran.to_networkx(new_graph)
nx_graph # 그래프 시각화
=True)
nx.draw(nx_graph, with_labels
SyntaxError: invalid character in identifier (384620633.py, line 17)
pip install torch numpy scipy tqdm+https://github.com/snap-stanford/ogb.git
$ pip install git+https://github.com/snap-stanford/graph-doc2vec.git
$ pip install git+https://github.com/snap-stanford/GRAN.git $ pip install git
pip install torch numpy scipy tqdm
Requirement already satisfied: torch in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (1.13.1)
Requirement already satisfied: numpy in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (1.22.4)
Requirement already satisfied: scipy in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (1.10.1)
Requirement already satisfied: tqdm in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (4.65.0)
Requirement already satisfied: typing_extensions in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from torch) (4.4.0)
Note: you may need to restart the kernel to use updated packages.
+https://github.com/snap-stanford/ogb.git pip install git
Collecting git+https://github.com/snap-stanford/ogb.git
Cloning https://github.com/snap-stanford/ogb.git to /tmp/pip-req-build-rvf1uik4
Running command git clone --filter=blob:none --quiet https://github.com/snap-stanford/ogb.git /tmp/pip-req-build-rvf1uik4
Resolved https://github.com/snap-stanford/ogb.git to commit a47b716f7e972f666eae9909ee0f922cd0f9d966
Preparing metadata (setup.py) ... done
Requirement already satisfied: torch>=1.6.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.13.1)
Requirement already satisfied: numpy>=1.16.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.22.4)
Requirement already satisfied: tqdm>=4.29.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (4.65.0)
Requirement already satisfied: scikit-learn>=0.20.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.2.2)
Requirement already satisfied: pandas>=0.24.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.3.5)
Requirement already satisfied: six>=1.12.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.16.0)
Requirement already satisfied: urllib3>=1.24.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.26.15)
Collecting outdated>=0.2.0
Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils
Downloading littleutils-0.2.2.tar.gz (6.6 kB)
Preparing metadata (setup.py) ... done
Requirement already satisfied: setuptools>=44 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from outdated>=0.2.0->ogb==1.3.6) (65.6.3)
Requirement already satisfied: requests in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from outdated>=0.2.0->ogb==1.3.6) (2.28.1)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from pandas>=0.24.0->ogb==1.3.6) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from pandas>=0.24.0->ogb==1.3.6) (2022.7)
Requirement already satisfied: scipy>=1.3.2 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from scikit-learn>=0.20.0->ogb==1.3.6) (1.10.1)
Requirement already satisfied: joblib>=1.1.1 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from scikit-learn>=0.20.0->ogb==1.3.6) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from scikit-learn>=0.20.0->ogb==1.3.6) (3.1.0)
Requirement already satisfied: typing_extensions in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from torch>=1.6.0->ogb==1.3.6) (4.4.0)
Requirement already satisfied: certifi>=2017.4.17 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from requests->outdated>=0.2.0->ogb==1.3.6) (2022.12.7)
Requirement already satisfied: charset-normalizer<3,>=2 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from requests->outdated>=0.2.0->ogb==1.3.6) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from requests->outdated>=0.2.0->ogb==1.3.6) (3.4)
Building wheels for collected packages: ogb, littleutils
Building wheel for ogb (setup.py) ... done
Created wheel for ogb: filename=ogb-1.3.6-py3-none-any.whl size=78743 sha256=cc58571b3e5c903fd2395540d77d76740c2f810f3663639c9299e2a64236f590
Stored in directory: /tmp/pip-ephem-wheel-cache-8rpjzmhk/wheels/c1/20/5b/76ab6aa5c9588d05152dfc4a7088179e040f7db7498d771b56
Building wheel for littleutils (setup.py) ... done
Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7028 sha256=ba71828ff5f9a53430843ca994d1df80eeeee554ed19b36f6ea962853d8aa708
Stored in directory: /home/coco/.cache/pip/wheels/6a/33/c4/0ef84d7f5568c2823e3d63a6e08988852fb9e4bc822034870a
Successfully built ogb littleutils
Installing collected packages: littleutils, outdated, ogb
Successfully installed littleutils-0.2.2 ogb-1.3.6 outdated-0.2.2
Note: you may need to restart the kernel to use updated packages.
+https://github.com/snap-stanford/graph-doc2vec.git pip install git
Collecting git+https://github.com/snap-stanford/graph-doc2vec.git
Cloning https://github.com/snap-stanford/graph-doc2vec.git to /tmp/pip-req-build-ms1szh1h
Running command git clone --filter=blob:none --quiet https://github.com/snap-stanford/graph-doc2vec.git /tmp/pip-req-build-ms1szh1h
Username for 'https://github.com':
+https://github.com/snap-stanford/GRAN.git pip install git