CH8. 신용카드 거래에 대한 그래프 분석(under-sampling)

graph
Author

김보람

Published

April 12, 2023

import pandas as pd

import os
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

default_edge_color = 'gray'
default_node_color = '#407cc9'
enhanced_node_color = '#f5b042'
enhanced_edge_color = '#cc2f04'
import pandas as pd
df = pd.read_csv("fraudTrain.csv")
df = df[df["is_fraud"]==0].sample(frac=0.20, random_state=42).append(df[df["is_fraud"] == 1])
df.head()
Unnamed: 0 trans_date_trans_time cc_num merchant category amt first last gender street ... lat long city_pop job dob trans_num unix_time merch_lat merch_long is_fraud
669418 669418 2019-10-12 18:21 4.089100e+18 fraud_Haley, Jewess and Bechtelar shopping_pos 7.53 Debra Stark F 686 Linda Rest ... 32.3836 -94.8653 24536 Multimedia programmer 1983-10-14 d313353fa30233e5fab5468e852d22fc 1350066071 32.202008 -94.371865 0
32567 32567 2019-01-20 13:06 4.247920e+12 fraud_Turner LLC travel 3.79 Judith Moss F 46297 Benjamin Plains Suite 703 ... 39.5370 -83.4550 22305 Television floor manager 1939-03-09 88c65b4e1585934d578511e627fe3589 1327064760 39.156673 -82.930503 0
156587 156587 2019-03-24 18:09 4.026220e+12 fraud_Klein Group entertainment 59.07 Debbie Payne F 204 Ashley Neck Apt. 169 ... 41.5224 -71.9934 4720 Broadcast presenter 1977-05-18 3bd9ede04b5c093143d5e5292940b670 1332612553 41.657152 -72.595751 0
1020243 1020243 2020-02-25 15:12 4.957920e+12 fraud_Monahan-Morar personal_care 25.58 Alan Parsons M 0547 Russell Ford Suite 574 ... 39.6171 -102.4776 207 Network engineer 1955-12-04 19e16ee7a01d229e750359098365e321 1361805120 39.080346 -103.213452 0
116272 116272 2019-03-06 23:19 4.178100e+15 fraud_Kozey-Kuhlman personal_care 84.96 Jill Flores F 639 Cruz Islands ... 41.9488 -86.4913 3104 Horticulturist, commercial 1981-03-29 a0c8641ca1f5d6e243ed5a2246e66176 1331075954 42.502065 -86.732664 0

5 rows × 23 columns

df["is_fraud"].value_counts()
0    208514
1      6006
Name: is_fraud, dtype: int64
def build_graph_bipartite(df_input, graph_type=nx.Graph()):
    df=df_input.copy()
    mapping={x:node_id for node_id, x in enumerate(set(df["cc_num"].values.tolist()+\
                                                      df["merchant"].values.tolist()))}
    
    df["from"]=df["cc_num"].apply(lambda x:mapping[x])  #엣지의 출발점
    df["to"]=df["merchant"].apply(lambda x:mapping[x])  #엣지의 도착점
    
    df = df[['from', 'to', "amt", "is_fraud"]].groupby(['from','to']).agg({"is_fraud":"sum","amt":"sum"}).reset_index()
    df["is_fraud"]=df["is_fraud"].apply(lambda x:1 if x>0 else 0)
    
    G=nx.from_edgelist(df[["from","to"]].values, create_using=graph_type)
    
    nx.set_edge_attributes(G, {(int(x["from"]),int(x["to"])):x["is_fraud"] for idx, x in df[["from","to","is_fraud"]].iterrows()}, "label")  #엣지 속성 설정,각 속성의 사기 여부부 
    
    nx.set_edge_attributes(G,{(int(x["from"]),int(x["to"])):x["amt"] for idx,x in df[["from","to","amt"]].iterrows()}, "weight") # 엣지 속성 설정, 각 엣지의 거래 금액

    return G
G_bu = build_graph_bipartite(df, nx.Graph(name="Bipartite Undirect"))
# 기존 코드 (down)
from sklearn.utils import resample

df_majority = df[df.is_fraud==0]
df_minority = df[df.is_fraud==1]

df_maj_dowsampled = resample(df_majority,
                             n_samples=len(df_minority),
                             random_state=42)

df_downsampled = pd.concat([df_minority, df_maj_dowsampled])

print(df_downsampled.is_fraud.value_counts())
G_down = build_graph_bipartite(df_downsampled)
from sklearn.utils import resample


df_majority = df[df.is_fraud==0]
df_minority = df[df.is_fraud==1]

df_min_oversampled = resample(df_minority,
                              n_samples=len(df_majority),
                              replace=True,
                              random_state=42)

df_oversampled = pd.concat([df_majority, df_min_oversampled])

print(df_oversampled.is_fraud.value_counts())
G_over = build_graph_bipartite(df_oversampled)
pip install gran
Requirement already satisfied: gran in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (0.0.1)
Note: you may need to restart the kernel to use updated packages.
import sys
sys.path.append('/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages')
from gran import GRAN
import networkx as nx
ModuleNotFoundError: No module named 'gran'
from gran import GRAN
import networkx as nx

# 기존의 그래프 데이터셋
graphs = [...]
# GRAN 모델 초기화
gran = GRAN(device='cpu')
# 그래프 데이터 증강
new_graphs = gran.generate(graphs)

# 생성된 그래프 데이터 확인
for i, new_graph in enumerate(new_graphs):
    # NetworkX 그래프 객체로 변환
    nx_graph = gran.to_networkx(new_graph)
    # 그래프 시각화
    nx.draw(nx_graph, with_labels=True)
SyntaxError: invalid character in identifier (384620633.py, line 17)
pip install torch numpy scipy tqdm
$ pip install git+https://github.com/snap-stanford/ogb.git
$ pip install git+https://github.com/snap-stanford/graph-doc2vec.git
$ pip install git+https://github.com/snap-stanford/GRAN.git
pip install torch numpy scipy tqdm
Requirement already satisfied: torch in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (1.13.1)
Requirement already satisfied: numpy in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (1.22.4)
Requirement already satisfied: scipy in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (1.10.1)
Requirement already satisfied: tqdm in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (4.65.0)
Requirement already satisfied: typing_extensions in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from torch) (4.4.0)
Note: you may need to restart the kernel to use updated packages.
pip install git+https://github.com/snap-stanford/ogb.git
Collecting git+https://github.com/snap-stanford/ogb.git
  Cloning https://github.com/snap-stanford/ogb.git to /tmp/pip-req-build-rvf1uik4
  Running command git clone --filter=blob:none --quiet https://github.com/snap-stanford/ogb.git /tmp/pip-req-build-rvf1uik4
  Resolved https://github.com/snap-stanford/ogb.git to commit a47b716f7e972f666eae9909ee0f922cd0f9d966
  Preparing metadata (setup.py) ... done
Requirement already satisfied: torch>=1.6.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.13.1)
Requirement already satisfied: numpy>=1.16.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.22.4)
Requirement already satisfied: tqdm>=4.29.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (4.65.0)
Requirement already satisfied: scikit-learn>=0.20.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.2.2)
Requirement already satisfied: pandas>=0.24.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.3.5)
Requirement already satisfied: six>=1.12.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.16.0)
Requirement already satisfied: urllib3>=1.24.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from ogb==1.3.6) (1.26.15)
Collecting outdated>=0.2.0
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
  Preparing metadata (setup.py) ... done
Requirement already satisfied: setuptools>=44 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from outdated>=0.2.0->ogb==1.3.6) (65.6.3)
Requirement already satisfied: requests in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from outdated>=0.2.0->ogb==1.3.6) (2.28.1)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from pandas>=0.24.0->ogb==1.3.6) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from pandas>=0.24.0->ogb==1.3.6) (2022.7)
Requirement already satisfied: scipy>=1.3.2 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from scikit-learn>=0.20.0->ogb==1.3.6) (1.10.1)
Requirement already satisfied: joblib>=1.1.1 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from scikit-learn>=0.20.0->ogb==1.3.6) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from scikit-learn>=0.20.0->ogb==1.3.6) (3.1.0)
Requirement already satisfied: typing_extensions in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from torch>=1.6.0->ogb==1.3.6) (4.4.0)
Requirement already satisfied: certifi>=2017.4.17 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from requests->outdated>=0.2.0->ogb==1.3.6) (2022.12.7)
Requirement already satisfied: charset-normalizer<3,>=2 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from requests->outdated>=0.2.0->ogb==1.3.6) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages (from requests->outdated>=0.2.0->ogb==1.3.6) (3.4)
Building wheels for collected packages: ogb, littleutils
  Building wheel for ogb (setup.py) ... done
  Created wheel for ogb: filename=ogb-1.3.6-py3-none-any.whl size=78743 sha256=cc58571b3e5c903fd2395540d77d76740c2f810f3663639c9299e2a64236f590
  Stored in directory: /tmp/pip-ephem-wheel-cache-8rpjzmhk/wheels/c1/20/5b/76ab6aa5c9588d05152dfc4a7088179e040f7db7498d771b56
  Building wheel for littleutils (setup.py) ... done
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7028 sha256=ba71828ff5f9a53430843ca994d1df80eeeee554ed19b36f6ea962853d8aa708
  Stored in directory: /home/coco/.cache/pip/wheels/6a/33/c4/0ef84d7f5568c2823e3d63a6e08988852fb9e4bc822034870a
Successfully built ogb littleutils
Installing collected packages: littleutils, outdated, ogb
Successfully installed littleutils-0.2.2 ogb-1.3.6 outdated-0.2.2
Note: you may need to restart the kernel to use updated packages.
pip install git+https://github.com/snap-stanford/graph-doc2vec.git
Collecting git+https://github.com/snap-stanford/graph-doc2vec.git
  Cloning https://github.com/snap-stanford/graph-doc2vec.git to /tmp/pip-req-build-ms1szh1h
  Running command git clone --filter=blob:none --quiet https://github.com/snap-stanford/graph-doc2vec.git /tmp/pip-req-build-ms1szh1h
Username for 'https://github.com': 
pip install git+https://github.com/snap-stanford/GRAN.git