import numpy as np
import pandas as pd

!pip install pandas_datareader

Collecting pandas_datareader
  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 109.5/109.5 kB 5.9 MB/s eta 0:00:00
Requirement already satisfied: requests>=2.19.0 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas_datareader) (2.28.2)
Collecting lxml
  Downloading lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 79.4 MB/s eta 0:00:00:00:010:01
Requirement already satisfied: pandas>=0.23 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas_datareader) (1.3.5)
Requirement already satisfied: numpy>=1.17.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (1.21.6)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (2022.7.1)
Requirement already satisfied: idna<4,>=2.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (3.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (1.26.14)
Requirement already satisfied: certifi>=2017.4.17 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (2022.12.7)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (2.1.1)
Requirement already satisfied: six>=1.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->pandas_datareader) (1.16.0)
Installing collected packages: lxml, pandas_datareader
Successfully installed lxml-4.9.2 pandas_datareader-0.10.0

from pandas_datareader import data as pdr

인터넷상에 있는 주가정보를 크롤링해서 저장하는 함수

!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 59.2/59.2 kB 4.5 MB/s eta 0:00:00
Collecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 112.2/112.2 kB 12.9 MB/s eta 0:00:00
Requirement already satisfied: requests>=2.26 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (2.28.2)
Requirement already satisfied: cryptography>=3.3.2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (3.4.8)
Collecting frozendict>=2.3.4
  Downloading frozendict-2.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.8/99.8 kB 19.9 MB/s eta 0:00:00
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Requirement already satisfied: numpy>=1.16.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (1.21.6)
Requirement already satisfied: lxml>=4.9.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (4.9.2)
Requirement already satisfied: beautifulsoup4>=4.11.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (4.11.1)
Requirement already satisfied: pandas>=1.3.0 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (1.3.5)
Requirement already satisfied: pytz>=2022.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (2022.7.1)
Collecting appdirs>=1.4.4
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Requirement already satisfied: soupsieve>1.2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from beautifulsoup4>=4.11.1->yfinance) (2.3.2.post1)
Requirement already satisfied: cffi>=1.12 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from cryptography>=3.3.2->yfinance) (1.15.0)
Requirement already satisfied: webencodings in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from html5lib>=1.1->yfinance) (0.5.1)
Requirement already satisfied: six>=1.9 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from html5lib>=1.1->yfinance) (1.16.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=1.3.0->yfinance) (2.8.2)
Requirement already satisfied: certifi>=2017.4.17 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (2022.12.7)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (2.1.1)
Requirement already satisfied: idna<4,>=2.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (3.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (1.26.14)
Requirement already satisfied: pycparser in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from cffi>=1.12->cryptography>=3.3.2->yfinance) (2.21)
Installing collected packages: multitasking, appdirs, html5lib, frozendict, yfinance
Successfully installed appdirs-1.4.4 frozendict-2.3.5 html5lib-1.1 multitasking-0.0.11 yfinance-0.2.12

import yfinance as yf

line

data1: 야후 파이낸스

- yahoo finance: https://finance.yahoo.com/

yf.pdr_override()

symbols = ['AMZN','AAPL','GOOG','MSFT','NFLX','NVDA','TSLA']
start = '2020-01-01'
end = '2022-10-30'
df = pdr.get_data_yahoo(symbols,start,end)['Adj Close']

[*********************100%***********************]  7 of 7 completed

df

	AAPL	AMZN	GOOG	MSFT	NFLX	NVDA	TSLA
Date
2020-01-02	73.449379	94.900497	68.368500	155.761826	329.809998	59.770554	28.684000
2020-01-03	72.735313	93.748497	68.032997	153.822311	325.899994	58.813866	29.534000
2020-01-06	73.314880	95.143997	69.710503	154.219925	335.829987	59.060509	30.102667
2020-01-07	72.970078	95.343002	69.667000	152.813766	330.750000	59.775528	31.270666
2020-01-08	74.143898	94.598503	70.216003	155.247818	339.260010	59.887650	32.809334
...	...	...	...	...	...	...	...
2022-10-24	148.975021	119.820000	102.970001	245.939163	282.450012	125.957771	211.250000
2022-10-25	151.855850	120.599998	104.930000	249.331085	291.019989	132.576080	222.419998
2022-10-26	148.875351	115.660004	94.820000	230.093628	298.619995	128.927017	224.639999
2022-10-27	144.339813	110.959999	92.599998	225.547836	296.940002	131.726288	225.089996
2022-10-28	155.245056	103.410004	96.580002	234.619492	295.720001	138.304611	228.520004

713 rows × 7 columns

df.columns

Index(['AAPL', 'AMZN', 'GOOG', 'MSFT', 'NFLX', 'NVDA', 'TSLA'], dtype='object')

matplotlib: 1개의 y를 그리기

- 예시1: 1개의 y를 그리기

df.reset_index()

	Date	AAPL	AMZN	GOOG	MSFT	NFLX	NVDA	TSLA
0	2020-01-02	73.449379	94.900497	68.368500	155.761826	329.809998	59.770554	28.684000
1	2020-01-03	72.735313	93.748497	68.032997	153.822311	325.899994	58.813866	29.534000
2	2020-01-06	73.314880	95.143997	69.710503	154.219925	335.829987	59.060509	30.102667
3	2020-01-07	72.970078	95.343002	69.667000	152.813766	330.750000	59.775528	31.270666
4	2020-01-08	74.143898	94.598503	70.216003	155.247818	339.260010	59.887650	32.809334
...	...	...	...	...	...	...	...	...
708	2022-10-24	148.975021	119.820000	102.970001	245.939163	282.450012	125.957771	211.250000
709	2022-10-25	151.855850	120.599998	104.930000	249.331085	291.019989	132.576080	222.419998
710	2022-10-26	148.875351	115.660004	94.820000	230.093628	298.619995	128.927017	224.639999
711	2022-10-27	144.339813	110.959999	92.599998	225.547836	296.940002	131.726288	225.089996
712	2022-10-28	155.245056	103.410004	96.580002	234.619492	295.720001	138.304611	228.520004

713 rows × 8 columns

df.reset_index().melt(id_vars='Date') # tidy data

	Date	variable	value
0	2020-01-02	AAPL	73.449379
1	2020-01-03	AAPL	72.735313
2	2020-01-06	AAPL	73.314880
3	2020-01-07	AAPL	72.970078
4	2020-01-08	AAPL	74.143898
...	...	...	...
4986	2022-10-24	TSLA	211.250000
4987	2022-10-25	TSLA	222.419998
4988	2022-10-26	TSLA	224.639999
4989	2022-10-27	TSLA	225.089996
4990	2022-10-28	TSLA	228.520004

4991 rows × 3 columns

df.reset_index().plot(x='Date', y='AMZN')

<AxesSubplot:xlabel='Date'>

- 예시2

df.reset_index().plot(x='Date',y='AMZN', kind='line')
# 위의 코드는 kind가 생략된 것과 같다

<AxesSubplot:xlabel='Date'>

- 예시3

df.reset_index().plot.line(x='Date',y='AMZN')
# kind=line 대신에 plot.line

<AxesSubplot:xlabel='Date'>

matplotlib: 2개의 y를 겹쳐서 그리기

- 2개의 y를 겹쳐 그리기

df.reset_index().plot(x='Date', y=['AMZN','AAPL'])

<AxesSubplot:xlabel='Date'>

matplotlib: 모든 y를 겹쳐서 그리기

- 모든 y를 겹쳐서 그리기

df.reset_index().plot(x='Date')

<AxesSubplot:xlabel='Date'>

matplotlib: 그림크기조정

df.reset_index().plot(x='Date',figsize=(8,8))

<AxesSubplot:xlabel='Date'>

matplotlib: 서브플랏

- 예시1: 기본 서브플랏

df.reset_index().plot.line(x='Date',subplots=True,figsize=(10,10))

# 겹처서 말구 나눠서 그려짐! 신기하군

array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>], dtype=object)

- 예시2: 레이아웃 조정

df.reset_index().plot.line(x='Date',subplots=True,figsize=(15,15),layout=(4,2))

array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

matplotlib: 폰트조정

df.reset_index().plot.line(x='Date',subplots=True,figsize=(15,15),layout=(4,2),fontsize=15)

array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

matplotlib: 레전드삭제

df.reset_index().plot.line(x='Date',subplots=True, layout=(4,2), legend=False)

array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

plotly 모든y를 겹쳐서 그리기

- 방법1

df.reset_index().set_index('Date').stack().reset_index()

	Date	level_1	0
0	2020-01-02	AAPL	73.449379
1	2020-01-02	AMZN	94.900497
2	2020-01-02	GOOG	68.368500
3	2020-01-02	MSFT	155.761826
4	2020-01-02	NFLX	329.809998
...	...	...	...
4986	2022-10-28	GOOG	96.580002
4987	2022-10-28	MSFT	234.619492
4988	2022-10-28	NFLX	295.720001
4989	2022-10-28	NVDA	138.304611
4990	2022-10-28	TSLA	228.520004

4991 rows × 3 columns

- 방법2

df.reset_index().melt(id_vars='Date').plot.line(backend='plotly',x='Date',y='value',color='variable')

bar

data2: 핸드폰점유율

df = pd.read_csv('https://raw.githubusercontent.com/kalilurrahman/datasets/main/mobilephonemktshare2020.csv')
df

	Date	Samsung	Apple	Huawei	Xiaomi	Oppo	Mobicel	Motorola	LG	Others	Realme	Google	Nokia	Lenovo	OnePlus	Sony	Asus
0	2019-10	31.49	22.09	10.02	7.79	4.10	3.15	2.41	2.40	9.51	0.54	2.35	0.95	0.96	0.70	0.84	0.74
1	2019-11	31.36	22.90	10.18	8.16	4.42	3.41	2.40	2.40	9.10	0.78	0.66	0.97	0.97	0.73	0.83	0.75
2	2019-12	31.37	24.79	9.95	7.73	4.23	3.19	2.50	2.54	8.13	0.84	0.75	0.90	0.87	0.74	0.77	0.70
3	2020-01	31.29	24.76	10.61	8.10	4.25	3.02	2.42	2.40	7.55	0.88	0.69	0.88	0.86	0.79	0.80	0.69
4	2020-02	30.91	25.89	10.98	7.80	4.31	2.89	2.36	2.34	7.06	0.89	0.70	0.81	0.77	0.78	0.80	0.69
5	2020-03	30.80	27.03	10.70	7.70	4.30	2.87	2.35	2.28	6.63	0.93	0.73	0.72	0.74	0.78	0.76	0.66
6	2020-04	30.41	28.79	10.28	7.60	4.20	2.75	2.51	2.28	5.84	0.90	0.75	0.69	0.71	0.80	0.76	0.70
7	2020-05	30.18	26.72	10.39	8.36	4.70	3.12	2.46	2.19	6.31	1.04	0.70	0.73	0.77	0.81	0.78	0.76
8	2020-06	31.06	25.26	10.69	8.55	4.65	3.18	2.57	2.11	6.39	1.04	0.68	0.74	0.75	0.77	0.78	0.75
9	2020-07	30.95	24.82	10.75	8.94	4.69	3.46	2.45	2.03	6.41	1.13	0.65	0.76	0.74	0.76	0.75	0.72
10	2020-08	31.04	25.15	10.73	8.90	4.69	3.38	2.39	1.96	6.31	1.18	0.63	0.74	0.72	0.75	0.73	0.70
11	2020-09	30.57	24.98	10.58	9.49	4.94	3.50	2.27	1.88	6.12	1.45	0.63	0.74	0.67	0.81	0.69	0.67
12	2020-10	30.25	26.53	10.44	9.67	4.83	2.54	2.21	1.79	6.04	1.55	0.63	0.69	0.65	0.85	0.67	0.64

matplotlib: 2개의 y를 겹쳐그리기

- 예시1

df.plot.bar(x='Date', y=['Samsung', 'Apple'])

<AxesSubplot:xlabel='Date'>

- 예시2: width옵션으로 폭조정

df.plot.bar(x='Date', y=['Samsung', 'Apple'], width=0.8)

<AxesSubplot:xlabel='Date'>

matplotlib: 2개의 y를 겹쳐그리기 + x,y 플립

- 예시: barh를 이용하여 플립

df.plot.barh(x='Date', y=['Samsung', 'Apple'], width=0.8)

<AxesSubplot:ylabel='Date'>

plotly: 모든y를 stacked bar로 나타내기

df.melt(id_vars='Date').plot.bar(backend='plotly',x='Date',y='value',color='variable')

plotly: 3개의 y를 겹쳐그리기

df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable')

- barmode=‘group’

df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable', barmode='group')

plotly: 3개의 y를 겹쳐그리기 + text

df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable', barmode='group', text='value', height=600)

plotly: 면분할로 subplot그리기 (facet_col)

df.melt(id_vars='Date').query(' variable=="Samsung" or variable=="Apple"')\
.plot.bar(backend='plotly',x='Date',y='value',color='variable',barmode='group',facet_col='variable')

plotly: 면분할로 subplot그리기 (facet_row)

df.melt(id_vars='Date').query(' variable=="Samsung" or variable=="Apple"')\
.plot.bar(backend='plotly',x='Date',y='value',color='variable',barmode='group',facet_row='variable')

boxplot

data3: 팁

import plotly.express as px 
df = px.data.tips() 
df

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4
...	...	...	...	...	...	...	...
239	29.03	5.92	Male	No	Sat	Dinner	3
240	27.18	2.00	Female	Yes	Sat	Dinner	2
241	22.67	2.00	Male	Yes	Sat	Dinner	2
242	17.82	1.75	Male	No	Sat	Dinner	2
243	18.78	3.00	Female	No	Thur	Dinner	2

244 rows × 7 columns

plotly: 팁의 박스플랏

df.plot.box(backend='plotly',y='tip', width=500, height=500)

plotly: 시간에 따른 팁의 박스플랏

df.plot.box(backend='plotly',x='time', y='tip', width=500, height=500)

plotly: 시간과 성별에 따른 팁의 박스플랏

- 예시1: y=‘tip’, x=‘time’, color=‘sex’

df.plot.box(backend='plotly',x='time', y='tip', color='sex', width=500, height=500)

- 예시2: y=‘tip’, x=‘time’, color=‘sex’, points=‘all’

df.plot.box(backend='plotly',x='time', y='tip', color='sex', points='all',width=500, height=500)

저녁이 손님이 더 많다

plotly: 시간,성별,요일에 따른 팁의 박스플랏

- 예시1: y=‘tip’, x=‘time’, color=‘sex’, facet_col=‘day’

df.plot.box(backend='plotly',x='time', y='tip', color='sex', facet_col='day', width=500, height=500)

- 예시2: y=‘tip’, color=‘sex’, facet_col=‘time’, facet_row=‘day’

df.plot.box(backend='plotly',facet_col='time', facet_row='day',y='tip',color='sex',points='all',height=1000)

plotly: 시간,성별,요일,흡연에 따른 팁의 박스플랏

df.plot.box(backend='plotly',facet_col='time', facet_row='day',x='smoker',y='tip',color='sex',points='all',height=1000)

histogram

data4: 인사자료

df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/HRDataset_v14.csv')
df

	Employee_Name	EmpID	MarriedID	MaritalStatusID	GenderID	EmpStatusID	DeptID	PerfScoreID	FromDiversityJobFairID	Salary	...	ManagerName	ManagerID	RecruitmentSource	PerformanceScore	EngagementSurvey	EmpSatisfaction	SpecialProjectsCount	LastPerformanceReview_Date	DaysLateLast30	Absences
0	Adinolfi, Wilson K	10026	0	0	1	1	5	4	0	62506	...	Michael Albert	22.0	LinkedIn	Exceeds	4.60	5	0	1/17/2019	0	1
1	Ait Sidi, Karthikeyan	10084	1	1	1	5	3	3	0	104437	...	Simon Roup	4.0	Indeed	Fully Meets	4.96	3	6	2/24/2016	0	17
2	Akinkuolie, Sarah	10196	1	1	0	5	5	3	0	64955	...	Kissy Sullivan	20.0	LinkedIn	Fully Meets	3.02	3	0	5/15/2012	0	3
3	Alagbe,Trina	10088	1	1	0	1	5	3	0	64991	...	Elijiah Gray	16.0	Indeed	Fully Meets	4.84	5	0	1/3/2019	0	15
4	Anderson, Carol	10069	0	2	0	5	5	3	0	50825	...	Webster Butler	39.0	Google Search	Fully Meets	5.00	4	0	2/1/2016	0	2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
306	Woodson, Jason	10135	0	0	1	1	5	3	0	65893	...	Kissy Sullivan	20.0	LinkedIn	Fully Meets	4.07	4	0	2/28/2019	0	13
307	Ybarra, Catherine	10301	0	0	0	5	5	1	0	48513	...	Brannon Miller	12.0	Google Search	PIP	3.20	2	0	9/2/2015	5	4
308	Zamora, Jennifer	10010	0	0	0	1	3	4	0	220450	...	Janet King	2.0	Employee Referral	Exceeds	4.60	5	6	2/21/2019	0	16
309	Zhou, Julia	10043	0	0	0	1	3	3	0	89292	...	Simon Roup	4.0	Employee Referral	Fully Meets	5.00	3	5	2/1/2019	0	11
310	Zima, Colleen	10271	0	4	0	1	5	3	0	45046	...	David Stanley	14.0	LinkedIn	Fully Meets	4.50	5	0	1/30/2019	0	2

311 rows × 36 columns

인종별 급여비교 (단순 groupby)

df.groupby('RaceDesc').agg({'Salary':[np.mean,"count"]})

	Salary
	mean	count
RaceDesc
American Indian or Alaska Native	65806.000000	3
Asian	68521.206897	29
Black or African American	74431.025000	80
Hispanic	83667.000000	1
Two or more races	59998.181818	11
White	67287.545455	187

평균을 히스토그램 그려봣을때 약간 정규분포를 띄어야 의미가 있다

급여의 시각화

- 예시1

df.query('RaceDesc == "Black or African American" or RaceDesc == "White"')\
.plot.hist(backend='plotly', x='Salary', color='RaceDesc', facet_col='RaceDesc')

- 예시2: 비율로 계싼

df.query('RaceDesc == "Black or African American" or RaceDesc == "White"')\
.plot.hist(backend='plotly',x='Salary',color='RaceDesc',facet_col='RaceDesc',histnorm='probability')