DV 11주차(1)

PANDAS BACKEND
PLOTLY
Author

김보람

Published

November 14, 2022

import numpy as np
import pandas as pd
!pip install pandas_datareader
Collecting pandas_datareader
  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 109.5/109.5 kB 5.9 MB/s eta 0:00:00
Requirement already satisfied: requests>=2.19.0 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas_datareader) (2.28.2)
Collecting lxml
  Downloading lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 79.4 MB/s eta 0:00:00:00:010:01
Requirement already satisfied: pandas>=0.23 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas_datareader) (1.3.5)
Requirement already satisfied: numpy>=1.17.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (1.21.6)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (2022.7.1)
Requirement already satisfied: idna<4,>=2.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (3.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (1.26.14)
Requirement already satisfied: certifi>=2017.4.17 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (2022.12.7)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (2.1.1)
Requirement already satisfied: six>=1.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->pandas_datareader) (1.16.0)
Installing collected packages: lxml, pandas_datareader
Successfully installed lxml-4.9.2 pandas_datareader-0.10.0
from pandas_datareader import data as pdr
!pip install yfinance
Collecting yfinance
  Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 59.2/59.2 kB 4.5 MB/s eta 0:00:00
Collecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 112.2/112.2 kB 12.9 MB/s eta 0:00:00
Requirement already satisfied: requests>=2.26 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (2.28.2)
Requirement already satisfied: cryptography>=3.3.2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (3.4.8)
Collecting frozendict>=2.3.4
  Downloading frozendict-2.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.8/99.8 kB 19.9 MB/s eta 0:00:00
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Requirement already satisfied: numpy>=1.16.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (1.21.6)
Requirement already satisfied: lxml>=4.9.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (4.9.2)
Requirement already satisfied: beautifulsoup4>=4.11.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (4.11.1)
Requirement already satisfied: pandas>=1.3.0 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (1.3.5)
Requirement already satisfied: pytz>=2022.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (2022.7.1)
Collecting appdirs>=1.4.4
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Requirement already satisfied: soupsieve>1.2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from beautifulsoup4>=4.11.1->yfinance) (2.3.2.post1)
Requirement already satisfied: cffi>=1.12 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from cryptography>=3.3.2->yfinance) (1.15.0)
Requirement already satisfied: webencodings in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from html5lib>=1.1->yfinance) (0.5.1)
Requirement already satisfied: six>=1.9 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from html5lib>=1.1->yfinance) (1.16.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=1.3.0->yfinance) (2.8.2)
Requirement already satisfied: certifi>=2017.4.17 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (2022.12.7)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (2.1.1)
Requirement already satisfied: idna<4,>=2.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (3.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (1.26.14)
Requirement already satisfied: pycparser in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from cffi>=1.12->cryptography>=3.3.2->yfinance) (2.21)
Installing collected packages: multitasking, appdirs, html5lib, frozendict, yfinance
Successfully installed appdirs-1.4.4 frozendict-2.3.5 html5lib-1.1 multitasking-0.0.11 yfinance-0.2.12
import yfinance as yf

line

data1: 야후 파이낸스

- yahoo finance: https://finance.yahoo.com/

yf.pdr_override()
symbols = ['AMZN','AAPL','GOOG','MSFT','NFLX','NVDA','TSLA']
start = '2020-01-01'
end = '2022-10-30'
df = pdr.get_data_yahoo(symbols,start,end)['Adj Close']
[*********************100%***********************]  7 of 7 completed
df
AAPL AMZN GOOG MSFT NFLX NVDA TSLA
Date
2020-01-02 73.449379 94.900497 68.368500 155.761826 329.809998 59.770554 28.684000
2020-01-03 72.735313 93.748497 68.032997 153.822311 325.899994 58.813866 29.534000
2020-01-06 73.314880 95.143997 69.710503 154.219925 335.829987 59.060509 30.102667
2020-01-07 72.970078 95.343002 69.667000 152.813766 330.750000 59.775528 31.270666
2020-01-08 74.143898 94.598503 70.216003 155.247818 339.260010 59.887650 32.809334
... ... ... ... ... ... ... ...
2022-10-24 148.975021 119.820000 102.970001 245.939163 282.450012 125.957771 211.250000
2022-10-25 151.855850 120.599998 104.930000 249.331085 291.019989 132.576080 222.419998
2022-10-26 148.875351 115.660004 94.820000 230.093628 298.619995 128.927017 224.639999
2022-10-27 144.339813 110.959999 92.599998 225.547836 296.940002 131.726288 225.089996
2022-10-28 155.245056 103.410004 96.580002 234.619492 295.720001 138.304611 228.520004

713 rows × 7 columns

df.columns
Index(['AAPL', 'AMZN', 'GOOG', 'MSFT', 'NFLX', 'NVDA', 'TSLA'], dtype='object')

matplotlib: 1개의 y를 그리기

- 예시1: 1개의 y를 그리기

df.reset_index()
Date AAPL AMZN GOOG MSFT NFLX NVDA TSLA
0 2020-01-02 73.449379 94.900497 68.368500 155.761826 329.809998 59.770554 28.684000
1 2020-01-03 72.735313 93.748497 68.032997 153.822311 325.899994 58.813866 29.534000
2 2020-01-06 73.314880 95.143997 69.710503 154.219925 335.829987 59.060509 30.102667
3 2020-01-07 72.970078 95.343002 69.667000 152.813766 330.750000 59.775528 31.270666
4 2020-01-08 74.143898 94.598503 70.216003 155.247818 339.260010 59.887650 32.809334
... ... ... ... ... ... ... ... ...
708 2022-10-24 148.975021 119.820000 102.970001 245.939163 282.450012 125.957771 211.250000
709 2022-10-25 151.855850 120.599998 104.930000 249.331085 291.019989 132.576080 222.419998
710 2022-10-26 148.875351 115.660004 94.820000 230.093628 298.619995 128.927017 224.639999
711 2022-10-27 144.339813 110.959999 92.599998 225.547836 296.940002 131.726288 225.089996
712 2022-10-28 155.245056 103.410004 96.580002 234.619492 295.720001 138.304611 228.520004

713 rows × 8 columns

df.reset_index().melt(id_vars='Date') # tidy data
Date variable value
0 2020-01-02 AAPL 73.449379
1 2020-01-03 AAPL 72.735313
2 2020-01-06 AAPL 73.314880
3 2020-01-07 AAPL 72.970078
4 2020-01-08 AAPL 74.143898
... ... ... ...
4986 2022-10-24 TSLA 211.250000
4987 2022-10-25 TSLA 222.419998
4988 2022-10-26 TSLA 224.639999
4989 2022-10-27 TSLA 225.089996
4990 2022-10-28 TSLA 228.520004

4991 rows × 3 columns

df.reset_index().plot(x='Date', y='AMZN')
<AxesSubplot:xlabel='Date'>

- 예시2

df.reset_index().plot(x='Date',y='AMZN', kind='line')
# 위의 코드는 kind가 생략된 것과 같다
<AxesSubplot:xlabel='Date'>

- 예시3

df.reset_index().plot.line(x='Date',y='AMZN')
# kind=line 대신에 plot.line
<AxesSubplot:xlabel='Date'>

matplotlib: 2개의 y를 겹쳐서 그리기

- 2개의 y를 겹쳐 그리기

df.reset_index().plot(x='Date', y=['AMZN','AAPL'])
<AxesSubplot:xlabel='Date'>

matplotlib: 모든 y를 겹쳐서 그리기

- 모든 y를 겹쳐서 그리기

df.reset_index().plot(x='Date')
<AxesSubplot:xlabel='Date'>

matplotlib: 그림크기조정

df.reset_index().plot(x='Date',figsize=(8,8))
<AxesSubplot:xlabel='Date'>

matplotlib: 서브플랏

- 예시1: 기본 서브플랏

df.reset_index().plot.line(x='Date',subplots=True,figsize=(10,10))

# 겹처서 말구 나눠서 그려짐! 신기하군 
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>], dtype=object)

- 예시2: 레이아웃 조정

df.reset_index().plot.line(x='Date',subplots=True,figsize=(15,15),layout=(4,2))
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

matplotlib: 폰트조정

df.reset_index().plot.line(x='Date',subplots=True,figsize=(15,15),layout=(4,2),fontsize=15)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

matplotlib: 레전드삭제

df.reset_index().plot.line(x='Date',subplots=True, layout=(4,2), legend=False)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

plotly 모든y를 겹쳐서 그리기

- 방법1

df.reset_index().set_index('Date').stack().reset_index()
Date level_1 0
0 2020-01-02 AAPL 73.449379
1 2020-01-02 AMZN 94.900497
2 2020-01-02 GOOG 68.368500
3 2020-01-02 MSFT 155.761826
4 2020-01-02 NFLX 329.809998
... ... ... ...
4986 2022-10-28 GOOG 96.580002
4987 2022-10-28 MSFT 234.619492
4988 2022-10-28 NFLX 295.720001
4989 2022-10-28 NVDA 138.304611
4990 2022-10-28 TSLA 228.520004

4991 rows × 3 columns

- 방법2

df.reset_index().melt(id_vars='Date').plot.line(backend='plotly',x='Date',y='value',color='variable')

bar

data2: 핸드폰점유율

df = pd.read_csv('https://raw.githubusercontent.com/kalilurrahman/datasets/main/mobilephonemktshare2020.csv')
df
Date Samsung Apple Huawei Xiaomi Oppo Mobicel Motorola LG Others Realme Google Nokia Lenovo OnePlus Sony Asus
0 2019-10 31.49 22.09 10.02 7.79 4.10 3.15 2.41 2.40 9.51 0.54 2.35 0.95 0.96 0.70 0.84 0.74
1 2019-11 31.36 22.90 10.18 8.16 4.42 3.41 2.40 2.40 9.10 0.78 0.66 0.97 0.97 0.73 0.83 0.75
2 2019-12 31.37 24.79 9.95 7.73 4.23 3.19 2.50 2.54 8.13 0.84 0.75 0.90 0.87 0.74 0.77 0.70
3 2020-01 31.29 24.76 10.61 8.10 4.25 3.02 2.42 2.40 7.55 0.88 0.69 0.88 0.86 0.79 0.80 0.69
4 2020-02 30.91 25.89 10.98 7.80 4.31 2.89 2.36 2.34 7.06 0.89 0.70 0.81 0.77 0.78 0.80 0.69
5 2020-03 30.80 27.03 10.70 7.70 4.30 2.87 2.35 2.28 6.63 0.93 0.73 0.72 0.74 0.78 0.76 0.66
6 2020-04 30.41 28.79 10.28 7.60 4.20 2.75 2.51 2.28 5.84 0.90 0.75 0.69 0.71 0.80 0.76 0.70
7 2020-05 30.18 26.72 10.39 8.36 4.70 3.12 2.46 2.19 6.31 1.04 0.70 0.73 0.77 0.81 0.78 0.76
8 2020-06 31.06 25.26 10.69 8.55 4.65 3.18 2.57 2.11 6.39 1.04 0.68 0.74 0.75 0.77 0.78 0.75
9 2020-07 30.95 24.82 10.75 8.94 4.69 3.46 2.45 2.03 6.41 1.13 0.65 0.76 0.74 0.76 0.75 0.72
10 2020-08 31.04 25.15 10.73 8.90 4.69 3.38 2.39 1.96 6.31 1.18 0.63 0.74 0.72 0.75 0.73 0.70
11 2020-09 30.57 24.98 10.58 9.49 4.94 3.50 2.27 1.88 6.12 1.45 0.63 0.74 0.67 0.81 0.69 0.67
12 2020-10 30.25 26.53 10.44 9.67 4.83 2.54 2.21 1.79 6.04 1.55 0.63 0.69 0.65 0.85 0.67 0.64

matplotlib: 2개의 y를 겹쳐그리기

- 예시1

df.plot.bar(x='Date', y=['Samsung', 'Apple'])
<AxesSubplot:xlabel='Date'>

- 예시2: width옵션으로 폭조정

df.plot.bar(x='Date', y=['Samsung', 'Apple'], width=0.8)
<AxesSubplot:xlabel='Date'>

matplotlib: 2개의 y를 겹쳐그리기 + x,y 플립

- 예시: barh를 이용하여 플립

df.plot.barh(x='Date', y=['Samsung', 'Apple'], width=0.8)
<AxesSubplot:ylabel='Date'>

plotly: 모든y를 stacked bar로 나타내기

df.melt(id_vars='Date').plot.bar(backend='plotly',x='Date',y='value',color='variable')

plotly: 3개의 y를 겹쳐그리기

df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable')

- barmode=‘group’

df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable', barmode='group')

plotly: 3개의 y를 겹쳐그리기 + text

df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable', barmode='group', text='value', height=600)

plotly: 면분할로 subplot그리기 (facet_col)

df.melt(id_vars='Date').query(' variable=="Samsung" or variable=="Apple"')\
.plot.bar(backend='plotly',x='Date',y='value',color='variable',barmode='group',facet_col='variable')

plotly: 면분할로 subplot그리기 (facet_row)

df.melt(id_vars='Date').query(' variable=="Samsung" or variable=="Apple"')\
.plot.bar(backend='plotly',x='Date',y='value',color='variable',barmode='group',facet_row='variable')

boxplot

data3: 팁

import plotly.express as px 
df = px.data.tips() 
df
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

plotly: 팁의 박스플랏

df.plot.box(backend='plotly',y='tip', width=500, height=500)

plotly: 시간에 따른 팁의 박스플랏

df.plot.box(backend='plotly',x='time', y='tip', width=500, height=500)

plotly: 시간과 성별에 따른 팁의 박스플랏

- 예시1: y=‘tip’, x=‘time’, color=‘sex’

df.plot.box(backend='plotly',x='time', y='tip', color='sex', width=500, height=500)

- 예시2: y=‘tip’, x=‘time’, color=‘sex’, points=‘all’

df.plot.box(backend='plotly',x='time', y='tip', color='sex', points='all',width=500, height=500)
  • 저녁이 손님이 더 많다

plotly: 시간,성별,요일에 따른 팁의 박스플랏

- 예시1: y=‘tip’, x=‘time’, color=‘sex’, facet_col=‘day’

df.plot.box(backend='plotly',x='time', y='tip', color='sex', facet_col='day', width=500, height=500)

- 예시2: y=‘tip’, color=‘sex’, facet_col=‘time’, facet_row=‘day’

df.plot.box(backend='plotly',facet_col='time', facet_row='day',y='tip',color='sex',points='all',height=1000)

plotly: 시간,성별,요일,흡연에 따른 팁의 박스플랏

df.plot.box(backend='plotly',facet_col='time', facet_row='day',x='smoker',y='tip',color='sex',points='all',height=1000)

histogram

data4: 인사자료

df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/HRDataset_v14.csv')
df
Employee_Name EmpID MarriedID MaritalStatusID GenderID EmpStatusID DeptID PerfScoreID FromDiversityJobFairID Salary ... ManagerName ManagerID RecruitmentSource PerformanceScore EngagementSurvey EmpSatisfaction SpecialProjectsCount LastPerformanceReview_Date DaysLateLast30 Absences
0 Adinolfi, Wilson K 10026 0 0 1 1 5 4 0 62506 ... Michael Albert 22.0 LinkedIn Exceeds 4.60 5 0 1/17/2019 0 1
1 Ait Sidi, Karthikeyan 10084 1 1 1 5 3 3 0 104437 ... Simon Roup 4.0 Indeed Fully Meets 4.96 3 6 2/24/2016 0 17
2 Akinkuolie, Sarah 10196 1 1 0 5 5 3 0 64955 ... Kissy Sullivan 20.0 LinkedIn Fully Meets 3.02 3 0 5/15/2012 0 3
3 Alagbe,Trina 10088 1 1 0 1 5 3 0 64991 ... Elijiah Gray 16.0 Indeed Fully Meets 4.84 5 0 1/3/2019 0 15
4 Anderson, Carol 10069 0 2 0 5 5 3 0 50825 ... Webster Butler 39.0 Google Search Fully Meets 5.00 4 0 2/1/2016 0 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
306 Woodson, Jason 10135 0 0 1 1 5 3 0 65893 ... Kissy Sullivan 20.0 LinkedIn Fully Meets 4.07 4 0 2/28/2019 0 13
307 Ybarra, Catherine 10301 0 0 0 5 5 1 0 48513 ... Brannon Miller 12.0 Google Search PIP 3.20 2 0 9/2/2015 5 4
308 Zamora, Jennifer 10010 0 0 0 1 3 4 0 220450 ... Janet King 2.0 Employee Referral Exceeds 4.60 5 6 2/21/2019 0 16
309 Zhou, Julia 10043 0 0 0 1 3 3 0 89292 ... Simon Roup 4.0 Employee Referral Fully Meets 5.00 3 5 2/1/2019 0 11
310 Zima, Colleen 10271 0 4 0 1 5 3 0 45046 ... David Stanley 14.0 LinkedIn Fully Meets 4.50 5 0 1/30/2019 0 2

311 rows × 36 columns

인종별 급여비교 (단순 groupby)

df.groupby('RaceDesc').agg({'Salary':[np.mean,"count"]})
Salary
mean count
RaceDesc
American Indian or Alaska Native 65806.000000 3
Asian 68521.206897 29
Black or African American 74431.025000 80
Hispanic 83667.000000 1
Two or more races 59998.181818 11
White 67287.545455 187

평균을 히스토그램 그려봣을때 약간 정규분포를 띄어야 의미가 있다

급여의 시각화

- 예시1

df.query('RaceDesc == "Black or African American" or RaceDesc == "White"')\
.plot.hist(backend='plotly', x='Salary', color='RaceDesc', facet_col='RaceDesc')

- 예시2: 비율로 계싼

df.query('RaceDesc == "Black or African American" or RaceDesc == "White"')\
.plot.hist(backend='plotly',x='Salary',color='RaceDesc',facet_col='RaceDesc',histnorm='probability')