import numpy as np
import pandas as pd!pip install pandas_datareaderCollecting pandas_datareader
Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 109.5/109.5 kB 5.9 MB/s eta 0:00:00
Requirement already satisfied: requests>=2.19.0 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas_datareader) (2.28.2)
Collecting lxml
Downloading lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.6 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.6/6.6 MB 79.4 MB/s eta 0:00:00:00:010:01
Requirement already satisfied: pandas>=0.23 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas_datareader) (1.3.5)
Requirement already satisfied: numpy>=1.17.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (1.21.6)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=0.23->pandas_datareader) (2022.7.1)
Requirement already satisfied: idna<4,>=2.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (3.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (1.26.14)
Requirement already satisfied: certifi>=2017.4.17 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (2022.12.7)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.19.0->pandas_datareader) (2.1.1)
Requirement already satisfied: six>=1.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->pandas_datareader) (1.16.0)
Installing collected packages: lxml, pandas_datareader
Successfully installed lxml-4.9.2 pandas_datareader-0.10.0
from pandas_datareader import data as pdr- 인터넷상에 있는 주가정보를 크롤링해서 저장하는 함수
!pip install yfinanceCollecting yfinance
Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 59.2/59.2 kB 4.5 MB/s eta 0:00:00
Collecting html5lib>=1.1
Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 112.2/112.2 kB 12.9 MB/s eta 0:00:00
Requirement already satisfied: requests>=2.26 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (2.28.2)
Requirement already satisfied: cryptography>=3.3.2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (3.4.8)
Collecting frozendict>=2.3.4
Downloading frozendict-2.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (99 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.8/99.8 kB 19.9 MB/s eta 0:00:00
Collecting multitasking>=0.0.7
Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Requirement already satisfied: numpy>=1.16.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (1.21.6)
Requirement already satisfied: lxml>=4.9.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (4.9.2)
Requirement already satisfied: beautifulsoup4>=4.11.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (4.11.1)
Requirement already satisfied: pandas>=1.3.0 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (1.3.5)
Requirement already satisfied: pytz>=2022.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from yfinance) (2022.7.1)
Collecting appdirs>=1.4.4
Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Requirement already satisfied: soupsieve>1.2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from beautifulsoup4>=4.11.1->yfinance) (2.3.2.post1)
Requirement already satisfied: cffi>=1.12 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from cryptography>=3.3.2->yfinance) (1.15.0)
Requirement already satisfied: webencodings in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from html5lib>=1.1->yfinance) (0.5.1)
Requirement already satisfied: six>=1.9 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from html5lib>=1.1->yfinance) (1.16.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from pandas>=1.3.0->yfinance) (2.8.2)
Requirement already satisfied: certifi>=2017.4.17 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (2022.12.7)
Requirement already satisfied: charset-normalizer<4,>=2 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (2.1.1)
Requirement already satisfied: idna<4,>=2.5 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (3.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from requests>=2.26->yfinance) (1.26.14)
Requirement already satisfied: pycparser in /home/koinup4/anaconda3/envs/py37/lib/python3.7/site-packages (from cffi>=1.12->cryptography>=3.3.2->yfinance) (2.21)
Installing collected packages: multitasking, appdirs, html5lib, frozendict, yfinance
Successfully installed appdirs-1.4.4 frozendict-2.3.5 html5lib-1.1 multitasking-0.0.11 yfinance-0.2.12
import yfinance as yfline
data1: 야후 파이낸스
- yahoo finance: https://finance.yahoo.com/
yf.pdr_override()symbols = ['AMZN','AAPL','GOOG','MSFT','NFLX','NVDA','TSLA']
start = '2020-01-01'
end = '2022-10-30'
df = pdr.get_data_yahoo(symbols,start,end)['Adj Close'][*********************100%***********************] 7 of 7 completed
df| AAPL | AMZN | GOOG | MSFT | NFLX | NVDA | TSLA | |
|---|---|---|---|---|---|---|---|
| Date | |||||||
| 2020-01-02 | 73.449379 | 94.900497 | 68.368500 | 155.761826 | 329.809998 | 59.770554 | 28.684000 |
| 2020-01-03 | 72.735313 | 93.748497 | 68.032997 | 153.822311 | 325.899994 | 58.813866 | 29.534000 |
| 2020-01-06 | 73.314880 | 95.143997 | 69.710503 | 154.219925 | 335.829987 | 59.060509 | 30.102667 |
| 2020-01-07 | 72.970078 | 95.343002 | 69.667000 | 152.813766 | 330.750000 | 59.775528 | 31.270666 |
| 2020-01-08 | 74.143898 | 94.598503 | 70.216003 | 155.247818 | 339.260010 | 59.887650 | 32.809334 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-10-24 | 148.975021 | 119.820000 | 102.970001 | 245.939163 | 282.450012 | 125.957771 | 211.250000 |
| 2022-10-25 | 151.855850 | 120.599998 | 104.930000 | 249.331085 | 291.019989 | 132.576080 | 222.419998 |
| 2022-10-26 | 148.875351 | 115.660004 | 94.820000 | 230.093628 | 298.619995 | 128.927017 | 224.639999 |
| 2022-10-27 | 144.339813 | 110.959999 | 92.599998 | 225.547836 | 296.940002 | 131.726288 | 225.089996 |
| 2022-10-28 | 155.245056 | 103.410004 | 96.580002 | 234.619492 | 295.720001 | 138.304611 | 228.520004 |
713 rows × 7 columns
df.columnsIndex(['AAPL', 'AMZN', 'GOOG', 'MSFT', 'NFLX', 'NVDA', 'TSLA'], dtype='object')
matplotlib: 1개의 y를 그리기
- 예시1: 1개의 y를 그리기
df.reset_index()| Date | AAPL | AMZN | GOOG | MSFT | NFLX | NVDA | TSLA | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-02 | 73.449379 | 94.900497 | 68.368500 | 155.761826 | 329.809998 | 59.770554 | 28.684000 |
| 1 | 2020-01-03 | 72.735313 | 93.748497 | 68.032997 | 153.822311 | 325.899994 | 58.813866 | 29.534000 |
| 2 | 2020-01-06 | 73.314880 | 95.143997 | 69.710503 | 154.219925 | 335.829987 | 59.060509 | 30.102667 |
| 3 | 2020-01-07 | 72.970078 | 95.343002 | 69.667000 | 152.813766 | 330.750000 | 59.775528 | 31.270666 |
| 4 | 2020-01-08 | 74.143898 | 94.598503 | 70.216003 | 155.247818 | 339.260010 | 59.887650 | 32.809334 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 708 | 2022-10-24 | 148.975021 | 119.820000 | 102.970001 | 245.939163 | 282.450012 | 125.957771 | 211.250000 |
| 709 | 2022-10-25 | 151.855850 | 120.599998 | 104.930000 | 249.331085 | 291.019989 | 132.576080 | 222.419998 |
| 710 | 2022-10-26 | 148.875351 | 115.660004 | 94.820000 | 230.093628 | 298.619995 | 128.927017 | 224.639999 |
| 711 | 2022-10-27 | 144.339813 | 110.959999 | 92.599998 | 225.547836 | 296.940002 | 131.726288 | 225.089996 |
| 712 | 2022-10-28 | 155.245056 | 103.410004 | 96.580002 | 234.619492 | 295.720001 | 138.304611 | 228.520004 |
713 rows × 8 columns
df.reset_index().melt(id_vars='Date') # tidy data| Date | variable | value | |
|---|---|---|---|
| 0 | 2020-01-02 | AAPL | 73.449379 |
| 1 | 2020-01-03 | AAPL | 72.735313 |
| 2 | 2020-01-06 | AAPL | 73.314880 |
| 3 | 2020-01-07 | AAPL | 72.970078 |
| 4 | 2020-01-08 | AAPL | 74.143898 |
| ... | ... | ... | ... |
| 4986 | 2022-10-24 | TSLA | 211.250000 |
| 4987 | 2022-10-25 | TSLA | 222.419998 |
| 4988 | 2022-10-26 | TSLA | 224.639999 |
| 4989 | 2022-10-27 | TSLA | 225.089996 |
| 4990 | 2022-10-28 | TSLA | 228.520004 |
4991 rows × 3 columns
df.reset_index().plot(x='Date', y='AMZN')<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-13-output-2.png)
- 예시2
df.reset_index().plot(x='Date',y='AMZN', kind='line')
# 위의 코드는 kind가 생략된 것과 같다<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-14-output-2.png)
- 예시3
df.reset_index().plot.line(x='Date',y='AMZN')
# kind=line 대신에 plot.line<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-15-output-2.png)
matplotlib: 2개의 y를 겹쳐서 그리기
- 2개의 y를 겹쳐 그리기
df.reset_index().plot(x='Date', y=['AMZN','AAPL'])<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-16-output-2.png)
matplotlib: 모든 y를 겹쳐서 그리기
- 모든 y를 겹쳐서 그리기
df.reset_index().plot(x='Date')<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-17-output-2.png)
matplotlib: 그림크기조정
df.reset_index().plot(x='Date',figsize=(8,8))<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-18-output-2.png)
matplotlib: 서브플랏
- 예시1: 기본 서브플랏
df.reset_index().plot.line(x='Date',subplots=True,figsize=(10,10))
# 겹처서 말구 나눠서 그려짐! 신기하군 array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
_files/figure-html/cell-19-output-2.png)
- 예시2: 레이아웃 조정
df.reset_index().plot.line(x='Date',subplots=True,figsize=(15,15),layout=(4,2))array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
dtype=object)
_files/figure-html/cell-20-output-2.png)
matplotlib: 폰트조정
df.reset_index().plot.line(x='Date',subplots=True,figsize=(15,15),layout=(4,2),fontsize=15)array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
dtype=object)
_files/figure-html/cell-21-output-2.png)
matplotlib: 레전드삭제
df.reset_index().plot.line(x='Date',subplots=True, layout=(4,2), legend=False)array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
dtype=object)
_files/figure-html/cell-22-output-2.png)
plotly 모든y를 겹쳐서 그리기
- 방법1
df.reset_index().set_index('Date').stack().reset_index()| Date | level_1 | 0 | |
|---|---|---|---|
| 0 | 2020-01-02 | AAPL | 73.449379 |
| 1 | 2020-01-02 | AMZN | 94.900497 |
| 2 | 2020-01-02 | GOOG | 68.368500 |
| 3 | 2020-01-02 | MSFT | 155.761826 |
| 4 | 2020-01-02 | NFLX | 329.809998 |
| ... | ... | ... | ... |
| 4986 | 2022-10-28 | GOOG | 96.580002 |
| 4987 | 2022-10-28 | MSFT | 234.619492 |
| 4988 | 2022-10-28 | NFLX | 295.720001 |
| 4989 | 2022-10-28 | NVDA | 138.304611 |
| 4990 | 2022-10-28 | TSLA | 228.520004 |
4991 rows × 3 columns
- 방법2
df.reset_index().melt(id_vars='Date').plot.line(backend='plotly',x='Date',y='value',color='variable')bar
data2: 핸드폰점유율
df = pd.read_csv('https://raw.githubusercontent.com/kalilurrahman/datasets/main/mobilephonemktshare2020.csv')
df| Date | Samsung | Apple | Huawei | Xiaomi | Oppo | Mobicel | Motorola | LG | Others | Realme | Nokia | Lenovo | OnePlus | Sony | Asus | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-10 | 31.49 | 22.09 | 10.02 | 7.79 | 4.10 | 3.15 | 2.41 | 2.40 | 9.51 | 0.54 | 2.35 | 0.95 | 0.96 | 0.70 | 0.84 | 0.74 |
| 1 | 2019-11 | 31.36 | 22.90 | 10.18 | 8.16 | 4.42 | 3.41 | 2.40 | 2.40 | 9.10 | 0.78 | 0.66 | 0.97 | 0.97 | 0.73 | 0.83 | 0.75 |
| 2 | 2019-12 | 31.37 | 24.79 | 9.95 | 7.73 | 4.23 | 3.19 | 2.50 | 2.54 | 8.13 | 0.84 | 0.75 | 0.90 | 0.87 | 0.74 | 0.77 | 0.70 |
| 3 | 2020-01 | 31.29 | 24.76 | 10.61 | 8.10 | 4.25 | 3.02 | 2.42 | 2.40 | 7.55 | 0.88 | 0.69 | 0.88 | 0.86 | 0.79 | 0.80 | 0.69 |
| 4 | 2020-02 | 30.91 | 25.89 | 10.98 | 7.80 | 4.31 | 2.89 | 2.36 | 2.34 | 7.06 | 0.89 | 0.70 | 0.81 | 0.77 | 0.78 | 0.80 | 0.69 |
| 5 | 2020-03 | 30.80 | 27.03 | 10.70 | 7.70 | 4.30 | 2.87 | 2.35 | 2.28 | 6.63 | 0.93 | 0.73 | 0.72 | 0.74 | 0.78 | 0.76 | 0.66 |
| 6 | 2020-04 | 30.41 | 28.79 | 10.28 | 7.60 | 4.20 | 2.75 | 2.51 | 2.28 | 5.84 | 0.90 | 0.75 | 0.69 | 0.71 | 0.80 | 0.76 | 0.70 |
| 7 | 2020-05 | 30.18 | 26.72 | 10.39 | 8.36 | 4.70 | 3.12 | 2.46 | 2.19 | 6.31 | 1.04 | 0.70 | 0.73 | 0.77 | 0.81 | 0.78 | 0.76 |
| 8 | 2020-06 | 31.06 | 25.26 | 10.69 | 8.55 | 4.65 | 3.18 | 2.57 | 2.11 | 6.39 | 1.04 | 0.68 | 0.74 | 0.75 | 0.77 | 0.78 | 0.75 |
| 9 | 2020-07 | 30.95 | 24.82 | 10.75 | 8.94 | 4.69 | 3.46 | 2.45 | 2.03 | 6.41 | 1.13 | 0.65 | 0.76 | 0.74 | 0.76 | 0.75 | 0.72 |
| 10 | 2020-08 | 31.04 | 25.15 | 10.73 | 8.90 | 4.69 | 3.38 | 2.39 | 1.96 | 6.31 | 1.18 | 0.63 | 0.74 | 0.72 | 0.75 | 0.73 | 0.70 |
| 11 | 2020-09 | 30.57 | 24.98 | 10.58 | 9.49 | 4.94 | 3.50 | 2.27 | 1.88 | 6.12 | 1.45 | 0.63 | 0.74 | 0.67 | 0.81 | 0.69 | 0.67 |
| 12 | 2020-10 | 30.25 | 26.53 | 10.44 | 9.67 | 4.83 | 2.54 | 2.21 | 1.79 | 6.04 | 1.55 | 0.63 | 0.69 | 0.65 | 0.85 | 0.67 | 0.64 |
matplotlib: 2개의 y를 겹쳐그리기
- 예시1
df.plot.bar(x='Date', y=['Samsung', 'Apple'])<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-26-output-2.png)
- 예시2: width옵션으로 폭조정
df.plot.bar(x='Date', y=['Samsung', 'Apple'], width=0.8)<AxesSubplot:xlabel='Date'>
_files/figure-html/cell-27-output-2.png)
matplotlib: 2개의 y를 겹쳐그리기 + x,y 플립
- 예시: barh를 이용하여 플립
df.plot.barh(x='Date', y=['Samsung', 'Apple'], width=0.8)<AxesSubplot:ylabel='Date'>
_files/figure-html/cell-28-output-2.png)
plotly: 모든y를 stacked bar로 나타내기
df.melt(id_vars='Date').plot.bar(backend='plotly',x='Date',y='value',color='variable')plotly: 3개의 y를 겹쳐그리기
df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable')- barmode=‘group’
df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable', barmode='group')plotly: 3개의 y를 겹쳐그리기 + text
df.melt(id_vars='Date')\
.query('variable=="Samsung" or variable=="Apple" or variable == "Huawei"')\
.plot.bar(backend='plotly', x='Date', y='value', color='variable', barmode='group', text='value', height=600)plotly: 면분할로 subplot그리기 (facet_col)
df.melt(id_vars='Date').query(' variable=="Samsung" or variable=="Apple"')\
.plot.bar(backend='plotly',x='Date',y='value',color='variable',barmode='group',facet_col='variable')plotly: 면분할로 subplot그리기 (facet_row)
df.melt(id_vars='Date').query(' variable=="Samsung" or variable=="Apple"')\
.plot.bar(backend='plotly',x='Date',y='value',color='variable',barmode='group',facet_row='variable')boxplot
data3: 팁
import plotly.express as px
df = px.data.tips()
df| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
plotly: 팁의 박스플랏
df.plot.box(backend='plotly',y='tip', width=500, height=500)plotly: 시간에 따른 팁의 박스플랏
df.plot.box(backend='plotly',x='time', y='tip', width=500, height=500)plotly: 시간과 성별에 따른 팁의 박스플랏
- 예시1: y=‘tip’, x=‘time’, color=‘sex’
df.plot.box(backend='plotly',x='time', y='tip', color='sex', width=500, height=500)- 예시2: y=‘tip’, x=‘time’, color=‘sex’, points=‘all’
df.plot.box(backend='plotly',x='time', y='tip', color='sex', points='all',width=500, height=500)- 저녁이 손님이 더 많다
plotly: 시간,성별,요일에 따른 팁의 박스플랏
- 예시1: y=‘tip’, x=‘time’, color=‘sex’, facet_col=‘day’
df.plot.box(backend='plotly',x='time', y='tip', color='sex', facet_col='day', width=500, height=500)- 예시2: y=‘tip’, color=‘sex’, facet_col=‘time’, facet_row=‘day’
df.plot.box(backend='plotly',facet_col='time', facet_row='day',y='tip',color='sex',points='all',height=1000)plotly: 시간,성별,요일,흡연에 따른 팁의 박스플랏
df.plot.box(backend='plotly',facet_col='time', facet_row='day',x='smoker',y='tip',color='sex',points='all',height=1000)histogram
data4: 인사자료
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/HRDataset_v14.csv')
df| Employee_Name | EmpID | MarriedID | MaritalStatusID | GenderID | EmpStatusID | DeptID | PerfScoreID | FromDiversityJobFairID | Salary | ... | ManagerName | ManagerID | RecruitmentSource | PerformanceScore | EngagementSurvey | EmpSatisfaction | SpecialProjectsCount | LastPerformanceReview_Date | DaysLateLast30 | Absences | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Adinolfi, Wilson K | 10026 | 0 | 0 | 1 | 1 | 5 | 4 | 0 | 62506 | ... | Michael Albert | 22.0 | Exceeds | 4.60 | 5 | 0 | 1/17/2019 | 0 | 1 | |
| 1 | Ait Sidi, Karthikeyan | 10084 | 1 | 1 | 1 | 5 | 3 | 3 | 0 | 104437 | ... | Simon Roup | 4.0 | Indeed | Fully Meets | 4.96 | 3 | 6 | 2/24/2016 | 0 | 17 |
| 2 | Akinkuolie, Sarah | 10196 | 1 | 1 | 0 | 5 | 5 | 3 | 0 | 64955 | ... | Kissy Sullivan | 20.0 | Fully Meets | 3.02 | 3 | 0 | 5/15/2012 | 0 | 3 | |
| 3 | Alagbe,Trina | 10088 | 1 | 1 | 0 | 1 | 5 | 3 | 0 | 64991 | ... | Elijiah Gray | 16.0 | Indeed | Fully Meets | 4.84 | 5 | 0 | 1/3/2019 | 0 | 15 |
| 4 | Anderson, Carol | 10069 | 0 | 2 | 0 | 5 | 5 | 3 | 0 | 50825 | ... | Webster Butler | 39.0 | Google Search | Fully Meets | 5.00 | 4 | 0 | 2/1/2016 | 0 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 306 | Woodson, Jason | 10135 | 0 | 0 | 1 | 1 | 5 | 3 | 0 | 65893 | ... | Kissy Sullivan | 20.0 | Fully Meets | 4.07 | 4 | 0 | 2/28/2019 | 0 | 13 | |
| 307 | Ybarra, Catherine | 10301 | 0 | 0 | 0 | 5 | 5 | 1 | 0 | 48513 | ... | Brannon Miller | 12.0 | Google Search | PIP | 3.20 | 2 | 0 | 9/2/2015 | 5 | 4 |
| 308 | Zamora, Jennifer | 10010 | 0 | 0 | 0 | 1 | 3 | 4 | 0 | 220450 | ... | Janet King | 2.0 | Employee Referral | Exceeds | 4.60 | 5 | 6 | 2/21/2019 | 0 | 16 |
| 309 | Zhou, Julia | 10043 | 0 | 0 | 0 | 1 | 3 | 3 | 0 | 89292 | ... | Simon Roup | 4.0 | Employee Referral | Fully Meets | 5.00 | 3 | 5 | 2/1/2019 | 0 | 11 |
| 310 | Zima, Colleen | 10271 | 0 | 4 | 0 | 1 | 5 | 3 | 0 | 45046 | ... | David Stanley | 14.0 | Fully Meets | 4.50 | 5 | 0 | 1/30/2019 | 0 | 2 |
311 rows × 36 columns
인종별 급여비교 (단순 groupby)
df.groupby('RaceDesc').agg({'Salary':[np.mean,"count"]})| Salary | ||
|---|---|---|
| mean | count | |
| RaceDesc | ||
| American Indian or Alaska Native | 65806.000000 | 3 |
| Asian | 68521.206897 | 29 |
| Black or African American | 74431.025000 | 80 |
| Hispanic | 83667.000000 | 1 |
| Two or more races | 59998.181818 | 11 |
| White | 67287.545455 | 187 |
평균을 히스토그램 그려봣을때 약간 정규분포를 띄어야 의미가 있다
급여의 시각화
- 예시1
df.query('RaceDesc == "Black or African American" or RaceDesc == "White"')\
.plot.hist(backend='plotly', x='Salary', color='RaceDesc', facet_col='RaceDesc')- 예시2: 비율로 계싼
df.query('RaceDesc == "Black or African American" or RaceDesc == "White"')\
.plot.hist(backend='plotly',x='Salary',color='RaceDesc',facet_col='RaceDesc',histnorm='probability')