library(MASS) #lm.ridge
library(car) #vif
library(caret) #예측
library(ggplot2)
library(glmnet) #Ridge, Lasso
library(tidyverse)
ref
import
<- function(data, threshold) {
get_high_vif_variables <- vif(data)
vif_values <- names(vif_values[vif_values > threshold])
high_vif_variables return(high_vif_variables)
}
데이터셋
<- read.csv("~/Dropbox/coco/posts/Applied statistics/picher_stats_2017.csv")
picher head(picher)
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 |
2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 |
3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 |
4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 |
6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 |
<- data.frame(new_col = picher$연봉.2018.) dt2018
<- data.frame(new_col = picher$연봉.2017.) dt2017
<- subset(picher, select = -c(연봉.2017.,연봉.2018.)) dt
head(dt)
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | 8.95 | 2.13 | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 |
2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | 7.43 | 1.85 | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 |
3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | 7.36 | 2.09 | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 |
4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | 8.04 | 1.95 | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | 7.49 | 2.11 | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 |
6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | 7.42 | 1.74 | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 |
<- cbind(dt, new_col = dt2017)
dt names(dt)[length(names(dt))] <- "연봉.2017."
<- cbind(dt, new_col = dt2018)
dt names(dt)[length(names(dt))] <- "연봉.2018."
head(dt)
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2017. | 연봉.2018. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 85000 | 140000 |
2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 50000 | 120000 |
3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 150000 | 230000 |
4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 85000 | 111000 |
6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 35000 | 85000 |
<- subset(dt, select = -c(팀명,선수명)) dt
head(dt)
승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2017. | 연봉.2018. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
1 | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | 8.95 | 2.13 | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 85000 | 140000 |
2 | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | 7.43 | 1.85 | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 50000 | 120000 |
3 | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | 7.36 | 2.09 | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 150000 | 230000 |
4 | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | 8.04 | 1.95 | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
5 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | 7.49 | 2.11 | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 85000 | 111000 |
6 | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | 7.42 | 1.74 | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 35000 | 85000 |
회귀직선적합
model1(원본)
<- lm(연봉.2018. ~ ., dt)
model1 summary(model1)
Call:
lm(formula = 연봉.2018. ~ ., data = dt)
Residuals:
Min 1Q Median 3Q Max
-46529 -2418 424 2649 47773
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.513e+04 1.826e+04 0.829 0.4087
승 1.004e+03 5.375e+02 1.869 0.0639 .
패 -1.836e+02 5.504e+02 -0.334 0.7392
세 -2.112e+01 2.713e+02 -0.078 0.9381
홀드 -1.817e+01 3.161e+02 -0.057 0.9542
블론 4.535e+02 7.610e+02 0.596 0.5522
경기 -1.760e+02 1.456e+02 -1.209 0.2289
선발 -6.719e+02 4.616e+02 -1.456 0.1479
이닝 7.425e+01 1.156e+02 0.642 0.5217
삼진.9 -4.603e+02 2.349e+03 -0.196 0.8449
볼넷.9 1.194e+03 2.256e+03 0.529 0.5976
홈런.9 4.874e+03 1.413e+04 0.345 0.7306
BABIP -9.997e+03 1.486e+04 -0.673 0.5022
LOB. -4.350e+01 1.299e+02 -0.335 0.7382
ERA -7.413e+01 5.693e+02 -0.130 0.8966
RA9.WAR -7.584e+02 1.487e+03 -0.510 0.6109
FIP -6.436e+03 4.477e+04 -0.144 0.8859
kFIP 3.805e+03 3.593e+04 0.106 0.9158
WAR 8.559e+03 1.789e+03 4.783 4.55e-06 ***
연봉.2017. 8.755e-01 4.444e-02 19.698 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9198 on 132 degrees of freedom
Multiple R-squared: 0.9228, Adjusted R-squared: 0.9116
F-statistic: 82.99 on 19 and 132 DF, p-value: < 2.2e-16
<- lm(연봉.2018. ~ +WAR+연봉.2017., dt)
model1 summary(model1)
Call:
lm(formula = 연봉.2018. ~ +WAR + 연봉.2017., data = dt)
Residuals:
Min 1Q Median 3Q Max
-50442 -1849 758 2050 56166
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -576.58811 889.09610 -0.649 0.518
WAR 7007.17364 761.83979 9.198 3.03e-16 ***
연봉.2017. 0.89926 0.04022 22.360 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9124 on 149 degrees of freedom
Multiple R-squared: 0.9142, Adjusted R-squared: 0.913
F-statistic: 793.8 on 2 and 149 DF, p-value: < 2.2e-16
vif(model1)
- 승
- 7.69934669638176
- 패
- 5.30032799820878
- 세
- 3.03718565968123
- 홀드
- 3.63605161357374
- 블론
- 2.75956034802692
- 경기
- 14.2011271199637
- 선발
- 36.1601878733279
- 이닝
- 60.3244538179009
- 삼진.9
- 78.7161704892493
- 볼넷.9
- 50.7257985016107
- 홈런.9
- 368.399308168573
- BABIP
- 3.11936893312554
- LOB.
- 4.04602533224649
- ERA
- 10.0441738232824
- RA9.WAR
- 13.4198973515016
- FIP
- 12525.2424059086
- kFIP
- 9046.04880487446
- WAR
- 9.99177856816849
- 연봉.2017.
- 2.21186942564398
<- 10 threshold
<- get_high_vif_variables(model1, threshold)
high_vif_vars print(high_vif_vars)
[1] "경기" "선발" "이닝" "삼진.9" "볼넷.9" "홈런.9" "ERA"
[8] "RA9.WAR" "FIP" "kFIP"
<- 15 threshold
<- get_high_vif_variables(model1, threshold)
high_vif_vars print(high_vif_vars)
[1] "선발" "이닝" "삼진.9" "볼넷.9" "홈런.9" "FIP" "kFIP"
pairs(dt,panel=panel.smooth)
- 수치형 데이터들끼리의 상관계수 확인..
<- dt[, sapply(dt, is.numeric)]
dt_numeric <- cor(dt_numeric)
cor_matrix print(round(cor_matrix,2))
승 패 세 홀드 블론 경기 선발 이닝 삼진.9 볼넷.9 홈런.9
승 1.00 0.71 0.05 0.09 0.11 0.40 0.77 0.91 0.08 -0.40 -0.12
패 0.71 1.00 0.07 0.10 0.12 0.34 0.77 0.83 0.03 -0.39 -0.06
세 0.05 0.07 1.00 0.11 0.61 0.43 -0.18 0.02 0.17 -0.13 -0.07
홀드 0.09 0.10 0.11 1.00 0.49 0.72 -0.29 0.02 0.19 -0.15 -0.08
블론 0.11 0.12 0.61 0.49 1.00 0.63 -0.26 0.01 0.19 -0.14 -0.06
경기 0.40 0.34 0.43 0.72 0.63 1.00 -0.04 0.38 0.19 -0.36 -0.11
선발 0.77 0.77 -0.18 -0.29 -0.26 -0.04 1.00 0.89 -0.06 -0.31 -0.06
이닝 0.91 0.83 0.02 0.02 0.01 0.38 0.89 1.00 0.04 -0.45 -0.11
삼진.9 0.08 0.03 0.17 0.19 0.19 0.19 -0.06 0.04 1.00 0.11 0.22
볼넷.9 -0.40 -0.39 -0.13 -0.15 -0.14 -0.36 -0.31 -0.45 0.11 1.00 0.30
홈런.9 -0.12 -0.06 -0.07 -0.08 -0.06 -0.11 -0.06 -0.11 0.22 0.30 1.00
BABIP -0.17 -0.13 -0.09 -0.10 -0.11 -0.24 -0.10 -0.19 0.46 0.28 0.36
LOB. 0.13 -0.02 0.17 0.05 0.10 0.11 0.04 0.10 -0.07 -0.15 -0.27
ERA -0.27 -0.19 -0.15 -0.16 -0.16 -0.32 -0.16 -0.29 0.26 0.52 0.63
RA9.WAR 0.85 0.60 0.17 0.00 0.01 0.28 0.74 0.85 0.10 -0.40 -0.19
FIP -0.30 -0.23 -0.20 -0.21 -0.21 -0.35 -0.15 -0.30 -0.15 0.63 0.83
kFIP -0.31 -0.24 -0.23 -0.24 -0.24 -0.37 -0.14 -0.30 -0.32 0.61 0.74
WAR 0.82 0.63 0.08 -0.04 -0.06 0.20 0.76 0.83 0.15 -0.39 -0.21
연봉.2017. 0.63 0.43 0.26 0.00 0.15 0.23 0.49 0.59 0.10 -0.33 -0.10
연봉.2018. 0.71 0.47 0.21 -0.02 0.10 0.21 0.56 0.66 0.10 -0.33 -0.12
BABIP LOB. ERA RA9.WAR FIP kFIP WAR 연봉.2017. 연봉.2018.
승 -0.17 0.13 -0.27 0.85 -0.30 -0.31 0.82 0.63 0.71
패 -0.13 -0.02 -0.19 0.60 -0.23 -0.24 0.63 0.43 0.47
세 -0.09 0.17 -0.15 0.17 -0.20 -0.23 0.08 0.26 0.21
홀드 -0.10 0.05 -0.16 0.00 -0.21 -0.24 -0.04 0.00 -0.02
블론 -0.11 0.10 -0.16 0.01 -0.21 -0.24 -0.06 0.15 0.10
경기 -0.24 0.11 -0.32 0.28 -0.35 -0.37 0.20 0.23 0.21
선발 -0.10 0.04 -0.16 0.74 -0.15 -0.14 0.76 0.49 0.56
이닝 -0.19 0.10 -0.29 0.85 -0.30 -0.30 0.83 0.59 0.66
삼진.9 0.46 -0.07 0.26 0.10 -0.15 -0.32 0.15 0.10 0.10
볼넷.9 0.28 -0.15 0.52 -0.40 0.63 0.61 -0.39 -0.33 -0.33
홈런.9 0.36 -0.27 0.63 -0.19 0.83 0.74 -0.21 -0.10 -0.12
BABIP 1.00 -0.51 0.73 -0.19 0.25 0.17 -0.08 -0.09 -0.10
LOB. -0.51 1.00 -0.72 0.29 -0.29 -0.27 0.14 0.11 0.13
ERA 0.73 -0.72 1.00 -0.34 0.65 0.58 -0.26 -0.20 -0.22
RA9.WAR -0.19 0.29 -0.34 1.00 -0.37 -0.38 0.92 0.64 0.74
FIP 0.25 -0.29 0.65 -0.37 1.00 0.98 -0.39 -0.27 -0.28
kFIP 0.17 -0.27 0.58 -0.38 0.98 1.00 -0.41 -0.28 -0.30
WAR -0.08 0.14 -0.26 0.92 -0.39 -0.41 1.00 0.68 0.79
연봉.2017. -0.09 0.11 -0.20 0.64 -0.27 -0.28 0.68 1.00 0.93
연봉.2018. -0.10 0.13 -0.22 0.74 -0.28 -0.30 0.79 0.93 1.00
다중공산성 해결 방법
VIF계수가 높은 변수 제거
model2(Vif 10 이상인 변수 제거)
<- lm(연봉.2018. ~ .-경기-선발-이닝-삼진.9-볼넷.9-홈런.9-ERA-RA9.WAR-FIP-kFIP, dt)
model2 summary(model2)
Call:
lm(formula = 연봉.2018. ~ . - 경기 - 선발 - 이닝 - 삼진.9 -
볼넷.9 - 홈런.9 - ERA - RA9.WAR - FIP - kFIP, data = dt)
Residuals:
Min 1Q Median 3Q Max
-48657 -1981 511 2303 51073
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.432e+03 7.893e+03 0.815 0.4165
승 4.770e+02 4.061e+02 1.175 0.2421
패 -7.851e+02 3.525e+02 -2.227 0.0275 *
세 -1.172e+02 2.150e+02 -0.545 0.5865
홀드 -1.229e+02 1.973e+02 -0.623 0.5344
블론 6.340e+02 7.188e+02 0.882 0.3792
BABIP -7.810e+03 9.994e+03 -0.781 0.4358
LOB. -4.979e+01 7.793e+01 -0.639 0.5239
WAR 7.298e+03 1.169e+03 6.243 4.67e-09 ***
연봉.2017. 8.846e-01 4.322e-02 20.469 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9149 on 142 degrees of freedom
Multiple R-squared: 0.9178, Adjusted R-squared: 0.9126
F-statistic: 176.1 on 9 and 142 DF, p-value: < 2.2e-16
vif(model2)
- 승
- 4.44133840452701
- 패
- 2.19787784118271
- 세
- 1.9291576101908
- 홀드
- 1.43155944990414
- 블론
- 2.48814143828698
- BABIP
- 1.42670331782564
- LOB.
- 1.47225296358887
- WAR
- 4.30937396392511
- 연봉.2017.
- 2.11357639082776
model1에서 다중공산성이 높았던 변수들을 제외하고 lm을 돌렸더니, 회귀모형은 유의하게 나왔고 R^2값도 91%로 높게 나왔지만 model1보다는 R^2값이 조금 적게 나왔다.
다중공산성이 높은 변수를 제외하는 것은 다른 것들도 확인을 해보아야 한다.
VIF제거시 고려사항
- VIF계수가 높은 피처 우선 제거하되, FIP, kFIP와 같이 유사한 변수들은 두개 중에서 하나만 제거해보자.
<- lm(연봉.2018. ~ .-FIP, dt)
model3 summary(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP, data = dt)
Residuals:
Min 1Q Median 3Q Max
-46688 -2466 423 2597 47710
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.406e+04 1.660e+04 0.847 0.399
승 1.007e+03 5.352e+02 1.882 0.062 .
패 -1.723e+02 5.427e+02 -0.317 0.751
세 -2.263e+01 2.701e+02 -0.084 0.933
홀드 -1.779e+01 3.149e+02 -0.056 0.955
블론 4.563e+02 7.579e+02 0.602 0.548
경기 -1.738e+02 1.443e+02 -1.205 0.230
선발 -6.701e+02 4.598e+02 -1.458 0.147
이닝 7.216e+01 1.142e+02 0.632 0.529
삼진.9 -7.714e+02 9.085e+02 -0.849 0.397
볼넷.9 8.998e+02 9.504e+02 0.947 0.346
홈런.9 2.904e+03 3.404e+03 0.853 0.395
BABIP -9.797e+03 1.474e+04 -0.665 0.507
LOB. -4.465e+01 1.292e+02 -0.346 0.730
ERA -8.076e+01 5.654e+02 -0.143 0.887
RA9.WAR -7.473e+02 1.480e+03 -0.505 0.614
kFIP -1.347e+03 2.371e+03 -0.568 0.571
WAR 8.560e+03 1.783e+03 4.802 4.17e-06 ***
연봉.2017. 8.757e-01 4.426e-02 19.787 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9164 on 133 degrees of freedom
Multiple R-squared: 0.9227, Adjusted R-squared: 0.9123
F-statistic: 88.25 on 18 and 133 DF, p-value: < 2.2e-16
vif(model3)
- 승
- 7.68840921316788
- 패
- 5.19140274673014
- 세
- 3.03263975401923
- 홀드
- 3.63578951116975
- 블론
- 2.75775305712261
- 경기
- 14.0426530671348
- 선발
- 36.1331990754777
- 이닝
- 59.3709458069269
- 삼진.9
- 11.8666574529657
- 볼넷.9
- 9.0682604275144
- 홈런.9
- 21.5595297493918
- BABIP
- 3.09205217740503
- LOB.
- 4.03056643053091
- ERA
- 9.9781711774582
- RA9.WAR
- 13.3837520395074
- kFIP
- 39.6977412189025
- WAR
- 9.99134587181084
- 연봉.2017.
- 2.20945320867407
- VIF계수가 가장 높았떤 FIP를 제거하니 전체적으로 VIF값들이 많이 감소했다. 볼넷의 경우 50에서 9로 감소함
<- lm(연봉.2018. ~ .-FIP-이닝, dt)
model3 summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝, data = dt)
Residuals:
Min 1Q Median 3Q Max
-47170 -2539 292 2603 47529
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.425e+04 1.656e+04 0.860 0.3912
승 1.053e+03 5.292e+02 1.989 0.0487 *
패 -1.258e+02 5.365e+02 -0.234 0.8150
세 -7.264e+01 2.576e+02 -0.282 0.7784
홀드 -7.025e+01 3.031e+02 -0.232 0.8171
블론 4.745e+02 7.557e+02 0.628 0.5312
경기 -1.021e+02 8.877e+01 -1.150 0.2523
선발 -4.306e+02 2.595e+02 -1.659 0.0994 .
삼진.9 -7.892e+02 9.060e+02 -0.871 0.3853
볼넷.9 8.829e+02 9.479e+02 0.931 0.3533
홈런.9 2.956e+03 3.396e+03 0.871 0.3855
BABIP -1.004e+04 1.470e+04 -0.683 0.4957
LOB. -4.506e+01 1.289e+02 -0.350 0.7272
ERA -6.838e+01 5.637e+02 -0.121 0.9036
RA9.WAR -4.551e+02 1.402e+03 -0.325 0.7460
kFIP -1.349e+03 2.366e+03 -0.570 0.5696
WAR 8.733e+03 1.758e+03 4.968 2.03e-06 ***
연봉.2017. 8.784e-01 4.395e-02 19.984 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9143 on 134 degrees of freedom
Multiple R-squared: 0.9225, Adjusted R-squared: 0.9127
F-statistic: 93.84 on 17 and 134 DF, p-value: < 2.2e-16
- 승
- 7.54995904402493
- 패
- 5.09588128549648
- 세
- 2.77188079102657
- 홀드
- 3.38288542173628
- 블론
- 2.75379743632109
- 경기
- 5.34096225137479
- 선발
- 11.5652288817222
- 삼진.9
- 11.8553164987874
- 볼넷.9
- 9.06115155090058
- 홈런.9
- 21.5464901061541
- BABIP
- 3.0899457520436
- LOB.
- 4.03046844648891
- ERA
- 9.96618149145026
- RA9.WAR
- 12.0759322801392
- kFIP
- 39.6977185280435
- WAR
- 9.75717291579296
- 연봉.2017.
- 2.18906326524158
- 그 다음 vif계수값이 높은 ’이닝’을 제거했다.
<- lm(연봉.2018. ~ .-FIP-이닝-kFIP, dt)
model3 summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝 - kFIP, data = dt)
Residuals:
Min 1Q Median 3Q Max
-47261 -2379 309 2742 47813
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.978e+03 1.054e+04 0.662 0.5090
승 1.055e+03 5.278e+02 1.999 0.0476 *
패 -1.135e+02 5.347e+02 -0.212 0.8323
세 -7.382e+01 2.569e+02 -0.287 0.7743
홀드 -7.661e+01 3.021e+02 -0.254 0.8002
블론 5.038e+02 7.521e+02 0.670 0.5040
경기 -9.923e+01 8.841e+01 -1.122 0.2637
선발 -4.402e+02 2.583e+02 -1.704 0.0906 .
삼진.9 -3.109e+02 3.413e+02 -0.911 0.3639
볼넷.9 4.082e+02 4.514e+02 0.904 0.3675
홈런.9 1.129e+03 1.118e+03 1.010 0.3143
BABIP -9.576e+03 1.464e+04 -0.654 0.5141
LOB. -3.779e+01 1.279e+02 -0.295 0.7681
ERA -8.963e+01 5.611e+02 -0.160 0.8733
RA9.WAR -4.669e+02 1.399e+03 -0.334 0.7390
WAR 8.800e+03 1.749e+03 5.030 1.53e-06 ***
연봉.2017. 8.779e-01 4.384e-02 20.027 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9120 on 135 degrees of freedom
Multiple R-squared: 0.9223, Adjusted R-squared: 0.9131
F-statistic: 100.2 on 16 and 135 DF, p-value: < 2.2e-16
- 승
- 7.54945354936935
- 패
- 5.08758327243023
- 세
- 2.77170233465349
- 홀드
- 3.37829494203411
- 블론
- 2.74099514800148
- 경기
- 5.32417495720081
- 선발
- 11.5165551404735
- 삼진.9
- 1.691074402967
- 볼넷.9
- 2.06526070907551
- 홈런.9
- 2.34713984614222
- BABIP
- 3.08046557773165
- LOB.
- 3.99101474806593
- ERA
- 9.92261602645989
- RA9.WAR
- 12.0732896169765
- WAR
- 9.71340804685137
- 연봉.2017.
- 2.18834997395971
- KFIP제거
<- lm(연봉.2018. ~ .-FIP-이닝-kFIP-RA9.WAR, dt)
model3 summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝 - kFIP - RA9.WAR,
data = dt)
Residuals:
Min 1Q Median 3Q Max
-47256 -2340 228 2820 48394
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.002e+03 1.005e+04 0.796 0.4273
승 1.005e+03 5.044e+02 1.993 0.0483 *
패 -6.188e+01 5.102e+02 -0.121 0.9036
세 -1.005e+02 2.434e+02 -0.413 0.6805
홀드 -8.969e+01 2.986e+02 -0.300 0.7643
블론 5.293e+02 7.457e+02 0.710 0.4790
경기 -1.027e+02 8.749e+01 -1.174 0.2424
선발 -4.671e+02 2.447e+02 -1.909 0.0584 .
삼진.9 -3.052e+02 3.398e+02 -0.898 0.3707
볼넷.9 4.218e+02 4.481e+02 0.941 0.3482
홈런.9 1.154e+03 1.112e+03 1.037 0.3013
BABIP -9.059e+03 1.451e+04 -0.624 0.5334
LOB. -5.310e+01 1.190e+02 -0.446 0.6562
ERA -1.249e+02 5.493e+02 -0.227 0.8205
WAR 8.406e+03 1.289e+03 6.523 1.25e-09 ***
연봉.2017. 8.790e-01 4.358e-02 20.168 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9090 on 136 degrees of freedom
Multiple R-squared: 0.9223, Adjusted R-squared: 0.9137
F-statistic: 107.6 on 15 and 136 DF, p-value: < 2.2e-16
- 승
- 6.94042688100102
- 패
- 4.66294914095306
- 세
- 2.50425253772256
- 홀드
- 3.32146243586634
- 블론
- 2.71278267026247
- 경기
- 5.24904766467952
- 선발
- 10.3991198289636
- 삼진.9
- 1.68672132907842
- 볼넷.9
- 2.04837389574494
- 홈런.9
- 2.33709850623971
- BABIP
- 3.046073880605
- LOB.
- 3.47761526760098
- ERA
- 9.57142193166993
- WAR
- 5.30623103354661
- 연봉.2017.
- 2.17720905024618
<- lm(연봉.2018. ~ .-FIP-이닝-kFIP-RA9.WAR-선발, dt)
model3 summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝 - kFIP - RA9.WAR -
선발, data = dt)
Residuals:
Min 1Q Median 3Q Max
-46776 -2395 374 2597 50018
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7221.2085 10138.7800 0.712 0.4775
승 558.2422 451.0986 1.238 0.2180
패 -758.5773 359.9930 -2.107 0.0369 *
세 -12.1963 241.3026 -0.051 0.9598
홀드 106.5326 283.0186 0.376 0.7072
블론 843.4665 734.3349 1.149 0.2527
경기 -69.6278 86.5803 -0.804 0.4227
삼진.9 -270.1732 342.5480 -0.789 0.4316
볼넷.9 431.0859 452.3679 0.953 0.3423
홈런.9 983.4449 1119.0004 0.879 0.3810
BABIP -8863.3423 14648.1787 -0.605 0.5461
LOB. -57.5239 120.1320 -0.479 0.6328
ERA -88.2397 554.2171 -0.159 0.8737
WAR 7825.6419 1264.4051 6.189 6.57e-09 ***
연봉.2017. 0.8792 0.0440 19.981 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9178 on 137 degrees of freedom
Multiple R-squared: 0.9202, Adjusted R-squared: 0.912
F-statistic: 112.8 on 14 and 137 DF, p-value: < 2.2e-16
- 승
- 5.44563629770577
- 패
- 2.27744620091112
- 세
- 2.41391273581397
- 홀드
- 2.9278580003899
- 블론
- 2.58069224802437
- 경기
- 5.04287223750904
- 삼진.9
- 1.68181308100372
- 볼넷.9
- 2.04813245831198
- 홈런.9
- 2.32207970272157
- BABIP
- 3.04592151858475
- LOB.
- 3.47629848428834
- ERA
- 9.55974332977644
- WAR
- 5.01053061622992
- 연봉.2017.
- 2.17719632600132
- 유의미한 변수는 ’WAR’과 ’연봉(2017)’이다.
정규화
normalize
<- function(x) {
normalize return((x - mean(x)) / sd(x))
}
<- as.data.frame(lapply(dt, normalize)) df_normalized
head(df_normalized)
승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | 3.313623 | 1.2271453 | -0.3064519 | -0.5857052 | -0.5435919 | 0.05943348 | 2.452068 | 2.645175 | 0.6720988 | -0.8689998 | -0.44238194 | 0.01678276 | 0.4466146 | -0.5870557 | 3.174630 | -0.9710297 | -1.0581252 | 4.503142 | 3.912893 | 2.7347053 |
2 | 2.019505 | 2.5047212 | -0.0985024 | -0.5857052 | -0.5435919 | 0.05943348 | 2.349505 | 2.547755 | 0.1345315 | -0.9875023 | -0.66852133 | -0.24168646 | -0.1227637 | -0.5198553 | 3.114968 | -1.0618879 | -1.0732645 | 4.094734 | 3.266495 | 1.3373033 |
3 | 4.348918 | 0.9077513 | -0.3064519 | -0.5857052 | -0.5435919 | 0.11105570 | 2.554632 | 2.706808 | 0.1097751 | -0.8859287 | -0.41288550 | -0.09559517 | 0.3085835 | -0.6254559 | 2.973948 | -0.8374147 | -0.8663606 | 3.761956 | 6.821679 | 5.3298806 |
4 | 1.760682 | 1.2271453 | -0.3064519 | -0.5857052 | -0.5435919 | -0.04381097 | 2.246942 | 2.350927 | 0.3502657 | -0.9451800 | -0.18674611 | -0.47768010 | 0.5587649 | -0.6278559 | 2.740722 | -0.6984550 | -0.7603854 | 2.998081 | 2.620098 | 3.3335919 |
5 | 2.537153 | 1.2271453 | -0.3064519 | -0.5857052 | -0.5435919 | 0.05943348 | 2.452068 | 2.587518 | 0.1557512 | -0.8774643 | -0.29489973 | -0.19673529 | 0.4811224 | -0.5390554 | 2.751570 | -0.6129414 | -0.6190851 | 2.809003 | 2.975617 | 2.7347053 |
6 | 1.243035 | 2.1853272 | -0.3064519 | -0.5857052 | -0.5435919 | -0.14705541 | 2.041816 | 2.048726 | 0.1309948 | -1.0340569 | -0.08842464 | -0.57882022 | 0.6536613 | -0.7214564 | 2.963100 | -0.5808738 | -0.6140386 | 2.476226 | 2.135301 | 0.7384167 |
<- lm(연봉.2018. ~ ., df_normalized)
model4 summary(model4)
Call:
lm(formula = 연봉.2018. ~ ., data = df_normalized)
Residuals:
Min 1Q Median 3Q Max
-1.50382 -0.07816 0.01372 0.08561 1.54402
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.061e-16 2.411e-02 0.000 1.0000
승 1.254e-01 6.712e-02 1.869 0.0639 .
패 -1.858e-02 5.569e-02 -0.334 0.7392
세 -3.282e-03 4.216e-02 -0.078 0.9381
홀드 -2.652e-03 4.613e-02 -0.057 0.9542
블론 2.395e-02 4.019e-02 0.596 0.5522
경기 -1.102e-01 9.116e-02 -1.209 0.2289
선발 -2.117e-01 1.455e-01 -1.456 0.1479
이닝 1.207e-01 1.879e-01 0.642 0.5217
삼진.9 -4.207e-02 2.146e-01 -0.196 0.8449
볼넷.9 9.115e-02 1.723e-01 0.529 0.5976
홈런.9 1.602e-01 4.643e-01 0.345 0.7306
BABIP -2.875e-02 4.273e-02 -0.673 0.5022
LOB. -1.630e-02 4.866e-02 -0.335 0.7382
ERA -9.982e-03 7.667e-02 -0.130 0.8966
RA9.WAR -4.519e-02 8.862e-02 -0.510 0.6109
FIP -3.892e-01 2.707e+00 -0.144 0.8859
kFIP 2.437e-01 2.301e+00 0.106 0.9158
WAR 3.657e-01 7.647e-02 4.783 4.55e-06 ***
연봉.2017. 7.087e-01 3.598e-02 19.698 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2973 on 132 degrees of freedom
Multiple R-squared: 0.9228, Adjusted R-squared: 0.9116
F-statistic: 82.99 on 19 and 132 DF, p-value: < 2.2e-16
<- 10 threshold
<- get_high_vif_variables(model4, threshold)
high_vif_vars print(high_vif_vars)
[1] "경기" "선발" "이닝" "삼진.9" "볼넷.9" "홈런.9" "ERA"
[8] "RA9.WAR" "FIP" "kFIP"
vif(model4)
- 승
- 7.6993466963604
- 패
- 5.30032799820294
- 세
- 3.03718565967898
- 홀드
- 3.63605161357941
- 블론
- 2.75956034802382
- 경기
- 14.2011271199764
- 선발
- 36.160187873294
- 이닝
- 60.3244538179135
- 삼진.9
- 78.7161704890434
- 볼넷.9
- 50.7257985014939
- 홈런.9
- 368.399308167005
- BABIP
- 3.11936893312485
- LOB.
- 4.04602533224442
- ERA
- 10.044173823258
- RA9.WAR
- 13.4198973514472
- FIP
- 12525.2424058262
- kFIP
- 9046.04880481494
- WAR
- 9.99177856815799
- 연봉.2017.
- 2.2118694256429
###
변수선택
model3(AIC)
-
AIC(Step)
= lm(연봉.2018. ~ 1, data = dt) m0
= step(
model3
m0,scope = 연봉.2018. ~연봉.2017.+승+패+세+홀드+블론+경기+선발+이닝+삼진.9+볼넷.9+홈런.9+BABIP+LOB.+ERA+RA9.WAR+FIP+kFIP+WAR,
direction = "both")
Start: AIC=3144.3
연봉.2018. ~ 1
Df Sum of Sq RSS AIC
+ 연봉.2017. 1 1.2511e+11 1.9445e+10 2841.4
+ WAR 1 9.0535e+10 5.4022e+10 2996.7
+ RA9.WAR 1 7.9230e+10 6.5326e+10 3025.6
+ 승 1 7.3377e+10 7.1179e+10 3038.6
+ 이닝 1 6.2759e+10 8.1797e+10 3059.8
+ 선발 1 4.5409e+10 9.9147e+10 3089.0
+ 패 1 3.1910e+10 1.1265e+11 3108.4
+ 볼넷.9 1 1.5661e+10 1.2890e+11 3128.9
+ kFIP 1 1.2591e+10 1.3197e+11 3132.4
+ FIP 1 1.1403e+10 1.3315e+11 3133.8
+ ERA 1 6.7332e+09 1.3782e+11 3139.1
+ 세 1 6.4461e+09 1.3811e+11 3139.4
+ 경기 1 6.3714e+09 1.3819e+11 3139.4
+ LOB. 1 2.2831e+09 1.4227e+11 3143.9
+ 홈런.9 1 1.9575e+09 1.4260e+11 3144.2
<none> 1.4456e+11 3144.3
+ 삼진.9 1 1.5567e+09 1.4300e+11 3144.7
+ BABIP 1 1.5139e+09 1.4304e+11 3144.7
+ 블론 1 1.3815e+09 1.4318e+11 3144.8
+ 홀드 1 4.3499e+07 1.4451e+11 3146.3
Step: AIC=2841.38
연봉.2018. ~ 연봉.2017.
Df Sum of Sq RSS AIC
+ WAR 1 7.0421e+09 1.2403e+10 2775.0
+ RA9.WAR 1 4.9589e+09 1.4486e+10 2798.6
+ 승 1 3.8414e+09 1.5604e+10 2809.9
+ 이닝 1 2.8118e+09 1.6633e+10 2819.6
+ 선발 1 2.1318e+09 1.7313e+10 2825.7
+ 패 1 8.8114e+08 1.8564e+10 2836.3
<none> 1.9445e+10 2841.4
+ 블론 1 2.2022e+08 1.9225e+10 2841.7
+ 세 1 1.7105e+08 1.9274e+10 2842.0
+ kFIP 1 1.6254e+08 1.9283e+10 2842.1
+ FIP 1 1.5483e+08 1.9290e+10 2842.2
+ ERA 1 1.0735e+08 1.9338e+10 2842.5
+ LOB. 1 7.7049e+07 1.9368e+10 2842.8
+ 홈런.9 1 7.3957e+07 1.9371e+10 2842.8
+ 볼넷.9 1 6.4565e+07 1.9381e+10 2842.9
+ BABIP 1 5.6938e+07 1.9388e+10 2842.9
+ 홀드 1 3.8024e+07 1.9407e+10 2843.1
+ 삼진.9 1 5.5081e+06 1.9440e+10 2843.3
+ 경기 1 1.2651e+04 1.9445e+10 2843.4
- 연봉.2017. 1 1.2511e+11 1.4456e+11 3144.3
Step: AIC=2775.03
연봉.2018. ~ 연봉.2017. + WAR
Df Sum of Sq RSS AIC
+ 패 1 2.1336e+08 1.2190e+10 2774.4
+ kFIP 1 1.8769e+08 1.2215e+10 2774.7
+ 선발 1 1.7153e+08 1.2232e+10 2774.9
+ FIP 1 1.6877e+08 1.2234e+10 2774.9
+ 볼넷.9 1 1.6419e+08 1.2239e+10 2775.0
<none> 1.2403e+10 2775.0
+ 이닝 1 1.4704e+08 1.2256e+10 2775.2
+ 홈런.9 1 5.1612e+07 1.2351e+10 2776.4
+ 삼진.9 1 4.8349e+07 1.2355e+10 2776.4
+ 승 1 3.0076e+07 1.2373e+10 2776.7
+ 경기 1 2.7246e+07 1.2376e+10 2776.7
+ BABIP 1 2.4182e+07 1.2379e+10 2776.7
+ ERA 1 1.7077e+07 1.2386e+10 2776.8
+ 블론 1 1.1153e+07 1.2392e+10 2776.9
+ RA9.WAR 1 6.6509e+06 1.2396e+10 2776.9
+ 세 1 4.3325e+06 1.2399e+10 2777.0
+ 홀드 1 3.4824e+06 1.2400e+10 2777.0
+ LOB. 1 6.6018e+05 1.2402e+10 2777.0
- WAR 1 7.0421e+09 1.9445e+10 2841.4
- 연봉.2017. 1 4.1619e+10 5.4022e+10 2996.7
Step: AIC=2774.4
연봉.2018. ~ 연봉.2017. + WAR + 패
Df Sum of Sq RSS AIC
+ kFIP 1 1.9738e+08 1.1992e+10 2773.9
+ 승 1 1.8072e+08 1.2009e+10 2774.1
+ FIP 1 1.7496e+08 1.2015e+10 2774.2
<none> 1.2190e+10 2774.4
- 패 1 2.1336e+08 1.2403e+10 2775.0
+ 볼넷.9 1 1.0330e+08 1.2086e+10 2775.1
+ 홈런.9 1 7.1015e+07 1.2119e+10 2775.5
+ 삼진.9 1 6.6895e+07 1.2123e+10 2775.6
+ 블론 1 4.2173e+07 1.2148e+10 2775.9
+ BABIP 1 4.1954e+07 1.2148e+10 2775.9
+ 선발 1 3.1474e+07 1.2158e+10 2776.0
+ ERA 1 1.3441e+07 1.2176e+10 2776.2
+ 이닝 1 5.8966e+06 1.2184e+10 2776.3
+ 세 1 3.4705e+06 1.2186e+10 2776.3
+ RA9.WAR 1 2.4143e+06 1.2187e+10 2776.4
+ LOB. 1 1.7129e+06 1.2188e+10 2776.4
+ 경기 1 1.1252e+06 1.2189e+10 2776.4
+ 홀드 1 1.8992e+05 1.2190e+10 2776.4
- WAR 1 6.3743e+09 1.8564e+10 2836.3
- 연봉.2017. 1 4.1680e+10 5.3870e+10 2998.3
Step: AIC=2773.92
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP
Df Sum of Sq RSS AIC
+ 승 1 1.6741e+08 1.1825e+10 2773.8
<none> 1.1992e+10 2773.9
+ 블론 1 1.2836e+08 1.1864e+10 2774.3
- kFIP 1 1.9738e+08 1.2190e+10 2774.4
+ 선발 1 1.1764e+08 1.1875e+10 2774.4
- 패 1 2.2305e+08 1.2215e+10 2774.7
+ BABIP 1 7.5190e+07 1.1917e+10 2775.0
+ ERA 1 2.1818e+07 1.1971e+10 2775.6
+ 홀드 1 2.1404e+07 1.1971e+10 2775.6
+ 삼진.9 1 1.9275e+07 1.1973e+10 2775.7
+ 경기 1 1.7028e+07 1.1975e+10 2775.7
+ 이닝 1 1.3041e+07 1.1979e+10 2775.8
+ FIP 1 9.3610e+06 1.1983e+10 2775.8
+ 볼넷.9 1 8.8432e+06 1.1983e+10 2775.8
+ 홈런.9 1 8.7223e+06 1.1984e+10 2775.8
+ LOB. 1 4.0316e+06 1.1988e+10 2775.9
+ RA9.WAR 1 2.0131e+06 1.1990e+10 2775.9
+ 세 1 1.4454e+06 1.1991e+10 2775.9
- WAR 1 6.4941e+09 1.8486e+10 2837.7
- 연봉.2017. 1 4.1735e+10 5.3727e+10 2999.9
Step: AIC=2773.78
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승
Df Sum of Sq RSS AIC
+ 이닝 1 2.1565e+08 1.1609e+10 2773.0
+ 선발 1 1.9668e+08 1.1628e+10 2773.2
<none> 1.1825e+10 2773.8
- 승 1 1.6741e+08 1.1992e+10 2773.9
- kFIP 1 1.8408e+08 1.2009e+10 2774.1
+ 블론 1 8.3012e+07 1.1742e+10 2774.7
+ RA9.WAR 1 6.3182e+07 1.1762e+10 2775.0
+ BABIP 1 4.5875e+07 1.1779e+10 2775.2
+ 볼넷.9 1 1.7921e+07 1.1807e+10 2775.6
+ 삼진.9 1 1.4564e+07 1.1810e+10 2775.6
+ 홈런.9 1 1.2160e+07 1.1813e+10 2775.6
+ ERA 1 8.8026e+06 1.1816e+10 2775.7
+ FIP 1 8.1221e+06 1.1817e+10 2775.7
+ 세 1 5.8214e+06 1.1819e+10 2775.7
+ 홀드 1 5.2671e+06 1.1820e+10 2775.7
+ LOB. 1 3.9758e+05 1.1825e+10 2775.8
+ 경기 1 3.3176e+05 1.1825e+10 2775.8
- 패 1 3.6648e+08 1.2191e+10 2776.4
- WAR 1 3.6353e+09 1.5460e+10 2812.5
- 연봉.2017. 1 3.9188e+10 5.1013e+10 2994.0
Step: AIC=2772.98
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
Df Sum of Sq RSS AIC
- 패 1 3.1923e+07 1.1641e+10 2771.4
<none> 1.1609e+10 2773.0
- kFIP 1 2.1496e+08 1.1824e+10 2773.8
- 이닝 1 2.1565e+08 1.1825e+10 2773.8
+ BABIP 1 8.7592e+07 1.1522e+10 2773.8
+ 선발 1 5.0414e+07 1.1559e+10 2774.3
+ 블론 1 3.9472e+07 1.1570e+10 2774.5
+ 삼진.9 1 3.3863e+07 1.1575e+10 2774.5
+ ERA 1 3.3525e+07 1.1576e+10 2774.5
+ FIP 1 1.8310e+07 1.1591e+10 2774.7
+ 홈런.9 1 1.2031e+07 1.1597e+10 2774.8
+ RA9.WAR 1 1.0398e+07 1.1599e+10 2774.8
+ LOB. 1 3.1362e+06 1.1606e+10 2774.9
+ 경기 1 1.9500e+06 1.1607e+10 2775.0
+ 볼넷.9 1 1.2880e+06 1.1608e+10 2775.0
+ 세 1 2.2726e+05 1.1609e+10 2775.0
+ 홀드 1 9.3003e+04 1.1609e+10 2775.0
- 승 1 3.7002e+08 1.1979e+10 2775.8
- WAR 1 3.7546e+09 1.5364e+10 2813.6
- 연봉.2017. 1 3.8723e+10 5.0333e+10 2993.9
Step: AIC=2771.4
연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 + 이닝
Df Sum of Sq RSS AIC
<none> 1.1641e+10 2771.4
+ BABIP 1 9.5915e+07 1.1545e+10 2772.1
- kFIP 1 2.2291e+08 1.1864e+10 2772.3
+ 선발 1 6.0820e+07 1.1580e+10 2772.6
+ ERA 1 4.1760e+07 1.1599e+10 2772.8
+ 삼진.9 1 3.6788e+07 1.1604e+10 2772.9
+ 패 1 3.1923e+07 1.1609e+10 2773.0
+ 블론 1 2.3680e+07 1.1618e+10 2773.1
+ FIP 1 2.0239e+07 1.1621e+10 2773.1
+ 홈런.9 1 1.4166e+07 1.1627e+10 2773.2
+ LOB. 1 7.7349e+06 1.1633e+10 2773.3
+ 경기 1 1.5351e+06 1.1640e+10 2773.4
+ RA9.WAR 1 1.4350e+06 1.1640e+10 2773.4
+ 볼넷.9 1 1.4095e+06 1.1640e+10 2773.4
+ 홀드 1 2.5525e+05 1.1641e+10 2773.4
+ 세 1 6.8181e+04 1.1641e+10 2773.4
- 승 1 4.0011e+08 1.2041e+10 2774.5
- 이닝 1 5.5021e+08 1.2191e+10 2776.4
- WAR 1 3.9604e+09 1.5602e+10 2813.9
- 연봉.2017. 1 3.8795e+10 5.0436e+10 2992.2
summary
summary(model3)
Call:
lm(formula = 연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 +
이닝, data = dt)
Residuals:
Min 1Q Median 3Q Max
-48717 -2879 204 3083 48961
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.691e+03 2.658e+03 -1.012 0.31310
연봉.2017. 8.862e-01 4.018e-02 22.058 < 2e-16 ***
WAR 8.118e+03 1.152e+03 7.048 6.68e-11 ***
kFIP 6.737e+02 4.029e+02 1.672 0.09666 .
승 1.059e+03 4.727e+02 2.240 0.02659 *
이닝 -9.701e+01 3.693e+01 -2.627 0.00954 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8929 on 146 degrees of freedom
Multiple R-squared: 0.9195, Adjusted R-squared: 0.9167
F-statistic: 333.4 on 5 and 146 DF, p-value: < 2.2e-16
- AIC를 이용하면 최종 모형은 “연봉.2018. ~ 연봉.2017. + WAR + kFIP+승+이닝” 이다.
vif(model3)
- 연봉.2017.
- 1.91752518192932
- WAR
- 4.3927072113068
- kFIP
- 1.20722030237251
- 승
- 6.3165168650018
- 이닝
- 6.53436057823665
연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 + 이닝
후진
= step(model1, direction = "backward")
model_back summary(model_back)
Start: AIC=2793.07
연봉.2018. ~ 승 + 패 + 세 + 홀드 + 블론 + 경기 + 선발 +
이닝 + 삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. +
ERA + RA9.WAR + FIP + kFIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 홀드 1 2.7964e+05 1.1167e+10 2791.1
- 세 1 5.1274e+05 1.1167e+10 2791.1
- kFIP 1 9.4914e+05 1.1168e+10 2791.1
- ERA 1 1.4342e+06 1.1168e+10 2791.1
- FIP 1 1.7480e+06 1.1168e+10 2791.1
- 삼진.9 1 3.2496e+06 1.1170e+10 2791.1
- 패 1 9.4169e+06 1.1176e+10 2791.2
- LOB. 1 9.4883e+06 1.1176e+10 2791.2
- 홈런.9 1 1.0071e+07 1.1177e+10 2791.2
- RA9.WAR 1 2.1998e+07 1.1189e+10 2791.4
- 볼넷.9 1 2.3679e+07 1.1190e+10 2791.4
- 블론 1 3.0047e+07 1.1197e+10 2791.5
- 이닝 1 3.4909e+07 1.1201e+10 2791.6
- BABIP 1 3.8305e+07 1.1205e+10 2791.6
- 경기 1 1.2360e+08 1.1290e+10 2792.7
<none> 1.1167e+10 2793.1
- 선발 1 1.7922e+08 1.1346e+10 2793.5
- 승 1 2.9538e+08 1.1462e+10 2795.0
- WAR 1 1.9353e+09 1.3102e+10 2815.4
- 연봉.2017. 1 3.2824e+10 4.3991e+10 2999.5
Step: AIC=2791.07
연봉.2018. ~ 승 + 패 + 세 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + ERA + RA9.WAR +
FIP + kFIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 세 1 2.6213e+05 1.1167e+10 2789.1
- kFIP 1 9.3814e+05 1.1168e+10 2789.1
- ERA 1 1.5378e+06 1.1168e+10 2789.1
- FIP 1 1.7362e+06 1.1169e+10 2789.1
- 삼진.9 1 3.2908e+06 1.1170e+10 2789.1
- LOB. 1 9.8104e+06 1.1177e+10 2789.2
- 홈런.9 1 1.0084e+07 1.1177e+10 2789.2
- 패 1 1.1289e+07 1.1178e+10 2789.2
- 볼넷.9 1 2.3701e+07 1.1191e+10 2789.4
- RA9.WAR 1 2.3959e+07 1.1191e+10 2789.4
- 블론 1 2.9856e+07 1.1197e+10 2789.5
- BABIP 1 3.8334e+07 1.1205e+10 2789.6
- 이닝 1 3.9219e+07 1.1206e+10 2789.6
<none> 1.1167e+10 2791.1
- 선발 1 1.7952e+08 1.1346e+10 2791.5
- 경기 1 1.8031e+08 1.1347e+10 2791.5
- 승 1 2.9677e+08 1.1464e+10 2793.1
- WAR 1 1.9405e+09 1.3107e+10 2813.4
- 연봉.2017. 1 3.2987e+10 4.4154e+10 2998.0
Step: AIC=2789.08
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + ERA + RA9.WAR +
FIP + kFIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- kFIP 1 9.9324e+05 1.1168e+10 2787.1
- ERA 1 1.6052e+06 1.1169e+10 2787.1
- FIP 1 1.8109e+06 1.1169e+10 2787.1
- 삼진.9 1 3.2005e+06 1.1170e+10 2787.1
- LOB. 1 9.9171e+06 1.1177e+10 2787.2
- 홈런.9 1 1.0260e+07 1.1177e+10 2787.2
- 패 1 1.1827e+07 1.1179e+10 2787.2
- 볼넷.9 1 2.3993e+07 1.1191e+10 2787.4
- RA9.WAR 1 2.8604e+07 1.1196e+10 2787.5
- 블론 1 3.3423e+07 1.1201e+10 2787.5
- BABIP 1 3.8245e+07 1.1205e+10 2787.6
- 이닝 1 4.1983e+07 1.1209e+10 2787.7
<none> 1.1167e+10 2789.1
- 선발 1 1.8297e+08 1.1350e+10 2789.6
- 경기 1 1.9004e+08 1.1357e+10 2789.6
- 승 1 3.2363e+08 1.1491e+10 2791.4
- WAR 1 1.9440e+09 1.3111e+10 2811.5
- 연봉.2017. 1 3.4986e+10 4.6153e+10 3002.8
Step: AIC=2787.09
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + ERA + RA9.WAR +
FIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- ERA 1 1.8214e+06 1.1170e+10 2785.1
- LOB. 1 1.0407e+07 1.1179e+10 2785.2
- 패 1 1.1087e+07 1.1179e+10 2785.2
- FIP 1 2.8081e+07 1.1196e+10 2785.5
- RA9.WAR 1 2.8212e+07 1.1196e+10 2785.5
- 블론 1 3.3436e+07 1.1202e+10 2785.6
- BABIP 1 3.7443e+07 1.1206e+10 2785.6
- 이닝 1 4.1025e+07 1.1209e+10 2785.7
- 홈런.9 1 5.5367e+07 1.1223e+10 2785.8
- 삼진.9 1 6.7478e+07 1.1236e+10 2786.0
- 볼넷.9 1 7.1385e+07 1.1239e+10 2786.1
<none> 1.1168e+10 2787.1
- 선발 1 1.8242e+08 1.1351e+10 2787.6
- 경기 1 1.8950e+08 1.1358e+10 2787.7
- 승 1 3.2639e+08 1.1494e+10 2789.5
- WAR 1 1.9444e+09 1.3112e+10 2809.5
- 연봉.2017. 1 3.5014e+10 4.6182e+10 3000.9
Step: AIC=2785.12
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + RA9.WAR +
FIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- LOB. 1 1.0813e+07 1.1181e+10 2783.3
- 패 1 1.1584e+07 1.1181e+10 2783.3
- FIP 1 2.9304e+07 1.1199e+10 2783.5
- 블론 1 3.2749e+07 1.1203e+10 2783.6
- RA9.WAR 1 3.2883e+07 1.1203e+10 2783.6
- 이닝 1 4.0932e+07 1.1211e+10 2783.7
- 홈런.9 1 5.3929e+07 1.1224e+10 2783.8
- BABIP 1 6.8000e+07 1.1238e+10 2784.0
- 삼진.9 1 6.9211e+07 1.1239e+10 2784.1
- 볼넷.9 1 6.9566e+07 1.1239e+10 2784.1
<none> 1.1170e+10 2785.1
- 선발 1 1.8070e+08 1.1351e+10 2785.6
- 경기 1 1.8826e+08 1.1358e+10 2785.7
- 승 1 3.2533e+08 1.1495e+10 2787.5
- WAR 1 1.9871e+09 1.3157e+10 2808.0
- 연봉.2017. 1 3.5061e+10 4.6231e+10 2999.0
Step: AIC=2783.26
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + RA9.WAR + FIP +
WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 패 1 1.0204e+07 1.1191e+10 2781.4
- FIP 1 2.3196e+07 1.1204e+10 2781.6
- 블론 1 3.0781e+07 1.1212e+10 2781.7
- 이닝 1 4.3368e+07 1.1224e+10 2781.8
- 홈런.9 1 4.6793e+07 1.1228e+10 2781.9
- RA9.WAR 1 5.5284e+07 1.1236e+10 2782.0
- BABIP 1 5.7524e+07 1.1238e+10 2782.0
- 볼넷.9 1 6.1767e+07 1.1242e+10 2782.1
- 삼진.9 1 6.4118e+07 1.1245e+10 2782.1
<none> 1.1181e+10 2783.3
- 선발 1 1.8271e+08 1.1363e+10 2783.7
- 경기 1 1.8379e+08 1.1365e+10 2783.7
- 승 1 3.3073e+08 1.1511e+10 2785.7
- WAR 1 2.1202e+09 1.3301e+10 2807.7
- 연봉.2017. 1 3.5097e+10 4.6278e+10 2997.2
Step: AIC=2781.4
연봉.2018. ~ 승 + 블론 + 경기 + 선발 + 이닝 + 삼진.9 +
볼넷.9 + 홈런.9 + BABIP + RA9.WAR + FIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- FIP 1 2.2440e+07 1.1213e+10 2779.7
- 블론 1 2.3243e+07 1.1214e+10 2779.7
- 이닝 1 4.1401e+07 1.1232e+10 2780.0
- 홈런.9 1 4.5529e+07 1.1236e+10 2780.0
- RA9.WAR 1 4.6219e+07 1.1237e+10 2780.0
- BABIP 1 5.8410e+07 1.1249e+10 2780.2
- 볼넷.9 1 6.1274e+07 1.1252e+10 2780.2
- 삼진.9 1 6.2728e+07 1.1254e+10 2780.2
<none> 1.1191e+10 2781.4
- 경기 1 2.1328e+08 1.1404e+10 2782.3
- 선발 1 2.3948e+08 1.1430e+10 2782.6
- 승 1 3.4606e+08 1.1537e+10 2784.0
- WAR 1 2.1342e+09 1.3325e+10 2805.9
- 연봉.2017. 1 3.5334e+10 4.6525e+10 2996.0
Step: AIC=2779.71
연봉.2018. ~ 승 + 블론 + 경기 + 선발 + 이닝 + 삼진.9 +
볼넷.9 + 홈런.9 + BABIP + RA9.WAR + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 블론 1 2.8280e+07 1.1242e+10 2778.1
- 이닝 1 4.0706e+07 1.1254e+10 2778.3
- RA9.WAR 1 4.3431e+07 1.1257e+10 2778.3
- BABIP 1 6.5327e+07 1.1279e+10 2778.6
- 삼진.9 1 6.9757e+07 1.1283e+10 2778.7
- 볼넷.9 1 8.4569e+07 1.1298e+10 2778.8
- 홈런.9 1 1.0518e+08 1.1319e+10 2779.1
<none> 1.1213e+10 2779.7
- 경기 1 2.0863e+08 1.1422e+10 2780.5
- 선발 1 2.4225e+08 1.1456e+10 2781.0
- 승 1 3.4542e+08 1.1559e+10 2782.3
- WAR 1 2.1544e+09 1.3368e+10 2804.4
- 연봉.2017. 1 3.5313e+10 4.6526e+10 2994.0
Step: AIC=2778.09
연봉.2018. ~ 승 + 경기 + 선발 + 이닝 + 삼진.9 + 볼넷.9 +
홈런.9 + BABIP + RA9.WAR + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 이닝 1 3.5553e+07 1.1277e+10 2776.6
- RA9.WAR 1 4.1773e+07 1.1283e+10 2776.7
- 삼진.9 1 6.2788e+07 1.1304e+10 2776.9
- BABIP 1 6.6047e+07 1.1308e+10 2777.0
- 볼넷.9 1 8.6190e+07 1.1328e+10 2777.2
- 홈런.9 1 9.9781e+07 1.1341e+10 2777.4
<none> 1.1242e+10 2778.1
- 경기 1 1.8104e+08 1.1423e+10 2778.5
- 선발 1 2.4212e+08 1.1484e+10 2779.3
- 승 1 3.7616e+08 1.1618e+10 2781.1
- WAR 1 2.1262e+09 1.3368e+10 2802.4
- 연봉.2017. 1 3.7489e+10 4.8731e+10 2999.0
Step: AIC=2776.57
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 볼넷.9 +
홈런.9 + BABIP + RA9.WAR + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- RA9.WAR 1 2.7446e+07 1.1305e+10 2774.9
- BABIP 1 6.5912e+07 1.1343e+10 2775.5
- 삼진.9 1 7.3754e+07 1.1351e+10 2775.6
- 볼넷.9 1 7.9195e+07 1.1356e+10 2775.6
- 홈런.9 1 1.1893e+08 1.1396e+10 2776.2
<none> 1.1277e+10 2776.6
- 경기 1 2.5407e+08 1.1531e+10 2778.0
- 승 1 4.4284e+08 1.1720e+10 2780.4
- 선발 1 7.1295e+08 1.1990e+10 2783.9
- WAR 1 2.3107e+09 1.3588e+10 2802.9
- 연봉.2017. 1 3.7540e+10 4.8818e+10 2997.3
Step: AIC=2774.94
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 볼넷.9 +
홈런.9 + BABIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- BABIP 1 5.1810e+07 1.1356e+10 2773.6
- 삼진.9 1 7.6555e+07 1.1381e+10 2774.0
- 볼넷.9 1 7.6805e+07 1.1381e+10 2774.0
- 홈런.9 1 1.1502e+08 1.1420e+10 2774.5
<none> 1.1305e+10 2774.9
- 경기 1 2.6687e+08 1.1572e+10 2776.5
- 승 1 4.1542e+08 1.1720e+10 2778.4
- 선발 1 7.2929e+08 1.2034e+10 2782.4
- WAR 1 3.6036e+09 1.4908e+10 2815.0
- 연봉.2017. 1 3.7533e+10 4.8837e+10 2995.4
Step: AIC=2773.63
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 볼넷.9 +
홈런.9 + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 볼넷.9 1 7.2317e+07 1.1429e+10 2772.6
- 홈런.9 1 8.5360e+07 1.1442e+10 2772.8
<none> 1.1356e+10 2773.6
- 삼진.9 1 1.8112e+08 1.1538e+10 2774.0
- 경기 1 2.2627e+08 1.1583e+10 2774.6
- 승 1 4.2114e+08 1.1778e+10 2777.2
- 선발 1 7.1482e+08 1.2071e+10 2780.9
- WAR 1 3.5773e+09 1.4934e+10 2813.3
- 연봉.2017. 1 3.7607e+10 4.8964e+10 2993.8
Step: AIC=2772.6
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 홈런.9 +
WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 홈런.9 1 1.2421e+08 1.1553e+10 2772.2
- 삼진.9 1 1.4867e+08 1.1577e+10 2772.6
<none> 1.1429e+10 2772.6
- 경기 1 3.5597e+08 1.1785e+10 2775.3
- 승 1 4.5207e+08 1.1881e+10 2776.5
- 선발 1 7.9752e+08 1.2226e+10 2780.8
- WAR 1 3.5112e+09 1.4940e+10 2811.3
- 연봉.2017. 1 3.7614e+10 4.9042e+10 2992.0
Step: AIC=2772.24
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 삼진.9 1 8.2804e+07 1.1636e+10 2771.3
<none> 1.1553e+10 2772.2
- 경기 1 4.0633e+08 1.1959e+10 2775.5
- 승 1 4.9123e+08 1.2044e+10 2776.6
- 선발 1 7.3028e+08 1.2283e+10 2779.6
- WAR 1 3.4409e+09 1.4994e+10 2809.9
- 연봉.2017. 1 3.8020e+10 4.9573e+10 2991.6
Step: AIC=2771.33
연봉.2018. ~ 승 + 경기 + 선발 + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
<none> 1.1636e+10 2771.3
- 경기 1 4.4893e+08 1.2085e+10 2775.1
- 승 1 5.0693e+08 1.2143e+10 2775.8
- 선발 1 6.7070e+08 1.2306e+10 2777.8
- WAR 1 3.3777e+09 1.5014e+10 2808.1
- 연봉.2017. 1 3.8082e+10 4.9718e+10 2990.1
Call:
lm(formula = 연봉.2018. ~ 승 + 경기 + 선발 + WAR + 연봉.2017.,
data = dt)
Residuals:
Min 1Q Median 3Q Max
-48420 -1976 -56 2829 48859
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1969.51173 1500.85489 1.312 0.1915
승 1174.23883 465.58916 2.522 0.0127 *
경기 -120.60886 50.81729 -2.373 0.0189 *
선발 -440.87646 151.97589 -2.901 0.0043 **
WAR 7220.36610 1109.09601 6.510 1.13e-09 ***
연봉.2017. 0.88247 0.04037 21.860 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8927 on 146 degrees of freedom
Multiple R-squared: 0.9195, Adjusted R-squared: 0.9168
F-statistic: 333.6 on 5 and 146 DF, p-value: < 2.2e-16
vif(model_back)
- 승
- 6.13100501411465
- 경기
- 1.83604531782849
- 선발
- 4.16007029774887
- WAR
- 4.07445827346373
- 연봉.2017.
- 1.93705640010866
연봉.2018. ~ 승 + 경기 + 선발 + WAR + 연봉.2017.
전진
= lm(연봉.2018. ~ 1, data = dt) m0
= step(
model_forward
m0,scope = 연봉.2018. ~연봉.2017.+승+패+세+홀드+블론+경기+선발+이닝+삼진.9+볼넷.9+홈런.9+BABIP+LOB.+ERA+RA9.WAR+FIP+kFIP+WAR,
direction = "forward")
summary(model_forward)
Start: AIC=3144.3
연봉.2018. ~ 1
Df Sum of Sq RSS AIC
+ 연봉.2017. 1 1.2511e+11 1.9445e+10 2841.4
+ WAR 1 9.0535e+10 5.4022e+10 2996.7
+ RA9.WAR 1 7.9230e+10 6.5326e+10 3025.6
+ 승 1 7.3377e+10 7.1179e+10 3038.6
+ 이닝 1 6.2759e+10 8.1797e+10 3059.8
+ 선발 1 4.5409e+10 9.9147e+10 3089.0
+ 패 1 3.1910e+10 1.1265e+11 3108.4
+ 볼넷.9 1 1.5661e+10 1.2890e+11 3128.9
+ kFIP 1 1.2591e+10 1.3197e+11 3132.4
+ FIP 1 1.1403e+10 1.3315e+11 3133.8
+ ERA 1 6.7332e+09 1.3782e+11 3139.1
+ 세 1 6.4461e+09 1.3811e+11 3139.4
+ 경기 1 6.3714e+09 1.3819e+11 3139.4
+ LOB. 1 2.2831e+09 1.4227e+11 3143.9
+ 홈런.9 1 1.9575e+09 1.4260e+11 3144.2
<none> 1.4456e+11 3144.3
+ 삼진.9 1 1.5567e+09 1.4300e+11 3144.7
+ BABIP 1 1.5139e+09 1.4304e+11 3144.7
+ 블론 1 1.3815e+09 1.4318e+11 3144.8
+ 홀드 1 4.3499e+07 1.4451e+11 3146.3
Step: AIC=2841.38
연봉.2018. ~ 연봉.2017.
Df Sum of Sq RSS AIC
+ WAR 1 7042094427 1.2403e+10 2775.0
+ RA9.WAR 1 4958914952 1.4486e+10 2798.6
+ 승 1 3841387936 1.5604e+10 2809.9
+ 이닝 1 2811807174 1.6633e+10 2819.6
+ 선발 1 2131826098 1.7313e+10 2825.7
+ 패 1 881138122 1.8564e+10 2836.3
<none> 1.9445e+10 2841.4
+ 블론 1 220224080 1.9225e+10 2841.7
+ 세 1 171052899 1.9274e+10 2842.0
+ kFIP 1 162536872 1.9283e+10 2842.1
+ FIP 1 154825743 1.9290e+10 2842.2
+ ERA 1 107350094 1.9338e+10 2842.5
+ LOB. 1 77049296 1.9368e+10 2842.8
+ 홈런.9 1 73957140 1.9371e+10 2842.8
+ 볼넷.9 1 64564811 1.9381e+10 2842.9
+ BABIP 1 56938420 1.9388e+10 2842.9
+ 홀드 1 38023685 1.9407e+10 2843.1
+ 삼진.9 1 5508109 1.9440e+10 2843.3
+ 경기 1 12651 1.9445e+10 2843.4
Step: AIC=2775.03
연봉.2018. ~ 연봉.2017. + WAR
Df Sum of Sq RSS AIC
+ 패 1 213356827 1.2190e+10 2774.4
+ kFIP 1 187694356 1.2215e+10 2774.7
+ 선발 1 171531569 1.2232e+10 2774.9
+ FIP 1 168772833 1.2234e+10 2774.9
+ 볼넷.9 1 164189202 1.2239e+10 2775.0
<none> 1.2403e+10 2775.0
+ 이닝 1 147039192 1.2256e+10 2775.2
+ 홈런.9 1 51612430 1.2351e+10 2776.4
+ 삼진.9 1 48348966 1.2355e+10 2776.4
+ 승 1 30075743 1.2373e+10 2776.7
+ 경기 1 27245510 1.2376e+10 2776.7
+ BABIP 1 24181791 1.2379e+10 2776.7
+ ERA 1 17077047 1.2386e+10 2776.8
+ 블론 1 11153112 1.2392e+10 2776.9
+ RA9.WAR 1 6650871 1.2396e+10 2776.9
+ 세 1 4332494 1.2399e+10 2777.0
+ 홀드 1 3482363 1.2400e+10 2777.0
+ LOB. 1 660176 1.2402e+10 2777.0
Step: AIC=2774.4
연봉.2018. ~ 연봉.2017. + WAR + 패
Df Sum of Sq RSS AIC
+ kFIP 1 197383620 1.1992e+10 2773.9
+ 승 1 180715640 1.2009e+10 2774.1
+ FIP 1 174958135 1.2015e+10 2774.2
<none> 1.2190e+10 2774.4
+ 볼넷.9 1 103300993 1.2086e+10 2775.1
+ 홈런.9 1 71014626 1.2119e+10 2775.5
+ 삼진.9 1 66895356 1.2123e+10 2775.6
+ 블론 1 42172679 1.2148e+10 2775.9
+ BABIP 1 41953578 1.2148e+10 2775.9
+ 선발 1 31473684 1.2158e+10 2776.0
+ ERA 1 13441234 1.2176e+10 2776.2
+ 이닝 1 5896647 1.2184e+10 2776.3
+ 세 1 3470456 1.2186e+10 2776.3
+ RA9.WAR 1 2414340 1.2187e+10 2776.4
+ LOB. 1 1712854 1.2188e+10 2776.4
+ 경기 1 1125166 1.2189e+10 2776.4
+ 홀드 1 189917 1.2190e+10 2776.4
Step: AIC=2773.92
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP
Df Sum of Sq RSS AIC
+ 승 1 167413120 1.1825e+10 2773.8
<none> 1.1992e+10 2773.9
+ 블론 1 128359041 1.1864e+10 2774.3
+ 선발 1 117641927 1.1875e+10 2774.4
+ BABIP 1 75190355 1.1917e+10 2775.0
+ ERA 1 21818455 1.1971e+10 2775.6
+ 홀드 1 21403854 1.1971e+10 2775.6
+ 삼진.9 1 19275489 1.1973e+10 2775.7
+ 경기 1 17028183 1.1975e+10 2775.7
+ 이닝 1 13040981 1.1979e+10 2775.8
+ FIP 1 9361041 1.1983e+10 2775.8
+ 볼넷.9 1 8843181 1.1983e+10 2775.8
+ 홈런.9 1 8722328 1.1984e+10 2775.8
+ LOB. 1 4031616 1.1988e+10 2775.9
+ RA9.WAR 1 2013063 1.1990e+10 2775.9
+ 세 1 1445393 1.1991e+10 2775.9
Step: AIC=2773.78
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승
Df Sum of Sq RSS AIC
+ 이닝 1 215650124 1.1609e+10 2773.0
+ 선발 1 196677432 1.1628e+10 2773.2
<none> 1.1825e+10 2773.8
+ 블론 1 83011867 1.1742e+10 2774.7
+ RA9.WAR 1 63182313 1.1762e+10 2775.0
+ BABIP 1 45874866 1.1779e+10 2775.2
+ 볼넷.9 1 17920561 1.1807e+10 2775.6
+ 삼진.9 1 14563944 1.1810e+10 2775.6
+ 홈런.9 1 12160231 1.1813e+10 2775.6
+ ERA 1 8802604 1.1816e+10 2775.7
+ FIP 1 8122100 1.1817e+10 2775.7
+ 세 1 5821352 1.1819e+10 2775.7
+ 홀드 1 5267064 1.1820e+10 2775.7
+ LOB. 1 397579 1.1825e+10 2775.8
+ 경기 1 331762 1.1825e+10 2775.8
Step: AIC=2772.98
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
Df Sum of Sq RSS AIC
<none> 1.1609e+10 2773.0
+ BABIP 1 87591503 1.1522e+10 2773.8
+ 선발 1 50413708 1.1559e+10 2774.3
+ 블론 1 39472232 1.1570e+10 2774.5
+ 삼진.9 1 33863019 1.1575e+10 2774.5
+ ERA 1 33524887 1.1576e+10 2774.5
+ FIP 1 18310110 1.1591e+10 2774.7
+ 홈런.9 1 12031455 1.1597e+10 2774.8
+ RA9.WAR 1 10397930 1.1599e+10 2774.8
+ LOB. 1 3136209 1.1606e+10 2774.9
+ 경기 1 1950014 1.1607e+10 2775.0
+ 볼넷.9 1 1288038 1.1608e+10 2775.0
+ 세 1 227255 1.1609e+10 2775.0
+ 홀드 1 93003 1.1609e+10 2775.0
Call:
lm(formula = 연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP +
승 + 이닝, data = dt)
Residuals:
Min 1Q Median 3Q Max
-48378 -2526 133 2563 48361
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.653e+03 2.664e+03 -0.996 0.3209
연봉.2017. 8.856e-01 4.027e-02 21.992 < 2e-16 ***
WAR 8.003e+03 1.169e+03 6.848 1.97e-10 ***
패 -2.708e+02 4.288e+02 -0.631 0.5287
kFIP 6.622e+02 4.042e+02 1.639 0.1035
승 1.025e+03 4.767e+02 2.150 0.0332 *
이닝 -7.811e+01 4.760e+01 -1.641 0.1029
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8948 on 145 degrees of freedom
Multiple R-squared: 0.9197, Adjusted R-squared: 0.9164
F-statistic: 276.8 on 6 and 145 DF, p-value: < 2.2e-16
vif(model_forward)
- 연봉.2017.
- 1.9185269678088
- WAR
- 4.50275401281816
- 패
- 3.39937428417761
- kFIP
- 1.20965408072733
- 승
- 6.3982769107728
- 이닝
- 10.8086888355271
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
PCA(주성분분석)
- 서로 상관성이 높은 변수들의 선형 결합으로 만들어 기존의 상관성이 높은 변수들을 요약, 축소하는 기법
<- dt[,1:19] # 설명변수 dt2
<- dt[,20] # 종속변수 dt3
<- prcomp(dt2, center=T, scale=T)
procomp.result2 summary(procomp.result2)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.6109 1.8528 1.5587 1.27735 1.07673 0.94635 0.77437
Proportion of Variance 0.3588 0.1807 0.1279 0.08588 0.06102 0.04714 0.03156
Cumulative Proportion 0.3588 0.5395 0.6673 0.75322 0.81424 0.86137 0.89293
PC8 PC9 PC10 PC11 PC12 PC13 PC14
Standard deviation 0.75305 0.60013 0.57118 0.51280 0.43707 0.31874 0.28648
Proportion of Variance 0.02985 0.01896 0.01717 0.01384 0.01005 0.00535 0.00432
Cumulative Proportion 0.92278 0.94173 0.95890 0.97274 0.98280 0.98815 0.99247
PC15 PC16 PC17 PC18 PC19
Standard deviation 0.27007 0.21161 0.1236 0.10061 0.006742
Proportion of Variance 0.00384 0.00236 0.0008 0.00053 0.000000
Cumulative Proportion 0.99630 0.99866 0.9995 1.00000 1.000000
<- princomp(dt2, cor=TRUE) dt.pca
summary(dt.pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
Standard deviation 2.6109027 1.8528422 1.5587351 1.2773530 1.07672785
Proportion of Variance 0.3587796 0.1806855 0.1278766 0.0858753 0.06101805
Cumulative Proportion 0.3587796 0.5394651 0.6673417 0.7532170 0.81423504
Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
Standard deviation 0.94635475 0.77436931 0.75305028 0.60012648 0.57117946
Proportion of Variance 0.04713617 0.03156041 0.02984656 0.01895536 0.01717084
Cumulative Proportion 0.86137122 0.89293163 0.92277819 0.94173355 0.95890439
Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
Standard deviation 0.51280325 0.4370697 0.31874103 0.286476368 0.270069813
Proportion of Variance 0.01384038 0.0100542 0.00534715 0.004319406 0.003838827
Cumulative Proportion 0.97274477 0.9827990 0.98814612 0.992465529 0.996304355
Comp.16 Comp.17 Comp.18 Comp.19
Standard deviation 0.211606675 0.1235839263 0.1006053051 6.741848e-03
Proportion of Variance 0.002356704 0.0008038414 0.0005327067 2.392237e-06
Cumulative Proportion 0.998661060 0.9994649011 0.9999976078 1.000000e+00
- 제 1주성분과 제6주성분까지의 누적 분산비율은 대략 85.71%로 6개의 주성분 변수를 활용해 전체 데이터의 85.71%를 설명할 수 있다.
screeplot(dt.pca, npcs=8, type="lines")
- 주성분들에 의해 설명되는 변동 비율
loadings(dt.pca)
Loadings:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
승 0.322 0.201 0.105
패 0.272 0.198 0.108 0.236 -0.108 -0.477 -0.294
세 -0.202 0.289 0.126 -0.512 -0.426 -0.192 0.360
홀드 -0.253 0.336 0.141 0.417 0.343 0.154 0.296
블론 -0.274 0.396 0.204 -0.208 0.116 -0.183 -0.354
경기 0.191 -0.220 0.372 0.222 0.227 0.236
선발 0.261 0.350 -0.123 -0.250 -0.105
이닝 0.329 0.235 0.109 -0.134
삼진.9 0.393 -0.404 -0.229 0.455 -0.171 -0.204 -0.211
볼넷.9 -0.250 0.116 -0.236 0.289 0.752 -0.178
홈런.9 -0.173 0.286 0.276 0.268 0.154 -0.535
BABIP -0.142 0.207 0.292 -0.444 0.146
LOB. 0.131 -0.209 -0.230 0.228 -0.416 0.495 -0.174
ERA -0.235 0.294 0.288 -0.156 -0.133 0.106 0.107
RA9.WAR 0.327 0.176 -0.181 0.124 0.119 0.391
FIP -0.259 0.284 0.378
kFIP -0.257 0.268 0.423
WAR 0.318 0.213 -0.126 0.113 0.165 0.235
연봉.2017. 0.252 0.130 0.103 -0.296 -0.181 0.629 -0.535
Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
승 0.383 0.237 0.542 0.550 0.102
패 -0.478 -0.137 0.321 -0.270 -0.229
세 -0.134 -0.328 0.242 0.206 0.105
홀드 0.128 -0.339 -0.335 0.264 -0.113 0.240
블론 0.363 0.513 -0.251 -0.150 -0.121
경기 -0.183 0.560 -0.322 -0.240
선발 0.154 -0.463 0.328 0.249
이닝 0.213 -0.370 0.151
삼진.9 -0.380 0.376
볼넷.9 -0.105 0.128 -0.359
홈런.9 -0.614
BABIP 0.665 -0.171 0.296 -0.149 0.187
LOB. 0.414 -0.155 0.112 -0.328 0.216
ERA -0.141 0.336 -0.206 -0.663 0.296
RA9.WAR 0.123 -0.243 -0.247 -0.163 -0.683
FIP 0.323
kFIP 0.492
WAR -0.398 -0.499 0.305 0.473
연봉.2017. -0.229 0.174
Comp.18 Comp.19
승
패
세
홀드
블론
경기 0.318
선발 0.553
이닝 -0.758
삼진.9
볼넷.9
홈런.9 -0.125
BABIP
LOB.
ERA
RA9.WAR
FIP 0.754
kFIP -0.641
WAR
연봉.2017.
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474
Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
Cumulative Var 0.526 0.579 0.632 0.684 0.737 0.789 0.842 0.895
Comp.18 Comp.19
SS loadings 1.000 1.000
Proportion Var 0.053 0.053
Cumulative Var 0.947 1.000
능형회귀
<- lm.ridge(연봉.2018.~., dt, lambda=seq(0.01,20,0.1)) rfit
select(rfit)
modified HKB estimator is 1.552458
modified L-W estimator is 1.638756
smallest value of GCV at 4.11
round(rfit$coef[,rfit$lam=='4.11'],3)
- 승
- 3659.317
- 패
- -737.614
- 세
- -93.401
- 홀드
- -155.777
- 블론
- 859.526
- 경기
- -1932.839
- 선발
- -3234.453
- 이닝
- 441.579
- 삼진.9
- -934.786
- 볼넷.9
- 1047.402
- 홈런.9
- 1328.778
- BABIP
- -755.31
- LOB.
- -578.691
- ERA
- -288.368
- RA9.WAR
- 516.127
- FIP
- -176.744
- kFIP
- -420.71
- WAR
- 9904.312
- 연봉.2017.
- 21113.076
matplot(rfit$lambda, t(rfit$coef), type='l',
xlab=expression(lambda),
ylab=expression(bold(beta)(lambda)), lwd=2)
abline(h=0, col="grey", lty=2)
abline(v=14.91, col="black", lty=2)
glm
<- model.matrix(연봉.2018.~., dt)[,-1]
X <- dt$연봉.2018. y
head(X)
승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2017. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | 8.95 | 2.13 | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 85000 |
2 | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | 7.43 | 1.85 | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 50000 |
3 | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | 7.36 | 2.09 | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 150000 |
4 | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | 8.04 | 1.95 | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 |
5 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | 7.49 | 2.11 | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 85000 |
6 | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | 7.42 | 1.74 | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 35000 |
<-glmnet(X,y,alpha=0, lambda=seq(0,100,10)) ##ridge : alpha=0
ridge.fitplot(ridge.fit, label=TRUE)
abline(h=0, col="grey", lty=2)
summary(ridge.fit)
Length Class Mode
a0 11 -none- numeric
beta 209 dgCMatrix S4
df 11 -none- numeric
dim 2 -none- numeric
lambda 11 -none- numeric
dev.ratio 11 -none- numeric
nulldev 1 -none- numeric
npasses 1 -none- numeric
jerr 1 -none- numeric
offset 1 -none- logical
call 5 -none- call
nobs 1 -none- numeric
<-cv.glmnet(X,y,alpha=0,nfolds=length(y)) cv.fit
Warning message:
“Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold”
cv.fit
Call: cv.glmnet(x = X, y = y, nfolds = length(y), alpha = 0)
Measure: Mean-Squared Error
Lambda Index Measure SE Nonzero
min 2869 100 117171048 49876335 19
1se 12711 84 166795504 78214517 19
plot(cv.fit)
예측
WAR이라는 변수가 다른 설명변수의 곱으로 이루어진 변수니까, 인터넷에서 나오는 WAR계산법에 들어가는 변수들을 제거해보자.(근데 인터넷에 말이 다 다름 ㅎㅎ)
ERA, 이닝수, FIP는 일단 빼야함.
FIP자체가 홈런, 삼진, 볼넷 등의 값인데..
step, 전진: 연봉.2017.이랑 kFIP, 볼넷.9, 삼진.9를ㄴ넣어보자.
연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 + 이닝
후진: 연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
<- lm(연봉.2018. ~ 연봉.2017.+kFIP+볼넷.9+삼진.9, dt)
model3 summary(model3)
Call:
lm(formula = 연봉.2018. ~ 연봉.2017. + kFIP + 볼넷.9 +
삼진.9, data = dt)
Residuals:
Min 1Q Median 3Q Max
-65987 -2665 -72 1406 61788
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3604.44247 4650.08368 0.775 0.440
연봉.2017. 1.13680 0.03994 28.461 <2e-16 ***
kFIP -552.00216 675.96374 -0.817 0.415
볼넷.9 -19.23606 556.28780 -0.035 0.972
삼진.9 -42.36062 381.57885 -0.111 0.912
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 11450 on 147 degrees of freedom
Multiple R-squared: 0.8666, Adjusted R-squared: 0.863
F-statistic: 238.8 on 4 and 147 DF, p-value: < 2.2e-16
<- data.frame(FIP = dt$"FIP",
X WAR = dt$"WAR",
`볼넷/9` = dt$"볼넷.9",
`삼진/9` = dt$"삼진.9",
`연봉.2017.` = dt$"연봉.2017.")
<- dt$"연봉.2018." y
set.seed(19)
<- createDataPartition(y, p = 0.8, list = FALSE)
train_indices <- X[train_indices, ]
X_train <- X[-train_indices, ]
X_test <- y[train_indices]
y_train <- y[-train_indices] y_test
<- lm(y_train ~ ., data = cbind(X_train, y_train)) model
summary(model)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-49900 -1887 574 2975 49836
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.261e+03 4.037e+03 -0.312 0.755
FIP 1.395e+02 6.349e+02 0.220 0.826
WAR 8.148e+03 9.161e+02 8.894 8.65e-15 ***
볼넷.9 5.772e+02 5.144e+02 1.122 0.264
삼진.9 -4.929e+02 3.528e+02 -1.397 0.165
연봉.2017. 9.156e-01 4.691e-02 19.518 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9667 on 117 degrees of freedom
Multiple R-squared: 0.9104, Adjusted R-squared: 0.9065
F-statistic: 237.7 on 5 and 117 DF, p-value: < 2.2e-16
ㅎ므
<- data.frame(FIP = picher$"FIP",
X WAR = picher$"WAR",
`볼넷/9` = picher$"볼넷.9",
`삼진/9` = picher$"삼진.9",
`연봉.2017.` = picher$"연봉.2017.")
<- predict(model, newdata = X) predict_2018_salary
<- cbind(picher, new_col = predict_2018_salary) picher
head(picher)
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | new_col | new_col.1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | <dbl> | <dbl> | |
1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 | 63003.59 | 127838.00 |
2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 | 37061.64 | 91955.34 |
3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 | 111180.99 | 180164.16 |
4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 | 74120.86 | 125774.08 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 | 63002.89 | 110388.58 |
6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 | 25942.97 | 60851.77 |
<- round(predict_2018_salary,0) predictsalart
<- picher$"선수명"
result1 <- picher$"연봉.2018."
result2 <- predictsalart
result3 <- picher$"연봉.2017."
result4 <- cbind(result1,result2,result3,result4) result
result
result1 | result2 | result3 | result4 | |
---|---|---|---|---|
1 | 켈리 | 140000 | 127838 | 85000 |
2 | 소사 | 120000 | 91955 | 50000 |
3 | 양현종 | 230000 | 180164 | 150000 |
4 | 차우찬 | 100000 | 125774 | 100000 |
5 | 레일리 | 111000 | 110389 | 85000 |
6 | 피어밴드 | 85000 | 60852 | 35000 |
7 | 고영표 | 11500 | 32249 | 5200 |
8 | 장원준 | 100000 | 120661 | 100000 |
9 | 함덕주 | 16000 | 34421 | 7000 |
10 | 팻딘 | 70000 | 90685 | 70000 |
11 | 윤성환 | 80000 | 95139 | 80000 |
12 | 유희관 | 50000 | 67371 | 50000 |
13 | 임기영 | 13000 | 22933 | 3100 |
14 | 박세웅 | 25000 | 27964 | 10000 |
15 | 백정현 | 15500 | 24575 | 10000 |
16 | 송승준 | 40000 | 51763 | 40000 |
17 | 류제국 | 29000 | 47535 | 35000 |
18 | 우규민 | 70000 | 78523 | 70000 |
19 | 임찬규 | 11500 | 19833 | 6500 |
20 | 손승락 | 70000 | 74664 | 70000 |
21 | 정우람 | 120000 | 119624 | 120000 |
22 | 윤희상 | 13000 | 26255 | 15000 |
23 | 원종현 | 18500 | 23403 | 14000 |
24 | 배영수 | 50000 | 61694 | 55000 |
25 | 박종훈 | 20000 | 20806 | 10000 |
26 | 이상화 | 10000 | 13334 | 4500 |
27 | 김진성 | 23000 | 24511 | 18000 |
28 | 이민호 | 18800 | 22706 | 16000 |
29 | 이재학 | 19000 | 25924 | 20000 |
30 | 김강률 | 15000 | 13575 | 6200 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
123 | 정재원 | 4000 | 2960 | 4000 |
124 | 김민우 | 3600 | 3555 | 3800 |
125 | 이현호 | 5200 | 3447 | 6000 |
126 | 권혁 | 45000 | 38632 | 45000 |
127 | 배제성 | 3000 | 1539 | 2700 |
128 | 홍상삼 | 9000 | 11187 | 12500 |
129 | 이태양 | 7300 | 5930 | 8300 |
130 | 김진우 | 6000 | 9837 | 12000 |
131 | 이영하 | 4200 | -291 | 2700 |
132 | 최성영 | 2900 | 1140 | 2900 |
133 | 김동호 | 6000 | 3508 | 5000 |
134 | 김윤동 | 15000 | 1585 | 4700 |
135 | 정인욱 | 5700 | 3266 | 7200 |
136 | 송창식 | 24000 | 16351 | 22000 |
137 | 배재환 | 3000 | 3807 | 4000 |
138 | 이정민 | 10000 | 11150 | 15000 |
139 | 최동환 | 6500 | 1652 | 6000 |
140 | 이종혁 | 3200 | 564 | 2700 |
141 | 홍성용 | 6800 | 2878 | 6300 |
142 | 정영일 | 3000 | 4241 | 4000 |
143 | 김지용 | 9000 | 3789 | 10000 |
144 | 최금강 | 12500 | 8542 | 14000 |
145 | 김범수 | 3600 | 1207 | 3300 |
146 | 이승현 | 7000 | 2054 | 6200 |
147 | 주권 | 7600 | 2162 | 7500 |
148 | 장민재 | 7100 | 2973 | 8100 |
149 | 정용운 | 7500 | -673 | 3100 |
150 | 노경은 | 10000 | 9953 | 16000 |
151 | 김승현 | 4000 | -3065 | 2900 |
152 | 류희운 | 4000 | -5132 | 3000 |
k-fold
<- dt[c('FIP', 'WAR', '볼넷.9', '삼진.9', '연봉.2017.')]
X <- dt$"연봉.2018." y
<- trainControl(method = "cv",
ctrl number = 10,
verboseIter = TRUE)
print(model)
Linear Regression
152 samples
5 predictor
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 136, 139, 137, 136, 137, 136, ...
Resampling results:
RMSE Rsquared MAE
8775.345 0.9234182 5096.303
Tuning parameter 'intercept' was held constant at a value of TRUE
createDataPartition
set.seed(20)
<- createDataPartition(y, p = 0.8, list = FALSE) # 80%를 훈련 세트로 사용
train_indices <- X[train_indices, ] # X 훈련 세트
X_train <- y[train_indices] # y 훈련 세트
y_train <- X[-train_indices, ] # X 테스트 세트
X_test <- y[-train_indices] # y 테스트 세트 y_test
<- lm(y_train ~ ., data = cbind(X_train, y_train)) model
summary(model)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-51628 -1742 835 2679 54478
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.132e+03 3.752e+03 -0.835 0.406
FIP 3.656e+02 6.383e+02 0.573 0.568
WAR 6.521e+03 9.510e+02 6.857 3.53e-10 ***
볼넷.9 4.095e+02 4.922e+02 0.832 0.407
삼진.9 -2.664e+02 3.220e+02 -0.827 0.410
연봉.2017. 9.436e-01 4.885e-02 19.314 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9407 on 117 degrees of freedom
Multiple R-squared: 0.9084, Adjusted R-squared: 0.9045
F-statistic: 232.1 on 5 and 117 DF, p-value: < 2.2e-16
<- predict(model, newdata = X_test) y_pred
y_pred
- 2
- 83760.5036635398
- 11
- 92856.5551719829
- 16
- 49834.1350433709
- 20
- 74394.6037441324
- 21
- 121111.857311619
- 23
- 21354.3954407502
- 30
- 11877.9200631754
- 31
- 13693.1214770523
- 36
- 10957.9331638088
- 39
- 7904.67772835263
- 41
- 22618.8632801464
- 47
- 3977.22428640269
- 51
- 4759.88345127168
- 58
- 5598.23942115724
- 62
- 2384.85845607467
- 64
- 3520.11810139773
- 67
- 3352.59845508957
- 68
- 22782.5629218232
- 73
- 2753.45775461199
- 77
- 8019.61080626648
- 80
- 5122.41200499673
- 83
- -3354.1873156885
- 90
- -1479.51353608508
- 98
- 2904.84077333906
- 118
- 5349.37841885841
- 121
- 3656.12420217399
- 132
- 2282.13105827633
- 141
- 3578.68637161238
- 142
- 4774.13856910488
plot(model)
## Shapiro-Wilk Test
## H0 : normal distribution vs. H1 : not H0
shapiro.test(resid(model))
Shapiro-Wilk normality test
data: resid(model)
W = 0.69671, p-value = 1.316e-14
귀무가설 기각
library(lmtest)
### 등분산성
## H0 : 등분산 vs. H1 : 이분산 (Heteroscedasticity)
bptest(model)
studentized Breusch-Pagan test
data: model
BP = 48.48, df = 5, p-value = 2.834e-09
잔차 이분산..
bptest(model)
studentized Breusch-Pagan test
data: model
BP = 48.48, df = 5, p-value = 2.834e-09
기각..
<- sqrt(mean((y_pred - y_test)^2))
rmse rmse
8169.95343721918
<- 1 - sum((y_test - y_pred)^2) / sum((y_test - mean(y_test))^2)
r_squared r_squared
0.937905268099376
<- predict(model, newdata = X)
y_pred y_pred
- 1
- 120079.644407696
- 2
- 83760.5036635398
- 3
- 175521.573203863
- 4
- 121612.06220557
- 5
- 106098.40123629
- 6
- 55938.3360303223
- 7
- 26731.8079691304
- 8
- 117272.89045312
- 9
- 28840.8622273183
- 10
- 87223.7553189251
- 11
- 92856.5551719829
- 12
- 64095.7326866766
- 13
- 18557.1211875583
- 14
- 24286.457756089
- 15
- 21705.8561903346
- 16
- 49834.1350433709
- 17
- 45256.3857125534
- 18
- 77519.7451159889
- 19
- 17218.5428165066
- 20
- 74394.6037441324
- 21
- 121111.857311619
- 22
- 24012.4794639814
- 23
- 21354.3954407502
- 24
- 60809.3599020221
- 25
- 18627.1334642915
- 26
- 11284.3959821959
- 27
- 23499.0012034168
- 28
- 21500.0921981219
- 29
- 25216.0463766418
- 30
- 11877.9200631754
- 31
- 13693.1214770523
- 32
- 9721.37978877243
- 33
- 10832.9902724258
- 34
- 22447.697009164
- 35
- 7824.80860177616
- 36
- 10957.9331638088
- 37
- 50497.4525617583
- 38
- 23384.8597822882
- 39
- 7904.67772835263
- 40
- 24703.2223966378
- 41
- 22618.8632801464
- 42
- 13201.8856253739
- 43
- 12816.7508124466
- 44
- 26147.3170424822
- 45
- 58131.6087102705
- 46
- 7523.01225446262
- 47
- 3977.22428640269
- 48
- 4591.4093894283
- 49
- 39003.2211855645
- 50
- 71627.7765552946
- 51
- 4759.88345127168
- 52
- 47644.4841594123
- 53
- 10530.7891568554
- 54
- 5199.11439751501
- 55
- 9368.46795181711
- 56
- 31579.4491180105
- 57
- 4231.50693696113
- 58
- 5598.23942115724
- 59
- 5136.52005559622
- 60
- 9102.06897972658
- 61
- 6984.12598013427
- 62
- 2384.85845607467
- 63
- 18303.363177317
- 64
- 3520.11810139773
- 65
- 3322.44698945243
- 66
- 1989.42705473082
- 67
- 3352.59845508957
- 68
- 22782.5629218232
- 69
- 10769.628209866
- 70
- 3902.99550968491
- 71
- 19176.2096780485
- 72
- 5598.20242176528
- 73
- 2753.45775461199
- 74
- 10039.0367084241
- 75
- 7357.93890125183
- 76
- 5058.81091390124
- 77
- 8019.61080626648
- 78
- 7277.46775096933
- 79
- 2999.80648325582
- 80
- 5122.41200499673
- 81
- 2190.6374430789
- 82
- 9310.17406653524
- 83
- -3354.1873156885
- 84
- -324.080566112093
- 85
- 3952.58752085951
- 86
- 3631.83930171537
- 87
- 129.350963084188
- 88
- 2855.73595792648
- 89
- 3556.52749004506
- 90
- -1479.51353608508
- 91
- 9288.2135748458
- 92
- 2179.48869530268
- 93
- 2274.22153090663
- 94
- 2563.46139539266
- 95
- 4225.95510985668
- 96
- 779.354766372629
- 97
- -2275.06753560988
- 98
- 2904.84077333906
- 99
- 1276.18575681504
- 100
- 1278.77949800168
- 101
- 1106.52488833093
- 102
- 1872.11890217375
- 103
- 2491.20505069081
- 104
- 5496.81554757196
- 105
- 7100.93204974382
- 106
- 2029.42471093689
- 107
- 6086.50404299722
- 108
- 6770.85899230805
- 109
- 229.654419792656
- 110
- 3410.59366971035
- 111
- 18753.9298630808
- 112
- 2464.93783388577
- 113
- 9614.92030264136
- 114
- 1084.00882682257
- 115
- 8543.2323809515
- 116
- 2825.44788089157
- 117
- 3470.9785470481
- 118
- 5349.37841885841
- 119
- 1957.02838737825
- 120
- 1942.46056909266
- 121
- 3656.12420217399
- 122
- 42166.0786594782
- 123
- 2646.91201627154
- 124
- 3912.29633727664
- 125
- 4053.21273794936
- 126
- 40416.3088155261
- 127
- 1718.55530132255
- 128
- 11344.8355840123
- 129
- 6449.57632010468
- 130
- 10343.4503716666
- 131
- 566.080172487302
- 132
- 2282.13105827633
- 133
- 3362.37196892649
- 134
- 2213.60459015082
- 135
- 4651.09395657194
- 136
- 17836.449861336
- 137
- 4905.41919551359
- 138
- 11939.7833450429
- 139
- 2934.46752999489
- 140
- 1059.70613969482
- 141
- 3578.68637161238
- 142
- 4774.13856910488
- 143
- 5340.39499497243
- 144
- 9744.24047688514
- 145
- 1838.44630518678
- 146
- 3450.35582952913
- 147
- 3280.22844632236
- 148
- 3919.93354796755
- 149
- -53.2035118929793
- 150
- 11626.3868566865
- 151
- -1610.41555450545
- 152
- -3236.44776099906
<- cbind(picher, new_data= y_pred) picher
<- picher[order(-picher$"연봉.2018."), ]
sorted_picher sorted_picher
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | new_col | new_col.1 | new_data | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | <dbl> | <dbl> | <dbl> | |
3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 | 111180.99 | 180164.16 | 175521.57 |
1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 | 63003.59 | 127838.00 | 120079.64 |
2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 | 37061.64 | 91955.34 | 83760.50 |
21 | 정우람 | 한화 | 6 | 4 | 26 | 0 | 5 | 56 | 0 | 59.0 | ⋯ | 2.75 | 2.85 | 3.26 | 2.69 | 1.81 | 120000 | 120000 | 88943.64 | 119624.39 | 121111.86 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 | 63002.89 | 110388.58 | 106098.40 |
4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 | 74120.86 | 125774.08 | 121612.06 |
8 | 장원준 | 두산 | 14 | 9 | 0 | 0 | 0 | 29 | 29 | 180.1 | ⋯ | 3.14 | 7.28 | 4.26 | 4.35 | 3.85 | 100000 | 100000 | 74120.70 | 120660.77 | 117272.89 |
6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 | 25942.97 | 60851.77 | 55938.34 |
11 | 윤성환 | 삼성 | 12 | 9 | 0 | 0 | 0 | 28 | 28 | 174.1 | ⋯ | 4.28 | 5.36 | 4.78 | 4.80 | 3.03 | 80000 | 80000 | 59296.47 | 95138.61 | 92856.56 |
10 | 팻딘 | KIA | 9 | 7 | 0 | 0 | 0 | 30 | 29 | 176.0 | ⋯ | 4.14 | 5.66 | 4.65 | 4.61 | 3.64 | 70000 | 70000 | 51884.71 | 90684.81 | 87223.76 |
18 | 우규민 | 삼성 | 7 | 10 | 0 | 0 | 0 | 27 | 25 | 133.0 | ⋯ | 5.21 | 1.48 | 4.95 | 4.98 | 2.14 | 70000 | 70000 | 51884.20 | 78523.00 | 77519.75 |
20 | 손승락 | 롯데 | 1 | 3 | 37 | 0 | 5 | 61 | 0 | 62.0 | ⋯ | 2.18 | 3.91 | 3.69 | 3.37 | 1.82 | 70000 | 70000 | 51883.97 | 74663.86 | 74394.60 |
45 | 이동현 | LG | 3 | 6 | 7 | 5 | 3 | 45 | 0 | 50.2 | ⋯ | 4.80 | 1.22 | 3.64 | 3.59 | 0.68 | 60000 | 60000 | 44471.73 | 57336.45 | 58131.61 |
12 | 유희관 | 두산 | 11 | 6 | 0 | 1 | 0 | 30 | 29 | 188.2 | ⋯ | 4.53 | 4.79 | 4.78 | 4.97 | 2.89 | 50000 | 50000 | 37060.65 | 67371.43 | 64095.73 |
24 | 배영수 | 한화 | 7 | 8 | 0 | 0 | 0 | 25 | 25 | 128.0 | ⋯ | 5.06 | 2.47 | 5.10 | 5.21 | 1.68 | 50000 | 55000 | 40766.17 | 61693.57 | 60809.36 |
37 | 임창용 | KIA | 8 | 6 | 7 | 9 | 5 | 51 | 0 | 50.0 | ⋯ | 3.78 | 1.40 | 3.69 | 3.35 | 0.96 | 50000 | 50000 | 37059.82 | 49996.69 | 50497.45 |
52 | 윤길현 | 롯데 | 1 | 4 | 0 | 13 | 2 | 40 | 0 | 39.1 | ⋯ | 6.41 | -0.04 | 3.99 | 3.73 | 0.50 | 50000 | 50000 | 37059.67 | 46413.16 | 47644.48 |
122 | 송은범 | 한화 | 0 | 4 | 1 | 0 | 1 | 13 | 6 | 37.1 | ⋯ | 6.51 | 0.10 | 6.33 | 6.79 | -0.12 | 45000 | 45000 | 33353.86 | 41260.88 | 42166.08 |
126 | 권혁 | 한화 | 1 | 3 | 0 | 11 | 1 | 37 | 0 | 31.1 | ⋯ | 6.32 | 0.04 | 6.48 | 6.78 | -0.16 | 45000 | 45000 | 33353.67 | 38632.11 | 40416.31 |
16 | 송승준 | 롯데 | 11 | 5 | 0 | 1 | 1 | 30 | 22 | 130.1 | ⋯ | 4.21 | 4.22 | 4.91 | 4.77 | 2.20 | 40000 | 40000 | 29648.38 | 51762.94 | 49834.14 |
49 | 이현승 | 두산 | 3 | 2 | 5 | 9 | 7 | 57 | 0 | 52.0 | ⋯ | 3.98 | 1.69 | 4.25 | 4.30 | 0.53 | 40000 | 40000 | 29647.84 | 38630.86 | 39003.22 |
41 | 안영명 | 한화 | 1 | 8 | 0 | 0 | 1 | 25 | 16 | 87.2 | ⋯ | 5.75 | 0.52 | 5.19 | 5.34 | 0.81 | 35000 | 20000 | 14824.10 | 23301.25 | 22618.86 |
68 | 채병용 | SK | 6 | 4 | 0 | 6 | 3 | 43 | 0 | 50.0 | ⋯ | 6.84 | -0.17 | 5.05 | 4.94 | 0.22 | 30000 | 25000 | 18529.76 | 21870.11 | 22782.56 |
17 | 류제국 | LG | 8 | 6 | 0 | 0 | 0 | 25 | 25 | 131.1 | ⋯ | 5.35 | 1.53 | 4.79 | 4.84 | 2.15 | 29000 | 35000 | 25942.45 | 47534.93 | 45256.39 |
14 | 박세웅 | 롯데 | 12 | 6 | 0 | 0 | 0 | 28 | 28 | 171.1 | ⋯ | 3.68 | 5.92 | 5.07 | 5.14 | 2.54 | 25000 | 10000 | 7412.73 | 27963.63 | 24286.46 |
40 | 임창민 | NC | 4 | 3 | 29 | 0 | 6 | 60 | 0 | 66.0 | ⋯ | 3.68 | 2.68 | 4.83 | 4.60 | 0.89 | 25000 | 22500 | 16677.00 | 24913.36 | 24703.22 |
56 | 박정진 | 한화 | 3 | 2 | 1 | 7 | 0 | 55 | 0 | 48.0 | ⋯ | 3.94 | 0.87 | 4.92 | 4.66 | 0.38 | 25000 | 33000 | 24459.37 | 30622.42 | 31579.45 |
136 | 송창식 | 한화 | 5 | 6 | 0 | 15 | 6 | 63 | 0 | 73.1 | ⋯ | 6.63 | -0.74 | 5.68 | 5.59 | -0.22 | 24000 | 22000 | 16306.09 | 16351.07 | 17836.45 |
27 | 김진성 | NC | 10 | 6 | 0 | 15 | 2 | 69 | 0 | 89.2 | ⋯ | 3.61 | 3.20 | 4.30 | 3.92 | 1.47 | 23000 | 18000 | 13341.73 | 24511.49 | 23499.00 |
38 | 심창민 | 삼성 | 4 | 7 | 6 | 16 | 2 | 66 | 0 | 75.1 | ⋯ | 4.18 | 1.82 | 4.82 | 4.26 | 0.93 | 23000 | 21000 | 15565.14 | 23185.40 | 23384.86 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋱ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
99 | 박치국 | 두산 | 1 | 1 | 0 | 0 | 0 | 21 | 3 | 32.0 | ⋯ | 6.75 | -0.12 | 5.39 | 5.58 | 0.00 | 3400 | 2700 | 2001.156 | 1048.3622 | 1276.1858 |
102 | 허건엽 | SK | 0 | 0 | 0 | 0 | 0 | 8 | 0 | 8.1 | ⋯ | 8.64 | -0.20 | 5.29 | 5.64 | -0.01 | 3400 | 3300 | 2445.935 | 2067.0516 | 1872.1189 |
105 | 고봉재 | 두산 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 0.00 | 0.04 | 6.73 | 7.64 | -0.01 | 3300 | 4400 | 3261.709 | 8820.0864 | 7100.9320 |
112 | 김시현 | 삼성 | 0 | 0 | 0 | 0 | 0 | 17 | 0 | 21.1 | ⋯ | 7.59 | 0.08 | 7.06 | 7.18 | -0.05 | 3300 | 2700 | 2001.245 | 2115.3516 | 2464.9378 |
120 | 강장산 | KT | 0 | 0 | 0 | 0 | 0 | 17 | 0 | 26.1 | ⋯ | 5.47 | 0.54 | 6.16 | 6.46 | -0.10 | 3300 | 3000 | 2223.583 | 1795.4384 | 1942.4606 |
118 | 배민관 | LG | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 3.2 | ⋯ | 9.82 | -0.11 | 9.46 | 9.77 | -0.08 | 3200 | 3000 | 2223.850 | 5401.6928 | 5349.3784 |
140 | 이종혁 | KT | 2 | 0 | 0 | 0 | 0 | 16 | 0 | 19.0 | ⋯ | 6.63 | -0.09 | 7.10 | 7.57 | -0.25 | 3200 | 2700 | 2001.196 | 564.4227 | 1059.7061 |
70 | 최지광 | 삼성 | 0 | 2 | 0 | 0 | 0 | 11 | 6 | 25.0 | ⋯ | 6.48 | -0.16 | 5.69 | 5.65 | 0.21 | 3100 | 2700 | 2001.325 | 4206.2468 | 3902.9955 |
73 | 박세진 | KT | 0 | 2 | 0 | 0 | 0 | 4 | 3 | 11.1 | ⋯ | 9.53 | -0.13 | 4.44 | 4.67 | 0.18 | 3100 | 3000 | 2223.610 | 3189.5874 | 2753.4578 |
101 | 박상원 | 한화 | 0 | 0 | 0 | 1 | 0 | 18 | 0 | 21.2 | ⋯ | 4.15 | 0.32 | 5.30 | 5.42 | 0.00 | 3100 | 2700 | 2001.158 | 968.7329 | 1106.5249 |
81 | 김대유 | SK | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 4.2 | ⋯ | 9.64 | 0.08 | 5.88 | 5.91 | 0.09 | 3000 | 2700 | 2001.218 | 2139.0137 | 2190.6374 |
84 | 손주영 | LG | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 6.0 | ⋯ | 4.50 | 0.06 | 3.07 | 3.08 | 0.08 | 3000 | 2700 | 2001.010 | -539.4368 | -324.0806 |
87 | 김진영 | 한화 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 2.2 | ⋯ | 10.12 | -0.02 | 2.98 | 2.29 | 0.07 | 3000 | 2700 | 2000.967 | -560.7629 | 129.3510 |
98 | 임진우 | 두산 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 27.00 | -0.11 | 4.73 | 4.61 | 0.00 | 3000 | 3200 | 2371.857 | 3087.5908 | 2904.8408 |
109 | 서균 | 한화 | 0 | 0 | 0 | 0 | 0 | 14 | 0 | 14.1 | ⋯ | 4.40 | 0.25 | 5.41 | 5.58 | -0.04 | 3000 | 2700 | 2001.060 | -370.1056 | 229.6544 |
121 | 홍성무 | KT | 0 | 1 | 0 | 0 | 0 | 7 | 0 | 6.1 | ⋯ | 12.79 | -0.29 | 7.37 | 7.58 | -0.11 | 3000 | 3000 | 2223.730 | 3741.6415 | 3656.1242 |
127 | 배제성 | KT | 0 | 0 | 0 | 0 | 0 | 21 | 1 | 32.0 | ⋯ | 8.72 | -0.10 | 6.39 | 6.59 | -0.17 | 3000 | 2700 | 2001.242 | 1539.0311 | 1718.5553 |
137 | 배재환 | NC | 0 | 1 | 0 | 0 | 0 | 3 | 1 | 8.0 | ⋯ | 9.00 | -0.16 | 11.48 | 11.73 | -0.23 | 3000 | 4000 | 2964.893 | 3807.0275 | 4905.4192 |
142 | 정영일 | SK | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 8.0 | ⋯ | 10.12 | -0.61 | 10.73 | 11.37 | -0.30 | 3000 | 4000 | 2964.964 | 4241.1480 | 4774.1386 |
96 | 김도영 | KT | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 0.00 | 0.04 | 3.73 | 4.43 | 0.00 | 2900 | 2700 | 2001.259 | 1731.9157 | 779.3548 |
97 | 조근종 | KT | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2.0 | ⋯ | 22.50 | -0.16 | 6.73 | 4.86 | 0.00 | 2900 | 2700 | 2000.470 | -6343.0220 | -2275.0675 |
108 | 안규현 | 삼성 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2.0 | ⋯ | 9.00 | -0.04 | 8.73 | 9.67 | -0.04 | 2900 | 2800 | 2075.799 | 7768.8014 | 6770.8590 |
115 | 김종훈 | KIA | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 36.00 | -0.16 | 18.73 | 17.60 | -0.06 | 2900 | 2800 | 2075.467 | 4943.5065 | 8543.2324 |
132 | 최성영 | NC | 0 | 0 | 0 | 0 | 0 | 7 | 0 | 11.1 | ⋯ | 9.53 | -0.26 | 9.29 | 9.48 | -0.20 | 2900 | 2900 | 2149.435 | 1140.0750 | 2282.1311 |
83 | 장지훈 | 삼성 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 2.1 | ⋯ | 0.00 | 0.09 | 0.73 | -0.69 | 0.08 | 2800 | 2700 | 2000.563 | -5315.1733 | -3354.1873 |
90 | 차재용 | 롯데 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 2.1 | ⋯ | 0.00 | 0.08 | 2.02 | 1.83 | 0.05 | 2800 | 2800 | 2075.035 | -1808.0465 | -1479.5135 |
106 | 이수민 | 삼성 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2.2 | ⋯ | 10.12 | -0.15 | 5.23 | 5.95 | -0.01 | 2800 | 3000 | 2223.624 | 2419.1537 | 2029.4247 |
94 | 장민익 | 두산 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 2.0 | ⋯ | 0.00 | 0.08 | 4.73 | 4.61 | 0.02 | 2700 | 2700 | 2001.266 | 2792.7349 | 2563.4614 |
104 | 성영훈 | 두산 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 0.00 | 0.04 | 6.73 | 7.64 | -0.01 | 2700 | 2700 | 2001.678 | 7263.5285 | 5496.8155 |
116 | 정동윤 | SK | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 3.0 | ⋯ | 9.00 | -0.03 | 9.40 | 10.05 | -0.07 | 2700 | 2700 | 2001.280 | 2205.1885 | 2825.4479 |
<- picher[picher$"연봉.2018." != picher$"연봉.2017.", ]
filtered_df filtered_df
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | new_col | new_col.1 | new_data | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | <dbl> | <dbl> | <dbl> | |
1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 | 63003.595 | 127837.996 | 120079.644 |
2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 | 37061.640 | 91955.341 | 83760.504 |
3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 | 111180.994 | 180164.163 | 175521.573 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 | 63002.886 | 110388.580 | 106098.401 |
6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 | 25942.975 | 60851.766 | 55938.336 |
7 | 고영표 | KT | 8 | 12 | 0 | 1 | 0 | 25 | 24 | 141.2 | ⋯ | 5.08 | 2.97 | 3.88 | 3.78 | 3.87 | 11500 | 5200 | 3855.283 | 32248.671 | 26731.808 |
9 | 함덕주 | 두산 | 9 | 8 | 0 | 2 | 0 | 35 | 24 | 137.1 | ⋯ | 3.67 | 4.99 | 3.91 | 3.67 | 3.78 | 16000 | 7000 | 5189.489 | 34420.638 | 28840.862 |
13 | 임기영 | KIA | 8 | 6 | 0 | 0 | 0 | 23 | 19 | 118.1 | ⋯ | 3.65 | 4.25 | 4.07 | 4.19 | 2.79 | 13000 | 3100 | 2298.522 | 22932.876 | 18557.121 |
14 | 박세웅 | 롯데 | 12 | 6 | 0 | 0 | 0 | 28 | 28 | 171.1 | ⋯ | 3.68 | 5.92 | 5.07 | 5.14 | 2.54 | 25000 | 10000 | 7412.730 | 27963.633 | 24286.458 |
15 | 백정현 | 삼성 | 8 | 4 | 0 | 3 | 0 | 35 | 14 | 100.2 | ⋯ | 4.38 | 3.01 | 4.51 | 4.34 | 2.25 | 15500 | 10000 | 7412.536 | 24575.219 | 21705.856 |
17 | 류제국 | LG | 8 | 6 | 0 | 0 | 0 | 25 | 25 | 131.1 | ⋯ | 5.35 | 1.53 | 4.79 | 4.84 | 2.15 | 29000 | 35000 | 25942.448 | 47534.929 | 45256.386 |
19 | 임찬규 | LG | 6 | 10 | 0 | 0 | 0 | 27 | 26 | 124.1 | ⋯ | 4.63 | 3.15 | 4.81 | 4.79 | 2.04 | 11500 | 6500 | 4818.295 | 19832.884 | 17218.543 |
22 | 윤희상 | SK | 6 | 7 | 0 | 0 | 0 | 23 | 22 | 120.0 | ⋯ | 6.00 | 1.52 | 5.13 | 5.22 | 1.80 | 13000 | 15000 | 11118.427 | 26255.265 | 24012.479 |
23 | 원종현 | NC | 3 | 6 | 0 | 22 | 0 | 68 | 0 | 80.0 | ⋯ | 4.39 | 2.02 | 3.60 | 3.52 | 1.71 | 18500 | 14000 | 10377.101 | 23403.105 | 21354.395 |
24 | 배영수 | 한화 | 7 | 8 | 0 | 0 | 0 | 25 | 25 | 128.0 | ⋯ | 5.06 | 2.47 | 5.10 | 5.21 | 1.68 | 50000 | 55000 | 40766.167 | 61693.571 | 60809.360 |
25 | 박종훈 | SK | 12 | 7 | 0 | 1 | 0 | 29 | 28 | 151.1 | ⋯ | 4.10 | 4.31 | 5.38 | 5.55 | 1.62 | 20000 | 10000 | 7412.434 | 20805.675 | 18627.133 |
26 | 이상화 | KT | 4 | 3 | 6 | 4 | 1 | 70 | 0 | 66.0 | ⋯ | 3.95 | 2.43 | 3.57 | 3.45 | 1.54 | 10000 | 4500 | 3335.691 | 13333.548 | 11284.396 |
27 | 김진성 | NC | 10 | 6 | 0 | 15 | 2 | 69 | 0 | 89.2 | ⋯ | 3.61 | 3.20 | 4.30 | 3.92 | 1.47 | 23000 | 18000 | 13341.734 | 24511.486 | 23499.001 |
28 | 이민호 | NC | 5 | 1 | 3 | 6 | 1 | 60 | 3 | 88.2 | ⋯ | 4.06 | 1.97 | 4.41 | 4.22 | 1.38 | 18800 | 16000 | 11859.379 | 22705.669 | 21500.092 |
29 | 이재학 | NC | 5 | 7 | 0 | 0 | 0 | 28 | 23 | 119.0 | ⋯ | 5.67 | 1.04 | 5.53 | 5.35 | 1.31 | 19000 | 20000 | 14824.140 | 25923.735 | 25216.046 |
30 | 김강률 | 두산 | 7 | 2 | 7 | 12 | 1 | 70 | 0 | 89.0 | ⋯ | 3.44 | 2.68 | 4.15 | 4.09 | 1.27 | 15000 | 6200 | 4595.698 | 13575.161 | 11877.920 |
31 | 김재윤 | KT | 3 | 5 | 15 | 0 | 4 | 41 | 0 | 37.1 | ⋯ | 5.79 | 0.73 | 3.15 | 3.05 | 1.24 | 11000 | 9000 | 6670.994 | 15370.094 | 13693.121 |
32 | 김원중 | 롯데 | 7 | 8 | 0 | 0 | 0 | 24 | 24 | 107.1 | ⋯ | 5.70 | 2.22 | 5.53 | 5.56 | 1.23 | 6300 | 3000 | 2223.940 | 11393.858 | 9721.380 |
33 | 박진형 | 롯데 | 4 | 4 | 2 | 10 | 3 | 45 | 9 | 88.0 | ⋯ | 5.11 | 1.70 | 4.43 | 4.14 | 1.12 | 10500 | 6000 | 4447.373 | 11961.674 | 10832.990 |
34 | 윤규진 | 한화 | 8 | 7 | 0 | 2 | 0 | 36 | 18 | 119.0 | ⋯ | 5.22 | 2.23 | 5.46 | 5.42 | 1.09 | 21000 | 18000 | 13341.766 | 23320.087 | 22447.697 |
35 | 김재영 | 한화 | 5 | 7 | 0 | 0 | 1 | 20 | 15 | 85.1 | ⋯ | 4.54 | 1.60 | 5.00 | 5.23 | 1.06 | 5300 | 3000 | 2223.843 | 9322.823 | 7824.809 |
36 | 신정락 | LG | 3 | 5 | 10 | 12 | 3 | 63 | 0 | 59.0 | ⋯ | 5.34 | 0.51 | 4.23 | 4.12 | 0.97 | 10500 | 7500 | 5559.100 | 11850.125 | 10957.933 |
38 | 심창민 | 삼성 | 4 | 7 | 6 | 16 | 2 | 66 | 0 | 75.1 | ⋯ | 4.18 | 1.82 | 4.82 | 4.26 | 0.93 | 23000 | 21000 | 15565.136 | 23185.400 | 23384.860 |
39 | 정찬헌 | LG | 8 | 7 | 7 | 3 | 3 | 61 | 0 | 61.2 | ⋯ | 5.84 | 0.97 | 3.96 | 3.80 | 0.93 | 9500 | 4500 | 3335.522 | 8995.166 | 7904.678 |
40 | 임창민 | NC | 4 | 3 | 29 | 0 | 6 | 60 | 0 | 66.0 | ⋯ | 3.68 | 2.68 | 4.83 | 4.60 | 0.89 | 25000 | 22500 | 16676.995 | 24913.355 | 24703.222 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋱ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
118 | 배민관 | LG | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 3.2 | ⋯ | 9.82 | -0.11 | 9.46 | 9.77 | -0.08 | 3200 | 3000 | 2223.850 | 5401.6928 | 5349.37842 |
119 | 윤근영 | KT | 0 | 0 | 0 | 1 | 0 | 12 | 0 | 18.0 | ⋯ | 3.50 | 0.30 | 6.23 | 6.31 | -0.08 | 4200 | 4000 | 2964.668 | 1350.9942 | 1957.02839 |
120 | 강장산 | KT | 0 | 0 | 0 | 0 | 0 | 17 | 0 | 26.1 | ⋯ | 5.47 | 0.54 | 6.16 | 6.46 | -0.10 | 3300 | 3000 | 2223.583 | 1795.4384 | 1942.46057 |
124 | 김민우 | 한화 | 0 | 0 | 0 | 0 | 0 | 4 | 2 | 7.1 | ⋯ | 17.18 | -0.52 | 7.83 | 8.32 | -0.12 | 3600 | 3800 | 2816.614 | 3554.7700 | 3912.29634 |
125 | 이현호 | 두산 | 1 | 0 | 0 | 0 | 0 | 24 | 2 | 30.0 | ⋯ | 5.70 | -0.51 | 6.47 | 6.57 | -0.16 | 5200 | 6000 | 4447.098 | 3446.5228 | 4053.21274 |
127 | 배제성 | KT | 0 | 0 | 0 | 0 | 0 | 21 | 1 | 32.0 | ⋯ | 8.72 | -0.10 | 6.39 | 6.59 | -0.17 | 3000 | 2700 | 2001.242 | 1539.0311 | 1718.55530 |
128 | 홍상삼 | 두산 | 1 | 1 | 0 | 0 | 0 | 11 | 2 | 17.0 | ⋯ | 7.94 | -0.52 | 6.73 | 7.22 | -0.18 | 9000 | 12500 | 9265.021 | 11187.3262 | 11344.83558 |
129 | 이태양 | 한화 | 3 | 6 | 0 | 0 | 0 | 16 | 12 | 59.0 | ⋯ | 7.17 | 0.05 | 6.77 | 7.04 | -0.18 | 7300 | 8300 | 6151.890 | 5929.5258 | 6449.57632 |
130 | 김진우 | KIA | 2 | 6 | 0 | 0 | 1 | 14 | 8 | 36.1 | ⋯ | 7.93 | -0.35 | 6.68 | 7.04 | -0.19 | 6000 | 12000 | 8894.348 | 9837.3511 | 10343.45037 |
131 | 이영하 | 두산 | 3 | 3 | 0 | 0 | 0 | 20 | 3 | 35.2 | ⋯ | 5.55 | 0.59 | 6.79 | 6.86 | -0.19 | 4200 | 2700 | 2001.102 | -291.0924 | 566.08017 |
133 | 김동호 | 삼성 | 0 | 1 | 0 | 0 | 0 | 20 | 1 | 36.0 | ⋯ | 6.75 | -0.07 | 5.46 | 5.96 | -0.20 | 6000 | 5000 | 3706.004 | 3508.1291 | 3362.37197 |
134 | 김윤동 | KIA | 7 | 4 | 11 | 6 | 6 | 65 | 1 | 80.1 | ⋯ | 4.59 | 1.89 | 5.54 | 5.53 | -0.20 | 15000 | 4700 | 3483.494 | 1585.1461 | 2213.60459 |
135 | 정인욱 | 삼성 | 1 | 4 | 0 | 0 | 0 | 9 | 7 | 32.0 | ⋯ | 9.84 | -0.43 | 7.39 | 7.36 | -0.20 | 5700 | 7200 | 5336.429 | 3265.9890 | 4651.09396 |
136 | 송창식 | 한화 | 5 | 6 | 0 | 15 | 6 | 63 | 0 | 73.1 | ⋯ | 6.63 | -0.74 | 5.68 | 5.59 | -0.22 | 24000 | 22000 | 16306.086 | 16351.0737 | 17836.44986 |
137 | 배재환 | NC | 0 | 1 | 0 | 0 | 0 | 3 | 1 | 8.0 | ⋯ | 9.00 | -0.16 | 11.48 | 11.73 | -0.23 | 3000 | 4000 | 2964.893 | 3807.0275 | 4905.41920 |
138 | 이정민 | 롯데 | 3 | 1 | 0 | 2 | 1 | 24 | 0 | 26.2 | ⋯ | 5.40 | 0.19 | 6.47 | 6.83 | -0.24 | 10000 | 15000 | 11117.848 | 11150.4349 | 11939.78335 |
139 | 최동환 | LG | 1 | 2 | 1 | 5 | 2 | 35 | 0 | 38.0 | ⋯ | 5.68 | -0.05 | 6.76 | 6.77 | -0.24 | 6500 | 6000 | 4446.970 | 1651.5907 | 2934.46753 |
140 | 이종혁 | KT | 2 | 0 | 0 | 0 | 0 | 16 | 0 | 19.0 | ⋯ | 6.63 | -0.09 | 7.10 | 7.57 | -0.25 | 3200 | 2700 | 2001.196 | 564.4227 | 1059.70614 |
141 | 홍성용 | KT | 0 | 2 | 0 | 1 | 0 | 37 | 1 | 39.0 | ⋯ | 6.23 | 0.25 | 6.40 | 6.63 | -0.28 | 6800 | 6300 | 4669.430 | 2877.7134 | 3578.68637 |
142 | 정영일 | SK | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 8.0 | ⋯ | 10.12 | -0.61 | 10.73 | 11.37 | -0.30 | 3000 | 4000 | 2964.964 | 4241.1480 | 4774.13857 |
143 | 김지용 | LG | 4 | 3 | 3 | 8 | 4 | 53 | 0 | 53.0 | ⋯ | 5.09 | 0.28 | 6.30 | 6.27 | -0.38 | 9000 | 10000 | 7411.678 | 3788.8307 | 5340.39499 |
144 | 최금강 | NC | 5 | 3 | 0 | 0 | 0 | 39 | 13 | 89.2 | ⋯ | 7.33 | -0.27 | 6.31 | 6.58 | -0.41 | 12500 | 14000 | 10376.560 | 8542.1622 | 9744.24048 |
145 | 김범수 | 한화 | 0 | 4 | 0 | 0 | 0 | 15 | 5 | 31.0 | ⋯ | 8.71 | -0.42 | 8.15 | 8.52 | -0.42 | 3600 | 3300 | 2445.965 | 1206.6108 | 1838.44631 |
146 | 이승현 | 삼성 | 2 | 0 | 0 | 0 | 0 | 30 | 0 | 31.2 | ⋯ | 5.12 | 0.31 | 8.03 | 8.03 | -0.44 | 7000 | 6200 | 4595.280 | 2054.0673 | 3450.35583 |
147 | 주권 | KT | 5 | 6 | 1 | 3 | 2 | 39 | 12 | 81.2 | ⋯ | 6.61 | -0.02 | 6.33 | 6.54 | -0.46 | 7600 | 7500 | 5558.774 | 2162.2155 | 3280.22845 |
148 | 장민재 | 한화 | 2 | 5 | 0 | 0 | 2 | 33 | 5 | 62.2 | ⋯ | 7.76 | -1.21 | 6.21 | 6.48 | -0.47 | 7100 | 8100 | 6003.520 | 2972.9523 | 3919.93355 |
149 | 정용운 | KIA | 3 | 2 | 0 | 0 | 0 | 25 | 11 | 59.1 | ⋯ | 5.92 | 0.39 | 6.41 | 6.77 | -0.49 | 7500 | 3100 | 2297.618 | -673.0981 | -53.20351 |
150 | 노경은 | 롯데 | 0 | 2 | 0 | 0 | 0 | 9 | 2 | 14.2 | ⋯ | 11.66 | -0.83 | 8.03 | 8.29 | -0.61 | 10000 | 16000 | 11858.971 | 9953.0416 | 11626.38686 |
151 | 김승현 | 삼성 | 0 | 3 | 0 | 1 | 0 | 41 | 0 | 43.2 | ⋯ | 5.77 | -0.40 | 6.87 | 6.95 | -0.70 | 4000 | 2900 | 2149.246 | -3065.3516 | -1610.41555 |
152 | 류희운 | KT | 4 | 4 | 0 | 0 | 0 | 24 | 14 | 81.0 | ⋯ | 7.67 | -0.68 | 7.60 | 7.81 | -1.01 | 4000 | 3000 | 2223.298 | -5132.3247 | -3236.44776 |
<- filtered_df[,c("선수명","연봉.2018.","new_data","연봉.2017.")] df
df
선수명 | 연봉.2018. | new_data | 연봉.2017. | |
---|---|---|---|---|
<chr> | <int> | <dbl> | <int> | |
1 | 켈리 | 140000 | 120079.644 | 85000 |
2 | 소사 | 120000 | 83760.504 | 50000 |
3 | 양현종 | 230000 | 175521.573 | 150000 |
5 | 레일리 | 111000 | 106098.401 | 85000 |
6 | 피어밴드 | 85000 | 55938.336 | 35000 |
7 | 고영표 | 11500 | 26731.808 | 5200 |
9 | 함덕주 | 16000 | 28840.862 | 7000 |
13 | 임기영 | 13000 | 18557.121 | 3100 |
14 | 박세웅 | 25000 | 24286.458 | 10000 |
15 | 백정현 | 15500 | 21705.856 | 10000 |
17 | 류제국 | 29000 | 45256.386 | 35000 |
19 | 임찬규 | 11500 | 17218.543 | 6500 |
22 | 윤희상 | 13000 | 24012.479 | 15000 |
23 | 원종현 | 18500 | 21354.395 | 14000 |
24 | 배영수 | 50000 | 60809.360 | 55000 |
25 | 박종훈 | 20000 | 18627.133 | 10000 |
26 | 이상화 | 10000 | 11284.396 | 4500 |
27 | 김진성 | 23000 | 23499.001 | 18000 |
28 | 이민호 | 18800 | 21500.092 | 16000 |
29 | 이재학 | 19000 | 25216.046 | 20000 |
30 | 김강률 | 15000 | 11877.920 | 6200 |
31 | 김재윤 | 11000 | 13693.121 | 9000 |
32 | 김원중 | 6300 | 9721.380 | 3000 |
33 | 박진형 | 10500 | 10832.990 | 6000 |
34 | 윤규진 | 21000 | 22447.697 | 18000 |
35 | 김재영 | 5300 | 7824.809 | 3000 |
36 | 신정락 | 10500 | 10957.933 | 7500 |
38 | 심창민 | 23000 | 23384.860 | 21000 |
39 | 정찬헌 | 9500 | 7904.678 | 4500 |
40 | 임창민 | 25000 | 24703.222 | 22500 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
118 | 배민관 | 3200 | 5349.37842 | 3000 |
119 | 윤근영 | 4200 | 1957.02839 | 4000 |
120 | 강장산 | 3300 | 1942.46057 | 3000 |
124 | 김민우 | 3600 | 3912.29634 | 3800 |
125 | 이현호 | 5200 | 4053.21274 | 6000 |
127 | 배제성 | 3000 | 1718.55530 | 2700 |
128 | 홍상삼 | 9000 | 11344.83558 | 12500 |
129 | 이태양 | 7300 | 6449.57632 | 8300 |
130 | 김진우 | 6000 | 10343.45037 | 12000 |
131 | 이영하 | 4200 | 566.08017 | 2700 |
133 | 김동호 | 6000 | 3362.37197 | 5000 |
134 | 김윤동 | 15000 | 2213.60459 | 4700 |
135 | 정인욱 | 5700 | 4651.09396 | 7200 |
136 | 송창식 | 24000 | 17836.44986 | 22000 |
137 | 배재환 | 3000 | 4905.41920 | 4000 |
138 | 이정민 | 10000 | 11939.78335 | 15000 |
139 | 최동환 | 6500 | 2934.46753 | 6000 |
140 | 이종혁 | 3200 | 1059.70614 | 2700 |
141 | 홍성용 | 6800 | 3578.68637 | 6300 |
142 | 정영일 | 3000 | 4774.13857 | 4000 |
143 | 김지용 | 9000 | 5340.39499 | 10000 |
144 | 최금강 | 12500 | 9744.24048 | 14000 |
145 | 김범수 | 3600 | 1838.44631 | 3300 |
146 | 이승현 | 7000 | 3450.35583 | 6200 |
147 | 주권 | 7600 | 3280.22845 | 7500 |
148 | 장민재 | 7100 | 3919.93355 | 8100 |
149 | 정용운 | 7500 | -53.20351 | 3100 |
150 | 노경은 | 10000 | 11626.38686 | 16000 |
151 | 김승현 | 4000 | -1610.41555 | 2900 |
152 | 류희운 | 4000 | -3236.44776 | 3000 |
<- df[order(df$"연봉.2018.", decreasing = TRUE), ] sorted_df
sorted_df
선수명 | 연봉.2018. | new_data | 연봉.2017. | |
---|---|---|---|---|
<chr> | <int> | <dbl> | <int> | |
3 | 양현종 | 230000 | 175521.573 | 150000 |
1 | 켈리 | 140000 | 120079.644 | 85000 |
2 | 소사 | 120000 | 83760.504 | 50000 |
5 | 레일리 | 111000 | 106098.401 | 85000 |
6 | 피어밴드 | 85000 | 55938.336 | 35000 |
24 | 배영수 | 50000 | 60809.360 | 55000 |
41 | 안영명 | 35000 | 22618.863 | 20000 |
68 | 채병용 | 30000 | 22782.563 | 25000 |
17 | 류제국 | 29000 | 45256.386 | 35000 |
14 | 박세웅 | 25000 | 24286.458 | 10000 |
40 | 임창민 | 25000 | 24703.222 | 22500 |
56 | 박정진 | 25000 | 31579.449 | 33000 |
136 | 송창식 | 24000 | 17836.450 | 22000 |
27 | 김진성 | 23000 | 23499.001 | 18000 |
38 | 심창민 | 23000 | 23384.860 | 21000 |
34 | 윤규진 | 21000 | 22447.697 | 18000 |
25 | 박종훈 | 20000 | 18627.133 | 10000 |
44 | 심수창 | 20000 | 26147.317 | 25000 |
50 | 장원삼 | 20000 | 71627.777 | 75000 |
29 | 이재학 | 19000 | 25216.046 | 20000 |
43 | 진해수 | 19000 | 12816.751 | 11000 |
28 | 이민호 | 18800 | 21500.092 | 16000 |
23 | 원종현 | 18500 | 21354.395 | 14000 |
111 | 박희수 | 18500 | 18753.930 | 21000 |
9 | 함덕주 | 16000 | 28840.862 | 7000 |
15 | 백정현 | 15500 | 21705.856 | 10000 |
30 | 김강률 | 15000 | 11877.920 | 6200 |
60 | 권오준 | 15000 | 9102.069 | 10500 |
63 | 김사율 | 15000 | 18303.363 | 20000 |
134 | 김윤동 | 15000 | 2213.605 | 4700 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
79 | 황수범 | 3800 | 2999.8065 | 2700 |
124 | 김민우 | 3600 | 3912.2963 | 3800 |
145 | 김범수 | 3600 | 1838.4463 | 3300 |
92 | 이형범 | 3500 | 2179.4887 | 2700 |
93 | 임현준 | 3500 | 2274.2215 | 3200 |
110 | 안성무 | 3500 | 3410.5937 | 2800 |
99 | 박치국 | 3400 | 1276.1858 | 2700 |
102 | 허건엽 | 3400 | 1872.1189 | 3300 |
105 | 고봉재 | 3300 | 7100.9320 | 4400 |
112 | 김시현 | 3300 | 2464.9378 | 2700 |
120 | 강장산 | 3300 | 1942.4606 | 3000 |
118 | 배민관 | 3200 | 5349.3784 | 3000 |
140 | 이종혁 | 3200 | 1059.7061 | 2700 |
70 | 최지광 | 3100 | 3902.9955 | 2700 |
73 | 박세진 | 3100 | 2753.4578 | 3000 |
101 | 박상원 | 3100 | 1106.5249 | 2700 |
81 | 김대유 | 3000 | 2190.6374 | 2700 |
84 | 손주영 | 3000 | -324.0806 | 2700 |
87 | 김진영 | 3000 | 129.3510 | 2700 |
98 | 임진우 | 3000 | 2904.8408 | 3200 |
109 | 서균 | 3000 | 229.6544 | 2700 |
127 | 배제성 | 3000 | 1718.5553 | 2700 |
137 | 배재환 | 3000 | 4905.4192 | 4000 |
142 | 정영일 | 3000 | 4774.1386 | 4000 |
96 | 김도영 | 2900 | 779.3548 | 2700 |
97 | 조근종 | 2900 | -2275.0675 | 2700 |
108 | 안규현 | 2900 | 6770.8590 | 2800 |
115 | 김종훈 | 2900 | 8543.2324 | 2800 |
83 | 장지훈 | 2800 | -3354.1873 | 2700 |
106 | 이수민 | 2800 | 2029.4247 | 3000 |
이상치 제거
<- picher[-c(1,2,3,6,21,50,98),] picher2
picher2
선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 |
7 | 고영표 | KT | 8 | 12 | 0 | 1 | 0 | 25 | 24 | 141.2 | ⋯ | 0.83 | 0.362 | 64.6 | 5.08 | 2.97 | 3.88 | 3.78 | 3.87 | 11500 | 5200 |
8 | 장원준 | 두산 | 14 | 9 | 0 | 0 | 0 | 29 | 29 | 180.1 | ⋯ | 0.60 | 0.293 | 75.8 | 3.14 | 7.28 | 4.26 | 4.35 | 3.85 | 100000 | 100000 |
9 | 함덕주 | 두산 | 9 | 8 | 0 | 2 | 0 | 35 | 24 | 137.1 | ⋯ | 0.52 | 0.321 | 73.1 | 3.67 | 4.99 | 3.91 | 3.67 | 3.78 | 16000 | 7000 |
10 | 팻딘 | KIA | 9 | 7 | 0 | 0 | 0 | 30 | 29 | 176.0 | ⋯ | 1.12 | 0.353 | 76.5 | 4.14 | 5.66 | 4.65 | 4.61 | 3.64 | 70000 | 70000 |
11 | 윤성환 | 삼성 | 12 | 9 | 0 | 0 | 0 | 28 | 28 | 174.1 | ⋯ | 1.14 | 0.305 | 72.7 | 4.28 | 5.36 | 4.78 | 4.80 | 3.03 | 80000 | 80000 |
12 | 유희관 | 두산 | 11 | 6 | 0 | 1 | 0 | 30 | 29 | 188.2 | ⋯ | 0.95 | 0.329 | 69.7 | 4.53 | 4.79 | 4.78 | 4.97 | 2.89 | 50000 | 50000 |
13 | 임기영 | KIA | 8 | 6 | 0 | 0 | 0 | 23 | 19 | 118.1 | ⋯ | 0.68 | 0.331 | 72.3 | 3.65 | 4.25 | 4.07 | 4.19 | 2.79 | 13000 | 3100 |
14 | 박세웅 | 롯데 | 12 | 6 | 0 | 0 | 0 | 28 | 28 | 171.1 | ⋯ | 1.10 | 0.290 | 78.3 | 3.68 | 5.92 | 5.07 | 5.14 | 2.54 | 25000 | 10000 |
15 | 백정현 | 삼성 | 8 | 4 | 0 | 3 | 0 | 35 | 14 | 100.2 | ⋯ | 0.98 | 0.336 | 73.2 | 4.38 | 3.01 | 4.51 | 4.34 | 2.25 | 15500 | 10000 |
16 | 송승준 | 롯데 | 11 | 5 | 0 | 1 | 1 | 30 | 22 | 130.1 | ⋯ | 1.31 | 0.298 | 75.8 | 4.21 | 4.22 | 4.91 | 4.77 | 2.20 | 40000 | 40000 |
17 | 류제국 | LG | 8 | 6 | 0 | 0 | 0 | 25 | 25 | 131.1 | ⋯ | 0.69 | 0.327 | 63.5 | 5.35 | 1.53 | 4.79 | 4.84 | 2.15 | 29000 | 35000 |
18 | 우규민 | 삼성 | 7 | 10 | 0 | 0 | 0 | 27 | 25 | 133.0 | ⋯ | 1.29 | 0.334 | 60.6 | 5.21 | 1.48 | 4.95 | 4.98 | 2.14 | 70000 | 70000 |
19 | 임찬규 | LG | 6 | 10 | 0 | 0 | 0 | 27 | 26 | 124.1 | ⋯ | 0.87 | 0.335 | 71.3 | 4.63 | 3.15 | 4.81 | 4.79 | 2.04 | 11500 | 6500 |
20 | 손승락 | 롯데 | 1 | 3 | 37 | 0 | 5 | 61 | 0 | 62.0 | ⋯ | 1.02 | 0.311 | 89.9 | 2.18 | 3.91 | 3.69 | 3.37 | 1.82 | 70000 | 70000 |
22 | 윤희상 | SK | 6 | 7 | 0 | 0 | 0 | 23 | 22 | 120.0 | ⋯ | 1.20 | 0.340 | 63.1 | 6.00 | 1.52 | 5.13 | 5.22 | 1.80 | 13000 | 15000 |
23 | 원종현 | NC | 3 | 6 | 0 | 22 | 0 | 68 | 0 | 80.0 | ⋯ | 0.45 | 0.336 | 63.9 | 4.39 | 2.02 | 3.60 | 3.52 | 1.71 | 18500 | 14000 |
24 | 배영수 | 한화 | 7 | 8 | 0 | 0 | 0 | 25 | 25 | 128.0 | ⋯ | 1.20 | 0.328 | 66.4 | 5.06 | 2.47 | 5.10 | 5.21 | 1.68 | 50000 | 55000 |
25 | 박종훈 | SK | 12 | 7 | 0 | 1 | 0 | 29 | 28 | 151.1 | ⋯ | 0.95 | 0.287 | 75.3 | 4.10 | 4.31 | 5.38 | 5.55 | 1.62 | 20000 | 10000 |
26 | 이상화 | KT | 4 | 3 | 6 | 4 | 1 | 70 | 0 | 66.0 | ⋯ | 0.55 | 0.337 | 68.0 | 3.95 | 2.43 | 3.57 | 3.45 | 1.54 | 10000 | 4500 |
27 | 김진성 | NC | 10 | 6 | 0 | 15 | 2 | 69 | 0 | 89.2 | ⋯ | 1.30 | 0.268 | 76.1 | 3.61 | 3.20 | 4.30 | 3.92 | 1.47 | 23000 | 18000 |
28 | 이민호 | NC | 5 | 1 | 3 | 6 | 1 | 60 | 3 | 88.2 | ⋯ | 1.02 | 0.312 | 71.4 | 4.06 | 1.97 | 4.41 | 4.22 | 1.38 | 18800 | 16000 |
29 | 이재학 | NC | 5 | 7 | 0 | 0 | 0 | 28 | 23 | 119.0 | ⋯ | 1.66 | 0.350 | 65.4 | 5.67 | 1.04 | 5.53 | 5.35 | 1.31 | 19000 | 20000 |
30 | 김강률 | 두산 | 7 | 2 | 7 | 12 | 1 | 70 | 0 | 89.0 | ⋯ | 0.61 | 0.316 | 73.5 | 3.44 | 2.68 | 4.15 | 4.09 | 1.27 | 15000 | 6200 |
31 | 김재윤 | KT | 3 | 5 | 15 | 0 | 4 | 41 | 0 | 37.1 | ⋯ | 0.24 | 0.348 | 54.3 | 5.79 | 0.73 | 3.15 | 3.05 | 1.24 | 11000 | 9000 |
32 | 김원중 | 롯데 | 7 | 8 | 0 | 0 | 0 | 24 | 24 | 107.1 | ⋯ | 1.09 | 0.347 | 70.3 | 5.70 | 2.22 | 5.53 | 5.56 | 1.23 | 6300 | 3000 |
33 | 박진형 | 롯데 | 4 | 4 | 2 | 10 | 3 | 45 | 9 | 88.0 | ⋯ | 0.82 | 0.349 | 68.9 | 5.11 | 1.70 | 4.43 | 4.14 | 1.12 | 10500 | 6000 |
34 | 윤규진 | 한화 | 8 | 7 | 0 | 2 | 0 | 36 | 18 | 119.0 | ⋯ | 1.44 | 0.328 | 72.1 | 5.22 | 2.23 | 5.46 | 5.42 | 1.09 | 21000 | 18000 |
35 | 김재영 | 한화 | 5 | 7 | 0 | 0 | 1 | 20 | 15 | 85.1 | ⋯ | 0.95 | 0.337 | 67.8 | 4.54 | 1.60 | 5.00 | 5.23 | 1.06 | 5300 | 3000 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋱ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
123 | 정재원 | 한화 | 0 | 2 | 0 | 1 | 1 | 14 | 0 | 21.0 | ⋯ | 0.00 | 0.315 | 69.2 | 3.43 | 0.12 | 5.16 | 5.73 | -0.12 | 4000 | 4000 |
124 | 김민우 | 한화 | 0 | 0 | 0 | 0 | 0 | 4 | 2 | 7.1 | ⋯ | 1.23 | 0.613 | 50.7 | 17.18 | -0.52 | 7.83 | 8.32 | -0.12 | 3600 | 3800 |
125 | 이현호 | 두산 | 1 | 0 | 0 | 0 | 0 | 24 | 2 | 30.0 | ⋯ | 1.80 | 0.389 | 65.3 | 5.70 | -0.51 | 6.47 | 6.57 | -0.16 | 5200 | 6000 |
126 | 권혁 | 한화 | 1 | 3 | 0 | 11 | 1 | 37 | 0 | 31.1 | ⋯ | 1.72 | 0.311 | 67.3 | 6.32 | 0.04 | 6.48 | 6.78 | -0.16 | 45000 | 45000 |
127 | 배제성 | KT | 0 | 0 | 0 | 0 | 0 | 21 | 1 | 32.0 | ⋯ | 1.12 | 0.378 | 59.6 | 8.72 | -0.10 | 6.39 | 6.59 | -0.17 | 3000 | 2700 |
128 | 홍상삼 | 두산 | 1 | 1 | 0 | 0 | 0 | 11 | 2 | 17.0 | ⋯ | 1.06 | 0.300 | 53.0 | 7.94 | -0.52 | 6.73 | 7.22 | -0.18 | 9000 | 12500 |
129 | 이태양 | 한화 | 3 | 6 | 0 | 0 | 0 | 16 | 12 | 59.0 | ⋯ | 1.83 | 0.330 | 64.3 | 7.17 | 0.05 | 6.77 | 7.04 | -0.18 | 7300 | 8300 |
130 | 김진우 | KIA | 2 | 6 | 0 | 0 | 1 | 14 | 8 | 36.1 | ⋯ | 1.24 | 0.376 | 65.3 | 7.93 | -0.35 | 6.68 | 7.04 | -0.19 | 6000 | 12000 |
131 | 이영하 | 두산 | 3 | 3 | 0 | 0 | 0 | 20 | 3 | 35.2 | ⋯ | 2.02 | 0.333 | 78.7 | 5.55 | 0.59 | 6.79 | 6.86 | -0.19 | 4200 | 2700 |
132 | 최성영 | NC | 0 | 0 | 0 | 0 | 0 | 7 | 0 | 11.1 | ⋯ | 3.18 | 0.361 | 68.6 | 9.53 | -0.26 | 9.29 | 9.48 | -0.20 | 2900 | 2900 |
133 | 김동호 | 삼성 | 0 | 1 | 0 | 0 | 0 | 20 | 1 | 36.0 | ⋯ | 0.25 | 0.381 | 67.0 | 6.75 | -0.07 | 5.46 | 5.96 | -0.20 | 6000 | 5000 |
134 | 김윤동 | KIA | 7 | 4 | 11 | 6 | 6 | 65 | 1 | 80.1 | ⋯ | 1.12 | 0.267 | 75.0 | 4.59 | 1.89 | 5.54 | 5.53 | -0.20 | 15000 | 4700 |
135 | 정인욱 | 삼성 | 1 | 4 | 0 | 0 | 0 | 9 | 7 | 32.0 | ⋯ | 2.53 | 0.359 | 52.5 | 9.84 | -0.43 | 7.39 | 7.36 | -0.20 | 5700 | 7200 |
136 | 송창식 | 한화 | 5 | 6 | 0 | 15 | 6 | 63 | 0 | 73.1 | ⋯ | 1.60 | 0.303 | 61.0 | 6.63 | -0.74 | 5.68 | 5.59 | -0.22 | 24000 | 22000 |
137 | 배재환 | NC | 0 | 1 | 0 | 0 | 0 | 3 | 1 | 8.0 | ⋯ | 4.50 | 0.346 | 82.1 | 9.00 | -0.16 | 11.48 | 11.73 | -0.23 | 3000 | 4000 |
138 | 이정민 | 롯데 | 3 | 1 | 0 | 2 | 1 | 24 | 0 | 26.2 | ⋯ | 1.69 | 0.307 | 71.4 | 5.40 | 0.19 | 6.47 | 6.83 | -0.24 | 10000 | 15000 |
139 | 최동환 | LG | 1 | 2 | 1 | 5 | 2 | 35 | 0 | 38.0 | ⋯ | 2.13 | 0.273 | 71.1 | 5.68 | -0.05 | 6.76 | 6.77 | -0.24 | 6500 | 6000 |
140 | 이종혁 | KT | 2 | 0 | 0 | 0 | 0 | 16 | 0 | 19.0 | ⋯ | 1.42 | 0.324 | 67.0 | 6.63 | -0.09 | 7.10 | 7.57 | -0.25 | 3200 | 2700 |
141 | 홍성용 | KT | 0 | 2 | 0 | 1 | 0 | 37 | 1 | 39.0 | ⋯ | 1.62 | 0.294 | 68.3 | 6.23 | 0.25 | 6.40 | 6.63 | -0.28 | 6800 | 6300 |
142 | 정영일 | SK | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 8.0 | ⋯ | 3.38 | 0.281 | 47.3 | 10.12 | -0.61 | 10.73 | 11.37 | -0.30 | 3000 | 4000 |
143 | 김지용 | LG | 4 | 3 | 3 | 8 | 4 | 53 | 0 | 53.0 | ⋯ | 2.21 | 0.280 | 73.9 | 5.09 | 0.28 | 6.30 | 6.27 | -0.38 | 9000 | 10000 |
144 | 최금강 | NC | 5 | 3 | 0 | 0 | 0 | 39 | 13 | 89.2 | ⋯ | 1.51 | 0.320 | 57.4 | 7.33 | -0.27 | 6.31 | 6.58 | -0.41 | 12500 | 14000 |
145 | 김범수 | 한화 | 0 | 4 | 0 | 0 | 0 | 15 | 5 | 31.0 | ⋯ | 2.03 | 0.340 | 66.4 | 8.71 | -0.42 | 8.15 | 8.52 | -0.42 | 3600 | 3300 |
146 | 이승현 | 삼성 | 2 | 0 | 0 | 0 | 0 | 30 | 0 | 31.2 | ⋯ | 2.56 | 0.276 | 81.9 | 5.12 | 0.31 | 8.03 | 8.03 | -0.44 | 7000 | 6200 |
147 | 주권 | KT | 5 | 6 | 1 | 3 | 2 | 39 | 12 | 81.2 | ⋯ | 1.65 | 0.314 | 63.2 | 6.61 | -0.02 | 6.33 | 6.54 | -0.46 | 7600 | 7500 |
148 | 장민재 | 한화 | 2 | 5 | 0 | 0 | 2 | 33 | 5 | 62.2 | ⋯ | 1.58 | 0.355 | 56.9 | 7.76 | -1.21 | 6.21 | 6.48 | -0.47 | 7100 | 8100 |
149 | 정용운 | KIA | 3 | 2 | 0 | 0 | 0 | 25 | 11 | 59.1 | ⋯ | 1.06 | 0.263 | 65.4 | 5.92 | 0.39 | 6.41 | 6.77 | -0.49 | 7500 | 3100 |
150 | 노경은 | 롯데 | 0 | 2 | 0 | 0 | 0 | 9 | 2 | 14.2 | ⋯ | 2.45 | 0.382 | 52.8 | 11.66 | -0.83 | 8.03 | 8.29 | -0.61 | 10000 | 16000 |
151 | 김승현 | 삼성 | 0 | 3 | 0 | 1 | 0 | 41 | 0 | 43.2 | ⋯ | 1.44 | 0.341 | 73.9 | 5.77 | -0.40 | 6.87 | 6.95 | -0.70 | 4000 | 2900 |
152 | 류희운 | KT | 4 | 4 | 0 | 0 | 0 | 24 | 14 | 81.0 | ⋯ | 2.00 | 0.312 | 65.3 | 7.67 | -0.68 | 7.60 | 7.81 | -1.01 | 4000 | 3000 |
<- lm(연봉.2018. ~ .-팀명-선수명, data=picher2) aa
summary(aa)
Call:
lm(formula = 연봉.2018. ~ . - 팀명 - 선수명, data = picher2)
Residuals:
Min 1Q Median 3Q Max
-6719.3 -1077.4 65.0 796.1 21824.2
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1180.5580 6767.2703 -0.174 0.862
승 -11.5761 219.0412 -0.053 0.958
패 81.9489 210.8035 0.389 0.698
세 -37.2721 103.6610 -0.360 0.720
홀드 80.5730 116.2315 0.693 0.489
블론 370.1732 278.5525 1.329 0.186
경기 -32.2197 53.8240 -0.599 0.551
선발 -174.3314 168.9477 -1.032 0.304
이닝 52.2789 42.2955 1.236 0.219
삼진.9 201.3240 879.7268 0.229 0.819
볼넷.9 340.5238 826.0454 0.412 0.681
홈런.9 1665.6462 5202.1586 0.320 0.749
BABIP 114.3823 5557.0488 0.021 0.984
LOB. 27.0594 55.5030 0.488 0.627
ERA 188.2818 297.7530 0.632 0.528
RA9.WAR 1323.6685 595.3892 2.223 0.028 *
FIP -6291.3374 16484.2058 -0.382 0.703
kFIP 4975.4255 13255.6236 0.375 0.708
WAR 477.9530 756.5703 0.632 0.529
연봉.2017. 0.8897 0.0197 45.155 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3321 on 125 degrees of freedom
Multiple R-squared: 0.9762, Adjusted R-squared: 0.9726
F-statistic: 269.9 on 19 and 125 DF, p-value: < 2.2e-16
<- picher2[c('FIP', 'WAR', '볼넷.9', '삼진.9', '연봉.2017.')]
X <- picher2$"연봉.2018." y
set.seed(20)
<- createDataPartition(y, p = 0.8, list = FALSE) # 80%를 훈련 세트로 사용
train_indices <- X[train_indices, ] # X 훈련 세트
X_train <- y[train_indices] # y 훈련 세트
y_train <- X[-train_indices, ] # X 테스트 세트
X_test <- y[-train_indices] # y 테스트 세트 y_test
<- lm(y_train ~ ., data = cbind(X_train, y_train))
model2 summary(model2)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-8171.3 -1022.5 -169.3 1009.7 9835.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1947.46270 1101.98299 1.767 0.0799 .
FIP -130.36447 180.71549 -0.721 0.4722
WAR 1933.30721 331.27392 5.836 5.29e-08 ***
볼넷.9 -95.23174 143.48508 -0.664 0.5082
삼진.9 81.85525 94.20305 0.869 0.3867
연봉.2017. 0.89859 0.01633 55.034 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2764 on 112 degrees of freedom
Multiple R-squared: 0.9816, Adjusted R-squared: 0.9808
F-statistic: 1194 on 5 and 112 DF, p-value: < 2.2e-16
<- predict(model2, newdata = X_test) y_pred
summary(model2)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-8171.3 -1022.5 -169.3 1009.7 9835.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1947.46270 1101.98299 1.767 0.0799 .
FIP -130.36447 180.71549 -0.721 0.4722
WAR 1933.30721 331.27392 5.836 5.29e-08 ***
볼넷.9 -95.23174 143.48508 -0.664 0.5082
삼진.9 81.85525 94.20305 0.869 0.3867
연봉.2017. 0.89859 0.01633 55.034 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2764 on 112 degrees of freedom
Multiple R-squared: 0.9816, Adjusted R-squared: 0.9808
F-statistic: 1194 on 5 and 112 DF, p-value: < 2.2e-16
plot(model2)
## Shapiro-Wilk Test
## H0 : normal distribution vs. H1 : not H0
shapiro.test(resid(model2))
Shapiro-Wilk normality test
data: resid(model2)
W = 0.92186, p-value = 3.661e-06
귀무가설 기각
library(lmtest)
### 등분산성
## H0 : 등분산 vs. H1 : 이분산 (Heteroscedasticity)
bptest(model2)
studentized Breusch-Pagan test
data: model2
BP = 2.7624, df = 5, p-value = 0.7366
잔차 이분산..
bptest(model2)
studentized Breusch-Pagan test
data: model2
BP = 2.7624, df = 5, p-value = 0.7366
<- predict(model2, newdata = X) y_pred
<- cbind(picher2, new_data= y_pred) picher2
<- picher[order(-picher2$"연봉.2018."), ] sorted_picher2
<- picher2[picher2$"연봉.2018." != picher2$"연봉.2017.", ] filtered_df2
<- filtered_df2[,c("선수명","연봉.2018.","new_data","연봉.2017.")] df
<- df[order(df$"연봉.2018.", decreasing = TRUE), ] sorted_df
summary(model2)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-8171.3 -1022.5 -169.3 1009.7 9835.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1947.46270 1101.98299 1.767 0.0799 .
FIP -130.36447 180.71549 -0.721 0.4722
WAR 1933.30721 331.27392 5.836 5.29e-08 ***
볼넷.9 -95.23174 143.48508 -0.664 0.5082
삼진.9 81.85525 94.20305 0.869 0.3867
연봉.2017. 0.89859 0.01633 55.034 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2764 on 112 degrees of freedom
Multiple R-squared: 0.9816, Adjusted R-squared: 0.9808
F-statistic: 1194 on 5 and 112 DF, p-value: < 2.2e-16
sorted_df
선수명 | 연봉.2018. | new_data | 연봉.2017. | |
---|---|---|---|---|
<chr> | <int> | <dbl> | <int> | |
5 | 레일리 | 111000 | 86639.501 | 85000 |
24 | 배영수 | 50000 | 54254.114 | 55000 |
41 | 안영명 | 35000 | 20988.868 | 20000 |
68 | 채병용 | 30000 | 24555.703 | 25000 |
17 | 류제국 | 29000 | 37171.350 | 35000 |
14 | 박세웅 | 25000 | 15406.471 | 10000 |
40 | 임창민 | 25000 | 23649.432 | 22500 |
56 | 박정진 | 25000 | 32048.281 | 33000 |
136 | 송창식 | 24000 | 20807.581 | 22000 |
27 | 김진성 | 23000 | 20950.905 | 18000 |
38 | 심창민 | 23000 | 22494.249 | 21000 |
34 | 윤규진 | 21000 | 19775.968 | 18000 |
25 | 박종훈 | 20000 | 13538.895 | 10000 |
44 | 심수창 | 20000 | 25556.492 | 25000 |
29 | 이재학 | 19000 | 22160.236 | 20000 |
43 | 진해수 | 19000 | 13232.667 | 11000 |
28 | 이민호 | 18800 | 18842.143 | 16000 |
23 | 원종현 | 18500 | 17795.806 | 14000 |
111 | 박희수 | 18500 | 20005.072 | 21000 |
9 | 함덕주 | 16000 | 15382.469 | 7000 |
15 | 백정현 | 15500 | 15076.326 | 10000 |
30 | 김강률 | 15000 | 9728.566 | 6200 |
60 | 권오준 | 15000 | 11743.714 | 10500 |
63 | 김사율 | 15000 | 20080.039 | 20000 |
134 | 김윤동 | 15000 | 5165.007 | 4700 |
71 | 임정우 | 14000 | 22059.786 | 22000 |
78 | 박정배 | 14000 | 8806.466 | 8000 |
13 | 임기영 | 13000 | 9920.273 | 3100 |
22 | 윤희상 | 13000 | 18504.773 | 15000 |
42 | 심동섭 | 13000 | 13188.418 | 11000 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
152 | 류희운 | 4000 | 1612.039 | 3000 |
79 | 황수범 | 3800 | 3785.906 | 2700 |
124 | 김민우 | 3600 | 3911.050 | 3800 |
145 | 김범수 | 3600 | 2755.221 | 3300 |
92 | 이형범 | 3500 | 3704.690 | 2700 |
93 | 임현준 | 3500 | 4226.944 | 3200 |
110 | 안성무 | 3500 | 3170.363 | 2800 |
99 | 박치국 | 3400 | 3825.389 | 2700 |
102 | 허건엽 | 3400 | 4263.368 | 3300 |
105 | 고봉재 | 3300 | 4147.499 | 4400 |
112 | 김시현 | 3300 | 3306.331 | 2700 |
120 | 강장산 | 3300 | 3606.075 | 3000 |
118 | 배민관 | 3200 | 2722.062 | 3000 |
140 | 이종혁 | 3200 | 2901.344 | 2700 |
70 | 최지광 | 3100 | 3961.410 | 2700 |
73 | 박세진 | 3100 | 4478.896 | 3000 |
101 | 박상원 | 3100 | 3847.625 | 2700 |
81 | 김대유 | 3000 | 3887.465 | 2700 |
84 | 손주영 | 3000 | 4599.176 | 2700 |
87 | 김진영 | 3000 | 4582.740 | 2700 |
109 | 서균 | 3000 | 3926.074 | 2700 |
127 | 배제성 | 3000 | 3079.587 | 2700 |
137 | 배재환 | 3000 | 3326.123 | 4000 |
142 | 정영일 | 3000 | 3104.391 | 4000 |
96 | 김도영 | 2900 | 3887.404 | 2700 |
97 | 조근종 | 2900 | 4909.511 | 2700 |
108 | 안규현 | 2900 | 2330.828 | 2800 |
115 | 김종훈 | 2900 | 1665.021 | 2800 |
83 | 장지훈 | 2800 | 5644.555 | 2700 |
106 | 이수민 | 2800 | 3896.889 | 3000 |
다 안되노
번외 (WAR을 y로)
<- lm(WAR ~ ., dt)
model_ summary(model_)
Call:
lm(formula = WAR ~ ., data = dt)
Residuals:
Min 1Q Median 3Q Max
-1.25837 -0.18387 -0.00443 0.17470 1.58012
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.632e-01 8.184e-01 1.055 0.2935
승 7.000e-04 2.446e-02 0.029 0.9772
패 3.924e-02 2.449e-02 1.602 0.1115
세 1.444e-03 1.218e-02 0.119 0.9058
홀드 1.016e-02 1.417e-02 0.717 0.4747
블론 -5.186e-02 3.392e-02 -1.529 0.1287
경기 -1.216e-02 6.489e-03 -1.874 0.0631 .
선발 -2.104e-02 2.081e-02 -1.011 0.3139
이닝 7.125e-03 5.161e-03 1.381 0.1698
삼진.9 2.483e-02 1.055e-01 0.235 0.8142
볼넷.9 1.254e-02 1.014e-01 0.124 0.9017
홈런.9 -3.549e-03 6.346e-01 -0.006 0.9955
BABIP 9.050e-01 6.636e-01 1.364 0.1749
LOB. -1.276e-02 5.728e-03 -2.227 0.0277 *
ERA -3.164e-02 2.542e-02 -1.245 0.2154
RA9.WAR 4.259e-01 5.563e-02 7.655 3.63e-12 ***
FIP -2.926e-02 2.011e+00 -0.015 0.9884
kFIP -2.821e-02 1.613e+00 -0.017 0.9861
연봉.2017. -9.617e-06 3.872e-06 -2.484 0.0143 *
연봉.2018. 1.726e-05 3.608e-06 4.783 4.55e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.413 on 132 degrees of freedom
Multiple R-squared: 0.9147, Adjusted R-squared: 0.9024
F-statistic: 74.5 on 19 and 132 DF, p-value: < 2.2e-16
vif(model_)
- 승
- 7.90295946434333
- 패
- 5.20359428013343
- 세
- 3.03700161686219
- 홀드
- 3.62204119070082
- 블론
- 2.71884077341594
- 경기
- 13.9861481235637
- 선발
- 36.4583393437973
- 이닝
- 59.6518045536396
- 삼진.9
- 78.7060221382563
- 볼넷.9
- 50.827473231828
- 홈런.9
- 368.731473824191
- BABIP
- 3.08657710689232
- LOB.
- 3.90285091455687
- ERA
- 9.92892504245132
- RA9.WAR
- 9.31215347437081
- FIP
- 12527.1829609619
- kFIP
- 9046.79675535442
- 연봉.2017.
- 8.32457422927786
- 연봉.2018.
- 11.033305518369
- WAR로 돌린 모델의 \(R^2\)값이 연봉으로 돌린 것보다 높게 나왔다.