library(MASS) #lm.ridge
library(car) #vif
library(caret) #예측
library(ggplot2)
library(glmnet) #Ridge, Lasso
library(tidyverse)ref
import
get_high_vif_variables <- function(data, threshold) {
vif_values <- vif(data)
high_vif_variables <- names(vif_values[vif_values > threshold])
return(high_vif_variables)
}데이터셋
picher <- read.csv("~/Dropbox/coco/posts/Applied statistics/picher_stats_2017.csv")
head(picher)| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
| 1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 |
| 2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 |
| 3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 |
| 4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 |
| 6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 |
dt2018 <- data.frame(new_col = picher$연봉.2018.)dt2017 <- data.frame(new_col = picher$연봉.2017.)dt <- subset(picher, select = -c(연봉.2017.,연봉.2018.))head(dt)| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| 1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | 8.95 | 2.13 | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 |
| 2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | 7.43 | 1.85 | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 |
| 3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | 7.36 | 2.09 | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 |
| 4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | 8.04 | 1.95 | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | 7.49 | 2.11 | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 |
| 6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | 7.42 | 1.74 | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 |
dt <- cbind(dt, new_col = dt2017)
names(dt)[length(names(dt))] <- "연봉.2017."dt <- cbind(dt, new_col = dt2018)
names(dt)[length(names(dt))] <- "연봉.2018."head(dt)| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2017. | 연봉.2018. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
| 1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 85000 | 140000 |
| 2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 50000 | 120000 |
| 3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 150000 | 230000 |
| 4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 85000 | 111000 |
| 6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 35000 | 85000 |
dt <- subset(dt, select = -c(팀명,선수명))head(dt)| 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2017. | 연봉.2018. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
| 1 | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | 8.95 | 2.13 | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 85000 | 140000 |
| 2 | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | 7.43 | 1.85 | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 50000 | 120000 |
| 3 | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | 7.36 | 2.09 | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 150000 | 230000 |
| 4 | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | 8.04 | 1.95 | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
| 5 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | 7.49 | 2.11 | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 85000 | 111000 |
| 6 | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | 7.42 | 1.74 | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 35000 | 85000 |
회귀직선적합
model1(원본)
model1 <- lm(연봉.2018. ~ ., dt)
summary(model1)
Call:
lm(formula = 연봉.2018. ~ ., data = dt)
Residuals:
Min 1Q Median 3Q Max
-46529 -2418 424 2649 47773
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.513e+04 1.826e+04 0.829 0.4087
승 1.004e+03 5.375e+02 1.869 0.0639 .
패 -1.836e+02 5.504e+02 -0.334 0.7392
세 -2.112e+01 2.713e+02 -0.078 0.9381
홀드 -1.817e+01 3.161e+02 -0.057 0.9542
블론 4.535e+02 7.610e+02 0.596 0.5522
경기 -1.760e+02 1.456e+02 -1.209 0.2289
선발 -6.719e+02 4.616e+02 -1.456 0.1479
이닝 7.425e+01 1.156e+02 0.642 0.5217
삼진.9 -4.603e+02 2.349e+03 -0.196 0.8449
볼넷.9 1.194e+03 2.256e+03 0.529 0.5976
홈런.9 4.874e+03 1.413e+04 0.345 0.7306
BABIP -9.997e+03 1.486e+04 -0.673 0.5022
LOB. -4.350e+01 1.299e+02 -0.335 0.7382
ERA -7.413e+01 5.693e+02 -0.130 0.8966
RA9.WAR -7.584e+02 1.487e+03 -0.510 0.6109
FIP -6.436e+03 4.477e+04 -0.144 0.8859
kFIP 3.805e+03 3.593e+04 0.106 0.9158
WAR 8.559e+03 1.789e+03 4.783 4.55e-06 ***
연봉.2017. 8.755e-01 4.444e-02 19.698 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9198 on 132 degrees of freedom
Multiple R-squared: 0.9228, Adjusted R-squared: 0.9116
F-statistic: 82.99 on 19 and 132 DF, p-value: < 2.2e-16
model1 <- lm(연봉.2018. ~ +WAR+연봉.2017., dt)
summary(model1)
Call:
lm(formula = 연봉.2018. ~ +WAR + 연봉.2017., data = dt)
Residuals:
Min 1Q Median 3Q Max
-50442 -1849 758 2050 56166
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -576.58811 889.09610 -0.649 0.518
WAR 7007.17364 761.83979 9.198 3.03e-16 ***
연봉.2017. 0.89926 0.04022 22.360 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9124 on 149 degrees of freedom
Multiple R-squared: 0.9142, Adjusted R-squared: 0.913
F-statistic: 793.8 on 2 and 149 DF, p-value: < 2.2e-16
vif(model1)- 승
- 7.69934669638176
- 패
- 5.30032799820878
- 세
- 3.03718565968123
- 홀드
- 3.63605161357374
- 블론
- 2.75956034802692
- 경기
- 14.2011271199637
- 선발
- 36.1601878733279
- 이닝
- 60.3244538179009
- 삼진.9
- 78.7161704892493
- 볼넷.9
- 50.7257985016107
- 홈런.9
- 368.399308168573
- BABIP
- 3.11936893312554
- LOB.
- 4.04602533224649
- ERA
- 10.0441738232824
- RA9.WAR
- 13.4198973515016
- FIP
- 12525.2424059086
- kFIP
- 9046.04880487446
- WAR
- 9.99177856816849
- 연봉.2017.
- 2.21186942564398
threshold <- 10high_vif_vars <- get_high_vif_variables(model1, threshold)
print(high_vif_vars) [1] "경기" "선발" "이닝" "삼진.9" "볼넷.9" "홈런.9" "ERA"
[8] "RA9.WAR" "FIP" "kFIP"
threshold <- 15high_vif_vars <- get_high_vif_variables(model1, threshold)
print(high_vif_vars)[1] "선발" "이닝" "삼진.9" "볼넷.9" "홈런.9" "FIP" "kFIP"
pairs(dt,panel=panel.smooth)
- 수치형 데이터들끼리의 상관계수 확인..
dt_numeric <- dt[, sapply(dt, is.numeric)]
cor_matrix <- cor(dt_numeric)
print(round(cor_matrix,2)) 승 패 세 홀드 블론 경기 선발 이닝 삼진.9 볼넷.9 홈런.9
승 1.00 0.71 0.05 0.09 0.11 0.40 0.77 0.91 0.08 -0.40 -0.12
패 0.71 1.00 0.07 0.10 0.12 0.34 0.77 0.83 0.03 -0.39 -0.06
세 0.05 0.07 1.00 0.11 0.61 0.43 -0.18 0.02 0.17 -0.13 -0.07
홀드 0.09 0.10 0.11 1.00 0.49 0.72 -0.29 0.02 0.19 -0.15 -0.08
블론 0.11 0.12 0.61 0.49 1.00 0.63 -0.26 0.01 0.19 -0.14 -0.06
경기 0.40 0.34 0.43 0.72 0.63 1.00 -0.04 0.38 0.19 -0.36 -0.11
선발 0.77 0.77 -0.18 -0.29 -0.26 -0.04 1.00 0.89 -0.06 -0.31 -0.06
이닝 0.91 0.83 0.02 0.02 0.01 0.38 0.89 1.00 0.04 -0.45 -0.11
삼진.9 0.08 0.03 0.17 0.19 0.19 0.19 -0.06 0.04 1.00 0.11 0.22
볼넷.9 -0.40 -0.39 -0.13 -0.15 -0.14 -0.36 -0.31 -0.45 0.11 1.00 0.30
홈런.9 -0.12 -0.06 -0.07 -0.08 -0.06 -0.11 -0.06 -0.11 0.22 0.30 1.00
BABIP -0.17 -0.13 -0.09 -0.10 -0.11 -0.24 -0.10 -0.19 0.46 0.28 0.36
LOB. 0.13 -0.02 0.17 0.05 0.10 0.11 0.04 0.10 -0.07 -0.15 -0.27
ERA -0.27 -0.19 -0.15 -0.16 -0.16 -0.32 -0.16 -0.29 0.26 0.52 0.63
RA9.WAR 0.85 0.60 0.17 0.00 0.01 0.28 0.74 0.85 0.10 -0.40 -0.19
FIP -0.30 -0.23 -0.20 -0.21 -0.21 -0.35 -0.15 -0.30 -0.15 0.63 0.83
kFIP -0.31 -0.24 -0.23 -0.24 -0.24 -0.37 -0.14 -0.30 -0.32 0.61 0.74
WAR 0.82 0.63 0.08 -0.04 -0.06 0.20 0.76 0.83 0.15 -0.39 -0.21
연봉.2017. 0.63 0.43 0.26 0.00 0.15 0.23 0.49 0.59 0.10 -0.33 -0.10
연봉.2018. 0.71 0.47 0.21 -0.02 0.10 0.21 0.56 0.66 0.10 -0.33 -0.12
BABIP LOB. ERA RA9.WAR FIP kFIP WAR 연봉.2017. 연봉.2018.
승 -0.17 0.13 -0.27 0.85 -0.30 -0.31 0.82 0.63 0.71
패 -0.13 -0.02 -0.19 0.60 -0.23 -0.24 0.63 0.43 0.47
세 -0.09 0.17 -0.15 0.17 -0.20 -0.23 0.08 0.26 0.21
홀드 -0.10 0.05 -0.16 0.00 -0.21 -0.24 -0.04 0.00 -0.02
블론 -0.11 0.10 -0.16 0.01 -0.21 -0.24 -0.06 0.15 0.10
경기 -0.24 0.11 -0.32 0.28 -0.35 -0.37 0.20 0.23 0.21
선발 -0.10 0.04 -0.16 0.74 -0.15 -0.14 0.76 0.49 0.56
이닝 -0.19 0.10 -0.29 0.85 -0.30 -0.30 0.83 0.59 0.66
삼진.9 0.46 -0.07 0.26 0.10 -0.15 -0.32 0.15 0.10 0.10
볼넷.9 0.28 -0.15 0.52 -0.40 0.63 0.61 -0.39 -0.33 -0.33
홈런.9 0.36 -0.27 0.63 -0.19 0.83 0.74 -0.21 -0.10 -0.12
BABIP 1.00 -0.51 0.73 -0.19 0.25 0.17 -0.08 -0.09 -0.10
LOB. -0.51 1.00 -0.72 0.29 -0.29 -0.27 0.14 0.11 0.13
ERA 0.73 -0.72 1.00 -0.34 0.65 0.58 -0.26 -0.20 -0.22
RA9.WAR -0.19 0.29 -0.34 1.00 -0.37 -0.38 0.92 0.64 0.74
FIP 0.25 -0.29 0.65 -0.37 1.00 0.98 -0.39 -0.27 -0.28
kFIP 0.17 -0.27 0.58 -0.38 0.98 1.00 -0.41 -0.28 -0.30
WAR -0.08 0.14 -0.26 0.92 -0.39 -0.41 1.00 0.68 0.79
연봉.2017. -0.09 0.11 -0.20 0.64 -0.27 -0.28 0.68 1.00 0.93
연봉.2018. -0.10 0.13 -0.22 0.74 -0.28 -0.30 0.79 0.93 1.00
다중공산성 해결 방법
VIF계수가 높은 변수 제거
model2(Vif 10 이상인 변수 제거)
model2 <- lm(연봉.2018. ~ .-경기-선발-이닝-삼진.9-볼넷.9-홈런.9-ERA-RA9.WAR-FIP-kFIP, dt)
summary(model2)
Call:
lm(formula = 연봉.2018. ~ . - 경기 - 선발 - 이닝 - 삼진.9 -
볼넷.9 - 홈런.9 - ERA - RA9.WAR - FIP - kFIP, data = dt)
Residuals:
Min 1Q Median 3Q Max
-48657 -1981 511 2303 51073
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.432e+03 7.893e+03 0.815 0.4165
승 4.770e+02 4.061e+02 1.175 0.2421
패 -7.851e+02 3.525e+02 -2.227 0.0275 *
세 -1.172e+02 2.150e+02 -0.545 0.5865
홀드 -1.229e+02 1.973e+02 -0.623 0.5344
블론 6.340e+02 7.188e+02 0.882 0.3792
BABIP -7.810e+03 9.994e+03 -0.781 0.4358
LOB. -4.979e+01 7.793e+01 -0.639 0.5239
WAR 7.298e+03 1.169e+03 6.243 4.67e-09 ***
연봉.2017. 8.846e-01 4.322e-02 20.469 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9149 on 142 degrees of freedom
Multiple R-squared: 0.9178, Adjusted R-squared: 0.9126
F-statistic: 176.1 on 9 and 142 DF, p-value: < 2.2e-16
vif(model2)- 승
- 4.44133840452701
- 패
- 2.19787784118271
- 세
- 1.9291576101908
- 홀드
- 1.43155944990414
- 블론
- 2.48814143828698
- BABIP
- 1.42670331782564
- LOB.
- 1.47225296358887
- WAR
- 4.30937396392511
- 연봉.2017.
- 2.11357639082776
model1에서 다중공산성이 높았던 변수들을 제외하고 lm을 돌렸더니, 회귀모형은 유의하게 나왔고 R^2값도 91%로 높게 나왔지만 model1보다는 R^2값이 조금 적게 나왔다.
다중공산성이 높은 변수를 제외하는 것은 다른 것들도 확인을 해보아야 한다.
VIF제거시 고려사항
- VIF계수가 높은 피처 우선 제거하되, FIP, kFIP와 같이 유사한 변수들은 두개 중에서 하나만 제거해보자.
model3 <- lm(연봉.2018. ~ .-FIP, dt)
summary(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP, data = dt)
Residuals:
Min 1Q Median 3Q Max
-46688 -2466 423 2597 47710
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.406e+04 1.660e+04 0.847 0.399
승 1.007e+03 5.352e+02 1.882 0.062 .
패 -1.723e+02 5.427e+02 -0.317 0.751
세 -2.263e+01 2.701e+02 -0.084 0.933
홀드 -1.779e+01 3.149e+02 -0.056 0.955
블론 4.563e+02 7.579e+02 0.602 0.548
경기 -1.738e+02 1.443e+02 -1.205 0.230
선발 -6.701e+02 4.598e+02 -1.458 0.147
이닝 7.216e+01 1.142e+02 0.632 0.529
삼진.9 -7.714e+02 9.085e+02 -0.849 0.397
볼넷.9 8.998e+02 9.504e+02 0.947 0.346
홈런.9 2.904e+03 3.404e+03 0.853 0.395
BABIP -9.797e+03 1.474e+04 -0.665 0.507
LOB. -4.465e+01 1.292e+02 -0.346 0.730
ERA -8.076e+01 5.654e+02 -0.143 0.887
RA9.WAR -7.473e+02 1.480e+03 -0.505 0.614
kFIP -1.347e+03 2.371e+03 -0.568 0.571
WAR 8.560e+03 1.783e+03 4.802 4.17e-06 ***
연봉.2017. 8.757e-01 4.426e-02 19.787 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9164 on 133 degrees of freedom
Multiple R-squared: 0.9227, Adjusted R-squared: 0.9123
F-statistic: 88.25 on 18 and 133 DF, p-value: < 2.2e-16
vif(model3)- 승
- 7.68840921316788
- 패
- 5.19140274673014
- 세
- 3.03263975401923
- 홀드
- 3.63578951116975
- 블론
- 2.75775305712261
- 경기
- 14.0426530671348
- 선발
- 36.1331990754777
- 이닝
- 59.3709458069269
- 삼진.9
- 11.8666574529657
- 볼넷.9
- 9.0682604275144
- 홈런.9
- 21.5595297493918
- BABIP
- 3.09205217740503
- LOB.
- 4.03056643053091
- ERA
- 9.9781711774582
- RA9.WAR
- 13.3837520395074
- kFIP
- 39.6977412189025
- WAR
- 9.99134587181084
- 연봉.2017.
- 2.20945320867407
- VIF계수가 가장 높았떤 FIP를 제거하니 전체적으로 VIF값들이 많이 감소했다. 볼넷의 경우 50에서 9로 감소함
model3 <- lm(연봉.2018. ~ .-FIP-이닝, dt)
summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝, data = dt)
Residuals:
Min 1Q Median 3Q Max
-47170 -2539 292 2603 47529
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.425e+04 1.656e+04 0.860 0.3912
승 1.053e+03 5.292e+02 1.989 0.0487 *
패 -1.258e+02 5.365e+02 -0.234 0.8150
세 -7.264e+01 2.576e+02 -0.282 0.7784
홀드 -7.025e+01 3.031e+02 -0.232 0.8171
블론 4.745e+02 7.557e+02 0.628 0.5312
경기 -1.021e+02 8.877e+01 -1.150 0.2523
선발 -4.306e+02 2.595e+02 -1.659 0.0994 .
삼진.9 -7.892e+02 9.060e+02 -0.871 0.3853
볼넷.9 8.829e+02 9.479e+02 0.931 0.3533
홈런.9 2.956e+03 3.396e+03 0.871 0.3855
BABIP -1.004e+04 1.470e+04 -0.683 0.4957
LOB. -4.506e+01 1.289e+02 -0.350 0.7272
ERA -6.838e+01 5.637e+02 -0.121 0.9036
RA9.WAR -4.551e+02 1.402e+03 -0.325 0.7460
kFIP -1.349e+03 2.366e+03 -0.570 0.5696
WAR 8.733e+03 1.758e+03 4.968 2.03e-06 ***
연봉.2017. 8.784e-01 4.395e-02 19.984 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9143 on 134 degrees of freedom
Multiple R-squared: 0.9225, Adjusted R-squared: 0.9127
F-statistic: 93.84 on 17 and 134 DF, p-value: < 2.2e-16
- 승
- 7.54995904402493
- 패
- 5.09588128549648
- 세
- 2.77188079102657
- 홀드
- 3.38288542173628
- 블론
- 2.75379743632109
- 경기
- 5.34096225137479
- 선발
- 11.5652288817222
- 삼진.9
- 11.8553164987874
- 볼넷.9
- 9.06115155090058
- 홈런.9
- 21.5464901061541
- BABIP
- 3.0899457520436
- LOB.
- 4.03046844648891
- ERA
- 9.96618149145026
- RA9.WAR
- 12.0759322801392
- kFIP
- 39.6977185280435
- WAR
- 9.75717291579296
- 연봉.2017.
- 2.18906326524158
- 그 다음 vif계수값이 높은 ’이닝’을 제거했다.
model3 <- lm(연봉.2018. ~ .-FIP-이닝-kFIP, dt)
summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝 - kFIP, data = dt)
Residuals:
Min 1Q Median 3Q Max
-47261 -2379 309 2742 47813
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.978e+03 1.054e+04 0.662 0.5090
승 1.055e+03 5.278e+02 1.999 0.0476 *
패 -1.135e+02 5.347e+02 -0.212 0.8323
세 -7.382e+01 2.569e+02 -0.287 0.7743
홀드 -7.661e+01 3.021e+02 -0.254 0.8002
블론 5.038e+02 7.521e+02 0.670 0.5040
경기 -9.923e+01 8.841e+01 -1.122 0.2637
선발 -4.402e+02 2.583e+02 -1.704 0.0906 .
삼진.9 -3.109e+02 3.413e+02 -0.911 0.3639
볼넷.9 4.082e+02 4.514e+02 0.904 0.3675
홈런.9 1.129e+03 1.118e+03 1.010 0.3143
BABIP -9.576e+03 1.464e+04 -0.654 0.5141
LOB. -3.779e+01 1.279e+02 -0.295 0.7681
ERA -8.963e+01 5.611e+02 -0.160 0.8733
RA9.WAR -4.669e+02 1.399e+03 -0.334 0.7390
WAR 8.800e+03 1.749e+03 5.030 1.53e-06 ***
연봉.2017. 8.779e-01 4.384e-02 20.027 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9120 on 135 degrees of freedom
Multiple R-squared: 0.9223, Adjusted R-squared: 0.9131
F-statistic: 100.2 on 16 and 135 DF, p-value: < 2.2e-16
- 승
- 7.54945354936935
- 패
- 5.08758327243023
- 세
- 2.77170233465349
- 홀드
- 3.37829494203411
- 블론
- 2.74099514800148
- 경기
- 5.32417495720081
- 선발
- 11.5165551404735
- 삼진.9
- 1.691074402967
- 볼넷.9
- 2.06526070907551
- 홈런.9
- 2.34713984614222
- BABIP
- 3.08046557773165
- LOB.
- 3.99101474806593
- ERA
- 9.92261602645989
- RA9.WAR
- 12.0732896169765
- WAR
- 9.71340804685137
- 연봉.2017.
- 2.18834997395971
- KFIP제거
model3 <- lm(연봉.2018. ~ .-FIP-이닝-kFIP-RA9.WAR, dt)
summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝 - kFIP - RA9.WAR,
data = dt)
Residuals:
Min 1Q Median 3Q Max
-47256 -2340 228 2820 48394
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.002e+03 1.005e+04 0.796 0.4273
승 1.005e+03 5.044e+02 1.993 0.0483 *
패 -6.188e+01 5.102e+02 -0.121 0.9036
세 -1.005e+02 2.434e+02 -0.413 0.6805
홀드 -8.969e+01 2.986e+02 -0.300 0.7643
블론 5.293e+02 7.457e+02 0.710 0.4790
경기 -1.027e+02 8.749e+01 -1.174 0.2424
선발 -4.671e+02 2.447e+02 -1.909 0.0584 .
삼진.9 -3.052e+02 3.398e+02 -0.898 0.3707
볼넷.9 4.218e+02 4.481e+02 0.941 0.3482
홈런.9 1.154e+03 1.112e+03 1.037 0.3013
BABIP -9.059e+03 1.451e+04 -0.624 0.5334
LOB. -5.310e+01 1.190e+02 -0.446 0.6562
ERA -1.249e+02 5.493e+02 -0.227 0.8205
WAR 8.406e+03 1.289e+03 6.523 1.25e-09 ***
연봉.2017. 8.790e-01 4.358e-02 20.168 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9090 on 136 degrees of freedom
Multiple R-squared: 0.9223, Adjusted R-squared: 0.9137
F-statistic: 107.6 on 15 and 136 DF, p-value: < 2.2e-16
- 승
- 6.94042688100102
- 패
- 4.66294914095306
- 세
- 2.50425253772256
- 홀드
- 3.32146243586634
- 블론
- 2.71278267026247
- 경기
- 5.24904766467952
- 선발
- 10.3991198289636
- 삼진.9
- 1.68672132907842
- 볼넷.9
- 2.04837389574494
- 홈런.9
- 2.33709850623971
- BABIP
- 3.046073880605
- LOB.
- 3.47761526760098
- ERA
- 9.57142193166993
- WAR
- 5.30623103354661
- 연봉.2017.
- 2.17720905024618
model3 <- lm(연봉.2018. ~ .-FIP-이닝-kFIP-RA9.WAR-선발, dt)
summary(model3)
vif(model3)
Call:
lm(formula = 연봉.2018. ~ . - FIP - 이닝 - kFIP - RA9.WAR -
선발, data = dt)
Residuals:
Min 1Q Median 3Q Max
-46776 -2395 374 2597 50018
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7221.2085 10138.7800 0.712 0.4775
승 558.2422 451.0986 1.238 0.2180
패 -758.5773 359.9930 -2.107 0.0369 *
세 -12.1963 241.3026 -0.051 0.9598
홀드 106.5326 283.0186 0.376 0.7072
블론 843.4665 734.3349 1.149 0.2527
경기 -69.6278 86.5803 -0.804 0.4227
삼진.9 -270.1732 342.5480 -0.789 0.4316
볼넷.9 431.0859 452.3679 0.953 0.3423
홈런.9 983.4449 1119.0004 0.879 0.3810
BABIP -8863.3423 14648.1787 -0.605 0.5461
LOB. -57.5239 120.1320 -0.479 0.6328
ERA -88.2397 554.2171 -0.159 0.8737
WAR 7825.6419 1264.4051 6.189 6.57e-09 ***
연봉.2017. 0.8792 0.0440 19.981 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9178 on 137 degrees of freedom
Multiple R-squared: 0.9202, Adjusted R-squared: 0.912
F-statistic: 112.8 on 14 and 137 DF, p-value: < 2.2e-16
- 승
- 5.44563629770577
- 패
- 2.27744620091112
- 세
- 2.41391273581397
- 홀드
- 2.9278580003899
- 블론
- 2.58069224802437
- 경기
- 5.04287223750904
- 삼진.9
- 1.68181308100372
- 볼넷.9
- 2.04813245831198
- 홈런.9
- 2.32207970272157
- BABIP
- 3.04592151858475
- LOB.
- 3.47629848428834
- ERA
- 9.55974332977644
- WAR
- 5.01053061622992
- 연봉.2017.
- 2.17719632600132
- 유의미한 변수는 ’WAR’과 ’연봉(2017)’이다.
정규화
normalize
normalize <- function(x) {
return((x - mean(x)) / sd(x))
}df_normalized <- as.data.frame(lapply(dt, normalize))head(df_normalized)| 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| 1 | 3.313623 | 1.2271453 | -0.3064519 | -0.5857052 | -0.5435919 | 0.05943348 | 2.452068 | 2.645175 | 0.6720988 | -0.8689998 | -0.44238194 | 0.01678276 | 0.4466146 | -0.5870557 | 3.174630 | -0.9710297 | -1.0581252 | 4.503142 | 3.912893 | 2.7347053 |
| 2 | 2.019505 | 2.5047212 | -0.0985024 | -0.5857052 | -0.5435919 | 0.05943348 | 2.349505 | 2.547755 | 0.1345315 | -0.9875023 | -0.66852133 | -0.24168646 | -0.1227637 | -0.5198553 | 3.114968 | -1.0618879 | -1.0732645 | 4.094734 | 3.266495 | 1.3373033 |
| 3 | 4.348918 | 0.9077513 | -0.3064519 | -0.5857052 | -0.5435919 | 0.11105570 | 2.554632 | 2.706808 | 0.1097751 | -0.8859287 | -0.41288550 | -0.09559517 | 0.3085835 | -0.6254559 | 2.973948 | -0.8374147 | -0.8663606 | 3.761956 | 6.821679 | 5.3298806 |
| 4 | 1.760682 | 1.2271453 | -0.3064519 | -0.5857052 | -0.5435919 | -0.04381097 | 2.246942 | 2.350927 | 0.3502657 | -0.9451800 | -0.18674611 | -0.47768010 | 0.5587649 | -0.6278559 | 2.740722 | -0.6984550 | -0.7603854 | 2.998081 | 2.620098 | 3.3335919 |
| 5 | 2.537153 | 1.2271453 | -0.3064519 | -0.5857052 | -0.5435919 | 0.05943348 | 2.452068 | 2.587518 | 0.1557512 | -0.8774643 | -0.29489973 | -0.19673529 | 0.4811224 | -0.5390554 | 2.751570 | -0.6129414 | -0.6190851 | 2.809003 | 2.975617 | 2.7347053 |
| 6 | 1.243035 | 2.1853272 | -0.3064519 | -0.5857052 | -0.5435919 | -0.14705541 | 2.041816 | 2.048726 | 0.1309948 | -1.0340569 | -0.08842464 | -0.57882022 | 0.6536613 | -0.7214564 | 2.963100 | -0.5808738 | -0.6140386 | 2.476226 | 2.135301 | 0.7384167 |
model4 <- lm(연봉.2018. ~ ., df_normalized)
summary(model4)
Call:
lm(formula = 연봉.2018. ~ ., data = df_normalized)
Residuals:
Min 1Q Median 3Q Max
-1.50382 -0.07816 0.01372 0.08561 1.54402
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.061e-16 2.411e-02 0.000 1.0000
승 1.254e-01 6.712e-02 1.869 0.0639 .
패 -1.858e-02 5.569e-02 -0.334 0.7392
세 -3.282e-03 4.216e-02 -0.078 0.9381
홀드 -2.652e-03 4.613e-02 -0.057 0.9542
블론 2.395e-02 4.019e-02 0.596 0.5522
경기 -1.102e-01 9.116e-02 -1.209 0.2289
선발 -2.117e-01 1.455e-01 -1.456 0.1479
이닝 1.207e-01 1.879e-01 0.642 0.5217
삼진.9 -4.207e-02 2.146e-01 -0.196 0.8449
볼넷.9 9.115e-02 1.723e-01 0.529 0.5976
홈런.9 1.602e-01 4.643e-01 0.345 0.7306
BABIP -2.875e-02 4.273e-02 -0.673 0.5022
LOB. -1.630e-02 4.866e-02 -0.335 0.7382
ERA -9.982e-03 7.667e-02 -0.130 0.8966
RA9.WAR -4.519e-02 8.862e-02 -0.510 0.6109
FIP -3.892e-01 2.707e+00 -0.144 0.8859
kFIP 2.437e-01 2.301e+00 0.106 0.9158
WAR 3.657e-01 7.647e-02 4.783 4.55e-06 ***
연봉.2017. 7.087e-01 3.598e-02 19.698 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2973 on 132 degrees of freedom
Multiple R-squared: 0.9228, Adjusted R-squared: 0.9116
F-statistic: 82.99 on 19 and 132 DF, p-value: < 2.2e-16
threshold <- 10high_vif_vars <- get_high_vif_variables(model4, threshold)
print(high_vif_vars) [1] "경기" "선발" "이닝" "삼진.9" "볼넷.9" "홈런.9" "ERA"
[8] "RA9.WAR" "FIP" "kFIP"
vif(model4)- 승
- 7.6993466963604
- 패
- 5.30032799820294
- 세
- 3.03718565967898
- 홀드
- 3.63605161357941
- 블론
- 2.75956034802382
- 경기
- 14.2011271199764
- 선발
- 36.160187873294
- 이닝
- 60.3244538179135
- 삼진.9
- 78.7161704890434
- 볼넷.9
- 50.7257985014939
- 홈런.9
- 368.399308167005
- BABIP
- 3.11936893312485
- LOB.
- 4.04602533224442
- ERA
- 10.044173823258
- RA9.WAR
- 13.4198973514472
- FIP
- 12525.2424058262
- kFIP
- 9046.04880481494
- WAR
- 9.99177856815799
- 연봉.2017.
- 2.2118694256429
### 변수선택
model3(AIC)
- AIC(Step)
m0 = lm(연봉.2018. ~ 1, data = dt)model3 = step(
m0,
scope = 연봉.2018. ~연봉.2017.+승+패+세+홀드+블론+경기+선발+이닝+삼진.9+볼넷.9+홈런.9+BABIP+LOB.+ERA+RA9.WAR+FIP+kFIP+WAR,
direction = "both")Start: AIC=3144.3
연봉.2018. ~ 1
Df Sum of Sq RSS AIC
+ 연봉.2017. 1 1.2511e+11 1.9445e+10 2841.4
+ WAR 1 9.0535e+10 5.4022e+10 2996.7
+ RA9.WAR 1 7.9230e+10 6.5326e+10 3025.6
+ 승 1 7.3377e+10 7.1179e+10 3038.6
+ 이닝 1 6.2759e+10 8.1797e+10 3059.8
+ 선발 1 4.5409e+10 9.9147e+10 3089.0
+ 패 1 3.1910e+10 1.1265e+11 3108.4
+ 볼넷.9 1 1.5661e+10 1.2890e+11 3128.9
+ kFIP 1 1.2591e+10 1.3197e+11 3132.4
+ FIP 1 1.1403e+10 1.3315e+11 3133.8
+ ERA 1 6.7332e+09 1.3782e+11 3139.1
+ 세 1 6.4461e+09 1.3811e+11 3139.4
+ 경기 1 6.3714e+09 1.3819e+11 3139.4
+ LOB. 1 2.2831e+09 1.4227e+11 3143.9
+ 홈런.9 1 1.9575e+09 1.4260e+11 3144.2
<none> 1.4456e+11 3144.3
+ 삼진.9 1 1.5567e+09 1.4300e+11 3144.7
+ BABIP 1 1.5139e+09 1.4304e+11 3144.7
+ 블론 1 1.3815e+09 1.4318e+11 3144.8
+ 홀드 1 4.3499e+07 1.4451e+11 3146.3
Step: AIC=2841.38
연봉.2018. ~ 연봉.2017.
Df Sum of Sq RSS AIC
+ WAR 1 7.0421e+09 1.2403e+10 2775.0
+ RA9.WAR 1 4.9589e+09 1.4486e+10 2798.6
+ 승 1 3.8414e+09 1.5604e+10 2809.9
+ 이닝 1 2.8118e+09 1.6633e+10 2819.6
+ 선발 1 2.1318e+09 1.7313e+10 2825.7
+ 패 1 8.8114e+08 1.8564e+10 2836.3
<none> 1.9445e+10 2841.4
+ 블론 1 2.2022e+08 1.9225e+10 2841.7
+ 세 1 1.7105e+08 1.9274e+10 2842.0
+ kFIP 1 1.6254e+08 1.9283e+10 2842.1
+ FIP 1 1.5483e+08 1.9290e+10 2842.2
+ ERA 1 1.0735e+08 1.9338e+10 2842.5
+ LOB. 1 7.7049e+07 1.9368e+10 2842.8
+ 홈런.9 1 7.3957e+07 1.9371e+10 2842.8
+ 볼넷.9 1 6.4565e+07 1.9381e+10 2842.9
+ BABIP 1 5.6938e+07 1.9388e+10 2842.9
+ 홀드 1 3.8024e+07 1.9407e+10 2843.1
+ 삼진.9 1 5.5081e+06 1.9440e+10 2843.3
+ 경기 1 1.2651e+04 1.9445e+10 2843.4
- 연봉.2017. 1 1.2511e+11 1.4456e+11 3144.3
Step: AIC=2775.03
연봉.2018. ~ 연봉.2017. + WAR
Df Sum of Sq RSS AIC
+ 패 1 2.1336e+08 1.2190e+10 2774.4
+ kFIP 1 1.8769e+08 1.2215e+10 2774.7
+ 선발 1 1.7153e+08 1.2232e+10 2774.9
+ FIP 1 1.6877e+08 1.2234e+10 2774.9
+ 볼넷.9 1 1.6419e+08 1.2239e+10 2775.0
<none> 1.2403e+10 2775.0
+ 이닝 1 1.4704e+08 1.2256e+10 2775.2
+ 홈런.9 1 5.1612e+07 1.2351e+10 2776.4
+ 삼진.9 1 4.8349e+07 1.2355e+10 2776.4
+ 승 1 3.0076e+07 1.2373e+10 2776.7
+ 경기 1 2.7246e+07 1.2376e+10 2776.7
+ BABIP 1 2.4182e+07 1.2379e+10 2776.7
+ ERA 1 1.7077e+07 1.2386e+10 2776.8
+ 블론 1 1.1153e+07 1.2392e+10 2776.9
+ RA9.WAR 1 6.6509e+06 1.2396e+10 2776.9
+ 세 1 4.3325e+06 1.2399e+10 2777.0
+ 홀드 1 3.4824e+06 1.2400e+10 2777.0
+ LOB. 1 6.6018e+05 1.2402e+10 2777.0
- WAR 1 7.0421e+09 1.9445e+10 2841.4
- 연봉.2017. 1 4.1619e+10 5.4022e+10 2996.7
Step: AIC=2774.4
연봉.2018. ~ 연봉.2017. + WAR + 패
Df Sum of Sq RSS AIC
+ kFIP 1 1.9738e+08 1.1992e+10 2773.9
+ 승 1 1.8072e+08 1.2009e+10 2774.1
+ FIP 1 1.7496e+08 1.2015e+10 2774.2
<none> 1.2190e+10 2774.4
- 패 1 2.1336e+08 1.2403e+10 2775.0
+ 볼넷.9 1 1.0330e+08 1.2086e+10 2775.1
+ 홈런.9 1 7.1015e+07 1.2119e+10 2775.5
+ 삼진.9 1 6.6895e+07 1.2123e+10 2775.6
+ 블론 1 4.2173e+07 1.2148e+10 2775.9
+ BABIP 1 4.1954e+07 1.2148e+10 2775.9
+ 선발 1 3.1474e+07 1.2158e+10 2776.0
+ ERA 1 1.3441e+07 1.2176e+10 2776.2
+ 이닝 1 5.8966e+06 1.2184e+10 2776.3
+ 세 1 3.4705e+06 1.2186e+10 2776.3
+ RA9.WAR 1 2.4143e+06 1.2187e+10 2776.4
+ LOB. 1 1.7129e+06 1.2188e+10 2776.4
+ 경기 1 1.1252e+06 1.2189e+10 2776.4
+ 홀드 1 1.8992e+05 1.2190e+10 2776.4
- WAR 1 6.3743e+09 1.8564e+10 2836.3
- 연봉.2017. 1 4.1680e+10 5.3870e+10 2998.3
Step: AIC=2773.92
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP
Df Sum of Sq RSS AIC
+ 승 1 1.6741e+08 1.1825e+10 2773.8
<none> 1.1992e+10 2773.9
+ 블론 1 1.2836e+08 1.1864e+10 2774.3
- kFIP 1 1.9738e+08 1.2190e+10 2774.4
+ 선발 1 1.1764e+08 1.1875e+10 2774.4
- 패 1 2.2305e+08 1.2215e+10 2774.7
+ BABIP 1 7.5190e+07 1.1917e+10 2775.0
+ ERA 1 2.1818e+07 1.1971e+10 2775.6
+ 홀드 1 2.1404e+07 1.1971e+10 2775.6
+ 삼진.9 1 1.9275e+07 1.1973e+10 2775.7
+ 경기 1 1.7028e+07 1.1975e+10 2775.7
+ 이닝 1 1.3041e+07 1.1979e+10 2775.8
+ FIP 1 9.3610e+06 1.1983e+10 2775.8
+ 볼넷.9 1 8.8432e+06 1.1983e+10 2775.8
+ 홈런.9 1 8.7223e+06 1.1984e+10 2775.8
+ LOB. 1 4.0316e+06 1.1988e+10 2775.9
+ RA9.WAR 1 2.0131e+06 1.1990e+10 2775.9
+ 세 1 1.4454e+06 1.1991e+10 2775.9
- WAR 1 6.4941e+09 1.8486e+10 2837.7
- 연봉.2017. 1 4.1735e+10 5.3727e+10 2999.9
Step: AIC=2773.78
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승
Df Sum of Sq RSS AIC
+ 이닝 1 2.1565e+08 1.1609e+10 2773.0
+ 선발 1 1.9668e+08 1.1628e+10 2773.2
<none> 1.1825e+10 2773.8
- 승 1 1.6741e+08 1.1992e+10 2773.9
- kFIP 1 1.8408e+08 1.2009e+10 2774.1
+ 블론 1 8.3012e+07 1.1742e+10 2774.7
+ RA9.WAR 1 6.3182e+07 1.1762e+10 2775.0
+ BABIP 1 4.5875e+07 1.1779e+10 2775.2
+ 볼넷.9 1 1.7921e+07 1.1807e+10 2775.6
+ 삼진.9 1 1.4564e+07 1.1810e+10 2775.6
+ 홈런.9 1 1.2160e+07 1.1813e+10 2775.6
+ ERA 1 8.8026e+06 1.1816e+10 2775.7
+ FIP 1 8.1221e+06 1.1817e+10 2775.7
+ 세 1 5.8214e+06 1.1819e+10 2775.7
+ 홀드 1 5.2671e+06 1.1820e+10 2775.7
+ LOB. 1 3.9758e+05 1.1825e+10 2775.8
+ 경기 1 3.3176e+05 1.1825e+10 2775.8
- 패 1 3.6648e+08 1.2191e+10 2776.4
- WAR 1 3.6353e+09 1.5460e+10 2812.5
- 연봉.2017. 1 3.9188e+10 5.1013e+10 2994.0
Step: AIC=2772.98
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
Df Sum of Sq RSS AIC
- 패 1 3.1923e+07 1.1641e+10 2771.4
<none> 1.1609e+10 2773.0
- kFIP 1 2.1496e+08 1.1824e+10 2773.8
- 이닝 1 2.1565e+08 1.1825e+10 2773.8
+ BABIP 1 8.7592e+07 1.1522e+10 2773.8
+ 선발 1 5.0414e+07 1.1559e+10 2774.3
+ 블론 1 3.9472e+07 1.1570e+10 2774.5
+ 삼진.9 1 3.3863e+07 1.1575e+10 2774.5
+ ERA 1 3.3525e+07 1.1576e+10 2774.5
+ FIP 1 1.8310e+07 1.1591e+10 2774.7
+ 홈런.9 1 1.2031e+07 1.1597e+10 2774.8
+ RA9.WAR 1 1.0398e+07 1.1599e+10 2774.8
+ LOB. 1 3.1362e+06 1.1606e+10 2774.9
+ 경기 1 1.9500e+06 1.1607e+10 2775.0
+ 볼넷.9 1 1.2880e+06 1.1608e+10 2775.0
+ 세 1 2.2726e+05 1.1609e+10 2775.0
+ 홀드 1 9.3003e+04 1.1609e+10 2775.0
- 승 1 3.7002e+08 1.1979e+10 2775.8
- WAR 1 3.7546e+09 1.5364e+10 2813.6
- 연봉.2017. 1 3.8723e+10 5.0333e+10 2993.9
Step: AIC=2771.4
연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 + 이닝
Df Sum of Sq RSS AIC
<none> 1.1641e+10 2771.4
+ BABIP 1 9.5915e+07 1.1545e+10 2772.1
- kFIP 1 2.2291e+08 1.1864e+10 2772.3
+ 선발 1 6.0820e+07 1.1580e+10 2772.6
+ ERA 1 4.1760e+07 1.1599e+10 2772.8
+ 삼진.9 1 3.6788e+07 1.1604e+10 2772.9
+ 패 1 3.1923e+07 1.1609e+10 2773.0
+ 블론 1 2.3680e+07 1.1618e+10 2773.1
+ FIP 1 2.0239e+07 1.1621e+10 2773.1
+ 홈런.9 1 1.4166e+07 1.1627e+10 2773.2
+ LOB. 1 7.7349e+06 1.1633e+10 2773.3
+ 경기 1 1.5351e+06 1.1640e+10 2773.4
+ RA9.WAR 1 1.4350e+06 1.1640e+10 2773.4
+ 볼넷.9 1 1.4095e+06 1.1640e+10 2773.4
+ 홀드 1 2.5525e+05 1.1641e+10 2773.4
+ 세 1 6.8181e+04 1.1641e+10 2773.4
- 승 1 4.0011e+08 1.2041e+10 2774.5
- 이닝 1 5.5021e+08 1.2191e+10 2776.4
- WAR 1 3.9604e+09 1.5602e+10 2813.9
- 연봉.2017. 1 3.8795e+10 5.0436e+10 2992.2
summary
summary(model3)
Call:
lm(formula = 연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 +
이닝, data = dt)
Residuals:
Min 1Q Median 3Q Max
-48717 -2879 204 3083 48961
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.691e+03 2.658e+03 -1.012 0.31310
연봉.2017. 8.862e-01 4.018e-02 22.058 < 2e-16 ***
WAR 8.118e+03 1.152e+03 7.048 6.68e-11 ***
kFIP 6.737e+02 4.029e+02 1.672 0.09666 .
승 1.059e+03 4.727e+02 2.240 0.02659 *
이닝 -9.701e+01 3.693e+01 -2.627 0.00954 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8929 on 146 degrees of freedom
Multiple R-squared: 0.9195, Adjusted R-squared: 0.9167
F-statistic: 333.4 on 5 and 146 DF, p-value: < 2.2e-16
- AIC를 이용하면 최종 모형은 “연봉.2018. ~ 연봉.2017. + WAR + kFIP+승+이닝” 이다.
vif(model3)- 연봉.2017.
- 1.91752518192932
- WAR
- 4.3927072113068
- kFIP
- 1.20722030237251
- 승
- 6.3165168650018
- 이닝
- 6.53436057823665
연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 + 이닝
후진
model_back = step(model1, direction = "backward")
summary(model_back)Start: AIC=2793.07
연봉.2018. ~ 승 + 패 + 세 + 홀드 + 블론 + 경기 + 선발 +
이닝 + 삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. +
ERA + RA9.WAR + FIP + kFIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 홀드 1 2.7964e+05 1.1167e+10 2791.1
- 세 1 5.1274e+05 1.1167e+10 2791.1
- kFIP 1 9.4914e+05 1.1168e+10 2791.1
- ERA 1 1.4342e+06 1.1168e+10 2791.1
- FIP 1 1.7480e+06 1.1168e+10 2791.1
- 삼진.9 1 3.2496e+06 1.1170e+10 2791.1
- 패 1 9.4169e+06 1.1176e+10 2791.2
- LOB. 1 9.4883e+06 1.1176e+10 2791.2
- 홈런.9 1 1.0071e+07 1.1177e+10 2791.2
- RA9.WAR 1 2.1998e+07 1.1189e+10 2791.4
- 볼넷.9 1 2.3679e+07 1.1190e+10 2791.4
- 블론 1 3.0047e+07 1.1197e+10 2791.5
- 이닝 1 3.4909e+07 1.1201e+10 2791.6
- BABIP 1 3.8305e+07 1.1205e+10 2791.6
- 경기 1 1.2360e+08 1.1290e+10 2792.7
<none> 1.1167e+10 2793.1
- 선발 1 1.7922e+08 1.1346e+10 2793.5
- 승 1 2.9538e+08 1.1462e+10 2795.0
- WAR 1 1.9353e+09 1.3102e+10 2815.4
- 연봉.2017. 1 3.2824e+10 4.3991e+10 2999.5
Step: AIC=2791.07
연봉.2018. ~ 승 + 패 + 세 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + ERA + RA9.WAR +
FIP + kFIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 세 1 2.6213e+05 1.1167e+10 2789.1
- kFIP 1 9.3814e+05 1.1168e+10 2789.1
- ERA 1 1.5378e+06 1.1168e+10 2789.1
- FIP 1 1.7362e+06 1.1169e+10 2789.1
- 삼진.9 1 3.2908e+06 1.1170e+10 2789.1
- LOB. 1 9.8104e+06 1.1177e+10 2789.2
- 홈런.9 1 1.0084e+07 1.1177e+10 2789.2
- 패 1 1.1289e+07 1.1178e+10 2789.2
- 볼넷.9 1 2.3701e+07 1.1191e+10 2789.4
- RA9.WAR 1 2.3959e+07 1.1191e+10 2789.4
- 블론 1 2.9856e+07 1.1197e+10 2789.5
- BABIP 1 3.8334e+07 1.1205e+10 2789.6
- 이닝 1 3.9219e+07 1.1206e+10 2789.6
<none> 1.1167e+10 2791.1
- 선발 1 1.7952e+08 1.1346e+10 2791.5
- 경기 1 1.8031e+08 1.1347e+10 2791.5
- 승 1 2.9677e+08 1.1464e+10 2793.1
- WAR 1 1.9405e+09 1.3107e+10 2813.4
- 연봉.2017. 1 3.2987e+10 4.4154e+10 2998.0
Step: AIC=2789.08
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + ERA + RA9.WAR +
FIP + kFIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- kFIP 1 9.9324e+05 1.1168e+10 2787.1
- ERA 1 1.6052e+06 1.1169e+10 2787.1
- FIP 1 1.8109e+06 1.1169e+10 2787.1
- 삼진.9 1 3.2005e+06 1.1170e+10 2787.1
- LOB. 1 9.9171e+06 1.1177e+10 2787.2
- 홈런.9 1 1.0260e+07 1.1177e+10 2787.2
- 패 1 1.1827e+07 1.1179e+10 2787.2
- 볼넷.9 1 2.3993e+07 1.1191e+10 2787.4
- RA9.WAR 1 2.8604e+07 1.1196e+10 2787.5
- 블론 1 3.3423e+07 1.1201e+10 2787.5
- BABIP 1 3.8245e+07 1.1205e+10 2787.6
- 이닝 1 4.1983e+07 1.1209e+10 2787.7
<none> 1.1167e+10 2789.1
- 선발 1 1.8297e+08 1.1350e+10 2789.6
- 경기 1 1.9004e+08 1.1357e+10 2789.6
- 승 1 3.2363e+08 1.1491e+10 2791.4
- WAR 1 1.9440e+09 1.3111e+10 2811.5
- 연봉.2017. 1 3.4986e+10 4.6153e+10 3002.8
Step: AIC=2787.09
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + ERA + RA9.WAR +
FIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- ERA 1 1.8214e+06 1.1170e+10 2785.1
- LOB. 1 1.0407e+07 1.1179e+10 2785.2
- 패 1 1.1087e+07 1.1179e+10 2785.2
- FIP 1 2.8081e+07 1.1196e+10 2785.5
- RA9.WAR 1 2.8212e+07 1.1196e+10 2785.5
- 블론 1 3.3436e+07 1.1202e+10 2785.6
- BABIP 1 3.7443e+07 1.1206e+10 2785.6
- 이닝 1 4.1025e+07 1.1209e+10 2785.7
- 홈런.9 1 5.5367e+07 1.1223e+10 2785.8
- 삼진.9 1 6.7478e+07 1.1236e+10 2786.0
- 볼넷.9 1 7.1385e+07 1.1239e+10 2786.1
<none> 1.1168e+10 2787.1
- 선발 1 1.8242e+08 1.1351e+10 2787.6
- 경기 1 1.8950e+08 1.1358e+10 2787.7
- 승 1 3.2639e+08 1.1494e+10 2789.5
- WAR 1 1.9444e+09 1.3112e+10 2809.5
- 연봉.2017. 1 3.5014e+10 4.6182e+10 3000.9
Step: AIC=2785.12
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + LOB. + RA9.WAR +
FIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- LOB. 1 1.0813e+07 1.1181e+10 2783.3
- 패 1 1.1584e+07 1.1181e+10 2783.3
- FIP 1 2.9304e+07 1.1199e+10 2783.5
- 블론 1 3.2749e+07 1.1203e+10 2783.6
- RA9.WAR 1 3.2883e+07 1.1203e+10 2783.6
- 이닝 1 4.0932e+07 1.1211e+10 2783.7
- 홈런.9 1 5.3929e+07 1.1224e+10 2783.8
- BABIP 1 6.8000e+07 1.1238e+10 2784.0
- 삼진.9 1 6.9211e+07 1.1239e+10 2784.1
- 볼넷.9 1 6.9566e+07 1.1239e+10 2784.1
<none> 1.1170e+10 2785.1
- 선발 1 1.8070e+08 1.1351e+10 2785.6
- 경기 1 1.8826e+08 1.1358e+10 2785.7
- 승 1 3.2533e+08 1.1495e+10 2787.5
- WAR 1 1.9871e+09 1.3157e+10 2808.0
- 연봉.2017. 1 3.5061e+10 4.6231e+10 2999.0
Step: AIC=2783.26
연봉.2018. ~ 승 + 패 + 블론 + 경기 + 선발 + 이닝 +
삼진.9 + 볼넷.9 + 홈런.9 + BABIP + RA9.WAR + FIP +
WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 패 1 1.0204e+07 1.1191e+10 2781.4
- FIP 1 2.3196e+07 1.1204e+10 2781.6
- 블론 1 3.0781e+07 1.1212e+10 2781.7
- 이닝 1 4.3368e+07 1.1224e+10 2781.8
- 홈런.9 1 4.6793e+07 1.1228e+10 2781.9
- RA9.WAR 1 5.5284e+07 1.1236e+10 2782.0
- BABIP 1 5.7524e+07 1.1238e+10 2782.0
- 볼넷.9 1 6.1767e+07 1.1242e+10 2782.1
- 삼진.9 1 6.4118e+07 1.1245e+10 2782.1
<none> 1.1181e+10 2783.3
- 선발 1 1.8271e+08 1.1363e+10 2783.7
- 경기 1 1.8379e+08 1.1365e+10 2783.7
- 승 1 3.3073e+08 1.1511e+10 2785.7
- WAR 1 2.1202e+09 1.3301e+10 2807.7
- 연봉.2017. 1 3.5097e+10 4.6278e+10 2997.2
Step: AIC=2781.4
연봉.2018. ~ 승 + 블론 + 경기 + 선발 + 이닝 + 삼진.9 +
볼넷.9 + 홈런.9 + BABIP + RA9.WAR + FIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- FIP 1 2.2440e+07 1.1213e+10 2779.7
- 블론 1 2.3243e+07 1.1214e+10 2779.7
- 이닝 1 4.1401e+07 1.1232e+10 2780.0
- 홈런.9 1 4.5529e+07 1.1236e+10 2780.0
- RA9.WAR 1 4.6219e+07 1.1237e+10 2780.0
- BABIP 1 5.8410e+07 1.1249e+10 2780.2
- 볼넷.9 1 6.1274e+07 1.1252e+10 2780.2
- 삼진.9 1 6.2728e+07 1.1254e+10 2780.2
<none> 1.1191e+10 2781.4
- 경기 1 2.1328e+08 1.1404e+10 2782.3
- 선발 1 2.3948e+08 1.1430e+10 2782.6
- 승 1 3.4606e+08 1.1537e+10 2784.0
- WAR 1 2.1342e+09 1.3325e+10 2805.9
- 연봉.2017. 1 3.5334e+10 4.6525e+10 2996.0
Step: AIC=2779.71
연봉.2018. ~ 승 + 블론 + 경기 + 선발 + 이닝 + 삼진.9 +
볼넷.9 + 홈런.9 + BABIP + RA9.WAR + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 블론 1 2.8280e+07 1.1242e+10 2778.1
- 이닝 1 4.0706e+07 1.1254e+10 2778.3
- RA9.WAR 1 4.3431e+07 1.1257e+10 2778.3
- BABIP 1 6.5327e+07 1.1279e+10 2778.6
- 삼진.9 1 6.9757e+07 1.1283e+10 2778.7
- 볼넷.9 1 8.4569e+07 1.1298e+10 2778.8
- 홈런.9 1 1.0518e+08 1.1319e+10 2779.1
<none> 1.1213e+10 2779.7
- 경기 1 2.0863e+08 1.1422e+10 2780.5
- 선발 1 2.4225e+08 1.1456e+10 2781.0
- 승 1 3.4542e+08 1.1559e+10 2782.3
- WAR 1 2.1544e+09 1.3368e+10 2804.4
- 연봉.2017. 1 3.5313e+10 4.6526e+10 2994.0
Step: AIC=2778.09
연봉.2018. ~ 승 + 경기 + 선발 + 이닝 + 삼진.9 + 볼넷.9 +
홈런.9 + BABIP + RA9.WAR + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 이닝 1 3.5553e+07 1.1277e+10 2776.6
- RA9.WAR 1 4.1773e+07 1.1283e+10 2776.7
- 삼진.9 1 6.2788e+07 1.1304e+10 2776.9
- BABIP 1 6.6047e+07 1.1308e+10 2777.0
- 볼넷.9 1 8.6190e+07 1.1328e+10 2777.2
- 홈런.9 1 9.9781e+07 1.1341e+10 2777.4
<none> 1.1242e+10 2778.1
- 경기 1 1.8104e+08 1.1423e+10 2778.5
- 선발 1 2.4212e+08 1.1484e+10 2779.3
- 승 1 3.7616e+08 1.1618e+10 2781.1
- WAR 1 2.1262e+09 1.3368e+10 2802.4
- 연봉.2017. 1 3.7489e+10 4.8731e+10 2999.0
Step: AIC=2776.57
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 볼넷.9 +
홈런.9 + BABIP + RA9.WAR + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- RA9.WAR 1 2.7446e+07 1.1305e+10 2774.9
- BABIP 1 6.5912e+07 1.1343e+10 2775.5
- 삼진.9 1 7.3754e+07 1.1351e+10 2775.6
- 볼넷.9 1 7.9195e+07 1.1356e+10 2775.6
- 홈런.9 1 1.1893e+08 1.1396e+10 2776.2
<none> 1.1277e+10 2776.6
- 경기 1 2.5407e+08 1.1531e+10 2778.0
- 승 1 4.4284e+08 1.1720e+10 2780.4
- 선발 1 7.1295e+08 1.1990e+10 2783.9
- WAR 1 2.3107e+09 1.3588e+10 2802.9
- 연봉.2017. 1 3.7540e+10 4.8818e+10 2997.3
Step: AIC=2774.94
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 볼넷.9 +
홈런.9 + BABIP + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- BABIP 1 5.1810e+07 1.1356e+10 2773.6
- 삼진.9 1 7.6555e+07 1.1381e+10 2774.0
- 볼넷.9 1 7.6805e+07 1.1381e+10 2774.0
- 홈런.9 1 1.1502e+08 1.1420e+10 2774.5
<none> 1.1305e+10 2774.9
- 경기 1 2.6687e+08 1.1572e+10 2776.5
- 승 1 4.1542e+08 1.1720e+10 2778.4
- 선발 1 7.2929e+08 1.2034e+10 2782.4
- WAR 1 3.6036e+09 1.4908e+10 2815.0
- 연봉.2017. 1 3.7533e+10 4.8837e+10 2995.4
Step: AIC=2773.63
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 볼넷.9 +
홈런.9 + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 볼넷.9 1 7.2317e+07 1.1429e+10 2772.6
- 홈런.9 1 8.5360e+07 1.1442e+10 2772.8
<none> 1.1356e+10 2773.6
- 삼진.9 1 1.8112e+08 1.1538e+10 2774.0
- 경기 1 2.2627e+08 1.1583e+10 2774.6
- 승 1 4.2114e+08 1.1778e+10 2777.2
- 선발 1 7.1482e+08 1.2071e+10 2780.9
- WAR 1 3.5773e+09 1.4934e+10 2813.3
- 연봉.2017. 1 3.7607e+10 4.8964e+10 2993.8
Step: AIC=2772.6
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + 홈런.9 +
WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 홈런.9 1 1.2421e+08 1.1553e+10 2772.2
- 삼진.9 1 1.4867e+08 1.1577e+10 2772.6
<none> 1.1429e+10 2772.6
- 경기 1 3.5597e+08 1.1785e+10 2775.3
- 승 1 4.5207e+08 1.1881e+10 2776.5
- 선발 1 7.9752e+08 1.2226e+10 2780.8
- WAR 1 3.5112e+09 1.4940e+10 2811.3
- 연봉.2017. 1 3.7614e+10 4.9042e+10 2992.0
Step: AIC=2772.24
연봉.2018. ~ 승 + 경기 + 선발 + 삼진.9 + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
- 삼진.9 1 8.2804e+07 1.1636e+10 2771.3
<none> 1.1553e+10 2772.2
- 경기 1 4.0633e+08 1.1959e+10 2775.5
- 승 1 4.9123e+08 1.2044e+10 2776.6
- 선발 1 7.3028e+08 1.2283e+10 2779.6
- WAR 1 3.4409e+09 1.4994e+10 2809.9
- 연봉.2017. 1 3.8020e+10 4.9573e+10 2991.6
Step: AIC=2771.33
연봉.2018. ~ 승 + 경기 + 선발 + WAR + 연봉.2017.
Df Sum of Sq RSS AIC
<none> 1.1636e+10 2771.3
- 경기 1 4.4893e+08 1.2085e+10 2775.1
- 승 1 5.0693e+08 1.2143e+10 2775.8
- 선발 1 6.7070e+08 1.2306e+10 2777.8
- WAR 1 3.3777e+09 1.5014e+10 2808.1
- 연봉.2017. 1 3.8082e+10 4.9718e+10 2990.1
Call:
lm(formula = 연봉.2018. ~ 승 + 경기 + 선발 + WAR + 연봉.2017.,
data = dt)
Residuals:
Min 1Q Median 3Q Max
-48420 -1976 -56 2829 48859
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1969.51173 1500.85489 1.312 0.1915
승 1174.23883 465.58916 2.522 0.0127 *
경기 -120.60886 50.81729 -2.373 0.0189 *
선발 -440.87646 151.97589 -2.901 0.0043 **
WAR 7220.36610 1109.09601 6.510 1.13e-09 ***
연봉.2017. 0.88247 0.04037 21.860 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8927 on 146 degrees of freedom
Multiple R-squared: 0.9195, Adjusted R-squared: 0.9168
F-statistic: 333.6 on 5 and 146 DF, p-value: < 2.2e-16
vif(model_back)- 승
- 6.13100501411465
- 경기
- 1.83604531782849
- 선발
- 4.16007029774887
- WAR
- 4.07445827346373
- 연봉.2017.
- 1.93705640010866
연봉.2018. ~ 승 + 경기 + 선발 + WAR + 연봉.2017.
전진
m0 = lm(연봉.2018. ~ 1, data = dt)model_forward = step(
m0,
scope = 연봉.2018. ~연봉.2017.+승+패+세+홀드+블론+경기+선발+이닝+삼진.9+볼넷.9+홈런.9+BABIP+LOB.+ERA+RA9.WAR+FIP+kFIP+WAR,
direction = "forward")
summary(model_forward)Start: AIC=3144.3
연봉.2018. ~ 1
Df Sum of Sq RSS AIC
+ 연봉.2017. 1 1.2511e+11 1.9445e+10 2841.4
+ WAR 1 9.0535e+10 5.4022e+10 2996.7
+ RA9.WAR 1 7.9230e+10 6.5326e+10 3025.6
+ 승 1 7.3377e+10 7.1179e+10 3038.6
+ 이닝 1 6.2759e+10 8.1797e+10 3059.8
+ 선발 1 4.5409e+10 9.9147e+10 3089.0
+ 패 1 3.1910e+10 1.1265e+11 3108.4
+ 볼넷.9 1 1.5661e+10 1.2890e+11 3128.9
+ kFIP 1 1.2591e+10 1.3197e+11 3132.4
+ FIP 1 1.1403e+10 1.3315e+11 3133.8
+ ERA 1 6.7332e+09 1.3782e+11 3139.1
+ 세 1 6.4461e+09 1.3811e+11 3139.4
+ 경기 1 6.3714e+09 1.3819e+11 3139.4
+ LOB. 1 2.2831e+09 1.4227e+11 3143.9
+ 홈런.9 1 1.9575e+09 1.4260e+11 3144.2
<none> 1.4456e+11 3144.3
+ 삼진.9 1 1.5567e+09 1.4300e+11 3144.7
+ BABIP 1 1.5139e+09 1.4304e+11 3144.7
+ 블론 1 1.3815e+09 1.4318e+11 3144.8
+ 홀드 1 4.3499e+07 1.4451e+11 3146.3
Step: AIC=2841.38
연봉.2018. ~ 연봉.2017.
Df Sum of Sq RSS AIC
+ WAR 1 7042094427 1.2403e+10 2775.0
+ RA9.WAR 1 4958914952 1.4486e+10 2798.6
+ 승 1 3841387936 1.5604e+10 2809.9
+ 이닝 1 2811807174 1.6633e+10 2819.6
+ 선발 1 2131826098 1.7313e+10 2825.7
+ 패 1 881138122 1.8564e+10 2836.3
<none> 1.9445e+10 2841.4
+ 블론 1 220224080 1.9225e+10 2841.7
+ 세 1 171052899 1.9274e+10 2842.0
+ kFIP 1 162536872 1.9283e+10 2842.1
+ FIP 1 154825743 1.9290e+10 2842.2
+ ERA 1 107350094 1.9338e+10 2842.5
+ LOB. 1 77049296 1.9368e+10 2842.8
+ 홈런.9 1 73957140 1.9371e+10 2842.8
+ 볼넷.9 1 64564811 1.9381e+10 2842.9
+ BABIP 1 56938420 1.9388e+10 2842.9
+ 홀드 1 38023685 1.9407e+10 2843.1
+ 삼진.9 1 5508109 1.9440e+10 2843.3
+ 경기 1 12651 1.9445e+10 2843.4
Step: AIC=2775.03
연봉.2018. ~ 연봉.2017. + WAR
Df Sum of Sq RSS AIC
+ 패 1 213356827 1.2190e+10 2774.4
+ kFIP 1 187694356 1.2215e+10 2774.7
+ 선발 1 171531569 1.2232e+10 2774.9
+ FIP 1 168772833 1.2234e+10 2774.9
+ 볼넷.9 1 164189202 1.2239e+10 2775.0
<none> 1.2403e+10 2775.0
+ 이닝 1 147039192 1.2256e+10 2775.2
+ 홈런.9 1 51612430 1.2351e+10 2776.4
+ 삼진.9 1 48348966 1.2355e+10 2776.4
+ 승 1 30075743 1.2373e+10 2776.7
+ 경기 1 27245510 1.2376e+10 2776.7
+ BABIP 1 24181791 1.2379e+10 2776.7
+ ERA 1 17077047 1.2386e+10 2776.8
+ 블론 1 11153112 1.2392e+10 2776.9
+ RA9.WAR 1 6650871 1.2396e+10 2776.9
+ 세 1 4332494 1.2399e+10 2777.0
+ 홀드 1 3482363 1.2400e+10 2777.0
+ LOB. 1 660176 1.2402e+10 2777.0
Step: AIC=2774.4
연봉.2018. ~ 연봉.2017. + WAR + 패
Df Sum of Sq RSS AIC
+ kFIP 1 197383620 1.1992e+10 2773.9
+ 승 1 180715640 1.2009e+10 2774.1
+ FIP 1 174958135 1.2015e+10 2774.2
<none> 1.2190e+10 2774.4
+ 볼넷.9 1 103300993 1.2086e+10 2775.1
+ 홈런.9 1 71014626 1.2119e+10 2775.5
+ 삼진.9 1 66895356 1.2123e+10 2775.6
+ 블론 1 42172679 1.2148e+10 2775.9
+ BABIP 1 41953578 1.2148e+10 2775.9
+ 선발 1 31473684 1.2158e+10 2776.0
+ ERA 1 13441234 1.2176e+10 2776.2
+ 이닝 1 5896647 1.2184e+10 2776.3
+ 세 1 3470456 1.2186e+10 2776.3
+ RA9.WAR 1 2414340 1.2187e+10 2776.4
+ LOB. 1 1712854 1.2188e+10 2776.4
+ 경기 1 1125166 1.2189e+10 2776.4
+ 홀드 1 189917 1.2190e+10 2776.4
Step: AIC=2773.92
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP
Df Sum of Sq RSS AIC
+ 승 1 167413120 1.1825e+10 2773.8
<none> 1.1992e+10 2773.9
+ 블론 1 128359041 1.1864e+10 2774.3
+ 선발 1 117641927 1.1875e+10 2774.4
+ BABIP 1 75190355 1.1917e+10 2775.0
+ ERA 1 21818455 1.1971e+10 2775.6
+ 홀드 1 21403854 1.1971e+10 2775.6
+ 삼진.9 1 19275489 1.1973e+10 2775.7
+ 경기 1 17028183 1.1975e+10 2775.7
+ 이닝 1 13040981 1.1979e+10 2775.8
+ FIP 1 9361041 1.1983e+10 2775.8
+ 볼넷.9 1 8843181 1.1983e+10 2775.8
+ 홈런.9 1 8722328 1.1984e+10 2775.8
+ LOB. 1 4031616 1.1988e+10 2775.9
+ RA9.WAR 1 2013063 1.1990e+10 2775.9
+ 세 1 1445393 1.1991e+10 2775.9
Step: AIC=2773.78
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승
Df Sum of Sq RSS AIC
+ 이닝 1 215650124 1.1609e+10 2773.0
+ 선발 1 196677432 1.1628e+10 2773.2
<none> 1.1825e+10 2773.8
+ 블론 1 83011867 1.1742e+10 2774.7
+ RA9.WAR 1 63182313 1.1762e+10 2775.0
+ BABIP 1 45874866 1.1779e+10 2775.2
+ 볼넷.9 1 17920561 1.1807e+10 2775.6
+ 삼진.9 1 14563944 1.1810e+10 2775.6
+ 홈런.9 1 12160231 1.1813e+10 2775.6
+ ERA 1 8802604 1.1816e+10 2775.7
+ FIP 1 8122100 1.1817e+10 2775.7
+ 세 1 5821352 1.1819e+10 2775.7
+ 홀드 1 5267064 1.1820e+10 2775.7
+ LOB. 1 397579 1.1825e+10 2775.8
+ 경기 1 331762 1.1825e+10 2775.8
Step: AIC=2772.98
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
Df Sum of Sq RSS AIC
<none> 1.1609e+10 2773.0
+ BABIP 1 87591503 1.1522e+10 2773.8
+ 선발 1 50413708 1.1559e+10 2774.3
+ 블론 1 39472232 1.1570e+10 2774.5
+ 삼진.9 1 33863019 1.1575e+10 2774.5
+ ERA 1 33524887 1.1576e+10 2774.5
+ FIP 1 18310110 1.1591e+10 2774.7
+ 홈런.9 1 12031455 1.1597e+10 2774.8
+ RA9.WAR 1 10397930 1.1599e+10 2774.8
+ LOB. 1 3136209 1.1606e+10 2774.9
+ 경기 1 1950014 1.1607e+10 2775.0
+ 볼넷.9 1 1288038 1.1608e+10 2775.0
+ 세 1 227255 1.1609e+10 2775.0
+ 홀드 1 93003 1.1609e+10 2775.0
Call:
lm(formula = 연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP +
승 + 이닝, data = dt)
Residuals:
Min 1Q Median 3Q Max
-48378 -2526 133 2563 48361
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.653e+03 2.664e+03 -0.996 0.3209
연봉.2017. 8.856e-01 4.027e-02 21.992 < 2e-16 ***
WAR 8.003e+03 1.169e+03 6.848 1.97e-10 ***
패 -2.708e+02 4.288e+02 -0.631 0.5287
kFIP 6.622e+02 4.042e+02 1.639 0.1035
승 1.025e+03 4.767e+02 2.150 0.0332 *
이닝 -7.811e+01 4.760e+01 -1.641 0.1029
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8948 on 145 degrees of freedom
Multiple R-squared: 0.9197, Adjusted R-squared: 0.9164
F-statistic: 276.8 on 6 and 145 DF, p-value: < 2.2e-16
vif(model_forward)- 연봉.2017.
- 1.9185269678088
- WAR
- 4.50275401281816
- 패
- 3.39937428417761
- kFIP
- 1.20965408072733
- 승
- 6.3982769107728
- 이닝
- 10.8086888355271
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
PCA(주성분분석)
- 서로 상관성이 높은 변수들의 선형 결합으로 만들어 기존의 상관성이 높은 변수들을 요약, 축소하는 기법
dt2 <- dt[,1:19] # 설명변수dt3 <- dt[,20] # 종속변수procomp.result2 <- prcomp(dt2, center=T, scale=T)
summary(procomp.result2)Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.6109 1.8528 1.5587 1.27735 1.07673 0.94635 0.77437
Proportion of Variance 0.3588 0.1807 0.1279 0.08588 0.06102 0.04714 0.03156
Cumulative Proportion 0.3588 0.5395 0.6673 0.75322 0.81424 0.86137 0.89293
PC8 PC9 PC10 PC11 PC12 PC13 PC14
Standard deviation 0.75305 0.60013 0.57118 0.51280 0.43707 0.31874 0.28648
Proportion of Variance 0.02985 0.01896 0.01717 0.01384 0.01005 0.00535 0.00432
Cumulative Proportion 0.92278 0.94173 0.95890 0.97274 0.98280 0.98815 0.99247
PC15 PC16 PC17 PC18 PC19
Standard deviation 0.27007 0.21161 0.1236 0.10061 0.006742
Proportion of Variance 0.00384 0.00236 0.0008 0.00053 0.000000
Cumulative Proportion 0.99630 0.99866 0.9995 1.00000 1.000000
dt.pca <- princomp(dt2, cor=TRUE)summary(dt.pca)Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
Standard deviation 2.6109027 1.8528422 1.5587351 1.2773530 1.07672785
Proportion of Variance 0.3587796 0.1806855 0.1278766 0.0858753 0.06101805
Cumulative Proportion 0.3587796 0.5394651 0.6673417 0.7532170 0.81423504
Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
Standard deviation 0.94635475 0.77436931 0.75305028 0.60012648 0.57117946
Proportion of Variance 0.04713617 0.03156041 0.02984656 0.01895536 0.01717084
Cumulative Proportion 0.86137122 0.89293163 0.92277819 0.94173355 0.95890439
Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
Standard deviation 0.51280325 0.4370697 0.31874103 0.286476368 0.270069813
Proportion of Variance 0.01384038 0.0100542 0.00534715 0.004319406 0.003838827
Cumulative Proportion 0.97274477 0.9827990 0.98814612 0.992465529 0.996304355
Comp.16 Comp.17 Comp.18 Comp.19
Standard deviation 0.211606675 0.1235839263 0.1006053051 6.741848e-03
Proportion of Variance 0.002356704 0.0008038414 0.0005327067 2.392237e-06
Cumulative Proportion 0.998661060 0.9994649011 0.9999976078 1.000000e+00
- 제 1주성분과 제6주성분까지의 누적 분산비율은 대략 85.71%로 6개의 주성분 변수를 활용해 전체 데이터의 85.71%를 설명할 수 있다.
screeplot(dt.pca, npcs=8, type="lines")
- 주성분들에 의해 설명되는 변동 비율
loadings(dt.pca)
Loadings:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
승 0.322 0.201 0.105
패 0.272 0.198 0.108 0.236 -0.108 -0.477 -0.294
세 -0.202 0.289 0.126 -0.512 -0.426 -0.192 0.360
홀드 -0.253 0.336 0.141 0.417 0.343 0.154 0.296
블론 -0.274 0.396 0.204 -0.208 0.116 -0.183 -0.354
경기 0.191 -0.220 0.372 0.222 0.227 0.236
선발 0.261 0.350 -0.123 -0.250 -0.105
이닝 0.329 0.235 0.109 -0.134
삼진.9 0.393 -0.404 -0.229 0.455 -0.171 -0.204 -0.211
볼넷.9 -0.250 0.116 -0.236 0.289 0.752 -0.178
홈런.9 -0.173 0.286 0.276 0.268 0.154 -0.535
BABIP -0.142 0.207 0.292 -0.444 0.146
LOB. 0.131 -0.209 -0.230 0.228 -0.416 0.495 -0.174
ERA -0.235 0.294 0.288 -0.156 -0.133 0.106 0.107
RA9.WAR 0.327 0.176 -0.181 0.124 0.119 0.391
FIP -0.259 0.284 0.378
kFIP -0.257 0.268 0.423
WAR 0.318 0.213 -0.126 0.113 0.165 0.235
연봉.2017. 0.252 0.130 0.103 -0.296 -0.181 0.629 -0.535
Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
승 0.383 0.237 0.542 0.550 0.102
패 -0.478 -0.137 0.321 -0.270 -0.229
세 -0.134 -0.328 0.242 0.206 0.105
홀드 0.128 -0.339 -0.335 0.264 -0.113 0.240
블론 0.363 0.513 -0.251 -0.150 -0.121
경기 -0.183 0.560 -0.322 -0.240
선발 0.154 -0.463 0.328 0.249
이닝 0.213 -0.370 0.151
삼진.9 -0.380 0.376
볼넷.9 -0.105 0.128 -0.359
홈런.9 -0.614
BABIP 0.665 -0.171 0.296 -0.149 0.187
LOB. 0.414 -0.155 0.112 -0.328 0.216
ERA -0.141 0.336 -0.206 -0.663 0.296
RA9.WAR 0.123 -0.243 -0.247 -0.163 -0.683
FIP 0.323
kFIP 0.492
WAR -0.398 -0.499 0.305 0.473
연봉.2017. -0.229 0.174
Comp.18 Comp.19
승
패
세
홀드
블론
경기 0.318
선발 0.553
이닝 -0.758
삼진.9
볼넷.9
홈런.9 -0.125
BABIP
LOB.
ERA
RA9.WAR
FIP 0.754
kFIP -0.641
WAR
연봉.2017.
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474
Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
Cumulative Var 0.526 0.579 0.632 0.684 0.737 0.789 0.842 0.895
Comp.18 Comp.19
SS loadings 1.000 1.000
Proportion Var 0.053 0.053
Cumulative Var 0.947 1.000
능형회귀
rfit <- lm.ridge(연봉.2018.~., dt, lambda=seq(0.01,20,0.1))select(rfit) modified HKB estimator is 1.552458
modified L-W estimator is 1.638756
smallest value of GCV at 4.11
round(rfit$coef[,rfit$lam=='4.11'],3)- 승
- 3659.317
- 패
- -737.614
- 세
- -93.401
- 홀드
- -155.777
- 블론
- 859.526
- 경기
- -1932.839
- 선발
- -3234.453
- 이닝
- 441.579
- 삼진.9
- -934.786
- 볼넷.9
- 1047.402
- 홈런.9
- 1328.778
- BABIP
- -755.31
- LOB.
- -578.691
- ERA
- -288.368
- RA9.WAR
- 516.127
- FIP
- -176.744
- kFIP
- -420.71
- WAR
- 9904.312
- 연봉.2017.
- 21113.076
matplot(rfit$lambda, t(rfit$coef), type='l',
xlab=expression(lambda),
ylab=expression(bold(beta)(lambda)), lwd=2)
abline(h=0, col="grey", lty=2)
abline(v=14.91, col="black", lty=2)
glm
X <- model.matrix(연봉.2018.~., dt)[,-1]
y <- dt$연봉.2018.head(X)| 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | 삼진.9 | 볼넷.9 | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2017. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | 8.95 | 2.13 | 0.76 | 0.342 | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 85000 |
| 2 | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | 7.43 | 1.85 | 0.53 | 0.319 | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 50000 |
| 3 | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | 7.36 | 2.09 | 0.79 | 0.332 | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 150000 |
| 4 | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | 8.04 | 1.95 | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 |
| 5 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | 7.49 | 2.11 | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 85000 |
| 6 | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | 7.42 | 1.74 | 1.12 | 0.289 | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 35000 |
ridge.fit<-glmnet(X,y,alpha=0, lambda=seq(0,100,10)) ##ridge : alpha=0
plot(ridge.fit, label=TRUE)
abline(h=0, col="grey", lty=2)
summary(ridge.fit) Length Class Mode
a0 11 -none- numeric
beta 209 dgCMatrix S4
df 11 -none- numeric
dim 2 -none- numeric
lambda 11 -none- numeric
dev.ratio 11 -none- numeric
nulldev 1 -none- numeric
npasses 1 -none- numeric
jerr 1 -none- numeric
offset 1 -none- logical
call 5 -none- call
nobs 1 -none- numeric
cv.fit<-cv.glmnet(X,y,alpha=0,nfolds=length(y))Warning message:
“Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold”
cv.fit
Call: cv.glmnet(x = X, y = y, nfolds = length(y), alpha = 0)
Measure: Mean-Squared Error
Lambda Index Measure SE Nonzero
min 2869 100 117171048 49876335 19
1se 12711 84 166795504 78214517 19
plot(cv.fit)
예측
WAR이라는 변수가 다른 설명변수의 곱으로 이루어진 변수니까, 인터넷에서 나오는 WAR계산법에 들어가는 변수들을 제거해보자.(근데 인터넷에 말이 다 다름 ㅎㅎ)
ERA, 이닝수, FIP는 일단 빼야함.
FIP자체가 홈런, 삼진, 볼넷 등의 값인데..
step, 전진: 연봉.2017.이랑 kFIP, 볼넷.9, 삼진.9를ㄴ넣어보자.
연봉.2018. ~ 연봉.2017. + WAR + kFIP + 승 + 이닝
후진: 연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
연봉.2018. ~ 연봉.2017. + WAR + 패 + kFIP + 승 + 이닝
model3 <- lm(연봉.2018. ~ 연봉.2017.+kFIP+볼넷.9+삼진.9, dt)
summary(model3)
Call:
lm(formula = 연봉.2018. ~ 연봉.2017. + kFIP + 볼넷.9 +
삼진.9, data = dt)
Residuals:
Min 1Q Median 3Q Max
-65987 -2665 -72 1406 61788
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3604.44247 4650.08368 0.775 0.440
연봉.2017. 1.13680 0.03994 28.461 <2e-16 ***
kFIP -552.00216 675.96374 -0.817 0.415
볼넷.9 -19.23606 556.28780 -0.035 0.972
삼진.9 -42.36062 381.57885 -0.111 0.912
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 11450 on 147 degrees of freedom
Multiple R-squared: 0.8666, Adjusted R-squared: 0.863
F-statistic: 238.8 on 4 and 147 DF, p-value: < 2.2e-16
X <- data.frame(FIP = dt$"FIP",
WAR = dt$"WAR",
`볼넷/9` = dt$"볼넷.9",
`삼진/9` = dt$"삼진.9",
`연봉.2017.` = dt$"연봉.2017.")y <- dt$"연봉.2018."set.seed(19)
train_indices <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_indices, ]
X_test <- X[-train_indices, ]
y_train <- y[train_indices]
y_test <- y[-train_indices]model <- lm(y_train ~ ., data = cbind(X_train, y_train))summary(model)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-49900 -1887 574 2975 49836
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.261e+03 4.037e+03 -0.312 0.755
FIP 1.395e+02 6.349e+02 0.220 0.826
WAR 8.148e+03 9.161e+02 8.894 8.65e-15 ***
볼넷.9 5.772e+02 5.144e+02 1.122 0.264
삼진.9 -4.929e+02 3.528e+02 -1.397 0.165
연봉.2017. 9.156e-01 4.691e-02 19.518 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9667 on 117 degrees of freedom
Multiple R-squared: 0.9104, Adjusted R-squared: 0.9065
F-statistic: 237.7 on 5 and 117 DF, p-value: < 2.2e-16
ㅎ므
X <- data.frame(FIP = picher$"FIP",
WAR = picher$"WAR",
`볼넷/9` = picher$"볼넷.9",
`삼진/9` = picher$"삼진.9",
`연봉.2017.` = picher$"연봉.2017.")predict_2018_salary <- predict(model, newdata = X)picher <- cbind(picher, new_col = predict_2018_salary)head(picher)| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | new_col | new_col.1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | <dbl> | <dbl> | |
| 1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 73.7 | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 | 63003.59 | 127838.00 |
| 2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 67.1 | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 | 37061.64 | 91955.34 |
| 3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 72.1 | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 | 111180.99 | 180164.16 |
| 4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 | 74120.86 | 125774.08 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 | 63002.89 | 110388.58 |
| 6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 76.1 | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 | 25942.97 | 60851.77 |
predictsalart <- round(predict_2018_salary,0)result1 <- picher$"선수명"
result2 <- picher$"연봉.2018."
result3 <- predictsalart
result4 <- picher$"연봉.2017."
result <- cbind(result1,result2,result3,result4)result| result1 | result2 | result3 | result4 | |
|---|---|---|---|---|
| 1 | 켈리 | 140000 | 127838 | 85000 |
| 2 | 소사 | 120000 | 91955 | 50000 |
| 3 | 양현종 | 230000 | 180164 | 150000 |
| 4 | 차우찬 | 100000 | 125774 | 100000 |
| 5 | 레일리 | 111000 | 110389 | 85000 |
| 6 | 피어밴드 | 85000 | 60852 | 35000 |
| 7 | 고영표 | 11500 | 32249 | 5200 |
| 8 | 장원준 | 100000 | 120661 | 100000 |
| 9 | 함덕주 | 16000 | 34421 | 7000 |
| 10 | 팻딘 | 70000 | 90685 | 70000 |
| 11 | 윤성환 | 80000 | 95139 | 80000 |
| 12 | 유희관 | 50000 | 67371 | 50000 |
| 13 | 임기영 | 13000 | 22933 | 3100 |
| 14 | 박세웅 | 25000 | 27964 | 10000 |
| 15 | 백정현 | 15500 | 24575 | 10000 |
| 16 | 송승준 | 40000 | 51763 | 40000 |
| 17 | 류제국 | 29000 | 47535 | 35000 |
| 18 | 우규민 | 70000 | 78523 | 70000 |
| 19 | 임찬규 | 11500 | 19833 | 6500 |
| 20 | 손승락 | 70000 | 74664 | 70000 |
| 21 | 정우람 | 120000 | 119624 | 120000 |
| 22 | 윤희상 | 13000 | 26255 | 15000 |
| 23 | 원종현 | 18500 | 23403 | 14000 |
| 24 | 배영수 | 50000 | 61694 | 55000 |
| 25 | 박종훈 | 20000 | 20806 | 10000 |
| 26 | 이상화 | 10000 | 13334 | 4500 |
| 27 | 김진성 | 23000 | 24511 | 18000 |
| 28 | 이민호 | 18800 | 22706 | 16000 |
| 29 | 이재학 | 19000 | 25924 | 20000 |
| 30 | 김강률 | 15000 | 13575 | 6200 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 123 | 정재원 | 4000 | 2960 | 4000 |
| 124 | 김민우 | 3600 | 3555 | 3800 |
| 125 | 이현호 | 5200 | 3447 | 6000 |
| 126 | 권혁 | 45000 | 38632 | 45000 |
| 127 | 배제성 | 3000 | 1539 | 2700 |
| 128 | 홍상삼 | 9000 | 11187 | 12500 |
| 129 | 이태양 | 7300 | 5930 | 8300 |
| 130 | 김진우 | 6000 | 9837 | 12000 |
| 131 | 이영하 | 4200 | -291 | 2700 |
| 132 | 최성영 | 2900 | 1140 | 2900 |
| 133 | 김동호 | 6000 | 3508 | 5000 |
| 134 | 김윤동 | 15000 | 1585 | 4700 |
| 135 | 정인욱 | 5700 | 3266 | 7200 |
| 136 | 송창식 | 24000 | 16351 | 22000 |
| 137 | 배재환 | 3000 | 3807 | 4000 |
| 138 | 이정민 | 10000 | 11150 | 15000 |
| 139 | 최동환 | 6500 | 1652 | 6000 |
| 140 | 이종혁 | 3200 | 564 | 2700 |
| 141 | 홍성용 | 6800 | 2878 | 6300 |
| 142 | 정영일 | 3000 | 4241 | 4000 |
| 143 | 김지용 | 9000 | 3789 | 10000 |
| 144 | 최금강 | 12500 | 8542 | 14000 |
| 145 | 김범수 | 3600 | 1207 | 3300 |
| 146 | 이승현 | 7000 | 2054 | 6200 |
| 147 | 주권 | 7600 | 2162 | 7500 |
| 148 | 장민재 | 7100 | 2973 | 8100 |
| 149 | 정용운 | 7500 | -673 | 3100 |
| 150 | 노경은 | 10000 | 9953 | 16000 |
| 151 | 김승현 | 4000 | -3065 | 2900 |
| 152 | 류희운 | 4000 | -5132 | 3000 |
k-fold
X <- dt[c('FIP', 'WAR', '볼넷.9', '삼진.9', '연봉.2017.')]
y <- dt$"연봉.2018."ctrl <- trainControl(method = "cv",
number = 10,
verboseIter = TRUE)print(model)Linear Regression
152 samples
5 predictor
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 136, 139, 137, 136, 137, 136, ...
Resampling results:
RMSE Rsquared MAE
8775.345 0.9234182 5096.303
Tuning parameter 'intercept' was held constant at a value of TRUE
createDataPartition
set.seed(20)
train_indices <- createDataPartition(y, p = 0.8, list = FALSE) # 80%를 훈련 세트로 사용
X_train <- X[train_indices, ] # X 훈련 세트
y_train <- y[train_indices] # y 훈련 세트
X_test <- X[-train_indices, ] # X 테스트 세트
y_test <- y[-train_indices] # y 테스트 세트model <- lm(y_train ~ ., data = cbind(X_train, y_train))summary(model)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-51628 -1742 835 2679 54478
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.132e+03 3.752e+03 -0.835 0.406
FIP 3.656e+02 6.383e+02 0.573 0.568
WAR 6.521e+03 9.510e+02 6.857 3.53e-10 ***
볼넷.9 4.095e+02 4.922e+02 0.832 0.407
삼진.9 -2.664e+02 3.220e+02 -0.827 0.410
연봉.2017. 9.436e-01 4.885e-02 19.314 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9407 on 117 degrees of freedom
Multiple R-squared: 0.9084, Adjusted R-squared: 0.9045
F-statistic: 232.1 on 5 and 117 DF, p-value: < 2.2e-16
y_pred <- predict(model, newdata = X_test)y_pred- 2
- 83760.5036635398
- 11
- 92856.5551719829
- 16
- 49834.1350433709
- 20
- 74394.6037441324
- 21
- 121111.857311619
- 23
- 21354.3954407502
- 30
- 11877.9200631754
- 31
- 13693.1214770523
- 36
- 10957.9331638088
- 39
- 7904.67772835263
- 41
- 22618.8632801464
- 47
- 3977.22428640269
- 51
- 4759.88345127168
- 58
- 5598.23942115724
- 62
- 2384.85845607467
- 64
- 3520.11810139773
- 67
- 3352.59845508957
- 68
- 22782.5629218232
- 73
- 2753.45775461199
- 77
- 8019.61080626648
- 80
- 5122.41200499673
- 83
- -3354.1873156885
- 90
- -1479.51353608508
- 98
- 2904.84077333906
- 118
- 5349.37841885841
- 121
- 3656.12420217399
- 132
- 2282.13105827633
- 141
- 3578.68637161238
- 142
- 4774.13856910488
plot(model)



## Shapiro-Wilk Test
## H0 : normal distribution vs. H1 : not H0
shapiro.test(resid(model))
Shapiro-Wilk normality test
data: resid(model)
W = 0.69671, p-value = 1.316e-14
귀무가설 기각
library(lmtest)### 등분산성
## H0 : 등분산 vs. H1 : 이분산 (Heteroscedasticity)
bptest(model)
studentized Breusch-Pagan test
data: model
BP = 48.48, df = 5, p-value = 2.834e-09
잔차 이분산..
bptest(model)
studentized Breusch-Pagan test
data: model
BP = 48.48, df = 5, p-value = 2.834e-09
기각..
rmse <- sqrt(mean((y_pred - y_test)^2))
rmse
8169.95343721918
r_squared <- 1 - sum((y_test - y_pred)^2) / sum((y_test - mean(y_test))^2)
r_squared
0.937905268099376
y_pred <- predict(model, newdata = X)
y_pred- 1
- 120079.644407696
- 2
- 83760.5036635398
- 3
- 175521.573203863
- 4
- 121612.06220557
- 5
- 106098.40123629
- 6
- 55938.3360303223
- 7
- 26731.8079691304
- 8
- 117272.89045312
- 9
- 28840.8622273183
- 10
- 87223.7553189251
- 11
- 92856.5551719829
- 12
- 64095.7326866766
- 13
- 18557.1211875583
- 14
- 24286.457756089
- 15
- 21705.8561903346
- 16
- 49834.1350433709
- 17
- 45256.3857125534
- 18
- 77519.7451159889
- 19
- 17218.5428165066
- 20
- 74394.6037441324
- 21
- 121111.857311619
- 22
- 24012.4794639814
- 23
- 21354.3954407502
- 24
- 60809.3599020221
- 25
- 18627.1334642915
- 26
- 11284.3959821959
- 27
- 23499.0012034168
- 28
- 21500.0921981219
- 29
- 25216.0463766418
- 30
- 11877.9200631754
- 31
- 13693.1214770523
- 32
- 9721.37978877243
- 33
- 10832.9902724258
- 34
- 22447.697009164
- 35
- 7824.80860177616
- 36
- 10957.9331638088
- 37
- 50497.4525617583
- 38
- 23384.8597822882
- 39
- 7904.67772835263
- 40
- 24703.2223966378
- 41
- 22618.8632801464
- 42
- 13201.8856253739
- 43
- 12816.7508124466
- 44
- 26147.3170424822
- 45
- 58131.6087102705
- 46
- 7523.01225446262
- 47
- 3977.22428640269
- 48
- 4591.4093894283
- 49
- 39003.2211855645
- 50
- 71627.7765552946
- 51
- 4759.88345127168
- 52
- 47644.4841594123
- 53
- 10530.7891568554
- 54
- 5199.11439751501
- 55
- 9368.46795181711
- 56
- 31579.4491180105
- 57
- 4231.50693696113
- 58
- 5598.23942115724
- 59
- 5136.52005559622
- 60
- 9102.06897972658
- 61
- 6984.12598013427
- 62
- 2384.85845607467
- 63
- 18303.363177317
- 64
- 3520.11810139773
- 65
- 3322.44698945243
- 66
- 1989.42705473082
- 67
- 3352.59845508957
- 68
- 22782.5629218232
- 69
- 10769.628209866
- 70
- 3902.99550968491
- 71
- 19176.2096780485
- 72
- 5598.20242176528
- 73
- 2753.45775461199
- 74
- 10039.0367084241
- 75
- 7357.93890125183
- 76
- 5058.81091390124
- 77
- 8019.61080626648
- 78
- 7277.46775096933
- 79
- 2999.80648325582
- 80
- 5122.41200499673
- 81
- 2190.6374430789
- 82
- 9310.17406653524
- 83
- -3354.1873156885
- 84
- -324.080566112093
- 85
- 3952.58752085951
- 86
- 3631.83930171537
- 87
- 129.350963084188
- 88
- 2855.73595792648
- 89
- 3556.52749004506
- 90
- -1479.51353608508
- 91
- 9288.2135748458
- 92
- 2179.48869530268
- 93
- 2274.22153090663
- 94
- 2563.46139539266
- 95
- 4225.95510985668
- 96
- 779.354766372629
- 97
- -2275.06753560988
- 98
- 2904.84077333906
- 99
- 1276.18575681504
- 100
- 1278.77949800168
- 101
- 1106.52488833093
- 102
- 1872.11890217375
- 103
- 2491.20505069081
- 104
- 5496.81554757196
- 105
- 7100.93204974382
- 106
- 2029.42471093689
- 107
- 6086.50404299722
- 108
- 6770.85899230805
- 109
- 229.654419792656
- 110
- 3410.59366971035
- 111
- 18753.9298630808
- 112
- 2464.93783388577
- 113
- 9614.92030264136
- 114
- 1084.00882682257
- 115
- 8543.2323809515
- 116
- 2825.44788089157
- 117
- 3470.9785470481
- 118
- 5349.37841885841
- 119
- 1957.02838737825
- 120
- 1942.46056909266
- 121
- 3656.12420217399
- 122
- 42166.0786594782
- 123
- 2646.91201627154
- 124
- 3912.29633727664
- 125
- 4053.21273794936
- 126
- 40416.3088155261
- 127
- 1718.55530132255
- 128
- 11344.8355840123
- 129
- 6449.57632010468
- 130
- 10343.4503716666
- 131
- 566.080172487302
- 132
- 2282.13105827633
- 133
- 3362.37196892649
- 134
- 2213.60459015082
- 135
- 4651.09395657194
- 136
- 17836.449861336
- 137
- 4905.41919551359
- 138
- 11939.7833450429
- 139
- 2934.46752999489
- 140
- 1059.70613969482
- 141
- 3578.68637161238
- 142
- 4774.13856910488
- 143
- 5340.39499497243
- 144
- 9744.24047688514
- 145
- 1838.44630518678
- 146
- 3450.35582952913
- 147
- 3280.22844632236
- 148
- 3919.93354796755
- 149
- -53.2035118929793
- 150
- 11626.3868566865
- 151
- -1610.41555450545
- 152
- -3236.44776099906
picher <- cbind(picher, new_data= y_pred)sorted_picher <- picher[order(-picher$"연봉.2018."), ]
sorted_picher| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | new_col | new_col.1 | new_data | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | <dbl> | <dbl> | <dbl> | |
| 3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 | 111180.99 | 180164.16 | 175521.57 |
| 1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 | 63003.59 | 127838.00 | 120079.64 |
| 2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 | 37061.64 | 91955.34 | 83760.50 |
| 21 | 정우람 | 한화 | 6 | 4 | 26 | 0 | 5 | 56 | 0 | 59.0 | ⋯ | 2.75 | 2.85 | 3.26 | 2.69 | 1.81 | 120000 | 120000 | 88943.64 | 119624.39 | 121111.86 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 | 63002.89 | 110388.58 | 106098.40 |
| 4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 | 74120.86 | 125774.08 | 121612.06 |
| 8 | 장원준 | 두산 | 14 | 9 | 0 | 0 | 0 | 29 | 29 | 180.1 | ⋯ | 3.14 | 7.28 | 4.26 | 4.35 | 3.85 | 100000 | 100000 | 74120.70 | 120660.77 | 117272.89 |
| 6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 | 25942.97 | 60851.77 | 55938.34 |
| 11 | 윤성환 | 삼성 | 12 | 9 | 0 | 0 | 0 | 28 | 28 | 174.1 | ⋯ | 4.28 | 5.36 | 4.78 | 4.80 | 3.03 | 80000 | 80000 | 59296.47 | 95138.61 | 92856.56 |
| 10 | 팻딘 | KIA | 9 | 7 | 0 | 0 | 0 | 30 | 29 | 176.0 | ⋯ | 4.14 | 5.66 | 4.65 | 4.61 | 3.64 | 70000 | 70000 | 51884.71 | 90684.81 | 87223.76 |
| 18 | 우규민 | 삼성 | 7 | 10 | 0 | 0 | 0 | 27 | 25 | 133.0 | ⋯ | 5.21 | 1.48 | 4.95 | 4.98 | 2.14 | 70000 | 70000 | 51884.20 | 78523.00 | 77519.75 |
| 20 | 손승락 | 롯데 | 1 | 3 | 37 | 0 | 5 | 61 | 0 | 62.0 | ⋯ | 2.18 | 3.91 | 3.69 | 3.37 | 1.82 | 70000 | 70000 | 51883.97 | 74663.86 | 74394.60 |
| 45 | 이동현 | LG | 3 | 6 | 7 | 5 | 3 | 45 | 0 | 50.2 | ⋯ | 4.80 | 1.22 | 3.64 | 3.59 | 0.68 | 60000 | 60000 | 44471.73 | 57336.45 | 58131.61 |
| 12 | 유희관 | 두산 | 11 | 6 | 0 | 1 | 0 | 30 | 29 | 188.2 | ⋯ | 4.53 | 4.79 | 4.78 | 4.97 | 2.89 | 50000 | 50000 | 37060.65 | 67371.43 | 64095.73 |
| 24 | 배영수 | 한화 | 7 | 8 | 0 | 0 | 0 | 25 | 25 | 128.0 | ⋯ | 5.06 | 2.47 | 5.10 | 5.21 | 1.68 | 50000 | 55000 | 40766.17 | 61693.57 | 60809.36 |
| 37 | 임창용 | KIA | 8 | 6 | 7 | 9 | 5 | 51 | 0 | 50.0 | ⋯ | 3.78 | 1.40 | 3.69 | 3.35 | 0.96 | 50000 | 50000 | 37059.82 | 49996.69 | 50497.45 |
| 52 | 윤길현 | 롯데 | 1 | 4 | 0 | 13 | 2 | 40 | 0 | 39.1 | ⋯ | 6.41 | -0.04 | 3.99 | 3.73 | 0.50 | 50000 | 50000 | 37059.67 | 46413.16 | 47644.48 |
| 122 | 송은범 | 한화 | 0 | 4 | 1 | 0 | 1 | 13 | 6 | 37.1 | ⋯ | 6.51 | 0.10 | 6.33 | 6.79 | -0.12 | 45000 | 45000 | 33353.86 | 41260.88 | 42166.08 |
| 126 | 권혁 | 한화 | 1 | 3 | 0 | 11 | 1 | 37 | 0 | 31.1 | ⋯ | 6.32 | 0.04 | 6.48 | 6.78 | -0.16 | 45000 | 45000 | 33353.67 | 38632.11 | 40416.31 |
| 16 | 송승준 | 롯데 | 11 | 5 | 0 | 1 | 1 | 30 | 22 | 130.1 | ⋯ | 4.21 | 4.22 | 4.91 | 4.77 | 2.20 | 40000 | 40000 | 29648.38 | 51762.94 | 49834.14 |
| 49 | 이현승 | 두산 | 3 | 2 | 5 | 9 | 7 | 57 | 0 | 52.0 | ⋯ | 3.98 | 1.69 | 4.25 | 4.30 | 0.53 | 40000 | 40000 | 29647.84 | 38630.86 | 39003.22 |
| 41 | 안영명 | 한화 | 1 | 8 | 0 | 0 | 1 | 25 | 16 | 87.2 | ⋯ | 5.75 | 0.52 | 5.19 | 5.34 | 0.81 | 35000 | 20000 | 14824.10 | 23301.25 | 22618.86 |
| 68 | 채병용 | SK | 6 | 4 | 0 | 6 | 3 | 43 | 0 | 50.0 | ⋯ | 6.84 | -0.17 | 5.05 | 4.94 | 0.22 | 30000 | 25000 | 18529.76 | 21870.11 | 22782.56 |
| 17 | 류제국 | LG | 8 | 6 | 0 | 0 | 0 | 25 | 25 | 131.1 | ⋯ | 5.35 | 1.53 | 4.79 | 4.84 | 2.15 | 29000 | 35000 | 25942.45 | 47534.93 | 45256.39 |
| 14 | 박세웅 | 롯데 | 12 | 6 | 0 | 0 | 0 | 28 | 28 | 171.1 | ⋯ | 3.68 | 5.92 | 5.07 | 5.14 | 2.54 | 25000 | 10000 | 7412.73 | 27963.63 | 24286.46 |
| 40 | 임창민 | NC | 4 | 3 | 29 | 0 | 6 | 60 | 0 | 66.0 | ⋯ | 3.68 | 2.68 | 4.83 | 4.60 | 0.89 | 25000 | 22500 | 16677.00 | 24913.36 | 24703.22 |
| 56 | 박정진 | 한화 | 3 | 2 | 1 | 7 | 0 | 55 | 0 | 48.0 | ⋯ | 3.94 | 0.87 | 4.92 | 4.66 | 0.38 | 25000 | 33000 | 24459.37 | 30622.42 | 31579.45 |
| 136 | 송창식 | 한화 | 5 | 6 | 0 | 15 | 6 | 63 | 0 | 73.1 | ⋯ | 6.63 | -0.74 | 5.68 | 5.59 | -0.22 | 24000 | 22000 | 16306.09 | 16351.07 | 17836.45 |
| 27 | 김진성 | NC | 10 | 6 | 0 | 15 | 2 | 69 | 0 | 89.2 | ⋯ | 3.61 | 3.20 | 4.30 | 3.92 | 1.47 | 23000 | 18000 | 13341.73 | 24511.49 | 23499.00 |
| 38 | 심창민 | 삼성 | 4 | 7 | 6 | 16 | 2 | 66 | 0 | 75.1 | ⋯ | 4.18 | 1.82 | 4.82 | 4.26 | 0.93 | 23000 | 21000 | 15565.14 | 23185.40 | 23384.86 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋱ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 99 | 박치국 | 두산 | 1 | 1 | 0 | 0 | 0 | 21 | 3 | 32.0 | ⋯ | 6.75 | -0.12 | 5.39 | 5.58 | 0.00 | 3400 | 2700 | 2001.156 | 1048.3622 | 1276.1858 |
| 102 | 허건엽 | SK | 0 | 0 | 0 | 0 | 0 | 8 | 0 | 8.1 | ⋯ | 8.64 | -0.20 | 5.29 | 5.64 | -0.01 | 3400 | 3300 | 2445.935 | 2067.0516 | 1872.1189 |
| 105 | 고봉재 | 두산 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 0.00 | 0.04 | 6.73 | 7.64 | -0.01 | 3300 | 4400 | 3261.709 | 8820.0864 | 7100.9320 |
| 112 | 김시현 | 삼성 | 0 | 0 | 0 | 0 | 0 | 17 | 0 | 21.1 | ⋯ | 7.59 | 0.08 | 7.06 | 7.18 | -0.05 | 3300 | 2700 | 2001.245 | 2115.3516 | 2464.9378 |
| 120 | 강장산 | KT | 0 | 0 | 0 | 0 | 0 | 17 | 0 | 26.1 | ⋯ | 5.47 | 0.54 | 6.16 | 6.46 | -0.10 | 3300 | 3000 | 2223.583 | 1795.4384 | 1942.4606 |
| 118 | 배민관 | LG | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 3.2 | ⋯ | 9.82 | -0.11 | 9.46 | 9.77 | -0.08 | 3200 | 3000 | 2223.850 | 5401.6928 | 5349.3784 |
| 140 | 이종혁 | KT | 2 | 0 | 0 | 0 | 0 | 16 | 0 | 19.0 | ⋯ | 6.63 | -0.09 | 7.10 | 7.57 | -0.25 | 3200 | 2700 | 2001.196 | 564.4227 | 1059.7061 |
| 70 | 최지광 | 삼성 | 0 | 2 | 0 | 0 | 0 | 11 | 6 | 25.0 | ⋯ | 6.48 | -0.16 | 5.69 | 5.65 | 0.21 | 3100 | 2700 | 2001.325 | 4206.2468 | 3902.9955 |
| 73 | 박세진 | KT | 0 | 2 | 0 | 0 | 0 | 4 | 3 | 11.1 | ⋯ | 9.53 | -0.13 | 4.44 | 4.67 | 0.18 | 3100 | 3000 | 2223.610 | 3189.5874 | 2753.4578 |
| 101 | 박상원 | 한화 | 0 | 0 | 0 | 1 | 0 | 18 | 0 | 21.2 | ⋯ | 4.15 | 0.32 | 5.30 | 5.42 | 0.00 | 3100 | 2700 | 2001.158 | 968.7329 | 1106.5249 |
| 81 | 김대유 | SK | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 4.2 | ⋯ | 9.64 | 0.08 | 5.88 | 5.91 | 0.09 | 3000 | 2700 | 2001.218 | 2139.0137 | 2190.6374 |
| 84 | 손주영 | LG | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 6.0 | ⋯ | 4.50 | 0.06 | 3.07 | 3.08 | 0.08 | 3000 | 2700 | 2001.010 | -539.4368 | -324.0806 |
| 87 | 김진영 | 한화 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 2.2 | ⋯ | 10.12 | -0.02 | 2.98 | 2.29 | 0.07 | 3000 | 2700 | 2000.967 | -560.7629 | 129.3510 |
| 98 | 임진우 | 두산 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 27.00 | -0.11 | 4.73 | 4.61 | 0.00 | 3000 | 3200 | 2371.857 | 3087.5908 | 2904.8408 |
| 109 | 서균 | 한화 | 0 | 0 | 0 | 0 | 0 | 14 | 0 | 14.1 | ⋯ | 4.40 | 0.25 | 5.41 | 5.58 | -0.04 | 3000 | 2700 | 2001.060 | -370.1056 | 229.6544 |
| 121 | 홍성무 | KT | 0 | 1 | 0 | 0 | 0 | 7 | 0 | 6.1 | ⋯ | 12.79 | -0.29 | 7.37 | 7.58 | -0.11 | 3000 | 3000 | 2223.730 | 3741.6415 | 3656.1242 |
| 127 | 배제성 | KT | 0 | 0 | 0 | 0 | 0 | 21 | 1 | 32.0 | ⋯ | 8.72 | -0.10 | 6.39 | 6.59 | -0.17 | 3000 | 2700 | 2001.242 | 1539.0311 | 1718.5553 |
| 137 | 배재환 | NC | 0 | 1 | 0 | 0 | 0 | 3 | 1 | 8.0 | ⋯ | 9.00 | -0.16 | 11.48 | 11.73 | -0.23 | 3000 | 4000 | 2964.893 | 3807.0275 | 4905.4192 |
| 142 | 정영일 | SK | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 8.0 | ⋯ | 10.12 | -0.61 | 10.73 | 11.37 | -0.30 | 3000 | 4000 | 2964.964 | 4241.1480 | 4774.1386 |
| 96 | 김도영 | KT | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 0.00 | 0.04 | 3.73 | 4.43 | 0.00 | 2900 | 2700 | 2001.259 | 1731.9157 | 779.3548 |
| 97 | 조근종 | KT | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2.0 | ⋯ | 22.50 | -0.16 | 6.73 | 4.86 | 0.00 | 2900 | 2700 | 2000.470 | -6343.0220 | -2275.0675 |
| 108 | 안규현 | 삼성 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2.0 | ⋯ | 9.00 | -0.04 | 8.73 | 9.67 | -0.04 | 2900 | 2800 | 2075.799 | 7768.8014 | 6770.8590 |
| 115 | 김종훈 | KIA | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 36.00 | -0.16 | 18.73 | 17.60 | -0.06 | 2900 | 2800 | 2075.467 | 4943.5065 | 8543.2324 |
| 132 | 최성영 | NC | 0 | 0 | 0 | 0 | 0 | 7 | 0 | 11.1 | ⋯ | 9.53 | -0.26 | 9.29 | 9.48 | -0.20 | 2900 | 2900 | 2149.435 | 1140.0750 | 2282.1311 |
| 83 | 장지훈 | 삼성 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 2.1 | ⋯ | 0.00 | 0.09 | 0.73 | -0.69 | 0.08 | 2800 | 2700 | 2000.563 | -5315.1733 | -3354.1873 |
| 90 | 차재용 | 롯데 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 2.1 | ⋯ | 0.00 | 0.08 | 2.02 | 1.83 | 0.05 | 2800 | 2800 | 2075.035 | -1808.0465 | -1479.5135 |
| 106 | 이수민 | 삼성 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2.2 | ⋯ | 10.12 | -0.15 | 5.23 | 5.95 | -0.01 | 2800 | 3000 | 2223.624 | 2419.1537 | 2029.4247 |
| 94 | 장민익 | 두산 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 2.0 | ⋯ | 0.00 | 0.08 | 4.73 | 4.61 | 0.02 | 2700 | 2700 | 2001.266 | 2792.7349 | 2563.4614 |
| 104 | 성영훈 | 두산 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1.0 | ⋯ | 0.00 | 0.04 | 6.73 | 7.64 | -0.01 | 2700 | 2700 | 2001.678 | 7263.5285 | 5496.8155 |
| 116 | 정동윤 | SK | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 3.0 | ⋯ | 9.00 | -0.03 | 9.40 | 10.05 | -0.07 | 2700 | 2700 | 2001.280 | 2205.1885 | 2825.4479 |
filtered_df <- picher[picher$"연봉.2018." != picher$"연봉.2017.", ]
filtered_df| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | new_col | new_col.1 | new_data | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | <dbl> | <dbl> | <dbl> | |
| 1 | 켈리 | SK | 16 | 7 | 0 | 0 | 0 | 30 | 30 | 190.0 | ⋯ | 3.60 | 6.91 | 3.69 | 3.44 | 6.62 | 140000 | 85000 | 63003.595 | 127837.996 | 120079.644 |
| 2 | 소사 | LG | 11 | 11 | 1 | 0 | 0 | 30 | 29 | 185.1 | ⋯ | 3.88 | 6.80 | 3.52 | 3.41 | 6.08 | 120000 | 50000 | 37061.640 | 91955.341 | 83760.504 |
| 3 | 양현종 | KIA | 20 | 6 | 0 | 0 | 0 | 31 | 31 | 193.1 | ⋯ | 3.44 | 6.54 | 3.94 | 3.82 | 5.64 | 230000 | 150000 | 111180.994 | 180164.163 | 175521.573 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 | 63002.886 | 110388.580 | 106098.401 |
| 6 | 피어밴드 | KT | 8 | 10 | 0 | 0 | 0 | 26 | 26 | 160.0 | ⋯ | 3.04 | 6.52 | 4.42 | 4.32 | 3.94 | 85000 | 35000 | 25942.975 | 60851.766 | 55938.336 |
| 7 | 고영표 | KT | 8 | 12 | 0 | 1 | 0 | 25 | 24 | 141.2 | ⋯ | 5.08 | 2.97 | 3.88 | 3.78 | 3.87 | 11500 | 5200 | 3855.283 | 32248.671 | 26731.808 |
| 9 | 함덕주 | 두산 | 9 | 8 | 0 | 2 | 0 | 35 | 24 | 137.1 | ⋯ | 3.67 | 4.99 | 3.91 | 3.67 | 3.78 | 16000 | 7000 | 5189.489 | 34420.638 | 28840.862 |
| 13 | 임기영 | KIA | 8 | 6 | 0 | 0 | 0 | 23 | 19 | 118.1 | ⋯ | 3.65 | 4.25 | 4.07 | 4.19 | 2.79 | 13000 | 3100 | 2298.522 | 22932.876 | 18557.121 |
| 14 | 박세웅 | 롯데 | 12 | 6 | 0 | 0 | 0 | 28 | 28 | 171.1 | ⋯ | 3.68 | 5.92 | 5.07 | 5.14 | 2.54 | 25000 | 10000 | 7412.730 | 27963.633 | 24286.458 |
| 15 | 백정현 | 삼성 | 8 | 4 | 0 | 3 | 0 | 35 | 14 | 100.2 | ⋯ | 4.38 | 3.01 | 4.51 | 4.34 | 2.25 | 15500 | 10000 | 7412.536 | 24575.219 | 21705.856 |
| 17 | 류제국 | LG | 8 | 6 | 0 | 0 | 0 | 25 | 25 | 131.1 | ⋯ | 5.35 | 1.53 | 4.79 | 4.84 | 2.15 | 29000 | 35000 | 25942.448 | 47534.929 | 45256.386 |
| 19 | 임찬규 | LG | 6 | 10 | 0 | 0 | 0 | 27 | 26 | 124.1 | ⋯ | 4.63 | 3.15 | 4.81 | 4.79 | 2.04 | 11500 | 6500 | 4818.295 | 19832.884 | 17218.543 |
| 22 | 윤희상 | SK | 6 | 7 | 0 | 0 | 0 | 23 | 22 | 120.0 | ⋯ | 6.00 | 1.52 | 5.13 | 5.22 | 1.80 | 13000 | 15000 | 11118.427 | 26255.265 | 24012.479 |
| 23 | 원종현 | NC | 3 | 6 | 0 | 22 | 0 | 68 | 0 | 80.0 | ⋯ | 4.39 | 2.02 | 3.60 | 3.52 | 1.71 | 18500 | 14000 | 10377.101 | 23403.105 | 21354.395 |
| 24 | 배영수 | 한화 | 7 | 8 | 0 | 0 | 0 | 25 | 25 | 128.0 | ⋯ | 5.06 | 2.47 | 5.10 | 5.21 | 1.68 | 50000 | 55000 | 40766.167 | 61693.571 | 60809.360 |
| 25 | 박종훈 | SK | 12 | 7 | 0 | 1 | 0 | 29 | 28 | 151.1 | ⋯ | 4.10 | 4.31 | 5.38 | 5.55 | 1.62 | 20000 | 10000 | 7412.434 | 20805.675 | 18627.133 |
| 26 | 이상화 | KT | 4 | 3 | 6 | 4 | 1 | 70 | 0 | 66.0 | ⋯ | 3.95 | 2.43 | 3.57 | 3.45 | 1.54 | 10000 | 4500 | 3335.691 | 13333.548 | 11284.396 |
| 27 | 김진성 | NC | 10 | 6 | 0 | 15 | 2 | 69 | 0 | 89.2 | ⋯ | 3.61 | 3.20 | 4.30 | 3.92 | 1.47 | 23000 | 18000 | 13341.734 | 24511.486 | 23499.001 |
| 28 | 이민호 | NC | 5 | 1 | 3 | 6 | 1 | 60 | 3 | 88.2 | ⋯ | 4.06 | 1.97 | 4.41 | 4.22 | 1.38 | 18800 | 16000 | 11859.379 | 22705.669 | 21500.092 |
| 29 | 이재학 | NC | 5 | 7 | 0 | 0 | 0 | 28 | 23 | 119.0 | ⋯ | 5.67 | 1.04 | 5.53 | 5.35 | 1.31 | 19000 | 20000 | 14824.140 | 25923.735 | 25216.046 |
| 30 | 김강률 | 두산 | 7 | 2 | 7 | 12 | 1 | 70 | 0 | 89.0 | ⋯ | 3.44 | 2.68 | 4.15 | 4.09 | 1.27 | 15000 | 6200 | 4595.698 | 13575.161 | 11877.920 |
| 31 | 김재윤 | KT | 3 | 5 | 15 | 0 | 4 | 41 | 0 | 37.1 | ⋯ | 5.79 | 0.73 | 3.15 | 3.05 | 1.24 | 11000 | 9000 | 6670.994 | 15370.094 | 13693.121 |
| 32 | 김원중 | 롯데 | 7 | 8 | 0 | 0 | 0 | 24 | 24 | 107.1 | ⋯ | 5.70 | 2.22 | 5.53 | 5.56 | 1.23 | 6300 | 3000 | 2223.940 | 11393.858 | 9721.380 |
| 33 | 박진형 | 롯데 | 4 | 4 | 2 | 10 | 3 | 45 | 9 | 88.0 | ⋯ | 5.11 | 1.70 | 4.43 | 4.14 | 1.12 | 10500 | 6000 | 4447.373 | 11961.674 | 10832.990 |
| 34 | 윤규진 | 한화 | 8 | 7 | 0 | 2 | 0 | 36 | 18 | 119.0 | ⋯ | 5.22 | 2.23 | 5.46 | 5.42 | 1.09 | 21000 | 18000 | 13341.766 | 23320.087 | 22447.697 |
| 35 | 김재영 | 한화 | 5 | 7 | 0 | 0 | 1 | 20 | 15 | 85.1 | ⋯ | 4.54 | 1.60 | 5.00 | 5.23 | 1.06 | 5300 | 3000 | 2223.843 | 9322.823 | 7824.809 |
| 36 | 신정락 | LG | 3 | 5 | 10 | 12 | 3 | 63 | 0 | 59.0 | ⋯ | 5.34 | 0.51 | 4.23 | 4.12 | 0.97 | 10500 | 7500 | 5559.100 | 11850.125 | 10957.933 |
| 38 | 심창민 | 삼성 | 4 | 7 | 6 | 16 | 2 | 66 | 0 | 75.1 | ⋯ | 4.18 | 1.82 | 4.82 | 4.26 | 0.93 | 23000 | 21000 | 15565.136 | 23185.400 | 23384.860 |
| 39 | 정찬헌 | LG | 8 | 7 | 7 | 3 | 3 | 61 | 0 | 61.2 | ⋯ | 5.84 | 0.97 | 3.96 | 3.80 | 0.93 | 9500 | 4500 | 3335.522 | 8995.166 | 7904.678 |
| 40 | 임창민 | NC | 4 | 3 | 29 | 0 | 6 | 60 | 0 | 66.0 | ⋯ | 3.68 | 2.68 | 4.83 | 4.60 | 0.89 | 25000 | 22500 | 16676.995 | 24913.355 | 24703.222 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋱ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 118 | 배민관 | LG | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 3.2 | ⋯ | 9.82 | -0.11 | 9.46 | 9.77 | -0.08 | 3200 | 3000 | 2223.850 | 5401.6928 | 5349.37842 |
| 119 | 윤근영 | KT | 0 | 0 | 0 | 1 | 0 | 12 | 0 | 18.0 | ⋯ | 3.50 | 0.30 | 6.23 | 6.31 | -0.08 | 4200 | 4000 | 2964.668 | 1350.9942 | 1957.02839 |
| 120 | 강장산 | KT | 0 | 0 | 0 | 0 | 0 | 17 | 0 | 26.1 | ⋯ | 5.47 | 0.54 | 6.16 | 6.46 | -0.10 | 3300 | 3000 | 2223.583 | 1795.4384 | 1942.46057 |
| 124 | 김민우 | 한화 | 0 | 0 | 0 | 0 | 0 | 4 | 2 | 7.1 | ⋯ | 17.18 | -0.52 | 7.83 | 8.32 | -0.12 | 3600 | 3800 | 2816.614 | 3554.7700 | 3912.29634 |
| 125 | 이현호 | 두산 | 1 | 0 | 0 | 0 | 0 | 24 | 2 | 30.0 | ⋯ | 5.70 | -0.51 | 6.47 | 6.57 | -0.16 | 5200 | 6000 | 4447.098 | 3446.5228 | 4053.21274 |
| 127 | 배제성 | KT | 0 | 0 | 0 | 0 | 0 | 21 | 1 | 32.0 | ⋯ | 8.72 | -0.10 | 6.39 | 6.59 | -0.17 | 3000 | 2700 | 2001.242 | 1539.0311 | 1718.55530 |
| 128 | 홍상삼 | 두산 | 1 | 1 | 0 | 0 | 0 | 11 | 2 | 17.0 | ⋯ | 7.94 | -0.52 | 6.73 | 7.22 | -0.18 | 9000 | 12500 | 9265.021 | 11187.3262 | 11344.83558 |
| 129 | 이태양 | 한화 | 3 | 6 | 0 | 0 | 0 | 16 | 12 | 59.0 | ⋯ | 7.17 | 0.05 | 6.77 | 7.04 | -0.18 | 7300 | 8300 | 6151.890 | 5929.5258 | 6449.57632 |
| 130 | 김진우 | KIA | 2 | 6 | 0 | 0 | 1 | 14 | 8 | 36.1 | ⋯ | 7.93 | -0.35 | 6.68 | 7.04 | -0.19 | 6000 | 12000 | 8894.348 | 9837.3511 | 10343.45037 |
| 131 | 이영하 | 두산 | 3 | 3 | 0 | 0 | 0 | 20 | 3 | 35.2 | ⋯ | 5.55 | 0.59 | 6.79 | 6.86 | -0.19 | 4200 | 2700 | 2001.102 | -291.0924 | 566.08017 |
| 133 | 김동호 | 삼성 | 0 | 1 | 0 | 0 | 0 | 20 | 1 | 36.0 | ⋯ | 6.75 | -0.07 | 5.46 | 5.96 | -0.20 | 6000 | 5000 | 3706.004 | 3508.1291 | 3362.37197 |
| 134 | 김윤동 | KIA | 7 | 4 | 11 | 6 | 6 | 65 | 1 | 80.1 | ⋯ | 4.59 | 1.89 | 5.54 | 5.53 | -0.20 | 15000 | 4700 | 3483.494 | 1585.1461 | 2213.60459 |
| 135 | 정인욱 | 삼성 | 1 | 4 | 0 | 0 | 0 | 9 | 7 | 32.0 | ⋯ | 9.84 | -0.43 | 7.39 | 7.36 | -0.20 | 5700 | 7200 | 5336.429 | 3265.9890 | 4651.09396 |
| 136 | 송창식 | 한화 | 5 | 6 | 0 | 15 | 6 | 63 | 0 | 73.1 | ⋯ | 6.63 | -0.74 | 5.68 | 5.59 | -0.22 | 24000 | 22000 | 16306.086 | 16351.0737 | 17836.44986 |
| 137 | 배재환 | NC | 0 | 1 | 0 | 0 | 0 | 3 | 1 | 8.0 | ⋯ | 9.00 | -0.16 | 11.48 | 11.73 | -0.23 | 3000 | 4000 | 2964.893 | 3807.0275 | 4905.41920 |
| 138 | 이정민 | 롯데 | 3 | 1 | 0 | 2 | 1 | 24 | 0 | 26.2 | ⋯ | 5.40 | 0.19 | 6.47 | 6.83 | -0.24 | 10000 | 15000 | 11117.848 | 11150.4349 | 11939.78335 |
| 139 | 최동환 | LG | 1 | 2 | 1 | 5 | 2 | 35 | 0 | 38.0 | ⋯ | 5.68 | -0.05 | 6.76 | 6.77 | -0.24 | 6500 | 6000 | 4446.970 | 1651.5907 | 2934.46753 |
| 140 | 이종혁 | KT | 2 | 0 | 0 | 0 | 0 | 16 | 0 | 19.0 | ⋯ | 6.63 | -0.09 | 7.10 | 7.57 | -0.25 | 3200 | 2700 | 2001.196 | 564.4227 | 1059.70614 |
| 141 | 홍성용 | KT | 0 | 2 | 0 | 1 | 0 | 37 | 1 | 39.0 | ⋯ | 6.23 | 0.25 | 6.40 | 6.63 | -0.28 | 6800 | 6300 | 4669.430 | 2877.7134 | 3578.68637 |
| 142 | 정영일 | SK | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 8.0 | ⋯ | 10.12 | -0.61 | 10.73 | 11.37 | -0.30 | 3000 | 4000 | 2964.964 | 4241.1480 | 4774.13857 |
| 143 | 김지용 | LG | 4 | 3 | 3 | 8 | 4 | 53 | 0 | 53.0 | ⋯ | 5.09 | 0.28 | 6.30 | 6.27 | -0.38 | 9000 | 10000 | 7411.678 | 3788.8307 | 5340.39499 |
| 144 | 최금강 | NC | 5 | 3 | 0 | 0 | 0 | 39 | 13 | 89.2 | ⋯ | 7.33 | -0.27 | 6.31 | 6.58 | -0.41 | 12500 | 14000 | 10376.560 | 8542.1622 | 9744.24048 |
| 145 | 김범수 | 한화 | 0 | 4 | 0 | 0 | 0 | 15 | 5 | 31.0 | ⋯ | 8.71 | -0.42 | 8.15 | 8.52 | -0.42 | 3600 | 3300 | 2445.965 | 1206.6108 | 1838.44631 |
| 146 | 이승현 | 삼성 | 2 | 0 | 0 | 0 | 0 | 30 | 0 | 31.2 | ⋯ | 5.12 | 0.31 | 8.03 | 8.03 | -0.44 | 7000 | 6200 | 4595.280 | 2054.0673 | 3450.35583 |
| 147 | 주권 | KT | 5 | 6 | 1 | 3 | 2 | 39 | 12 | 81.2 | ⋯ | 6.61 | -0.02 | 6.33 | 6.54 | -0.46 | 7600 | 7500 | 5558.774 | 2162.2155 | 3280.22845 |
| 148 | 장민재 | 한화 | 2 | 5 | 0 | 0 | 2 | 33 | 5 | 62.2 | ⋯ | 7.76 | -1.21 | 6.21 | 6.48 | -0.47 | 7100 | 8100 | 6003.520 | 2972.9523 | 3919.93355 |
| 149 | 정용운 | KIA | 3 | 2 | 0 | 0 | 0 | 25 | 11 | 59.1 | ⋯ | 5.92 | 0.39 | 6.41 | 6.77 | -0.49 | 7500 | 3100 | 2297.618 | -673.0981 | -53.20351 |
| 150 | 노경은 | 롯데 | 0 | 2 | 0 | 0 | 0 | 9 | 2 | 14.2 | ⋯ | 11.66 | -0.83 | 8.03 | 8.29 | -0.61 | 10000 | 16000 | 11858.971 | 9953.0416 | 11626.38686 |
| 151 | 김승현 | 삼성 | 0 | 3 | 0 | 1 | 0 | 41 | 0 | 43.2 | ⋯ | 5.77 | -0.40 | 6.87 | 6.95 | -0.70 | 4000 | 2900 | 2149.246 | -3065.3516 | -1610.41555 |
| 152 | 류희운 | KT | 4 | 4 | 0 | 0 | 0 | 24 | 14 | 81.0 | ⋯ | 7.67 | -0.68 | 7.60 | 7.81 | -1.01 | 4000 | 3000 | 2223.298 | -5132.3247 | -3236.44776 |
df <- filtered_df[,c("선수명","연봉.2018.","new_data","연봉.2017.")]df| 선수명 | 연봉.2018. | new_data | 연봉.2017. | |
|---|---|---|---|---|
| <chr> | <int> | <dbl> | <int> | |
| 1 | 켈리 | 140000 | 120079.644 | 85000 |
| 2 | 소사 | 120000 | 83760.504 | 50000 |
| 3 | 양현종 | 230000 | 175521.573 | 150000 |
| 5 | 레일리 | 111000 | 106098.401 | 85000 |
| 6 | 피어밴드 | 85000 | 55938.336 | 35000 |
| 7 | 고영표 | 11500 | 26731.808 | 5200 |
| 9 | 함덕주 | 16000 | 28840.862 | 7000 |
| 13 | 임기영 | 13000 | 18557.121 | 3100 |
| 14 | 박세웅 | 25000 | 24286.458 | 10000 |
| 15 | 백정현 | 15500 | 21705.856 | 10000 |
| 17 | 류제국 | 29000 | 45256.386 | 35000 |
| 19 | 임찬규 | 11500 | 17218.543 | 6500 |
| 22 | 윤희상 | 13000 | 24012.479 | 15000 |
| 23 | 원종현 | 18500 | 21354.395 | 14000 |
| 24 | 배영수 | 50000 | 60809.360 | 55000 |
| 25 | 박종훈 | 20000 | 18627.133 | 10000 |
| 26 | 이상화 | 10000 | 11284.396 | 4500 |
| 27 | 김진성 | 23000 | 23499.001 | 18000 |
| 28 | 이민호 | 18800 | 21500.092 | 16000 |
| 29 | 이재학 | 19000 | 25216.046 | 20000 |
| 30 | 김강률 | 15000 | 11877.920 | 6200 |
| 31 | 김재윤 | 11000 | 13693.121 | 9000 |
| 32 | 김원중 | 6300 | 9721.380 | 3000 |
| 33 | 박진형 | 10500 | 10832.990 | 6000 |
| 34 | 윤규진 | 21000 | 22447.697 | 18000 |
| 35 | 김재영 | 5300 | 7824.809 | 3000 |
| 36 | 신정락 | 10500 | 10957.933 | 7500 |
| 38 | 심창민 | 23000 | 23384.860 | 21000 |
| 39 | 정찬헌 | 9500 | 7904.678 | 4500 |
| 40 | 임창민 | 25000 | 24703.222 | 22500 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 118 | 배민관 | 3200 | 5349.37842 | 3000 |
| 119 | 윤근영 | 4200 | 1957.02839 | 4000 |
| 120 | 강장산 | 3300 | 1942.46057 | 3000 |
| 124 | 김민우 | 3600 | 3912.29634 | 3800 |
| 125 | 이현호 | 5200 | 4053.21274 | 6000 |
| 127 | 배제성 | 3000 | 1718.55530 | 2700 |
| 128 | 홍상삼 | 9000 | 11344.83558 | 12500 |
| 129 | 이태양 | 7300 | 6449.57632 | 8300 |
| 130 | 김진우 | 6000 | 10343.45037 | 12000 |
| 131 | 이영하 | 4200 | 566.08017 | 2700 |
| 133 | 김동호 | 6000 | 3362.37197 | 5000 |
| 134 | 김윤동 | 15000 | 2213.60459 | 4700 |
| 135 | 정인욱 | 5700 | 4651.09396 | 7200 |
| 136 | 송창식 | 24000 | 17836.44986 | 22000 |
| 137 | 배재환 | 3000 | 4905.41920 | 4000 |
| 138 | 이정민 | 10000 | 11939.78335 | 15000 |
| 139 | 최동환 | 6500 | 2934.46753 | 6000 |
| 140 | 이종혁 | 3200 | 1059.70614 | 2700 |
| 141 | 홍성용 | 6800 | 3578.68637 | 6300 |
| 142 | 정영일 | 3000 | 4774.13857 | 4000 |
| 143 | 김지용 | 9000 | 5340.39499 | 10000 |
| 144 | 최금강 | 12500 | 9744.24048 | 14000 |
| 145 | 김범수 | 3600 | 1838.44631 | 3300 |
| 146 | 이승현 | 7000 | 3450.35583 | 6200 |
| 147 | 주권 | 7600 | 3280.22845 | 7500 |
| 148 | 장민재 | 7100 | 3919.93355 | 8100 |
| 149 | 정용운 | 7500 | -53.20351 | 3100 |
| 150 | 노경은 | 10000 | 11626.38686 | 16000 |
| 151 | 김승현 | 4000 | -1610.41555 | 2900 |
| 152 | 류희운 | 4000 | -3236.44776 | 3000 |
sorted_df <- df[order(df$"연봉.2018.", decreasing = TRUE), ]sorted_df| 선수명 | 연봉.2018. | new_data | 연봉.2017. | |
|---|---|---|---|---|
| <chr> | <int> | <dbl> | <int> | |
| 3 | 양현종 | 230000 | 175521.573 | 150000 |
| 1 | 켈리 | 140000 | 120079.644 | 85000 |
| 2 | 소사 | 120000 | 83760.504 | 50000 |
| 5 | 레일리 | 111000 | 106098.401 | 85000 |
| 6 | 피어밴드 | 85000 | 55938.336 | 35000 |
| 24 | 배영수 | 50000 | 60809.360 | 55000 |
| 41 | 안영명 | 35000 | 22618.863 | 20000 |
| 68 | 채병용 | 30000 | 22782.563 | 25000 |
| 17 | 류제국 | 29000 | 45256.386 | 35000 |
| 14 | 박세웅 | 25000 | 24286.458 | 10000 |
| 40 | 임창민 | 25000 | 24703.222 | 22500 |
| 56 | 박정진 | 25000 | 31579.449 | 33000 |
| 136 | 송창식 | 24000 | 17836.450 | 22000 |
| 27 | 김진성 | 23000 | 23499.001 | 18000 |
| 38 | 심창민 | 23000 | 23384.860 | 21000 |
| 34 | 윤규진 | 21000 | 22447.697 | 18000 |
| 25 | 박종훈 | 20000 | 18627.133 | 10000 |
| 44 | 심수창 | 20000 | 26147.317 | 25000 |
| 50 | 장원삼 | 20000 | 71627.777 | 75000 |
| 29 | 이재학 | 19000 | 25216.046 | 20000 |
| 43 | 진해수 | 19000 | 12816.751 | 11000 |
| 28 | 이민호 | 18800 | 21500.092 | 16000 |
| 23 | 원종현 | 18500 | 21354.395 | 14000 |
| 111 | 박희수 | 18500 | 18753.930 | 21000 |
| 9 | 함덕주 | 16000 | 28840.862 | 7000 |
| 15 | 백정현 | 15500 | 21705.856 | 10000 |
| 30 | 김강률 | 15000 | 11877.920 | 6200 |
| 60 | 권오준 | 15000 | 9102.069 | 10500 |
| 63 | 김사율 | 15000 | 18303.363 | 20000 |
| 134 | 김윤동 | 15000 | 2213.605 | 4700 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 79 | 황수범 | 3800 | 2999.8065 | 2700 |
| 124 | 김민우 | 3600 | 3912.2963 | 3800 |
| 145 | 김범수 | 3600 | 1838.4463 | 3300 |
| 92 | 이형범 | 3500 | 2179.4887 | 2700 |
| 93 | 임현준 | 3500 | 2274.2215 | 3200 |
| 110 | 안성무 | 3500 | 3410.5937 | 2800 |
| 99 | 박치국 | 3400 | 1276.1858 | 2700 |
| 102 | 허건엽 | 3400 | 1872.1189 | 3300 |
| 105 | 고봉재 | 3300 | 7100.9320 | 4400 |
| 112 | 김시현 | 3300 | 2464.9378 | 2700 |
| 120 | 강장산 | 3300 | 1942.4606 | 3000 |
| 118 | 배민관 | 3200 | 5349.3784 | 3000 |
| 140 | 이종혁 | 3200 | 1059.7061 | 2700 |
| 70 | 최지광 | 3100 | 3902.9955 | 2700 |
| 73 | 박세진 | 3100 | 2753.4578 | 3000 |
| 101 | 박상원 | 3100 | 1106.5249 | 2700 |
| 81 | 김대유 | 3000 | 2190.6374 | 2700 |
| 84 | 손주영 | 3000 | -324.0806 | 2700 |
| 87 | 김진영 | 3000 | 129.3510 | 2700 |
| 98 | 임진우 | 3000 | 2904.8408 | 3200 |
| 109 | 서균 | 3000 | 229.6544 | 2700 |
| 127 | 배제성 | 3000 | 1718.5553 | 2700 |
| 137 | 배재환 | 3000 | 4905.4192 | 4000 |
| 142 | 정영일 | 3000 | 4774.1386 | 4000 |
| 96 | 김도영 | 2900 | 779.3548 | 2700 |
| 97 | 조근종 | 2900 | -2275.0675 | 2700 |
| 108 | 안규현 | 2900 | 6770.8590 | 2800 |
| 115 | 김종훈 | 2900 | 8543.2324 | 2800 |
| 83 | 장지훈 | 2800 | -3354.1873 | 2700 |
| 106 | 이수민 | 2800 | 2029.4247 | 3000 |
이상치 제거
picher2 <- picher[-c(1,2,3,6,21,50,98),]picher2| 선수명 | 팀명 | 승 | 패 | 세 | 홀드 | 블론 | 경기 | 선발 | 이닝 | ⋯ | 홈런.9 | BABIP | LOB. | ERA | RA9.WAR | FIP | kFIP | WAR | 연봉.2018. | 연봉.2017. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <int> | |
| 4 | 차우찬 | LG | 10 | 7 | 0 | 0 | 0 | 28 | 28 | 175.2 | ⋯ | 1.02 | 0.298 | 75.0 | 3.43 | 6.11 | 4.20 | 4.03 | 4.63 | 100000 | 100000 |
| 5 | 레일리 | 롯데 | 13 | 7 | 0 | 0 | 0 | 30 | 30 | 187.1 | ⋯ | 0.91 | 0.323 | 74.1 | 3.80 | 6.13 | 4.36 | 4.31 | 4.38 | 111000 | 85000 |
| 7 | 고영표 | KT | 8 | 12 | 0 | 1 | 0 | 25 | 24 | 141.2 | ⋯ | 0.83 | 0.362 | 64.6 | 5.08 | 2.97 | 3.88 | 3.78 | 3.87 | 11500 | 5200 |
| 8 | 장원준 | 두산 | 14 | 9 | 0 | 0 | 0 | 29 | 29 | 180.1 | ⋯ | 0.60 | 0.293 | 75.8 | 3.14 | 7.28 | 4.26 | 4.35 | 3.85 | 100000 | 100000 |
| 9 | 함덕주 | 두산 | 9 | 8 | 0 | 2 | 0 | 35 | 24 | 137.1 | ⋯ | 0.52 | 0.321 | 73.1 | 3.67 | 4.99 | 3.91 | 3.67 | 3.78 | 16000 | 7000 |
| 10 | 팻딘 | KIA | 9 | 7 | 0 | 0 | 0 | 30 | 29 | 176.0 | ⋯ | 1.12 | 0.353 | 76.5 | 4.14 | 5.66 | 4.65 | 4.61 | 3.64 | 70000 | 70000 |
| 11 | 윤성환 | 삼성 | 12 | 9 | 0 | 0 | 0 | 28 | 28 | 174.1 | ⋯ | 1.14 | 0.305 | 72.7 | 4.28 | 5.36 | 4.78 | 4.80 | 3.03 | 80000 | 80000 |
| 12 | 유희관 | 두산 | 11 | 6 | 0 | 1 | 0 | 30 | 29 | 188.2 | ⋯ | 0.95 | 0.329 | 69.7 | 4.53 | 4.79 | 4.78 | 4.97 | 2.89 | 50000 | 50000 |
| 13 | 임기영 | KIA | 8 | 6 | 0 | 0 | 0 | 23 | 19 | 118.1 | ⋯ | 0.68 | 0.331 | 72.3 | 3.65 | 4.25 | 4.07 | 4.19 | 2.79 | 13000 | 3100 |
| 14 | 박세웅 | 롯데 | 12 | 6 | 0 | 0 | 0 | 28 | 28 | 171.1 | ⋯ | 1.10 | 0.290 | 78.3 | 3.68 | 5.92 | 5.07 | 5.14 | 2.54 | 25000 | 10000 |
| 15 | 백정현 | 삼성 | 8 | 4 | 0 | 3 | 0 | 35 | 14 | 100.2 | ⋯ | 0.98 | 0.336 | 73.2 | 4.38 | 3.01 | 4.51 | 4.34 | 2.25 | 15500 | 10000 |
| 16 | 송승준 | 롯데 | 11 | 5 | 0 | 1 | 1 | 30 | 22 | 130.1 | ⋯ | 1.31 | 0.298 | 75.8 | 4.21 | 4.22 | 4.91 | 4.77 | 2.20 | 40000 | 40000 |
| 17 | 류제국 | LG | 8 | 6 | 0 | 0 | 0 | 25 | 25 | 131.1 | ⋯ | 0.69 | 0.327 | 63.5 | 5.35 | 1.53 | 4.79 | 4.84 | 2.15 | 29000 | 35000 |
| 18 | 우규민 | 삼성 | 7 | 10 | 0 | 0 | 0 | 27 | 25 | 133.0 | ⋯ | 1.29 | 0.334 | 60.6 | 5.21 | 1.48 | 4.95 | 4.98 | 2.14 | 70000 | 70000 |
| 19 | 임찬규 | LG | 6 | 10 | 0 | 0 | 0 | 27 | 26 | 124.1 | ⋯ | 0.87 | 0.335 | 71.3 | 4.63 | 3.15 | 4.81 | 4.79 | 2.04 | 11500 | 6500 |
| 20 | 손승락 | 롯데 | 1 | 3 | 37 | 0 | 5 | 61 | 0 | 62.0 | ⋯ | 1.02 | 0.311 | 89.9 | 2.18 | 3.91 | 3.69 | 3.37 | 1.82 | 70000 | 70000 |
| 22 | 윤희상 | SK | 6 | 7 | 0 | 0 | 0 | 23 | 22 | 120.0 | ⋯ | 1.20 | 0.340 | 63.1 | 6.00 | 1.52 | 5.13 | 5.22 | 1.80 | 13000 | 15000 |
| 23 | 원종현 | NC | 3 | 6 | 0 | 22 | 0 | 68 | 0 | 80.0 | ⋯ | 0.45 | 0.336 | 63.9 | 4.39 | 2.02 | 3.60 | 3.52 | 1.71 | 18500 | 14000 |
| 24 | 배영수 | 한화 | 7 | 8 | 0 | 0 | 0 | 25 | 25 | 128.0 | ⋯ | 1.20 | 0.328 | 66.4 | 5.06 | 2.47 | 5.10 | 5.21 | 1.68 | 50000 | 55000 |
| 25 | 박종훈 | SK | 12 | 7 | 0 | 1 | 0 | 29 | 28 | 151.1 | ⋯ | 0.95 | 0.287 | 75.3 | 4.10 | 4.31 | 5.38 | 5.55 | 1.62 | 20000 | 10000 |
| 26 | 이상화 | KT | 4 | 3 | 6 | 4 | 1 | 70 | 0 | 66.0 | ⋯ | 0.55 | 0.337 | 68.0 | 3.95 | 2.43 | 3.57 | 3.45 | 1.54 | 10000 | 4500 |
| 27 | 김진성 | NC | 10 | 6 | 0 | 15 | 2 | 69 | 0 | 89.2 | ⋯ | 1.30 | 0.268 | 76.1 | 3.61 | 3.20 | 4.30 | 3.92 | 1.47 | 23000 | 18000 |
| 28 | 이민호 | NC | 5 | 1 | 3 | 6 | 1 | 60 | 3 | 88.2 | ⋯ | 1.02 | 0.312 | 71.4 | 4.06 | 1.97 | 4.41 | 4.22 | 1.38 | 18800 | 16000 |
| 29 | 이재학 | NC | 5 | 7 | 0 | 0 | 0 | 28 | 23 | 119.0 | ⋯ | 1.66 | 0.350 | 65.4 | 5.67 | 1.04 | 5.53 | 5.35 | 1.31 | 19000 | 20000 |
| 30 | 김강률 | 두산 | 7 | 2 | 7 | 12 | 1 | 70 | 0 | 89.0 | ⋯ | 0.61 | 0.316 | 73.5 | 3.44 | 2.68 | 4.15 | 4.09 | 1.27 | 15000 | 6200 |
| 31 | 김재윤 | KT | 3 | 5 | 15 | 0 | 4 | 41 | 0 | 37.1 | ⋯ | 0.24 | 0.348 | 54.3 | 5.79 | 0.73 | 3.15 | 3.05 | 1.24 | 11000 | 9000 |
| 32 | 김원중 | 롯데 | 7 | 8 | 0 | 0 | 0 | 24 | 24 | 107.1 | ⋯ | 1.09 | 0.347 | 70.3 | 5.70 | 2.22 | 5.53 | 5.56 | 1.23 | 6300 | 3000 |
| 33 | 박진형 | 롯데 | 4 | 4 | 2 | 10 | 3 | 45 | 9 | 88.0 | ⋯ | 0.82 | 0.349 | 68.9 | 5.11 | 1.70 | 4.43 | 4.14 | 1.12 | 10500 | 6000 |
| 34 | 윤규진 | 한화 | 8 | 7 | 0 | 2 | 0 | 36 | 18 | 119.0 | ⋯ | 1.44 | 0.328 | 72.1 | 5.22 | 2.23 | 5.46 | 5.42 | 1.09 | 21000 | 18000 |
| 35 | 김재영 | 한화 | 5 | 7 | 0 | 0 | 1 | 20 | 15 | 85.1 | ⋯ | 0.95 | 0.337 | 67.8 | 4.54 | 1.60 | 5.00 | 5.23 | 1.06 | 5300 | 3000 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋱ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 123 | 정재원 | 한화 | 0 | 2 | 0 | 1 | 1 | 14 | 0 | 21.0 | ⋯ | 0.00 | 0.315 | 69.2 | 3.43 | 0.12 | 5.16 | 5.73 | -0.12 | 4000 | 4000 |
| 124 | 김민우 | 한화 | 0 | 0 | 0 | 0 | 0 | 4 | 2 | 7.1 | ⋯ | 1.23 | 0.613 | 50.7 | 17.18 | -0.52 | 7.83 | 8.32 | -0.12 | 3600 | 3800 |
| 125 | 이현호 | 두산 | 1 | 0 | 0 | 0 | 0 | 24 | 2 | 30.0 | ⋯ | 1.80 | 0.389 | 65.3 | 5.70 | -0.51 | 6.47 | 6.57 | -0.16 | 5200 | 6000 |
| 126 | 권혁 | 한화 | 1 | 3 | 0 | 11 | 1 | 37 | 0 | 31.1 | ⋯ | 1.72 | 0.311 | 67.3 | 6.32 | 0.04 | 6.48 | 6.78 | -0.16 | 45000 | 45000 |
| 127 | 배제성 | KT | 0 | 0 | 0 | 0 | 0 | 21 | 1 | 32.0 | ⋯ | 1.12 | 0.378 | 59.6 | 8.72 | -0.10 | 6.39 | 6.59 | -0.17 | 3000 | 2700 |
| 128 | 홍상삼 | 두산 | 1 | 1 | 0 | 0 | 0 | 11 | 2 | 17.0 | ⋯ | 1.06 | 0.300 | 53.0 | 7.94 | -0.52 | 6.73 | 7.22 | -0.18 | 9000 | 12500 |
| 129 | 이태양 | 한화 | 3 | 6 | 0 | 0 | 0 | 16 | 12 | 59.0 | ⋯ | 1.83 | 0.330 | 64.3 | 7.17 | 0.05 | 6.77 | 7.04 | -0.18 | 7300 | 8300 |
| 130 | 김진우 | KIA | 2 | 6 | 0 | 0 | 1 | 14 | 8 | 36.1 | ⋯ | 1.24 | 0.376 | 65.3 | 7.93 | -0.35 | 6.68 | 7.04 | -0.19 | 6000 | 12000 |
| 131 | 이영하 | 두산 | 3 | 3 | 0 | 0 | 0 | 20 | 3 | 35.2 | ⋯ | 2.02 | 0.333 | 78.7 | 5.55 | 0.59 | 6.79 | 6.86 | -0.19 | 4200 | 2700 |
| 132 | 최성영 | NC | 0 | 0 | 0 | 0 | 0 | 7 | 0 | 11.1 | ⋯ | 3.18 | 0.361 | 68.6 | 9.53 | -0.26 | 9.29 | 9.48 | -0.20 | 2900 | 2900 |
| 133 | 김동호 | 삼성 | 0 | 1 | 0 | 0 | 0 | 20 | 1 | 36.0 | ⋯ | 0.25 | 0.381 | 67.0 | 6.75 | -0.07 | 5.46 | 5.96 | -0.20 | 6000 | 5000 |
| 134 | 김윤동 | KIA | 7 | 4 | 11 | 6 | 6 | 65 | 1 | 80.1 | ⋯ | 1.12 | 0.267 | 75.0 | 4.59 | 1.89 | 5.54 | 5.53 | -0.20 | 15000 | 4700 |
| 135 | 정인욱 | 삼성 | 1 | 4 | 0 | 0 | 0 | 9 | 7 | 32.0 | ⋯ | 2.53 | 0.359 | 52.5 | 9.84 | -0.43 | 7.39 | 7.36 | -0.20 | 5700 | 7200 |
| 136 | 송창식 | 한화 | 5 | 6 | 0 | 15 | 6 | 63 | 0 | 73.1 | ⋯ | 1.60 | 0.303 | 61.0 | 6.63 | -0.74 | 5.68 | 5.59 | -0.22 | 24000 | 22000 |
| 137 | 배재환 | NC | 0 | 1 | 0 | 0 | 0 | 3 | 1 | 8.0 | ⋯ | 4.50 | 0.346 | 82.1 | 9.00 | -0.16 | 11.48 | 11.73 | -0.23 | 3000 | 4000 |
| 138 | 이정민 | 롯데 | 3 | 1 | 0 | 2 | 1 | 24 | 0 | 26.2 | ⋯ | 1.69 | 0.307 | 71.4 | 5.40 | 0.19 | 6.47 | 6.83 | -0.24 | 10000 | 15000 |
| 139 | 최동환 | LG | 1 | 2 | 1 | 5 | 2 | 35 | 0 | 38.0 | ⋯ | 2.13 | 0.273 | 71.1 | 5.68 | -0.05 | 6.76 | 6.77 | -0.24 | 6500 | 6000 |
| 140 | 이종혁 | KT | 2 | 0 | 0 | 0 | 0 | 16 | 0 | 19.0 | ⋯ | 1.42 | 0.324 | 67.0 | 6.63 | -0.09 | 7.10 | 7.57 | -0.25 | 3200 | 2700 |
| 141 | 홍성용 | KT | 0 | 2 | 0 | 1 | 0 | 37 | 1 | 39.0 | ⋯ | 1.62 | 0.294 | 68.3 | 6.23 | 0.25 | 6.40 | 6.63 | -0.28 | 6800 | 6300 |
| 142 | 정영일 | SK | 0 | 0 | 0 | 0 | 0 | 9 | 0 | 8.0 | ⋯ | 3.38 | 0.281 | 47.3 | 10.12 | -0.61 | 10.73 | 11.37 | -0.30 | 3000 | 4000 |
| 143 | 김지용 | LG | 4 | 3 | 3 | 8 | 4 | 53 | 0 | 53.0 | ⋯ | 2.21 | 0.280 | 73.9 | 5.09 | 0.28 | 6.30 | 6.27 | -0.38 | 9000 | 10000 |
| 144 | 최금강 | NC | 5 | 3 | 0 | 0 | 0 | 39 | 13 | 89.2 | ⋯ | 1.51 | 0.320 | 57.4 | 7.33 | -0.27 | 6.31 | 6.58 | -0.41 | 12500 | 14000 |
| 145 | 김범수 | 한화 | 0 | 4 | 0 | 0 | 0 | 15 | 5 | 31.0 | ⋯ | 2.03 | 0.340 | 66.4 | 8.71 | -0.42 | 8.15 | 8.52 | -0.42 | 3600 | 3300 |
| 146 | 이승현 | 삼성 | 2 | 0 | 0 | 0 | 0 | 30 | 0 | 31.2 | ⋯ | 2.56 | 0.276 | 81.9 | 5.12 | 0.31 | 8.03 | 8.03 | -0.44 | 7000 | 6200 |
| 147 | 주권 | KT | 5 | 6 | 1 | 3 | 2 | 39 | 12 | 81.2 | ⋯ | 1.65 | 0.314 | 63.2 | 6.61 | -0.02 | 6.33 | 6.54 | -0.46 | 7600 | 7500 |
| 148 | 장민재 | 한화 | 2 | 5 | 0 | 0 | 2 | 33 | 5 | 62.2 | ⋯ | 1.58 | 0.355 | 56.9 | 7.76 | -1.21 | 6.21 | 6.48 | -0.47 | 7100 | 8100 |
| 149 | 정용운 | KIA | 3 | 2 | 0 | 0 | 0 | 25 | 11 | 59.1 | ⋯ | 1.06 | 0.263 | 65.4 | 5.92 | 0.39 | 6.41 | 6.77 | -0.49 | 7500 | 3100 |
| 150 | 노경은 | 롯데 | 0 | 2 | 0 | 0 | 0 | 9 | 2 | 14.2 | ⋯ | 2.45 | 0.382 | 52.8 | 11.66 | -0.83 | 8.03 | 8.29 | -0.61 | 10000 | 16000 |
| 151 | 김승현 | 삼성 | 0 | 3 | 0 | 1 | 0 | 41 | 0 | 43.2 | ⋯ | 1.44 | 0.341 | 73.9 | 5.77 | -0.40 | 6.87 | 6.95 | -0.70 | 4000 | 2900 |
| 152 | 류희운 | KT | 4 | 4 | 0 | 0 | 0 | 24 | 14 | 81.0 | ⋯ | 2.00 | 0.312 | 65.3 | 7.67 | -0.68 | 7.60 | 7.81 | -1.01 | 4000 | 3000 |
aa <- lm(연봉.2018. ~ .-팀명-선수명, data=picher2)summary(aa)
Call:
lm(formula = 연봉.2018. ~ . - 팀명 - 선수명, data = picher2)
Residuals:
Min 1Q Median 3Q Max
-6719.3 -1077.4 65.0 796.1 21824.2
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1180.5580 6767.2703 -0.174 0.862
승 -11.5761 219.0412 -0.053 0.958
패 81.9489 210.8035 0.389 0.698
세 -37.2721 103.6610 -0.360 0.720
홀드 80.5730 116.2315 0.693 0.489
블론 370.1732 278.5525 1.329 0.186
경기 -32.2197 53.8240 -0.599 0.551
선발 -174.3314 168.9477 -1.032 0.304
이닝 52.2789 42.2955 1.236 0.219
삼진.9 201.3240 879.7268 0.229 0.819
볼넷.9 340.5238 826.0454 0.412 0.681
홈런.9 1665.6462 5202.1586 0.320 0.749
BABIP 114.3823 5557.0488 0.021 0.984
LOB. 27.0594 55.5030 0.488 0.627
ERA 188.2818 297.7530 0.632 0.528
RA9.WAR 1323.6685 595.3892 2.223 0.028 *
FIP -6291.3374 16484.2058 -0.382 0.703
kFIP 4975.4255 13255.6236 0.375 0.708
WAR 477.9530 756.5703 0.632 0.529
연봉.2017. 0.8897 0.0197 45.155 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3321 on 125 degrees of freedom
Multiple R-squared: 0.9762, Adjusted R-squared: 0.9726
F-statistic: 269.9 on 19 and 125 DF, p-value: < 2.2e-16
X <- picher2[c('FIP', 'WAR', '볼넷.9', '삼진.9', '연봉.2017.')]
y <- picher2$"연봉.2018."
set.seed(20)
train_indices <- createDataPartition(y, p = 0.8, list = FALSE) # 80%를 훈련 세트로 사용
X_train <- X[train_indices, ] # X 훈련 세트
y_train <- y[train_indices] # y 훈련 세트
X_test <- X[-train_indices, ] # X 테스트 세트
y_test <- y[-train_indices] # y 테스트 세트model2 <- lm(y_train ~ ., data = cbind(X_train, y_train))
summary(model2)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-8171.3 -1022.5 -169.3 1009.7 9835.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1947.46270 1101.98299 1.767 0.0799 .
FIP -130.36447 180.71549 -0.721 0.4722
WAR 1933.30721 331.27392 5.836 5.29e-08 ***
볼넷.9 -95.23174 143.48508 -0.664 0.5082
삼진.9 81.85525 94.20305 0.869 0.3867
연봉.2017. 0.89859 0.01633 55.034 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2764 on 112 degrees of freedom
Multiple R-squared: 0.9816, Adjusted R-squared: 0.9808
F-statistic: 1194 on 5 and 112 DF, p-value: < 2.2e-16
y_pred <- predict(model2, newdata = X_test)summary(model2)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-8171.3 -1022.5 -169.3 1009.7 9835.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1947.46270 1101.98299 1.767 0.0799 .
FIP -130.36447 180.71549 -0.721 0.4722
WAR 1933.30721 331.27392 5.836 5.29e-08 ***
볼넷.9 -95.23174 143.48508 -0.664 0.5082
삼진.9 81.85525 94.20305 0.869 0.3867
연봉.2017. 0.89859 0.01633 55.034 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2764 on 112 degrees of freedom
Multiple R-squared: 0.9816, Adjusted R-squared: 0.9808
F-statistic: 1194 on 5 and 112 DF, p-value: < 2.2e-16
plot(model2)



## Shapiro-Wilk Test
## H0 : normal distribution vs. H1 : not H0
shapiro.test(resid(model2))
Shapiro-Wilk normality test
data: resid(model2)
W = 0.92186, p-value = 3.661e-06
귀무가설 기각
library(lmtest)### 등분산성
## H0 : 등분산 vs. H1 : 이분산 (Heteroscedasticity)
bptest(model2)
studentized Breusch-Pagan test
data: model2
BP = 2.7624, df = 5, p-value = 0.7366
잔차 이분산..
bptest(model2)
studentized Breusch-Pagan test
data: model2
BP = 2.7624, df = 5, p-value = 0.7366
y_pred <- predict(model2, newdata = X)picher2 <- cbind(picher2, new_data= y_pred)sorted_picher2 <- picher[order(-picher2$"연봉.2018."), ]filtered_df2 <- picher2[picher2$"연봉.2018." != picher2$"연봉.2017.", ]df <- filtered_df2[,c("선수명","연봉.2018.","new_data","연봉.2017.")]sorted_df <- df[order(df$"연봉.2018.", decreasing = TRUE), ]summary(model2)
Call:
lm(formula = y_train ~ ., data = cbind(X_train, y_train))
Residuals:
Min 1Q Median 3Q Max
-8171.3 -1022.5 -169.3 1009.7 9835.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1947.46270 1101.98299 1.767 0.0799 .
FIP -130.36447 180.71549 -0.721 0.4722
WAR 1933.30721 331.27392 5.836 5.29e-08 ***
볼넷.9 -95.23174 143.48508 -0.664 0.5082
삼진.9 81.85525 94.20305 0.869 0.3867
연봉.2017. 0.89859 0.01633 55.034 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2764 on 112 degrees of freedom
Multiple R-squared: 0.9816, Adjusted R-squared: 0.9808
F-statistic: 1194 on 5 and 112 DF, p-value: < 2.2e-16
sorted_df| 선수명 | 연봉.2018. | new_data | 연봉.2017. | |
|---|---|---|---|---|
| <chr> | <int> | <dbl> | <int> | |
| 5 | 레일리 | 111000 | 86639.501 | 85000 |
| 24 | 배영수 | 50000 | 54254.114 | 55000 |
| 41 | 안영명 | 35000 | 20988.868 | 20000 |
| 68 | 채병용 | 30000 | 24555.703 | 25000 |
| 17 | 류제국 | 29000 | 37171.350 | 35000 |
| 14 | 박세웅 | 25000 | 15406.471 | 10000 |
| 40 | 임창민 | 25000 | 23649.432 | 22500 |
| 56 | 박정진 | 25000 | 32048.281 | 33000 |
| 136 | 송창식 | 24000 | 20807.581 | 22000 |
| 27 | 김진성 | 23000 | 20950.905 | 18000 |
| 38 | 심창민 | 23000 | 22494.249 | 21000 |
| 34 | 윤규진 | 21000 | 19775.968 | 18000 |
| 25 | 박종훈 | 20000 | 13538.895 | 10000 |
| 44 | 심수창 | 20000 | 25556.492 | 25000 |
| 29 | 이재학 | 19000 | 22160.236 | 20000 |
| 43 | 진해수 | 19000 | 13232.667 | 11000 |
| 28 | 이민호 | 18800 | 18842.143 | 16000 |
| 23 | 원종현 | 18500 | 17795.806 | 14000 |
| 111 | 박희수 | 18500 | 20005.072 | 21000 |
| 9 | 함덕주 | 16000 | 15382.469 | 7000 |
| 15 | 백정현 | 15500 | 15076.326 | 10000 |
| 30 | 김강률 | 15000 | 9728.566 | 6200 |
| 60 | 권오준 | 15000 | 11743.714 | 10500 |
| 63 | 김사율 | 15000 | 20080.039 | 20000 |
| 134 | 김윤동 | 15000 | 5165.007 | 4700 |
| 71 | 임정우 | 14000 | 22059.786 | 22000 |
| 78 | 박정배 | 14000 | 8806.466 | 8000 |
| 13 | 임기영 | 13000 | 9920.273 | 3100 |
| 22 | 윤희상 | 13000 | 18504.773 | 15000 |
| 42 | 심동섭 | 13000 | 13188.418 | 11000 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 152 | 류희운 | 4000 | 1612.039 | 3000 |
| 79 | 황수범 | 3800 | 3785.906 | 2700 |
| 124 | 김민우 | 3600 | 3911.050 | 3800 |
| 145 | 김범수 | 3600 | 2755.221 | 3300 |
| 92 | 이형범 | 3500 | 3704.690 | 2700 |
| 93 | 임현준 | 3500 | 4226.944 | 3200 |
| 110 | 안성무 | 3500 | 3170.363 | 2800 |
| 99 | 박치국 | 3400 | 3825.389 | 2700 |
| 102 | 허건엽 | 3400 | 4263.368 | 3300 |
| 105 | 고봉재 | 3300 | 4147.499 | 4400 |
| 112 | 김시현 | 3300 | 3306.331 | 2700 |
| 120 | 강장산 | 3300 | 3606.075 | 3000 |
| 118 | 배민관 | 3200 | 2722.062 | 3000 |
| 140 | 이종혁 | 3200 | 2901.344 | 2700 |
| 70 | 최지광 | 3100 | 3961.410 | 2700 |
| 73 | 박세진 | 3100 | 4478.896 | 3000 |
| 101 | 박상원 | 3100 | 3847.625 | 2700 |
| 81 | 김대유 | 3000 | 3887.465 | 2700 |
| 84 | 손주영 | 3000 | 4599.176 | 2700 |
| 87 | 김진영 | 3000 | 4582.740 | 2700 |
| 109 | 서균 | 3000 | 3926.074 | 2700 |
| 127 | 배제성 | 3000 | 3079.587 | 2700 |
| 137 | 배재환 | 3000 | 3326.123 | 4000 |
| 142 | 정영일 | 3000 | 3104.391 | 4000 |
| 96 | 김도영 | 2900 | 3887.404 | 2700 |
| 97 | 조근종 | 2900 | 4909.511 | 2700 |
| 108 | 안규현 | 2900 | 2330.828 | 2800 |
| 115 | 김종훈 | 2900 | 1665.021 | 2800 |
| 83 | 장지훈 | 2800 | 5644.555 | 2700 |
| 106 | 이수민 | 2800 | 3896.889 | 3000 |
다 안되노
번외 (WAR을 y로)
model_ <- lm(WAR ~ ., dt)
summary(model_)
Call:
lm(formula = WAR ~ ., data = dt)
Residuals:
Min 1Q Median 3Q Max
-1.25837 -0.18387 -0.00443 0.17470 1.58012
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.632e-01 8.184e-01 1.055 0.2935
승 7.000e-04 2.446e-02 0.029 0.9772
패 3.924e-02 2.449e-02 1.602 0.1115
세 1.444e-03 1.218e-02 0.119 0.9058
홀드 1.016e-02 1.417e-02 0.717 0.4747
블론 -5.186e-02 3.392e-02 -1.529 0.1287
경기 -1.216e-02 6.489e-03 -1.874 0.0631 .
선발 -2.104e-02 2.081e-02 -1.011 0.3139
이닝 7.125e-03 5.161e-03 1.381 0.1698
삼진.9 2.483e-02 1.055e-01 0.235 0.8142
볼넷.9 1.254e-02 1.014e-01 0.124 0.9017
홈런.9 -3.549e-03 6.346e-01 -0.006 0.9955
BABIP 9.050e-01 6.636e-01 1.364 0.1749
LOB. -1.276e-02 5.728e-03 -2.227 0.0277 *
ERA -3.164e-02 2.542e-02 -1.245 0.2154
RA9.WAR 4.259e-01 5.563e-02 7.655 3.63e-12 ***
FIP -2.926e-02 2.011e+00 -0.015 0.9884
kFIP -2.821e-02 1.613e+00 -0.017 0.9861
연봉.2017. -9.617e-06 3.872e-06 -2.484 0.0143 *
연봉.2018. 1.726e-05 3.608e-06 4.783 4.55e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.413 on 132 degrees of freedom
Multiple R-squared: 0.9147, Adjusted R-squared: 0.9024
F-statistic: 74.5 on 19 and 132 DF, p-value: < 2.2e-16
vif(model_)- 승
- 7.90295946434333
- 패
- 5.20359428013343
- 세
- 3.03700161686219
- 홀드
- 3.62204119070082
- 블론
- 2.71884077341594
- 경기
- 13.9861481235637
- 선발
- 36.4583393437973
- 이닝
- 59.6518045536396
- 삼진.9
- 78.7060221382563
- 볼넷.9
- 50.827473231828
- 홈런.9
- 368.731473824191
- BABIP
- 3.08657710689232
- LOB.
- 3.90285091455687
- ERA
- 9.92892504245132
- RA9.WAR
- 9.31215347437081
- FIP
- 12527.1829609619
- kFIP
- 9046.79675535442
- 연봉.2017.
- 8.32457422927786
- 연봉.2018.
- 11.033305518369
- WAR로 돌린 모델의 \(R^2\)값이 연봉으로 돌린 것보다 높게 나왔다.