import numpy as np
import pandas as pd
import sklearn.linear_model
import sklearn.tree
07wk-031: 체중감량(교호작용) / 의사결정나무
최규빈
2023-10-17
1. 강의영상
https://youtu.be/playlist?list=PLQqh36zP38-xhSSJ1GlUjFhUgzvVa3aIH&si=JA3pd69Mv9QGXx0z
2. Imports
3. Data
= pd.read_csv('https://raw.githubusercontent.com/guebin/MP2023/main/posts/weightloss.csv')
df_train df_train
Supplement | Exercise | Weight_Loss | |
---|---|---|---|
0 | False | False | -0.877103 |
1 | True | False | 1.604542 |
2 | True | True | 13.824148 |
3 | True | True | 13.004505 |
4 | True | True | 13.701128 |
... | ... | ... | ... |
9995 | True | False | 1.558841 |
9996 | False | False | -0.217816 |
9997 | False | True | 4.072701 |
9998 | True | False | -0.253796 |
9999 | False | False | -1.399092 |
10000 rows × 3 columns
='Supplement',columns='Exercise',values='Weight_Loss') df_train.pivot_table(index
Exercise | False | True |
---|---|---|
Supplement | ||
False | 0.021673 | 4.991314 |
True | 0.497573 | 14.966363 |
-
운동과 체중감량보조제를 병행하면 시너지가 나는 것 같음
4. 분석
-
분석1: 선형회귀 (교호작용 고려 X)
# step 1
= df_train[['Supplement','Exercise']], df_train['Weight_Loss']
X,y # step 2
= sklearn.linear_model.LinearRegression()
predictr # step 3
predictr.fit(X,y)# step 4
'Weight_Loss_hat'] = predictr.predict(X) df_train[
='Supplement',columns='Exercise',values='Weight_Loss') df_train.pivot_table(index
Exercise | False | True |
---|---|---|
Supplement | ||
False | 0.021673 | 4.991314 |
True | 0.497573 | 14.966363 |
='Supplement',columns='Exercise',values='Weight_Loss_hat') df_train.pivot_table(index
Exercise | False | True |
---|---|---|
Supplement | ||
False | -2.373106 | 7.374557 |
True | 2.845934 | 12.593598 |
-
분석2: 의사결정나무
# step 1
= df_train[['Supplement','Exercise']], df_train['Weight_Loss']
X,y # step 2
= sklearn.tree.DecisionTreeRegressor()
predictr # step 3
predictr.fit(X,y)# step 4
'Weight_Loss_hat'] = predictr.predict(X) df_train[
df_train
Supplement | Exercise | Weight_Loss | Weight_Loss_hat | |
---|---|---|---|---|
0 | False | False | -0.877103 | 0.021673 |
1 | True | False | 1.604542 | 0.497573 |
2 | True | True | 13.824148 | 14.966363 |
3 | True | True | 13.004505 | 14.966363 |
4 | True | True | 13.701128 | 14.966363 |
... | ... | ... | ... | ... |
9995 | True | False | 1.558841 | 0.497573 |
9996 | False | False | -0.217816 | 0.021673 |
9997 | False | True | 4.072701 | 4.991314 |
9998 | True | False | -0.253796 | 0.497573 |
9999 | False | False | -1.399092 | 0.021673 |
10000 rows × 4 columns
='Supplement',columns='Exercise',values='Weight_Loss') df_train.pivot_table(index
Exercise | False | True |
---|---|---|
Supplement | ||
False | 0.021673 | 4.991314 |
True | 0.497573 | 14.966363 |
='Supplement',columns='Exercise',values='Weight_Loss_hat') df_train.pivot_table(index
Exercise | False | True |
---|---|---|
Supplement | ||
False | 0.021673 | 4.991314 |
True | 0.497573 | 14.966363 |