해당 강의노트는 전북대학교 김광수교수님 2023-2 고급딥러닝 자료임
## https://www.tensorflow.org/tutorials/keras/regression?hl=ko ##
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print (tf.__version__)
dataset_path = keras.utils.get_file("auto-mpg.data" , "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data" )
dataset_path
'/root/.keras/datasets/auto-mpg.data'
column_names = ['MPG' ,'Cylinders' ,'Displacement' ,'Horsepower' ,'Weight' ,
'Acceleration' , 'Model Year' , 'Origin' ]
raw_dataset = pd.read_csv(dataset_path, names= column_names,
na_values = "?" , comment= ' \t ' ,
sep= " " , skipinitialspace= True )
dataset = raw_dataset.copy()
dataset.tail()
393
27.0
4
140.0
86.0
2790.0
15.6
82
1
394
44.0
4
97.0
52.0
2130.0
24.6
82
2
395
32.0
4
135.0
84.0
2295.0
11.6
82
1
396
28.0
4
120.0
79.0
2625.0
18.6
82
1
397
31.0
4
119.0
82.0
2720.0
19.4
82
1
dataset = dataset.dropna()
origin = dataset.pop('Origin' )
#dataset['USA'] = (origin == 1)*1.0
dataset['Europe' ] = (origin == 2 )* 1.0
dataset['Japan' ] = (origin == 3 )* 1.0
dataset.tail()
SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
dataset['USA'] = (origin == 1)*1.0
<ipython-input-5-f05403a9f198>:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
dataset['Europe'] = (origin == 2)*1.0
<ipython-input-5-f05403a9f198>:5: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
dataset['Japan'] = (origin == 3)*1.0
393
27.0
4
140.0
86.0
2790.0
15.6
82
1.0
0.0
0.0
394
44.0
4
97.0
52.0
2130.0
24.6
82
0.0
1.0
0.0
395
32.0
4
135.0
84.0
2295.0
11.6
82
1.0
0.0
0.0
396
28.0
4
120.0
79.0
2625.0
18.6
82
1.0
0.0
0.0
397
31.0
4
119.0
82.0
2720.0
19.4
82
1.0
0.0
0.0
train_dataset = dataset.sample(frac= 0.8 ,random_state= 0 )
test_dataset = dataset.drop(train_dataset.index)
train_stats = train_dataset.describe()
train_stats.pop("MPG" )
train_stats = train_stats.transpose()
train_stats
train_labels = train_dataset.pop('MPG' )
test_labels = test_dataset.pop('MPG' )
def norm(x):
return (x - train_stats['mean' ]) / train_stats['std' ]
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
def build_model():
model = keras.Sequential([
layers.Dense(64 , activation= 'relu' , input_shape= [len (train_dataset.keys())]),
layers.Dense(64 , activation= 'relu' ), # 'linear' instead of 'relu'
#layers.Dense(64, activation='relu'),
#layers.Dense(64, activation='relu'),
layers.Dense(1 ) ])
optimizer = tf.keras.optimizers.RMSprop(0.001 )
model.compile (loss= 'mse' ,
optimizer= optimizer,
metrics= ['mae' , 'mse' ])
return model
model = build_model()
model.summary()
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self , epoch, logs):
if epoch % 100 == 0 : print ('' )
print ('.' , end= '' )
EPOCHS = 500
history = model.fit(
normed_train_data, train_labels,
epochs= EPOCHS, validation_split = 0.2 , verbose= 0 ,
callbacks= [PrintDot()])
Model: "sequential_15"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_53 (Dense) (None, 64) 640
dense_54 (Dense) (None, 64) 4160
dense_55 (Dense) (None, 64) 4160
dense_56 (Dense) (None, 64) 4160
dense_57 (Dense) (None, 1) 65
=================================================================
Total params: 13185 (51.50 KB)
Trainable params: 13185 (51.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
def plot_history(history):
hist = pd.DataFrame(history.history)
hist['epoch' ] = history.epoch
plt.figure(figsize= (8 ,12 ))
plt.subplot(2 ,1 ,1 )
plt.xlabel('Epoch' )
plt.ylabel('Mean Abs Error [MPG]' )
plt.plot(hist['epoch' ], hist['mae' ],
label= 'Train Error' )
plt.plot(hist['epoch' ], hist['val_mae' ],
label = 'Val Error' )
plt.ylim([0 ,5 ])
plt.legend()
plt.subplot(2 ,1 ,2 )
plt.xlabel('Epoch' )
plt.ylabel('Mean Square Error [$MPG^2$]' )
plt.plot(hist['epoch' ], hist['mse' ],
label= 'Train Error' )
plt.plot(hist['epoch' ], hist['val_mse' ],
label = 'Val Error' )
plt.ylim([0 ,20 ])
plt.legend()
plt.show()
plot_history(history)
test_predictions = model.predict(normed_test_data).flatten()
yy = np.array(test_predictions)
xx = np.array(normed_test_data["Weight" ])
print ('MSE' , np.mean(yy- test_labels)** 2 )
idx = np.array(np.argsort(xx), dtype= 'int' )
print (idx)
xx = xx[idx]
yy = yy[idx]
3/3 [==============================] - 0s 4ms/step
MSE 3.2270409256119708
[ 9 65 44 56 38 24 76 57 74 66 43 45 40 37 67 60 10 52 41 21 13 30 68 3
50 75 34 61 11 64 73 25 27 49 72 77 4 62 23 63 59 22 33 31 18 47 36 8
51 46 48 32 29 69 58 70 28 39 71 16 26 0 55 53 15 54 42 12 14 6 35 20
17 5 19 1 2 7]
plt.scatter(xx,yy)
plt.show()