[STBDA] 03wk: tensorflow_Variable, 미분

Author

김보람

Published

June 19, 2023

해당 강의노트는 전북대학교 최규빈교수님 STBDA2022 자료임

imports

import tensorflow as tf
import numpy as np
2023-06-16 17:03:31.644114: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
tf.config.experimental.list_physical_devices('GPU')
[]

지난강의 보충

- max, min, sum, mean

a= tf.constant([1.0,2.0,3.0,4.0])
a
<tf.Tensor: shape=(4,), dtype=float32, numpy=array([1., 2., 3., 4.], dtype=float32)>
tf.reduce_mean(a)
<tf.Tensor: shape=(), dtype=float32, numpy=2.5>

concat, stack

- 예제: (2,3,4,5) stack (2,3,4,5) -> (?,?,?,?,?)

a = tf.reshape(tf.constant(range(2*3*4*5)),(2,3,4,5))
b = -a

case1 (1,2,3,4,5) stack (1,2,3,4,5) –> (2,2,3,4,5) # axis=0

tf.stack([a,b],axis=0)
<tf.Tensor: shape=(2, 2, 3, 4, 5), dtype=int32, numpy=
array([[[[[   0,    1,    2,    3,    4],
          [   5,    6,    7,    8,    9],
          [  10,   11,   12,   13,   14],
          [  15,   16,   17,   18,   19]],

         [[  20,   21,   22,   23,   24],
          [  25,   26,   27,   28,   29],
          [  30,   31,   32,   33,   34],
          [  35,   36,   37,   38,   39]],

         [[  40,   41,   42,   43,   44],
          [  45,   46,   47,   48,   49],
          [  50,   51,   52,   53,   54],
          [  55,   56,   57,   58,   59]]],


        [[[  60,   61,   62,   63,   64],
          [  65,   66,   67,   68,   69],
          [  70,   71,   72,   73,   74],
          [  75,   76,   77,   78,   79]],

         [[  80,   81,   82,   83,   84],
          [  85,   86,   87,   88,   89],
          [  90,   91,   92,   93,   94],
          [  95,   96,   97,   98,   99]],

         [[ 100,  101,  102,  103,  104],
          [ 105,  106,  107,  108,  109],
          [ 110,  111,  112,  113,  114],
          [ 115,  116,  117,  118,  119]]]],



       [[[[   0,   -1,   -2,   -3,   -4],
          [  -5,   -6,   -7,   -8,   -9],
          [ -10,  -11,  -12,  -13,  -14],
          [ -15,  -16,  -17,  -18,  -19]],

         [[ -20,  -21,  -22,  -23,  -24],
          [ -25,  -26,  -27,  -28,  -29],
          [ -30,  -31,  -32,  -33,  -34],
          [ -35,  -36,  -37,  -38,  -39]],

         [[ -40,  -41,  -42,  -43,  -44],
          [ -45,  -46,  -47,  -48,  -49],
          [ -50,  -51,  -52,  -53,  -54],
          [ -55,  -56,  -57,  -58,  -59]]],


        [[[ -60,  -61,  -62,  -63,  -64],
          [ -65,  -66,  -67,  -68,  -69],
          [ -70,  -71,  -72,  -73,  -74],
          [ -75,  -76,  -77,  -78,  -79]],

         [[ -80,  -81,  -82,  -83,  -84],
          [ -85,  -86,  -87,  -88,  -89],
          [ -90,  -91,  -92,  -93,  -94],
          [ -95,  -96,  -97,  -98,  -99]],

         [[-100, -101, -102, -103, -104],
          [-105, -106, -107, -108, -109],
          [-110, -111, -112, -113, -114],
          [-115, -116, -117, -118, -119]]]]], dtype=int32)>

case2 (2,1,3,4,5) stack (2,1,3,4,5) –> (2,2,3,4,5) # axis=1

tf.stack([a,b],axis=1)
<tf.Tensor: shape=(2, 2, 3, 4, 5), dtype=int32, numpy=
array([[[[[   0,    1,    2,    3,    4],
          [   5,    6,    7,    8,    9],
          [  10,   11,   12,   13,   14],
          [  15,   16,   17,   18,   19]],

         [[  20,   21,   22,   23,   24],
          [  25,   26,   27,   28,   29],
          [  30,   31,   32,   33,   34],
          [  35,   36,   37,   38,   39]],

         [[  40,   41,   42,   43,   44],
          [  45,   46,   47,   48,   49],
          [  50,   51,   52,   53,   54],
          [  55,   56,   57,   58,   59]]],


        [[[   0,   -1,   -2,   -3,   -4],
          [  -5,   -6,   -7,   -8,   -9],
          [ -10,  -11,  -12,  -13,  -14],
          [ -15,  -16,  -17,  -18,  -19]],

         [[ -20,  -21,  -22,  -23,  -24],
          [ -25,  -26,  -27,  -28,  -29],
          [ -30,  -31,  -32,  -33,  -34],
          [ -35,  -36,  -37,  -38,  -39]],

         [[ -40,  -41,  -42,  -43,  -44],
          [ -45,  -46,  -47,  -48,  -49],
          [ -50,  -51,  -52,  -53,  -54],
          [ -55,  -56,  -57,  -58,  -59]]]],



       [[[[  60,   61,   62,   63,   64],
          [  65,   66,   67,   68,   69],
          [  70,   71,   72,   73,   74],
          [  75,   76,   77,   78,   79]],

         [[  80,   81,   82,   83,   84],
          [  85,   86,   87,   88,   89],
          [  90,   91,   92,   93,   94],
          [  95,   96,   97,   98,   99]],

         [[ 100,  101,  102,  103,  104],
          [ 105,  106,  107,  108,  109],
          [ 110,  111,  112,  113,  114],
          [ 115,  116,  117,  118,  119]]],


        [[[ -60,  -61,  -62,  -63,  -64],
          [ -65,  -66,  -67,  -68,  -69],
          [ -70,  -71,  -72,  -73,  -74],
          [ -75,  -76,  -77,  -78,  -79]],

         [[ -80,  -81,  -82,  -83,  -84],
          [ -85,  -86,  -87,  -88,  -89],
          [ -90,  -91,  -92,  -93,  -94],
          [ -95,  -96,  -97,  -98,  -99]],

         [[-100, -101, -102, -103, -104],
          [-105, -106, -107, -108, -109],
          [-110, -111, -112, -113, -114],
          [-115, -116, -117, -118, -119]]]]], dtype=int32)>

case3 (2,3,1,4,5) stack (2,3,1,4,5) –> (2,3,2,4,5) # axis=2

tf.stack([a,b],axis=2)
<tf.Tensor: shape=(2, 3, 2, 4, 5), dtype=int32, numpy=
array([[[[[   0,    1,    2,    3,    4],
          [   5,    6,    7,    8,    9],
          [  10,   11,   12,   13,   14],
          [  15,   16,   17,   18,   19]],

         [[   0,   -1,   -2,   -3,   -4],
          [  -5,   -6,   -7,   -8,   -9],
          [ -10,  -11,  -12,  -13,  -14],
          [ -15,  -16,  -17,  -18,  -19]]],


        [[[  20,   21,   22,   23,   24],
          [  25,   26,   27,   28,   29],
          [  30,   31,   32,   33,   34],
          [  35,   36,   37,   38,   39]],

         [[ -20,  -21,  -22,  -23,  -24],
          [ -25,  -26,  -27,  -28,  -29],
          [ -30,  -31,  -32,  -33,  -34],
          [ -35,  -36,  -37,  -38,  -39]]],


        [[[  40,   41,   42,   43,   44],
          [  45,   46,   47,   48,   49],
          [  50,   51,   52,   53,   54],
          [  55,   56,   57,   58,   59]],

         [[ -40,  -41,  -42,  -43,  -44],
          [ -45,  -46,  -47,  -48,  -49],
          [ -50,  -51,  -52,  -53,  -54],
          [ -55,  -56,  -57,  -58,  -59]]]],



       [[[[  60,   61,   62,   63,   64],
          [  65,   66,   67,   68,   69],
          [  70,   71,   72,   73,   74],
          [  75,   76,   77,   78,   79]],

         [[ -60,  -61,  -62,  -63,  -64],
          [ -65,  -66,  -67,  -68,  -69],
          [ -70,  -71,  -72,  -73,  -74],
          [ -75,  -76,  -77,  -78,  -79]]],


        [[[  80,   81,   82,   83,   84],
          [  85,   86,   87,   88,   89],
          [  90,   91,   92,   93,   94],
          [  95,   96,   97,   98,   99]],

         [[ -80,  -81,  -82,  -83,  -84],
          [ -85,  -86,  -87,  -88,  -89],
          [ -90,  -91,  -92,  -93,  -94],
          [ -95,  -96,  -97,  -98,  -99]]],


        [[[ 100,  101,  102,  103,  104],
          [ 105,  106,  107,  108,  109],
          [ 110,  111,  112,  113,  114],
          [ 115,  116,  117,  118,  119]],

         [[-100, -101, -102, -103, -104],
          [-105, -106, -107, -108, -109],
          [-110, -111, -112, -113, -114],
          [-115, -116, -117, -118, -119]]]]], dtype=int32)>

case4 (2,3,4,1,5) stack (2,3,4,1,5) –> (2,3,4,2,5) # axis=3

tf.stack([a,b],axis=-2)
<tf.Tensor: shape=(2, 3, 4, 2, 5), dtype=int32, numpy=
array([[[[[   0,    1,    2,    3,    4],
          [   0,   -1,   -2,   -3,   -4]],

         [[   5,    6,    7,    8,    9],
          [  -5,   -6,   -7,   -8,   -9]],

         [[  10,   11,   12,   13,   14],
          [ -10,  -11,  -12,  -13,  -14]],

         [[  15,   16,   17,   18,   19],
          [ -15,  -16,  -17,  -18,  -19]]],


        [[[  20,   21,   22,   23,   24],
          [ -20,  -21,  -22,  -23,  -24]],

         [[  25,   26,   27,   28,   29],
          [ -25,  -26,  -27,  -28,  -29]],

         [[  30,   31,   32,   33,   34],
          [ -30,  -31,  -32,  -33,  -34]],

         [[  35,   36,   37,   38,   39],
          [ -35,  -36,  -37,  -38,  -39]]],


        [[[  40,   41,   42,   43,   44],
          [ -40,  -41,  -42,  -43,  -44]],

         [[  45,   46,   47,   48,   49],
          [ -45,  -46,  -47,  -48,  -49]],

         [[  50,   51,   52,   53,   54],
          [ -50,  -51,  -52,  -53,  -54]],

         [[  55,   56,   57,   58,   59],
          [ -55,  -56,  -57,  -58,  -59]]]],



       [[[[  60,   61,   62,   63,   64],
          [ -60,  -61,  -62,  -63,  -64]],

         [[  65,   66,   67,   68,   69],
          [ -65,  -66,  -67,  -68,  -69]],

         [[  70,   71,   72,   73,   74],
          [ -70,  -71,  -72,  -73,  -74]],

         [[  75,   76,   77,   78,   79],
          [ -75,  -76,  -77,  -78,  -79]]],


        [[[  80,   81,   82,   83,   84],
          [ -80,  -81,  -82,  -83,  -84]],

         [[  85,   86,   87,   88,   89],
          [ -85,  -86,  -87,  -88,  -89]],

         [[  90,   91,   92,   93,   94],
          [ -90,  -91,  -92,  -93,  -94]],

         [[  95,   96,   97,   98,   99],
          [ -95,  -96,  -97,  -98,  -99]]],


        [[[ 100,  101,  102,  103,  104],
          [-100, -101, -102, -103, -104]],

         [[ 105,  106,  107,  108,  109],
          [-105, -106, -107, -108, -109]],

         [[ 110,  111,  112,  113,  114],
          [-110, -111, -112, -113, -114]],

         [[ 115,  116,  117,  118,  119],
          [-115, -116, -117, -118, -119]]]]], dtype=int32)>

case5 (2,3,4,5,1) stack (2,3,4,5,1) –> (2,3,4,5,2) # axis=4

tf.stack([a,b],axis=-1)
<tf.Tensor: shape=(2, 3, 4, 5, 2), dtype=int32, numpy=
array([[[[[   0,    0],
          [   1,   -1],
          [   2,   -2],
          [   3,   -3],
          [   4,   -4]],

         [[   5,   -5],
          [   6,   -6],
          [   7,   -7],
          [   8,   -8],
          [   9,   -9]],

         [[  10,  -10],
          [  11,  -11],
          [  12,  -12],
          [  13,  -13],
          [  14,  -14]],

         [[  15,  -15],
          [  16,  -16],
          [  17,  -17],
          [  18,  -18],
          [  19,  -19]]],


        [[[  20,  -20],
          [  21,  -21],
          [  22,  -22],
          [  23,  -23],
          [  24,  -24]],

         [[  25,  -25],
          [  26,  -26],
          [  27,  -27],
          [  28,  -28],
          [  29,  -29]],

         [[  30,  -30],
          [  31,  -31],
          [  32,  -32],
          [  33,  -33],
          [  34,  -34]],

         [[  35,  -35],
          [  36,  -36],
          [  37,  -37],
          [  38,  -38],
          [  39,  -39]]],


        [[[  40,  -40],
          [  41,  -41],
          [  42,  -42],
          [  43,  -43],
          [  44,  -44]],

         [[  45,  -45],
          [  46,  -46],
          [  47,  -47],
          [  48,  -48],
          [  49,  -49]],

         [[  50,  -50],
          [  51,  -51],
          [  52,  -52],
          [  53,  -53],
          [  54,  -54]],

         [[  55,  -55],
          [  56,  -56],
          [  57,  -57],
          [  58,  -58],
          [  59,  -59]]]],



       [[[[  60,  -60],
          [  61,  -61],
          [  62,  -62],
          [  63,  -63],
          [  64,  -64]],

         [[  65,  -65],
          [  66,  -66],
          [  67,  -67],
          [  68,  -68],
          [  69,  -69]],

         [[  70,  -70],
          [  71,  -71],
          [  72,  -72],
          [  73,  -73],
          [  74,  -74]],

         [[  75,  -75],
          [  76,  -76],
          [  77,  -77],
          [  78,  -78],
          [  79,  -79]]],


        [[[  80,  -80],
          [  81,  -81],
          [  82,  -82],
          [  83,  -83],
          [  84,  -84]],

         [[  85,  -85],
          [  86,  -86],
          [  87,  -87],
          [  88,  -88],
          [  89,  -89]],

         [[  90,  -90],
          [  91,  -91],
          [  92,  -92],
          [  93,  -93],
          [  94,  -94]],

         [[  95,  -95],
          [  96,  -96],
          [  97,  -97],
          [  98,  -98],
          [  99,  -99]]],


        [[[ 100, -100],
          [ 101, -101],
          [ 102, -102],
          [ 103, -103],
          [ 104, -104]],

         [[ 105, -105],
          [ 106, -106],
          [ 107, -107],
          [ 108, -108],
          [ 109, -109]],

         [[ 110, -110],
          [ 111, -111],
          [ 112, -112],
          [ 113, -113],
          [ 114, -114]],

         [[ 115, -115],
          [ 116, -116],
          [ 117, -117],
          [ 118, -118],
          [ 119, -119]]]]], dtype=int32)>

- 예제: (2,3,4), (2,3,4), (2,3,4)

a= tf.reshape(tf.constant(range(2*3*4)),(2,3,4))
b= -a
c= 2*a

(예시1) (2,3,4), (2,3,4), (2,3,4) \(\to\) (6,3,4)

tf.concat([a,b,c],axis=0)
<tf.Tensor: shape=(6, 3, 4), dtype=int32, numpy=
array([[[  0,   1,   2,   3],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]],

       [[ 12,  13,  14,  15],
        [ 16,  17,  18,  19],
        [ 20,  21,  22,  23]],

       [[  0,  -1,  -2,  -3],
        [ -4,  -5,  -6,  -7],
        [ -8,  -9, -10, -11]],

       [[-12, -13, -14, -15],
        [-16, -17, -18, -19],
        [-20, -21, -22, -23]],

       [[  0,   2,   4,   6],
        [  8,  10,  12,  14],
        [ 16,  18,  20,  22]],

       [[ 24,  26,  28,  30],
        [ 32,  34,  36,  38],
        [ 40,  42,  44,  46]]], dtype=int32)>

(예시2) (2,3,4), (2,3,4), (2,3,4) \(\to\) (2,9,4)

tf.concat([a,b,c],axis=1)
<tf.Tensor: shape=(2, 9, 4), dtype=int32, numpy=
array([[[  0,   1,   2,   3],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11],
        [  0,  -1,  -2,  -3],
        [ -4,  -5,  -6,  -7],
        [ -8,  -9, -10, -11],
        [  0,   2,   4,   6],
        [  8,  10,  12,  14],
        [ 16,  18,  20,  22]],

       [[ 12,  13,  14,  15],
        [ 16,  17,  18,  19],
        [ 20,  21,  22,  23],
        [-12, -13, -14, -15],
        [-16, -17, -18, -19],
        [-20, -21, -22, -23],
        [ 24,  26,  28,  30],
        [ 32,  34,  36,  38],
        [ 40,  42,  44,  46]]], dtype=int32)>

(예시3) (2,3,4), (2,3,4), (2,3,4) \(\to\) (2,3,12)

tf.concat([a,b,c],axis=-1)
<tf.Tensor: shape=(2, 3, 12), dtype=int32, numpy=
array([[[  0,   1,   2,   3,   0,  -1,  -2,  -3,   0,   2,   4,   6],
        [  4,   5,   6,   7,  -4,  -5,  -6,  -7,   8,  10,  12,  14],
        [  8,   9,  10,  11,  -8,  -9, -10, -11,  16,  18,  20,  22]],

       [[ 12,  13,  14,  15, -12, -13, -14, -15,  24,  26,  28,  30],
        [ 16,  17,  18,  19, -16, -17, -18, -19,  32,  34,  36,  38],
        [ 20,  21,  22,  23, -20, -21, -22, -23,  40,  42,  44,  46]]],
      dtype=int32)>

(예시4) (2,3,4), (2,3,4), (2,3,4) \(\to\) (3,2,3,4)

tf.stack([a,b,c],axis=0)
<tf.Tensor: shape=(3, 2, 3, 4), dtype=int32, numpy=
array([[[[  0,   1,   2,   3],
         [  4,   5,   6,   7],
         [  8,   9,  10,  11]],

        [[ 12,  13,  14,  15],
         [ 16,  17,  18,  19],
         [ 20,  21,  22,  23]]],


       [[[  0,  -1,  -2,  -3],
         [ -4,  -5,  -6,  -7],
         [ -8,  -9, -10, -11]],

        [[-12, -13, -14, -15],
         [-16, -17, -18, -19],
         [-20, -21, -22, -23]]],


       [[[  0,   2,   4,   6],
         [  8,  10,  12,  14],
         [ 16,  18,  20,  22]],

        [[ 24,  26,  28,  30],
         [ 32,  34,  36,  38],
         [ 40,  42,  44,  46]]]], dtype=int32)>

(예시5) (2,3,4), (2,3,4), (2,3,4) \(\to\) (2,3,3,4)

tf.stack([a,b,c],axis=1)
<tf.Tensor: shape=(2, 3, 3, 4), dtype=int32, numpy=
array([[[[  0,   1,   2,   3],
         [  4,   5,   6,   7],
         [  8,   9,  10,  11]],

        [[  0,  -1,  -2,  -3],
         [ -4,  -5,  -6,  -7],
         [ -8,  -9, -10, -11]],

        [[  0,   2,   4,   6],
         [  8,  10,  12,  14],
         [ 16,  18,  20,  22]]],


       [[[ 12,  13,  14,  15],
         [ 16,  17,  18,  19],
         [ 20,  21,  22,  23]],

        [[-12, -13, -14, -15],
         [-16, -17, -18, -19],
         [-20, -21, -22, -23]],

        [[ 24,  26,  28,  30],
         [ 32,  34,  36,  38],
         [ 40,  42,  44,  46]]]], dtype=int32)>

(예시6) (2,3,4), (2,3,4), (2,3,4) \(\to\) (2,3,3,4)

tf.stack([a,b,c],axis=2)
<tf.Tensor: shape=(2, 3, 3, 4), dtype=int32, numpy=
array([[[[  0,   1,   2,   3],
         [  0,  -1,  -2,  -3],
         [  0,   2,   4,   6]],

        [[  4,   5,   6,   7],
         [ -4,  -5,  -6,  -7],
         [  8,  10,  12,  14]],

        [[  8,   9,  10,  11],
         [ -8,  -9, -10, -11],
         [ 16,  18,  20,  22]]],


       [[[ 12,  13,  14,  15],
         [-12, -13, -14, -15],
         [ 24,  26,  28,  30]],

        [[ 16,  17,  18,  19],
         [-16, -17, -18, -19],
         [ 32,  34,  36,  38]],

        [[ 20,  21,  22,  23],
         [-20, -21, -22, -23],
         [ 40,  42,  44,  46]]]], dtype=int32)>

(예시7) (2,3,4), (2,3,4), (2,3,4) \(\to\) (2,3,4,3)

tf.stack([a,b,c],axis=-1)
<tf.Tensor: shape=(2, 3, 4, 3), dtype=int32, numpy=
array([[[[  0,   0,   0],
         [  1,  -1,   2],
         [  2,  -2,   4],
         [  3,  -3,   6]],

        [[  4,  -4,   8],
         [  5,  -5,  10],
         [  6,  -6,  12],
         [  7,  -7,  14]],

        [[  8,  -8,  16],
         [  9,  -9,  18],
         [ 10, -10,  20],
         [ 11, -11,  22]]],


       [[[ 12, -12,  24],
         [ 13, -13,  26],
         [ 14, -14,  28],
         [ 15, -15,  30]],

        [[ 16, -16,  32],
         [ 17, -17,  34],
         [ 18, -18,  36],
         [ 19, -19,  38]],

        [[ 20, -20,  40],
         [ 21, -21,  42],
         [ 22, -22,  44],
         [ 23, -23,  46]]]], dtype=int32)>

- 예제: (2,3,4) (4,3,4) \(\to\) (6,3,4)

a=tf.reshape(tf.constant(range(2*3*4)),(2,3,4))
b=tf.reshape(-tf.constant(range(4*3*4)),(4,3,4))
tf.concat([a,b],axis=0)
<tf.Tensor: shape=(6, 3, 4), dtype=int32, numpy=
array([[[  0,   1,   2,   3],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]],

       [[ 12,  13,  14,  15],
        [ 16,  17,  18,  19],
        [ 20,  21,  22,  23]],

       [[  0,  -1,  -2,  -3],
        [ -4,  -5,  -6,  -7],
        [ -8,  -9, -10, -11]],

       [[-12, -13, -14, -15],
        [-16, -17, -18, -19],
        [-20, -21, -22, -23]],

       [[-24, -25, -26, -27],
        [-28, -29, -30, -31],
        [-32, -33, -34, -35]],

       [[-36, -37, -38, -39],
        [-40, -41, -42, -43],
        [-44, -45, -46, -47]]], dtype=int32)>
tf.concat([a,b],axis=1)
InvalidArgumentError: {{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Dimension 0 in both shapes must be equal: shape[0] = [2,3,4] vs. shape[1] = [4,3,4] [Op:ConcatV2] name: concat
  • dimension이 달라서 안된다.
tf.concat([a,b],axis=2)
InvalidArgumentError: {{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Dimension 0 in both shapes must be equal: shape[0] = [2,3,4] vs. shape[1] = [4,3,4] [Op:ConcatV2] name: concat

- (2,2) @ (2,) 의 연산?

numpy

np.array([77,-88])
array([ 77, -88])
  • 길이가 2인 벡터
np.array([[1,0],[0,1]]) @ np.array([77,-88])
array([ 77, -88])
  • 오 하지만 계산이 된다!?
np.array([77,-88]) @ np.array([[1,0],[0,1]])
array([ 77, -88])
np.array([[1,0],[0,1]]) @ np.array([77,-88]).reshape(2,1)
array([[ 77],
       [-88]])
np.array([77,-88]).reshape(2,1) @ np.array([[1,0],[0,1]])
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)
  • (2x1) @ (2x2) 가 되서 dimension이 달라서 안됨
np.array([77,-88]).reshape(1,2) @ np.array([[1,0],[0,1]])
array([[ 77, -88]])

tensorflow

I = tf.constant([[1.0,0.0],[0.0,1.0]])
x = tf.constant([77.0,-88.0])
I @ x
InvalidArgumentError: {{function_node __wrapped__MatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} In[0] and In[1] has different ndims: [2,2] vs. [2] [Op:MatMul]
  • (2x2) 랑 길이가2인 벡터의 행렬곲 안됨.
x @ I
InvalidArgumentError: {{function_node __wrapped__MatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} In[0] and In[1] has different ndims: [2] vs. [2,2] [Op:MatMul]
I @ tf.reshape(x,(2,1))
<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[ 77.],
       [-88.]], dtype=float32)>
tf.reshape(x,(1,2)) @ I
<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 77., -88.]], dtype=float32)>

tf.Variable

선언

- tf.Variable()로 선언

tf.Variable([1,2,3,4])
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>
tf.Variable([1.0,2.0,3.0,4.0])
<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([1., 2., 3., 4.], dtype=float32)>

- tf.constant() 선언후 변환

tf.Variable(tf.constant([1,2,3,4]))
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>

- np 등으로 선언후 변환

tf.Variable(np.array([1,2,3,4]))
<tf.Variable 'Variable:0' shape=(4,) dtype=int64, numpy=array([1, 2, 3, 4])>

타입

type(tf.Variable([1,2,3,4]))
tensorflow.python.ops.resource_variable_ops.ResourceVariable

인덱싱

a=tf.Variable([1,2,3,4])
a
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>
a[:2]
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>

연산가능

a=tf.Variable([1,2,3,4])
b=tf.Variable([-1,-2,-3,-4])
type(a)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
type(b)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
a+b
<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 0, 0], dtype=int32)>
type(a+b)
tensorflow.python.framework.ops.EagerTensor
b = -a
type(b)
tensorflow.python.framework.ops.EagerTensor

tf.Variable도 쓰기 불편함

tf.Variable([1,2])+tf.Variable([3.14,3.14])
InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a int32 tensor but is a float tensor [Op:AddV2]

tnp의 은총도 일부만 가능

import tensorflow.experimental.numpy as tnp
tnp.experimental_enable_numpy_behavior()

- 알아서 형 변환

tf.Variable([1,2])+tf.Variable([3.14,3.14])
<tf.Tensor: shape=(2,), dtype=float64, numpy=array([4.1400001, 5.1400001])>

- .reshape 메소드

tf.Variable([1,2,3,4]).reshape(2,2)
AttributeError: 'ResourceVariable' object has no attribute 'reshape'

대부분의 동작은 tf.constant랑 큰 차이를 모르겠음

- tf.concat

a= tf.Variable([[1,2],[3,4]])
b= tf.Variable([[-1,-2],[-3,-4]])
tf.concat([a,b],axis=0)
<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[ 1,  2],
       [ 3,  4],
       [-1, -2],
       [-3, -4]], dtype=int32)>

- tf.stack

a= tf.Variable([[1,2],[3,4]])
b= tf.Variable([[-1,-2],[-3,-4]])
tf.stack([a,b],axis=0)
<tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
array([[[ 1,  2],
        [ 3,  4]],

       [[-1, -2],
        [-3, -4]]], dtype=int32)>

변수값변경가능(?)

a= tf.Variable([1,2,3,4])
id(a)
139819184398048
a
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>
a.assign_add([-1,-2,-3,-4])
id(a)
139819184398048
a
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([0, 0, 0, 0], dtype=int32)>

요약

- tf.Variable()로 만들어야 하는 뚜렷한 차이는 모르겠음.

- 애써 tf.Variable()로 만들어도 간단한연산을 하면 그 결과는 tf.constant()로 만든 오브젝트와 동일해짐.

미분

모티브

- 예제: 컴퓨터를 이용하여 \(x=2\)에서 \(y=3x^2\)의 접선의 기울기를 구해보자.

(손풀이)

\[\frac{dy}{dx}=6x\]

이므로 \(x=2\)를 대입하면 12이다.

(컴퓨터를 이용한 풀이)

단계1

x1=2
y1= 3*x1**2
x2=2+0.000000001
y2= 3*x2**2
(y2-y1)/(x2-x1)
12.0

단계2

def f(x):
    return(3*x**2)
f(3)
27
def d(f,x):
    return (f(x+0.000000001)-f(x))/0.000000001
d(f,2)
12.000000992884452

단계3

d(lambda x: 3*x**2 ,2)
12.000000992884452
d(lambda x: x**2 ,0)
1e-09

단계4

\[f(x,y)= x^2 +3y\]

def f(x,y):
    return(x**2 +3*y)
d(f,(2,3))
TypeError: can only concatenate tuple (not "float") to tuple

y에대한 미분 정의를 안해줬으. 오류.

tf.GradientTape() 사용방법

- 예제1: \(x=2\)에서 \(y=3x^2\)의 도함수값을 구하라.

x=tf.Variable(2.0) #
a=tf.constant(3.0)
  • 미분할 것을 Varialbe로
mytape=tf.GradientTape()
mytape
<tensorflow.python.eager.backprop.GradientTape at 0x7f2a30cc9820>
  • 실행 결과가 0x7f2a30cc9820 이 오브젝트주소 안에
dir(mytape)
['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_ensure_recording',
 '_persistent',
 '_pop_tape',
 '_push_tape',
 '_recording',
 '_tape',
 '_tf_api_names',
 '_tf_api_names_v1',
 '_watch_accessed_variables',
 '_watched_variables',
 'batch_jacobian',
 'gradient',
 'jacobian',
 'reset',
 'stop_recording',
 'watch',
 'watched_variables']
mytape.__enter__() # 기록 시작
y=a*x**2 # y=ax^2 = 3x^2
mytape.__exit__(None,None,None) # 기록 끝
mytape.gradient(y,x) # y를 x로 미분하라.
<tf.Tensor: shape=(), dtype=float32, numpy=12.0>

- 예제2: 조금 다른예제

x=tf.Variable(2.0)
#a=tf.constant(3.0)

mytape=tf.GradientTape()
mytape.__enter__() # 기록 시작
a=(x/2)*3 ## a=(3/2)x
y=a*x**2  ## y=ax^2 = (3/2)x^3
mytape.__exit__(None,None,None) # 기록 끝

mytape.gradient(y,x) # y를 x로 미분하라.
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>
  • 왜 12가 안나오고 18이 나올까? 아래 식을 살펴보자.

\[a=\frac{3}{2}x\] \[y=ax^2=\frac{3}{2}x^3\]

\[\frac{dy}{dx}=\frac{3}{2} 3x^2\]

3/2*3*4
18.0

- 테이프의 개념 (\(\star\))

(상황)

우리가 어려운 미분계산을 컴퓨터에게 부탁하는 상황임. (예를들면 \(y=3x^2\)) 컴퓨터에게 부탁을 하기 위해서는 연습장(=테이프)에 \(y=3x^2\)이라는 수식을 써서 보여줘야하는데 이때 컴퓨터에게 target이 무엇인지 그리고 무엇으로 미분하고 싶은 것인지를 명시해야함.

  1. mytape = tf.GradientTape(): tf.GradientTape()는 연습장을 만드는 명령어, 만들어진 연습장을 mytape라고 이름을 붙인다.

  2. mytape.__enter__(): 만들어진 공책을 연다 (=기록할수 있는 상태로 만든다)

  3. a=x/2*3; y=a*x**2: 컴퓨터에게 전달할 수식을 쓴다

  4. mytape.__exit__(None,None,None): 공책을 닫는다.

  5. mytape.gradient(y,x): \(y\)\(x\)로 미분하라는 메모를 남기고 컴퓨터에게 전달한다.

- 예제3: 연습장을 언제 열고 닫을지 결정하는건 중요하다.

x=tf.Variable(2.0)
a=(x/2)*3 ## a=(3/2)x

mytape=tf.GradientTape()
mytape.__enter__() # 기록 시작
y=a*x**2  ## y=ax^2 = (3/2)x^3
mytape.__exit__(None,None,None) # 기록 끝

mytape.gradient(y,x) # y를 x로 미분하라.
<tf.Tensor: shape=(), dtype=float32, numpy=12.0>

- 예제4: with문과 함께 쓰는 tf.GradientTape()

x=tf.Variable(2.0)
a=(x/2)*3
with tf.GradientTape() as mytape:
    ## with문 시작
    y=a*x**2
    ## with문 끝
mytape.gradient(y,x) # y를 x로 미분하라.
<tf.Tensor: shape=(), dtype=float32, numpy=12.0>

(문법해설)

아래와 같이 쓴다.

with expression as myname:
    ## with문 시작: myname.__enter__()
    blabla ~
    yadiyadi !!
    ## with문 끝: myname.__exit__()
  1. expression 의 실행결과 오브젝트가 생성, 생성된 오브젝트는 myname라고 이름붙임. 이 오브젝트는 .__enter__().__exit__()를 숨겨진 기능으로 포함해야 한다.

  2. with문이 시작되면서 myname.__enter__()이 실행된다.

  3. 블라블라와 야디야디가 실행된다.

  4. with문이 종료되면서 myname.__exit__()이 실행된다.

- 예제5: 예제2를 with문과 함께 구현

x=tf.Variable(2.0)

with tf.GradientTape() as mytape:
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3

mytape.gradient(y,x) # y를 x로 미분하라.
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

- 예제6: persistent = True

(관찰1)

x=tf.Variable(2.0)

with tf.GradientTape() as mytape:
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
mytape.gradient(y,x) # 2번이상 실행해서 에러를 관측하라
RuntimeError: A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)

(관찰2)

x=tf.Variable(2.0)

with tf.GradientTape(persistent=True) as mytape:
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
mytape.gradient(y,x) # 2번이상실행해도 에러가 나지않음
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

- 예제7: watch

(관찰1)

x를 Variable이 아니라 constant

x=tf.constant(2.0)

with tf.GradientTape(persistent=True) as mytape:
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
print(mytape.gradient(y,x))
None

(관찰2)

x=tf.constant(2.0)
with tf.GradientTape(persistent=True) as mytape:
    mytape.watch(x) # 수동감시
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
print(mytape.gradient(y,x))
tf.Tensor(18.0, shape=(), dtype=float32)

(관찰3)

x=tf.Variable(2.0)
with tf.GradientTape(persistent=True,watch_accessed_variables=False) as mytape: # 자동감시 모드 해제
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
print(mytape.gradient(y,x))
None

(관찰4)

x=tf.Variable(2.0)
with tf.GradientTape(persistent=True,watch_accessed_variables=False) as mytape: # 자동감시 모드 해제
    mytape.watch(x)
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
print(mytape.gradient(y,x))
tf.Tensor(18.0, shape=(), dtype=float32)

(관찰5)

x=tf.Variable(2.0)
with tf.GradientTape(persistent=True) as mytape:
    mytape.watch(x)
    a=(x/2)*3 ## a=(3/2)x
    y=a*x**2  ## y=ax^2 = (3/2)x^3
print(mytape.gradient(y,x))
tf.Tensor(18.0, shape=(), dtype=float32)
  • 자동감시모드를 해제한다고 말한적이 없고, 감시를 하라고 하면 걍..

- 예제9: 카페예제로 돌아오자.

- 예제10: 카페예제의 매트릭스 버전

- 예제11: 위의 예제에서 이론적인 \(\boldsymbol{\beta}\)의 최적값을 찾아보고 (즉 \(\hat{\boldsymbol{\beta}}\)을 찾고) 그곳에서 loss의 미분을 구하라. 구한결과가 \(\begin{bmatrix}0 \\ 0 \end{bmatrix}\) 임을 확인하라.