86. Tensorflow Classification ์ฐ์ต๋ฌธ์
๋ฌธ1) bmi.csv ๋ฐ์ดํฐ์ ์ ์ด์ฉํ์ฌ ๋ค์๊ณผ ๊ฐ์ด sigmoid classifier์ ๋ชจ๋ธ์ ์์ฑํ์์ค.
์กฐ๊ฑด1> bmi.csv ๋ฐ์ดํฐ์
-> x๋ณ์ : 1,2๋ฒ์งธ ์นผ๋ผ(height, weight)
-> y๋ณ์ : 3๋ฒ์งธ ์นผ๋ผ(label)
์กฐ๊ฑด2> ๋ฅ๋ฌ๋ ์ต์ ํ ์๊ณ ๋ฆฌ์ฆ : Adam
์กฐ๊ฑด3> learning rage = 0.01
์กฐ๊ฑด4> ๋ฐ๋ณตํ์ต : 2,000๋ฒ, 200 step ๋จ์๋ก loss ์ถ๋ ฅ
์กฐ๊ฑด5> ์ต์ ํ ๋ชจ๋ธ ํ ์คํธ : ๋ถ๋ฅ์ ํ๋(Accuracy report) ์ถ๋ ฅ
<์ถ๋ ฅ๊ฒฐ๊ณผ>
step = 200 , loss = 0.532565
step = 400 , loss = 0.41763392
step = 600 , loss = 0.34404162
step = 800 , loss = 0.29450226
step = 1000 , loss = 0.25899038
step = 1200 , loss = 0.23218009
step = 1400 , loss = 0.2111086
step = 1600 , loss = 0.19401966
step = 1800 , loss = 0.17981105
step = 2000 , loss = 0.16775638
========================================
accuracy= 0.9894053767712886
import tensorflow as tf
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import minmax_scale #x๋ณ์ ์ ๊ทํ
import numpy as np
import pandas as pd
csv file load
bmi = pd.read_csv('C:/ITWILL/5_Tensorflow/data/bmi.csv')
print(bmi.info())
bmi['label'].value_counts()
normal 7677
fat 7425
thin 4898
subset ์์ฑ : label์์ normal, fat ์ถ์ถ
bmi = bmi[bmi.label.isin(['normal','fat'])]
print(bmi.head())
bmi['label'].value_counts()
normal 7677 -> 0
fat 7425 -> 1
thin 4898 -> ์ ๊ฑฐ
์นผ๋ผ ์ถ์ถ
col = list(bmi.columns)
print(col)
x,y ๋ณ์ ์ถ์ถ
x_data = bmi[col[:2]] #x๋ณ์(1,2์นผ๋ผ)
y_data = bmi[col[2]] #y๋ณ์(3์นผ๋ผ)
y_data #dtype: object
๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ : label ๋๋ฏธ๋ณ์ ๋ณํ(normal -> 0, fat -> 1)
map_data = {'normal': 0,'fat' : 1}
y_data= y_data.map(map_data) #dict mapping
print(y_data) #0/1
x_data ์ ๊ทํ ํจ์
x_data = minmax_scale(x_data)
numpy ๊ฐ์ฒด ๋ณํ
x_data = np.array(x_data)
y_data = np.transpose(np.array([y_data])) #(1, 15102) -> (15102, 1)
print(x_data.shape) #(15102, 2)
print(y_data.shape) #(15102, 1)
* X,Y ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ ์๋ฃ
1. X,Y ๋ณ์ ์ ์
X = tf.constant(x_data, tf.float32)
Y = tf.constant(y_data, tf.float32)
2. w,b ๋ณ์ ์ ์ : ์ด๊ธฐ๊ฐ(์ ๊ท๋ถํฌ ๋์ )
w = tf.Variable(tf.random.normal([2, 1])) #[์
๋ ฅ์,์ถ๋ ฅ์]
b = tf.Variable(tf.random.normal([1])) #[์ถ๋ ฅ์]
3. ํ๊ท๋ฐฉ์ ์
def linear_model(X) : #train, test
y_pred = tf.linalg.matmul(X, w) + b
return y_pred #2์ฐจ์
4. sigmoid ํ์ฑํจ์ ์ ์ฉ
def sig_fn(X):
y_pred = linear_model(X)
sig = tf.nn.sigmoid(y_pred)
return sig
5. ์์ค ํจ์ ์ ์ : ์์ค๊ณ์ฐ์ ์์
def loss_fn() : #์ธ์ ์์
sig = sig_fn(X)
loss = -tf.reduce_mean(Y*tf.math.log(sig)+(1-Y)*tf.math.log(1-sig))
return loss
6. ์ต์ ํ ๊ฐ์ฒด : learning_rate= 0.01
optimizer = tf.optimizers.Adam(learning_rate= 0.01)
7. ๋ฐ๋ณตํ์ต : ๋ฐ๋ณตํ์ต : 2,000๋ฒ, 200 step ๋จ์๋ก loss ์ถ๋ ฅ
for step in range(2000) :
#model ์ต์ ํ -> w, b ์
๋ฐ์ดํธ
optimizer.minimize(loss=loss_fn, var_list=[w, b])
if (step+1) % 200 == 0 :
print('step : ', (step+1), ", loss val = ", loss_fn().numpy())
print('='*30)
8. model ์ต์ ํ ํ
์คํธ
y_pred = tf.cast(sig_fn(X) > 0.5, tf.float32)
acc = accuracy_score(Y, y_pred)
print('accuracy =', acc)
report = classification_report(Y, y_pred)
print(report)
๋ฌธ2) bmi.csv ๋ฐ์ดํฐ์ ์ ์ด์ฉํ์ฌ ๋ค์๊ณผ ๊ฐ์ด softmax classifier ๋ชจ๋ธ์ ์์ฑํ์์ค.
์กฐ๊ฑด1> bmi.csv ๋ฐ์ดํฐ์
-> x๋ณ์ : height, weight ์นผ๋ผ
-> y๋ณ์ : label(3๊ฐ ๋ฒ์ฃผ) ์นผ๋ผ
์กฐ๊ฑด2> ๋ฅ๋ฌ๋ ์ต์ ํ ์๊ณ ๋ฆฌ์ฆ : Adam
์กฐ๊ฑด3> learning rage : 0.001 or 0.005 ์ ํ(๋ถ๋ฅ์ ํ๋ ๋์๊ฒ)
์กฐ๊ฑด4> ๋ฐ๋ณตํ์ต, step ๋จ์๋ก loss : <์ถ๋ ฅ๊ฒฐ๊ณผ> ์ฐธ๊ณ
์กฐ๊ฑด5> ๋ถ๋ฅ์ ํ๋ ์ถ๋ ฅ
์กฐ๊ฑด6> ์์ธก์น์ ์ ๋ต 15๊ฐ ์ถ๋ ฅ
<์ถ๋ ฅ ๊ฒฐ๊ณผ>
step = 500 , loss = 0.44498476
step = 1000 , loss = 0.34861678
step = 1500 , loss = 0.28995454
step = 2000 , loss = 0.24887484
step = 2500 , loss = 0.2177721
step = 3000 , loss = 0.19313334
step = 3500 , loss = 0.17303815
step = 4000 , loss = 0.15629826
step = 4500 , loss = 0.1421249
step = 5000 , loss = 0.12996733
========================================
accuracy = 0.9769
========================================
y_pred : [0 0 1 1 1 1 0 2 0 2 1 2 1 0 2]
y_true : [0 0 1 1 1 1 0 2 0 2 1 2 1 0 2]
========================================
import tensorflow as tf ver1.x
from sklearn.preprocessing import minmax_scale #x data ์ ๊ทํ(0~1)
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
bmi = pd.read_csv('C:/ITWILL/5_Tensorflow/data/bmi.csv')
print(bmi.info())
์นผ๋ผ ์ถ์ถ
col = list(bmi.columns)
print(col)
x,y ๋ณ์ ์ถ์ถ
x_data = bmi[col[:2]] #x๋ณ์
x_data ์ ๊ทํ
x_data = minmax_scale(x_data)
label one hot encoding
label_map = {"thin": [1,0,0], "normal": [0,1,0], "fat": [0,0,1]}
bmi["label"] = bmi["label"].apply(lambda x : np.array(label_map[x]))
y_data = list(bmi["label"]) #์ค์ฒฉlist : [[1,0,0], [1,0,0]]
numpy ๊ฐ์ฒด ๋ณํ
x_data = np.array(x_data)
y_data = np.array(y_data)
* X,Y ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ ์๋ฃ
1. X,Y๋ณ์ ์ ์ : ๊ณต๊ธํ ๋ณ์
X = tf.constant(x_data, tf.float32) #[?, 2]
Y = tf.constant(y_data, tf.float32) #[?, 3]
2. w,b ๋ณ์ ์ ์
w = tf.Variable(tf.random.normal([2, 3])) #[์
๋ ฅ์, ์ถ๋ ฅ์]
b = tf.Variable(tf.zeros([3])) #[์ถ๋ ฅ์]
3. ํ๊ท๋ฐฉ์ ์
def linear_model(X) : #train, test
y_pred = tf.matmul(X, w) + b #ํ๋ ฌ๊ณฑ : [None,3]*[3,1]=[None,1]
return y_pred
4. softmax ํ์ฑํจ์ ์ ์ฉ
def soft_fn(X):
y_pred = linear_model(X)
soft = tf.nn.softmax(y_pred)
return soft
5. ์์ค ํจ์ ์ ์ : ์์ค๊ณ์ฐ์ ์์
def loss_fn() : #์ธ์ ์์
soft = soft_fn(X) #ํ๋ จ์
-> ์์ธก์น : ํ๊ท๋ฐฉ์ ์
loss = -tf.reduce_mean(Y*tf.math.log(soft)+(1-Y)*tf.math.log(1-soft))
return loss
6. ์ต์ ํ ๊ฐ์ฒด
optimizer = tf.optimizers.Adam(lr=0.005)
7. ๋ฐ๋ณตํ์ต
for step in range(5000) :
#์ค์ฐจ์ ๊ณฑํ๊ท ์ต์ ํ : ์์ค๊ฐ ์ต์ํ -> [a, b] ๊ฐฑ์ (update)
optimizer.minimize(loss_fn, var_list=[w, b]) #(์์ค๊ฐ, ์์ ๋์)
#500๋ฐฐ์ ๋จ์ ์ถ๋ ฅ
if (step+1) % 500 == 0 :
print("step =", (step+1), ", loss =", loss_fn().numpy())
8. ์ต์ ํ๋ model ๊ฒ์
soft_re = soft_fn(X).numpy()
y_pred = tf.argmax(soft_re, 1) # demension : 2d
y_true = tf.argmax(Y, 1) # demension : 2d
acc = accuracy_score(y_true, y_pred)
print("="*40)
print('accuracy =', acc) # accuracy = 0.98
y_true vs y_pred
print("="*40)
print('y_pred : ', y_pred.numpy()[:15])
print('y_true : ', y_true.numpy()[:15])
๋ฌธ3) ๋ค์ digits ๋ฐ์ดํฐ์ ์ ์ด์ฉํ์ฌ ๋คํญ๋ถ๋ฅ๊ธฐ๋ฅผ ์์ฑํ์์ค.
<์กฐ๊ฑด1> digits ๋ฐ์ดํฐ์ ์ ํน์ฑ์ ๋ณด๊ณ ์ ์ฒ๋ฆฌ/๊ณต๊ธdata ์์ฑ
<์กฐ๊ฑด2> ์๋ <์ถ๋ ฅ๊ฒฐ๊ณผ>๋ฅผ ์ฐธ๊ณ ํ์ฌ ํ์ต์จ๊ณผ ๋ฐ๋ณตํ์ต ์ ์ฉ
<์กฐ๊ฑด3> epoch์ ๋ฐ๋ฅธ loss value ์๊ฐํ : ์ด๋ฏธ์งํ์ผ ์ฐธ๊ณ (exam03_lossValue.png)
<์ถ๋ ฅ๊ฒฐ๊ณผ>
step = 200 , loss = 0.06003735238669643
step = 400 , loss = 0.02922042555340125
step = 600 , loss = 0.01916724251850193
step = 800 , loss = 0.01418028865527556
step = 1000 , loss = 0.011102086315873883
step = 1200 , loss = 0.008942419709185086
step = 1400 , loss = 0.007311927138572721
step = 1600 , loss = 0.006023632246639046
step = 1800 , loss = 0.004981346240771604
step = 2000 , loss = 0.004163072611802871
========================================
accuracy = 0.9648148148148148
import tensorflow as tf #ver 2.0
from sklearn.preprocessing import OneHotEncoder #y data -> one hot
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
digits ๋ฐ์ดํฐ์
: ์ซ์ ํ๊ธฐ์ฒด ์ด๋ฏธ์ง -> ์ซ์ ์์ธก(0~9)
• ํ๊ฒ ๋ณ์ : y
- 0 ~ 9 : 10์ง์ ์ ์
• ํน์ง ๋ณ์(64ํฝ์
) : X
- 0๋ถํฐ 9๊น์ง์ ์ซ์๋ฅผ ์์ผ๋ก ์ด ์ด๋ฏธ์ง ๋ฐ์ดํฐ
- ๊ฐ ์ด๋ฏธ์ง๋ 0๋ถํฐ 15๊น์ง์ 16๊ฐ ๋ช
์์ ๊ฐ์ง๋ 8x8=64ํฝ์
ํด์๋์ ํ๋ฐฑ ์ด๋ฏธ์ง
digits = load_digits() #dataset load
X = digits.data #X๋ณ์
y = digits.target #y๋ณ์
print(X.shape) #(1797, 64) : 64=8x8
print(y.shape) #(1797,)
1. digits dataset split
x_train, x_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=123)
2. ์ ์ฒ๋ฆฌ : X๋ณ์ ์ ๊ทํ, Y๋ณ์ one-hot encoding
print(X.max()) #16.0
x_train_nor, x_test_nor = x_train / 16.0, x_test / 16.0
reshape
y_train = y_train.reshape([-1, 1])
y_test = y_test.reshape([-1, 1])
one-hot encoding
obj = OneHotEncoder()
y_train = obj.fit_transform(y_train).toarray()
y_train.shape #(1257, 10)
y_test = obj.fit_transform(y_test).toarray()
y_test.shape #(540, 10)
print(y_train.dtype) #float64
3. ๊ณต๊ธ data :
x_train = x_train_nor
x_test = x_test_nor
x_train.shape #(1257, 64)
x_test.shape #(540, 64)
4. w, b ๋ณ์ ์ ์
w = tf.Variable(tf.random.normal([64, 10], dtype=tf.float64)) #[์
๋ ฅ์, ์ถ๋ ฅ์]
b = tf.Variable(tf.random.normal([10], dtype=tf.float64)) #[์ถ๋ ฅ์]
#type ์ผ์น
5. ํ๊ท๋ฐฉ์ ์
def linear_model(X) : #train, test
y_pred = tf.matmul(X, w) + b #ํ๋ ฌ๊ณฑ : [None,10]*[10,1]=[None,1]
return y_pred
6. softmax ํ์ฑํจ์ ์ ์ฉ
def soft_fn(X):
y_pred = linear_model(X)
soft = tf.nn.softmax(y_pred)
return soft
7. ์์ค ํจ์ ์ ์ : ์์ค๊ณ์ฐ์ ์์
def loss_fn() : #์ธ์ ์์
soft = soft_fn(x_train) #ํ๋ จ์
-> ์์ธก์น : ํ๊ท๋ฐฉ์ ์
loss = -tf.reduce_mean(y_train*tf.math.log(soft)+(1-y_train)*tf.math.log(1-soft))
return loss
8. ์ต์ ํ ๊ฐ์ฒด
optimizer = tf.optimizers.Adam(lr=0.01)
9. ๋ฐ๋ณตํ์ต
loss_val = []
for step in range(2000) :
#์ค์ฐจ์ ๊ณฑํ๊ท ์ต์ ํ : ์์ค๊ฐ ์ต์ํ -> [a, b] ๊ฐฑ์ (update)
optimizer.minimize(loss_fn, var_list=[w, b]) #(์์ค๊ฐ, ์์ ๋์)
#100๋ฐฐ์ ๋จ์ ์ถ๋ ฅ
if (step+1) % 200 == 0 :
print("step =", (step+1), ", loss =", loss_fn().numpy())
loss_val.append(loss_fn().numpy())
10. ์ ์ ํ๋ model ๊ฒ์ฆ
soft_re = soft_fn(x_test).numpy()
y_pred = tf.argmax(soft_re, 1) #demension : 2d
y_true = tf.argmax(y_test, 1) #demension : 2d
acc = accuracy_score(y_true, y_pred)
print('accuracy =', acc) #accuracy = 0.98
step = 200 , loss = 0.06003735238669643
step = 400 , loss = 0.02922042555340125
step = 600 , loss = 0.01916724251850193
step = 800 , loss = 0.01418028865527556
step = 1000 , loss = 0.011102086315873883
step = 1200 , loss = 0.008942419709185086
step = 1400 , loss = 0.007311927138572721
step = 1600 , loss = 0.006023632246639046
step = 1800 , loss = 0.004981346240771604
step = 2000 , loss = 0.004163072611802871
========================================
accuracy = 0.9648148148148148
11. loss value vs epochs ์๊ฐํ
import matplotlib.pyplot as plt
plt.plot(loss_val, 'r--')
plt.show()