(Tensorflow 2.x) Classification (다중 분류)

2021. 10. 23. 22:20

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
 
from tensorflow.keras.datasets import reuters
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import SGD,  Adam
from keras.utils.np_utils import to_categorical
 
print(tf.__version__)
 
 
# 다중 분류
 
 
# 데이터셋 생성
# 로이터 데이터셋 => 46개의 클래스, (단일 레이블 다중 분류, 한 개의 데이터는 한 개의 카테고리에만 속함)
 
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=10000)  # 가장 자주 나타나는 단어 1만개만 사용
 
print(x_train.shape, y_train.shape)
 
print(x_train[0])  # data type = list, numpy(x), 리스트를 텐서로 바꿔줘야 함
print(y_train[0])
print(max([max(str) for str in x_train]))  # 단어를 1만개로 제한 -> 단어 인덱스 최대 9,999
 
def vectorize(seq, dim=10000):
    result = np.zeros((len(seq), dim))
    for idx, val in enumerate(seq):
        result[idx, val] = 1.
    return result
 
x_train = vectorize(x_train)
x_test = vectorize(x_test)
print(x_train[0])
 
y_train = to_categorical(y_train)  # 레이블은 one-hot vector로 변환
y_test = to_categorical(y_test)
 
# 데이터의 내용 확인방법
# w_index = imdb.get_word_index()  # 단어를 정수 인덱스로 매핑한 딕셔너리
# rev_index = dict([(value, key) for (key, value) in w_index.items()])  # 단어와 정수 인덱스를 바꿈
# word_test = ' '.join([rev_index.get(i-3, '?') for i in x_train[0]])  # 원본 데이터의 3번째까지는 데이터 정보, '?'디폴트값
# print(word_test)
 
 
# 모델 구축
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(10000,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(46, activation='softmax'))
 
 
# 모델 컴파일 (모델을 로드하는 부분에서 수행함)
# 만약, 정답 레이블이 one-hot vector가 아니라 scalar값 이라면, loss='sparse_categorical_crossentropy'를 사용한다.
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
 
 
# 모델 학습
history = model.fit(x_train, y_train, validation_split=0.2, epochs=20, batch_size=512)
 
print(history.history.keys())  # dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
 
 
# 모델 평가 및 예측
result = model.evaluate(x_test, y_test)
print(result)
 
pred = model.predict(x_test)
print(pred[7].shape)  # 예측값의 shape
print(np.sum(pred[7]))  # 벡터의 모든 원소의 합
print(np.argmax(pred[7]))  # 모델이 예측한 입력값의 클래스
 
# 모델 저장 및 로드
# model.save("model_name.h5")
# model = tf.keras.models.load_model("model_name.h5")
 
 
# 모델 손실함수 추이확인
loss = history.history['loss']
val_loss = history.history['val_loss']
 
epochs = range(1, len(loss)+1)
 
plt.plot(epochs, loss, 'bo', label="train_loss")
plt.plot(epochs, val_loss, 'b', label="val_loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend(loc='best')
plt.show()
 

Colored by Color Scripter

아래 출력값을 보면, 대략 epoch=9 정도부터 과적합이 되는 것으로 보임. (정확도 대략 77%)

저작자표시 비영리 변경금지

'머신러닝_딥러닝 > Tensorflow + Keras' 카테고리의 다른 글

(Tensorflow 2.x) Fashion MNIST (With CNN) (0)	2021.10.23
(Tensorflow 2.x) MNIST (With CNN) (0)	2021.10.23
(Tensorflow 2.x) Logistic Regression 2탄 (0)	2021.10.23
(Tensorflow 2.x) Logistic Regression 1탄 (0)	2021.10.23
(Tensorflow 2.x) Regression 2탄 (0)	2021.10.23

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

누구나 쉽게, 인공지능

(Tensorflow 2.x) Classification (다중 분류)

'머신러닝_딥러닝 > Tensorflow + Keras' 카테고리의 다른 글

+ Recent posts

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역