MLP모델 설계 및 학습 1탄 (MNIST dataset) => 업그레이드

  1.  Dropout 추가
  2.  BatchNormalization 적용
  3.  가중치 초기화 방법 적용 
  4.  최적화 방법 변경

총 4가지 사항이 업그레이드 되었으며, 1탄 소스코드에서 달라진 부분에만 주석을 적어놓았다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import torch
import numpy as np
import os
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
import torch.nn.init as init
 
os.environ['KMP_DUPLICATE_LIB_OK'= 'True'
 
BATCH_SIZE = 64
EPOCHS = 10
 
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
 
print(DEVICE)
 
 
train_dataset = datasets.MNIST(root="./data/MNIST",
                               train=True,
                               download=True,
                               transform=transforms.ToTensor())
 
test_dataset = datasets.MNIST(root="./data/MNIST",
                              train=False,
                              download=True,
                              transform=transforms.ToTensor())
 
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True)
 
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=False)
 
 
 
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28*28512)
        self.fc2 = nn.Linear(512256)
        self.fc3 = nn.Linear(25610)
        self.dropout = 0.3
        self.batch_norm_1 = nn.BatchNorm1d(512)
        self.batch_norm_2 = nn.BatchNorm1d(256)
 
    def forward(self, x):
        x = x.view(-128*28)
        x = self.fc1(x)
 
        # Batch_Normalization 적용, layer 마다 Input의 분포가 달라지면 학습속도가 느려짐
        # Batch_Normalization은 활성화함수 전후 모두 가능하다
        x = self.batch_norm_1(x)
        x = F.relu(x) #활성화 함수 => ReLU 로 변경
 
        # Dropout은 학습 시, 랜덤으로 노드를 선택해 가중치를 업뎃하지 않도록 하지만
        # 평가 시에는 모든 노드를 사용하기 때문에, training=self.training=False
        x = F.dropout(x, training=self.training, p=self.dropout)
 
        x = self.fc2(x)
        x = self.batch_norm_2(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training, p=self.dropout)
 
        x = self.fc3(x)
        x = F.softmax(x, dim=1)
        return x
 
 
def weight_initializer(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight.data)
 
 
model = MLP().to(DEVICE)
model.apply(weight_initializer)  #가중치를 랜덤하게 초기화하지 않고, 초기화 기법을 사용하였다.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01#최적화 방법도 변경해 보았다.
criterion = nn.CrossEntropyLoss()
 
print(model)
 
 
def train(model, train_loader, optimizer, interval):
    model.train()
 
    for idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
 
        if idx % interval == 0:
            print('train epoch: {}, {}/{} train_loss: {}'.
                    format(epoch, idx*len(image), len(train_loader.dataset), loss.item()))
 
 
def evaluate(model, test_loader):
    model.eval()
    model.training = False  #Dropout은 평가 시에 사용하지 않는다
    test_loss = 0
    right = 0
 
    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            pred = output.max(1, keepdim=True)[1]
            right += pred.eq(label.view_as(pred)).sum().item()
 
    test_loss /= len(test_loader.dataset)
    test_acc = right/len(test_loader.dataset) * 100
 
    return test_loss, test_acc
 
 
for epoch in range(1, EPOCHS+1):
    train(model, train_loader, optimizer, 200)
    test_loss, test_acc = evaluate(model, test_loader)
    print("test_loss: {}, test_acc: {}".format(test_loss, test_acc))
 
 
 
 
 
 
 
cs

 

위의 소스코드를 실행시키면, 아래와 같은 결과를 얻을 수 있다.

 

반응형

+ Recent posts