-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbaseline_gm.py
More file actions
150 lines (119 loc) · 5.52 KB
/
baseline_gm.py
File metadata and controls
150 lines (119 loc) · 5.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import torch
import torch.nn.functional as F
from model_gm import get_model_from_name
import routines_gm as routines
def get_avg_parameters(networks, weights=None):
'''
given a series of networks (of the same shape) stored in the argument
<networks>, return a list containing all the parameters averaged over
all the networks with the weights specified by <weights>
"*" means to unpack. For example,
f( *[1, 2, 3] ) = f( 1, 2, 3 );
f( **{'a': 1, 'b': 2, 'c': 3} ) = f( 1, 2, 3 )
zip function:
zip( list_1, list_2, list_3 ) =
zip(
(list_1[0], list_2[0], list_3[0]),
...
(list_1[n], list_2[n], list_3[n])
)
where n is the least lenth of three lists
<par_group> means a set of parameters at a specific position from all networks
<avg_par> is the average parameter at that specific position
'''
avg_pars = []
for par_group in zip(*[net.parameters() for net in networks]):
# Alexanderia
# print([par.shape for par in par_group])
if weights is not None:
weighted_par_group = [par * weights[i] for i, par in enumerate(par_group)]
avg_par = torch.sum(torch.stack(weighted_par_group), dim=0)
else:
# print("shape of stacked params is ", torch.stack(par_group).shape) # (2, 400, 784)
avg_par = torch.mean(torch.stack(par_group), dim=0)
# Alexanderia
# print(avg_par.shape)
avg_pars.append(avg_par)
return avg_pars
def naive_ensembling(args, networks, test_loader):
'''
test the prediction accuracy of naive ensembling method
naive ensembling method: combine two models to a single one by simply
averaging their parameters at the same position
Net.state_dict():
a dictionary that maps each layer to its parameters
Net.parameters():
an array-like data structure that stores the parameters of each layer
'''
# simply average the weights in networks
if args.width_ratio != 1:
print("Unfortunately naive ensembling can't work if models are not of same shape!")
return -1, None
weights = [(1-args.ensemble_step), args.ensemble_step]
avg_pars = get_avg_parameters(networks, weights)
ensemble_network = get_model_from_name(args)
# put on GPU
if args.gpu_id!=-1:
ensemble_network = ensemble_network.cuda(args.gpu_id)
# check the test performance of the method before
log_dict = {}
log_dict['test_losses'] = []
# log_dict['test_counter'] = [i * len(train_loader.dataset) for i in range(args.n_epochs + 1)]
# Alexanderia
# routines.test(args, ensemble_network, test_loader, log_dict)
# set the weights of the ensembled network
for idx, (name, param) in enumerate(ensemble_network.state_dict().items()):
ensemble_network.state_dict()[name].copy_(avg_pars[idx].data)
# check the test performance of the method after ensembling
log_dict = {}
log_dict['test_losses'] = []
# log_dict['test_counter'] = [i * len(train_loader.dataset) for i in range(args.n_epochs + 1)]
return routines.test(args, ensemble_network, test_loader, log_dict), ensemble_network
def prediction_ensembling(args, networks, test_loader):
'''
calculate the prediction accuracy of the traditional ensembling methods
traditional ensembling method: keep two models and directly average their
output results after inferences from each of them
'''
log_dict = {}
log_dict['test_losses'] = []
# test counter is not even used!
# log_dict['test_counter'] = [i * len(train_loader.dataset) for i in range(args.n_epochs + 1)]
if args.dataset.lower() == 'cifar10':
cifar_criterion = torch.nn.CrossEntropyLoss()
# set all the networks in eval mode
for net in networks:
net.eval()
test_loss = 0
correct = 0
# with torch.no_grad():
for data, target in test_loader:
if args.gpu_id!=-1:
data = data.cuda(args.gpu_id)
target = target.cuda(args.gpu_id)
outputs = []
# average the outputs of all nets
assert len(networks) == 2
if args.prediction_wts:
wts = [(1 - args.ensemble_step), args.ensemble_step]
else:
wts = [0.5, 0.5]
for idx, net in enumerate(networks):
outputs.append(wts[idx]*net(data))
# print("number of outputs {} and each is of shape {}".format(len(outputs), outputs[-1].shape))
# number of outputs 2 and each is of shape torch.Size([1000, 10])
output = torch.sum(torch.stack(outputs), dim=0) # sum because multiplied by wts above
# check loss of this ensembled prediction
if args.dataset.lower() == 'cifar10':
# mnist models return log_softmax outputs, while cifar ones return raw values!
test_loss += cifar_criterion(output, target).item()
elif args.dataset.lower() == 'mnist':
test_loss += F.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
log_dict['test_losses'].append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return (float(correct) * 100.0)/len(test_loader.dataset)