我正在使用gpytarch库应用深度高斯过程进行回归分析。这是我的代码:
import tqdm
import gpytorch
from gpytorch.models.deep_gps import DeepGPLayer, DeepGP
from gpytorch.means import ConstantMean, ZeroMean, LinearMean
from gpytorch.kernels import RBFKernel, ScaleKernel, SpectralMixtureKernel
from gpytorch.variational import VariationalStrategy, CholeskyVariationalDistribution, MultitaskVariationalStrategy
from gpytorch.distributions import MultivariateNormal
from gpytorch.likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood
from torch.utils.data import TensorDataset, DataLoader
from gpytorch.mlls import DeepApproximateMLL
from gpytorch.mlls import VariationalELBO
def hidden_size_extract(kwargs, name, delete_from_dict=False):
if name not in kwargs:
hidden_size = []
for i in range(0, 6):
key = name + '_%d' % i
if key in kwargs and kwargs[key] != 0:
hidden_size.append(kwargs[key])
if delete_from_dict:
kwargs.pop(key)
else:
hidden_size = kwargs[name].copy()
if delete_from_dict:
kwargs.pop(name)
return hidden_size
class DGPHiddenLayer(DeepGPLayer):
def __init__(self, input_dims, output_dims, device, num_inducing, mean_type='zero', num_mixtures=None):
if num_mixtures is None:
self._num_mixtures= 2*output_dims
else:
self._num_mixtures=num_mixtures
print(mean_type)
if output_dims is None:
inducing_points = torch.randn(num_inducing, input_dims).to(device=device)
batch_shape = torch.Size([])
else:
inducing_points = torch.randn(output_dims, num_inducing, input_dims).to(device=device)
batch_shape = torch.Size([output_dims])
# Sparse Variational Formulation (inducing variables initialised as randn)
variational_distribution = CholeskyVariationalDistribution(num_inducing_points=num_inducing, batch_shape=batch_shape)
if output_dims is None:
variational_strategy = VariationalStrategy(
self,
inducing_points,
variational_distribution,
learn_inducing_locations=True)
else:
variational_strategy =
MultitaskVariationalStrategy(
VariationalStrategy(
self,
inducing_points, variational_distribution,
learn_inducing_locations=True
), num_tasks=output_dims
)
super().__init__(variational_strategy, input_dims, output_dims)
self._output_dims = output_dims
if mean_type == 'constant':
self.mean_module = gpytorch.means.MultitaskMean(
ConstantMean(batch_shape=batch_shape),
num_tasks=output_dims
)
elif mean_type == 'zero':
self.mean_module =gpytorch.means.MultitaskMean(
ZeroMean(batch_shape=batch_shape),
num_tasks=output_dims
)
else:
self.mean_module =gpytorch.means.MultitaskMean(
LinearMean(input_dims),
num_tasks=output_dims
)
base_kernel = gpytorch.kernels.SpectralMixtureKernel(
ard_num_dims=input_dims, num_mixtures=output_dims,
batch_shape=(output_dims,),)
self.covar_module = gpytorch.kernels.MultitaskKernel(base_kernel, ard_num_dims=input_dims, num_tasks=output_dims, rank=1)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)
def __call__(self, x, *other_inputs, **kwargs):
if len(other_inputs):
if isinstance(x, gpytorch.distributions.MultitaskMultivariateNormal):
x = x.rsample()
processed_inputs = [
inp.unsqueeze(0).expand(gpytorch.settings.num_likelihood_samples.value(), *inp.shape)
for inp in other_inputs
]
print(x.shape, processed_inputs.shape)
x = torch.cat([x] + processed_inputs, dim=-1)
return super().__call__(x, are_samples=bool(len(other_inputs)))
class DeepGaussianProcesses(DeepGP):
def __init__(self,
input_size,
output_size,
device,
num_inducing=128,
noise_constraint=None,
max_cholesky_size=10000,
optimizer = None,
**kwargs):
self.__dict__.update((k, v) for k, v in kwargs.items() if k=='mean_type')
super(DeepGaussianProcesses, self).__init__()
# pass hidden layer sizes as separate arguments as well as array
hidden_size = hidden_size_extract(kwargs, 'hidden_size')
self.output_size=output_size
self.hidden_size = hidden_size
self.hidden_size.append(output_size)
self.max_cholesky_size = max_cholesky_size
self.device = device
if self.mean_type=='constant' or self.mean_type is not None:
means = (len(self.hidden_size)-1)*['linear'] + ['constant'] # The last layer with constant mean
elif self.mean_type=='zero':
means = (len(self.hidden_size)-1)*['linear'] + ['zero']
hidden_layers = torch.nn.ModuleList([DGPHiddenLayer(
input_dims=input_size,
output_dims=self.hidden_size[0],
device = self.device,
mean_type=means[0],
num_inducing=num_inducing,
)])
for i in range(len(self.hidden_size)-1):
hidden_layers.append(DGPHiddenLayer(
input_dims=self.hidden_size[i],
output_dims=self.hidden_size[i + 1],
device = self.device,
mean_type=means[i+1],
num_inducing = num_inducing,
))
self.hidden_layers = hidden_layers
# variable count of hidden layers and neurons
#print(self._layers)
if optimizer is not None:
self._optimizer= optimizer
else:
self._optimizer= torch.optim.Adam([
{'params': self.hidden_layers.parameters()}
], lr=1e-3, betas=(0.5,0.99))
if output_size is None:
self.likelihood = GaussianLikelihood(noise_constraint=noise_constraint)
else:
self.likelihood= MultitaskGaussianLikelihood(num_tasks=output_size, noise_constraint=noise_constraint)
self.to(device=self.device)
def forward(self, inputs):
output = self.hidden_layers[0](inputs)
for hid_layer in self.hidden_layers[1:]:
output = hid_layer(output)
return output
def train_gp(self, train_x, train_y, num_epochs=20, num_samples=10, batch_size=200):
scheduler = torch.optim.lr_scheduler.MultiStepLR(self._optimizer, milestones=[int(0.5 * num_epochs), int(0.75 * num_epochs)])
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
mll = DeepApproximateMLL(VariationalELBO(self.likelihood, self, num_data=len(train_x)))
self.losses = []
epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="GP training Epoch")
self.train()
with gpytorch.settings.max_cholesky_size(self.max_cholesky_size):
for i in epochs_iter:
# Within each iteration, we will go over each minibatch of data
minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
for x_batch, y_batch in minibatch_iter:
with gpytorch.settings.num_likelihood_samples(num_samples):
self._optimizer.zero_grad()
output = self(x_batch)
#print(x_batch.shape, output, output.mean.shape, y_batch.shape)
loss = -mll(output, y_batch)
loss.backward()
self._optimizer.step()
scheduler.step()
self.losses.append(loss.item())
minibatch_iter.set_postfix(loss=loss.item())
model = DeepGaussianProcesses(train_x.shape[-1], train_y.shape[-1], num_inducing=120, mean_type='zero', device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
model.train_gp( train_x, train_y, num_epochs=350)
nmll, nll, rmse=model.evaluation(train_x, train_y, test_x, test_y)
我得到以下错误:
input:torch.Size([10000, 5])
output:torch.Size([10000, 4])
--> 245 output = self(x_batch)
246 #print(x_batch.shape, output, output.mean.shape, y_batch.shape)
247 loss = -mll(output, y_batch)
/usr/local/lib/python3.7/dist-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
28
29 def __call__(self, *inputs, **kwargs):
---> 30 outputs = self.forward(*inputs, **kwargs)
31 if isinstance(outputs, list):
32 return [_validate_module_outputs(output) for output in outputs]
<ipython-input-12-44b9caffc31f> in forward(self, inputs)
192
193 def forward(self, inputs):
--> 194 output = self.hidden_layers[0](inputs)
195 for hid_layer in self.hidden_layers[1:]:
196 output = hid_layer(output)
<ipython-input-12-44b9caffc31f> in __call__(self, x, *other_inputs, **kwargs)
125 x = torch.cat([x] + processed_inputs, dim=-1)
126
--> 127 return super().__call__(x, are_samples=bool(len(other_inputs)))
128
129
/usr/local/lib/python3.7/dist-packages/gpytorch/models/deep_gps/deep_gp.py in __call__(self, inputs, are_samples, **kwargs)
97
98 # Now run samples through the GP
---> 99 output = ApproximateGP.__call__(self, inputs)
100 if self.output_dims is not None:
101 mean = output.loc.transpose(-1, -2)
/usr/local/lib/python3.7/dist-packages/gpytorch/models/approximate_gp.py in __call__(self, inputs, prior, **kwargs)
106 if inputs.dim() == 1:
107 inputs = inputs.unsqueeze(-1)
--> 108 return self.variational_strategy(inputs, prior=prior, **kwargs)
/usr/local/lib/python3.7/dist-packages/gpytorch/variational/independent_multitask_variational_strategy.py in __call__(self, x, task_indices, prior, **kwargs)
54 See :class:`LMCVariationalStrategy`.
55 """
---> 56 function_dist = self.base_variational_strategy(x, prior=prior, **kwargs)
57
58 if task_indices is None:
/usr/local/lib/python3.7/dist-packages/gpytorch/variational/variational_strategy.py in __call__(self, x, prior, **kwargs)
240 self.updated_strategy.fill_(True)
241
--> 242 return super().__call__(x, prior=prior, **kwargs)
/usr/local/lib/python3.7/dist-packages/gpytorch/variational/_variational_strategy.py in __call__(self, x, prior, **kwargs)
305 inducing_values=variational_dist_u.mean,
306 variational_inducing_covar=variational_dist_u.lazy_covariance_matrix,
--> 307 **kwargs,
308 )
309 elif isinstance(variational_dist_u, Delta):
/usr/local/lib/python3.7/dist-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
28
29 def __call__(self, *inputs, **kwargs):
---> 30 outputs = self.forward(*inputs, **kwargs)
31 if isinstance(outputs, list):
32 return [_validate_module_outputs(output) for output in outputs]
/usr/local/lib/python3.7/dist-packages/gpytorch/variational/variational_strategy.py in forward(self, x, inducing_points, inducing_values, variational_inducing_covar, **kwargs)
167 num_induc = inducing_points.size(-2)
168 test_mean = full_output.mean[..., num_induc:]
--> 169 induc_induc_covar = full_covar[..., :num_induc, :num_induc].add_jitter()
170 induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate()
171 data_data_covar = full_covar[..., num_induc:, num_induc:]
/usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in __getitem__(self, index)
401 _, row_index, col_index = index
402 batch_indices = [slice(None, None, None)] * (self.dim() - 2)
--> 403 return self._getitem(row_index, col_index, *batch_indices)
404 else:
405 return super().__getitem__(index)
/usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in _getitem(self, row_index, col_index, *batch_indices)
69 if not isinstance(x1, slice) or not isinstance(x2, slice):
70 # It's too complicated to deal with tensor indices in this case - we'll use the super method
---> 71 return self.evaluate_kernel()._getitem(row_index, col_index, *batch_indices)
72
73 # Now we know that x1 and x2 are slices
/usr/local/lib/python3.7/dist-packages/gpytorch/utils/memoize.py in g(self, *args, **kwargs)
57 kwargs_pkl = pickle.dumps(kwargs)
58 if not _is_in_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl):
---> 59 return _add_to_cache(self, cache_name, method(self, *args, **kwargs), *args, kwargs_pkl=kwargs_pkl)
60 return _get_from_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl)
61
/usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate_kernel(self)
339 diag=False,
340 last_dim_is_batch=self.last_dim_is_batch,
--> 341 **self.params,
342 )
343 self.kernel.active_dims = temp_active_dims
/usr/local/lib/python3.7/dist-packages/gpytorch/kernels/kernel.py in __call__(self, x1, x2, diag, last_dim_is_batch, **params)
406 res = LazyEvaluatedKernelTensor(x1_, x2_, kernel=self, last_dim_is_batch=last_dim_is_batch, **params)
407 else:
--> 408 res = lazify(super(Kernel, self).__call__(x1_, x2_, last_dim_is_batch=last_dim_is_batch, **params))
409 return res
410
/usr/local/lib/python3.7/dist-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
28
29 def __call__(self, *inputs, **kwargs):
---> 30 outputs = self.forward(*inputs, **kwargs)
31 if isinstance(outputs, list):
32 return [_validate_module_outputs(output) for output in outputs]
/usr/local/lib/python3.7/dist-packages/gpytorch/kernels/multitask_kernel.py in forward(self, x1, x2, diag, last_dim_is_batch, **params)
49 covar_i = covar_i.repeat(*x1.shape[:-2], 1, 1)
50 covar_x = lazify(self.data_covar_module.forward(x1, x2, **params))
---> 51 res = KroneckerProductLazyTensor(covar_x, covar_i)
52 return res.diag() if diag else res
53
/usr/local/lib/python3.7/dist-packages/gpytorch/lazy/kronecker_product_lazy_tensor.py in __init__(self, *lazy_tensors)
77 raise RuntimeError(
78 "KroneckerProductLazyTensor expects lazy tensors with the "
---> 79 "same batch shapes. Got {}.".format([lv.batch_shape for lv in lazy_tensors])
80 )
81 super().__init__(*lazy_tensors)
RuntimeError: KroneckerProductLazyTensor expects lazy tensors with the same batch shapes. Got [torch.Size([4]), torch.Size([16])].
这个库中的例子不多。我不知道怎样才能摆脱这个错误。有什么建议吗?