代码之家  ›  专栏  ›  技术社区  ›  Dalek

RuntimeError:张量a(1160)的大小必须与非单例维度2的张量b(0)的尺寸匹配

  •  0
  • Dalek  · 技术社区  · 3 年前

    我正在使用gpytarch库应用深度高斯过程进行回归分析。这是我的代码:

    import tqdm
    import gpytorch
    from gpytorch.models.deep_gps import DeepGPLayer, DeepGP
    from gpytorch.means import ConstantMean, ZeroMean, LinearMean
    from gpytorch.kernels import RBFKernel, ScaleKernel, SpectralMixtureKernel
    from gpytorch.variational import VariationalStrategy, CholeskyVariationalDistribution, MultitaskVariationalStrategy
    from gpytorch.distributions import MultivariateNormal
    from gpytorch.likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood
    from torch.utils.data import TensorDataset, DataLoader
    from gpytorch.mlls import DeepApproximateMLL
    from gpytorch.mlls import VariationalELBO
    def hidden_size_extract(kwargs, name, delete_from_dict=False):
        if name not in kwargs:
            hidden_size = []
            for i in range(0, 6):
                key = name + '_%d' % i
                if key in kwargs and kwargs[key] != 0:
                    hidden_size.append(kwargs[key])
    
                    if delete_from_dict:
                        kwargs.pop(key)
        else:
            hidden_size = kwargs[name].copy()
    
            if delete_from_dict:
                kwargs.pop(name)
    
        return hidden_size
    
    class DGPHiddenLayer(DeepGPLayer):
    
        def __init__(self, input_dims, output_dims, device, num_inducing, mean_type='zero', num_mixtures=None):
            if num_mixtures is None:
               self._num_mixtures= 2*output_dims
            else:
              self._num_mixtures=num_mixtures
            print(mean_type)
            if output_dims is None:
                inducing_points = torch.randn(num_inducing, input_dims).to(device=device)
                batch_shape = torch.Size([])
            else:
                inducing_points = torch.randn(output_dims, num_inducing, input_dims).to(device=device)
                batch_shape = torch.Size([output_dims])
            # Sparse Variational Formulation (inducing variables initialised as randn)
            variational_distribution = CholeskyVariationalDistribution(num_inducing_points=num_inducing, batch_shape=batch_shape)
            if output_dims is None:
               variational_strategy = VariationalStrategy(
                self,
                inducing_points,
                variational_distribution,
                learn_inducing_locations=True)
            else:
                 variational_strategy =  
                  MultitaskVariationalStrategy(
                   VariationalStrategy(
                     self, 
                     inducing_points, variational_distribution, 
                     learn_inducing_locations=True
                     ), num_tasks=output_dims
                   )    
            super().__init__(variational_strategy, input_dims, output_dims)
            self._output_dims = output_dims
            if mean_type == 'constant':
                   self.mean_module = gpytorch.means.MultitaskMean(
                                                                   ConstantMean(batch_shape=batch_shape), 
                                                                   num_tasks=output_dims
                                                                   )
            elif mean_type == 'zero':
                     self.mean_module =gpytorch.means.MultitaskMean(
                                                                    ZeroMean(batch_shape=batch_shape),
                                                                    num_tasks=output_dims
                                                                    )
            else:
                    self.mean_module =gpytorch.means.MultitaskMean(
                                                                    LinearMean(input_dims),
                                                                    num_tasks=output_dims
                                                                    )
            base_kernel = gpytorch.kernels.SpectralMixtureKernel(
                    ard_num_dims=input_dims, num_mixtures=output_dims, 
                    batch_shape=(output_dims,),)
            self.covar_module = gpytorch.kernels.MultitaskKernel(base_kernel, ard_num_dims=input_dims, num_tasks=output_dims, rank=1)
    
        def forward(self, x):
    
            mean_x = self.mean_module(x)
    
            covar_x = self.covar_module(x)
    
            return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)
    
        def __call__(self, x, *other_inputs, **kwargs):
    
            if len(other_inputs):
                if isinstance(x, gpytorch.distributions.MultitaskMultivariateNormal):
                    x = x.rsample()
    
                processed_inputs = [
                    inp.unsqueeze(0).expand(gpytorch.settings.num_likelihood_samples.value(), *inp.shape)
                    for inp in other_inputs
                ]
                print(x.shape, processed_inputs.shape)
                x = torch.cat([x] + processed_inputs, dim=-1)
    
            return super().__call__(x, are_samples=bool(len(other_inputs)))
    
    
    
    class DeepGaussianProcesses(DeepGP):
        def __init__(self, 
                     input_size, 
                     output_size, 
                     device, 
                     num_inducing=128,
                     noise_constraint=None, 
                     max_cholesky_size=10000,
                     optimizer = None,
                     **kwargs):
            self.__dict__.update((k, v) for k, v in kwargs.items() if k=='mean_type')
                        
            super(DeepGaussianProcesses, self).__init__()
             
            # pass hidden layer sizes as separate arguments as well as array
            
            hidden_size = hidden_size_extract(kwargs, 'hidden_size')
            
            self.output_size=output_size
            self.hidden_size = hidden_size
            self.hidden_size.append(output_size)
            self.max_cholesky_size = max_cholesky_size
            self.device = device
            if self.mean_type=='constant' or self.mean_type is not None:
               means = (len(self.hidden_size)-1)*['linear'] + ['constant'] # The last layer with constant mean
            elif self.mean_type=='zero':
               means = (len(self.hidden_size)-1)*['linear'] + ['zero']
            hidden_layers = torch.nn.ModuleList([DGPHiddenLayer(
                input_dims=input_size,
                output_dims=self.hidden_size[0],
                device = self.device,
                mean_type=means[0],
                num_inducing=num_inducing,
                )])
            for i in range(len(self.hidden_size)-1):
                hidden_layers.append(DGPHiddenLayer(
                    input_dims=self.hidden_size[i],
                    output_dims=self.hidden_size[i + 1],
                    device = self.device,
                    mean_type=means[i+1],
                    num_inducing = num_inducing,
                    ))
    
            self.hidden_layers = hidden_layers
            # variable count of hidden layers and neurons
    
            #print(self._layers)
            if optimizer is not None:
               self._optimizer= optimizer
            else:
               self._optimizer= torch.optim.Adam([
                   {'params': self.hidden_layers.parameters()}
                ], lr=1e-3, betas=(0.5,0.99))
               
            if output_size is None:
               self.likelihood = GaussianLikelihood(noise_constraint=noise_constraint)
            else:
               self.likelihood= MultitaskGaussianLikelihood(num_tasks=output_size, noise_constraint=noise_constraint)
            self.to(device=self.device)
    
        def forward(self, inputs):
            output = self.hidden_layers[0](inputs)
            for hid_layer in self.hidden_layers[1:]:
                output = hid_layer(output)
            return output
    
        def train_gp(self, train_x, train_y, num_epochs=20, num_samples=10, batch_size=200):
            scheduler = torch.optim.lr_scheduler.MultiStepLR(self._optimizer, milestones=[int(0.5 * num_epochs), int(0.75 * num_epochs)])
            train_dataset = TensorDataset(train_x, train_y)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            mll = DeepApproximateMLL(VariationalELBO(self.likelihood, self, num_data=len(train_x)))
            self.losses = []
            epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="GP training Epoch")
            self.train()
            with gpytorch.settings.max_cholesky_size(self.max_cholesky_size):
                 for i in epochs_iter:
                     # Within each iteration, we will go over each minibatch of data
                     minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)            
                     for x_batch, y_batch in minibatch_iter:
                         with gpytorch.settings.num_likelihood_samples(num_samples):
                              self._optimizer.zero_grad()
                              output = self(x_batch)
                              #print(x_batch.shape, output, output.mean.shape, y_batch.shape)
                              loss = -mll(output, y_batch)
                              loss.backward()
                              self._optimizer.step()
                              scheduler.step()
                              self.losses.append(loss.item())
            
                              minibatch_iter.set_postfix(loss=loss.item())
    
    model = DeepGaussianProcesses(train_x.shape[-1], train_y.shape[-1], num_inducing=120, mean_type='zero', device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
    model.train_gp( train_x, train_y, num_epochs=350)
    nmll, nll, rmse=model.evaluation(train_x, train_y, test_x, test_y)
    

    我得到以下错误:

    input:torch.Size([10000, 5])
    output:torch.Size([10000, 4])
    
    --> 245                           output = self(x_batch)
        246                           #print(x_batch.shape, output, output.mean.shape, y_batch.shape)
        247                           loss = -mll(output, y_batch)
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
         28 
         29     def __call__(self, *inputs, **kwargs):
    ---> 30         outputs = self.forward(*inputs, **kwargs)
         31         if isinstance(outputs, list):
         32             return [_validate_module_outputs(output) for output in outputs]
    
    <ipython-input-12-44b9caffc31f> in forward(self, inputs)
        192 
        193     def forward(self, inputs):
    --> 194         output = self.hidden_layers[0](inputs)
        195         for hid_layer in self.hidden_layers[1:]:
        196             output = hid_layer(output)
    
    <ipython-input-12-44b9caffc31f> in __call__(self, x, *other_inputs, **kwargs)
        125             x = torch.cat([x] + processed_inputs, dim=-1)
        126 
    --> 127         return super().__call__(x, are_samples=bool(len(other_inputs)))
        128 
        129 
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/models/deep_gps/deep_gp.py in __call__(self, inputs, are_samples, **kwargs)
         97 
         98         # Now run samples through the GP
    ---> 99         output = ApproximateGP.__call__(self, inputs)
        100         if self.output_dims is not None:
        101             mean = output.loc.transpose(-1, -2)
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/models/approximate_gp.py in __call__(self, inputs, prior, **kwargs)
        106         if inputs.dim() == 1:
        107             inputs = inputs.unsqueeze(-1)
    --> 108         return self.variational_strategy(inputs, prior=prior, **kwargs)
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/variational/independent_multitask_variational_strategy.py in __call__(self, x, task_indices, prior, **kwargs)
         54         See :class:`LMCVariationalStrategy`.
         55         """
    ---> 56         function_dist = self.base_variational_strategy(x, prior=prior, **kwargs)
         57 
         58         if task_indices is None:
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/variational/variational_strategy.py in __call__(self, x, prior, **kwargs)
        240                 self.updated_strategy.fill_(True)
        241 
    --> 242         return super().__call__(x, prior=prior, **kwargs)
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/variational/_variational_strategy.py in __call__(self, x, prior, **kwargs)
        305                 inducing_values=variational_dist_u.mean,
        306                 variational_inducing_covar=variational_dist_u.lazy_covariance_matrix,
    --> 307                 **kwargs,
        308             )
        309         elif isinstance(variational_dist_u, Delta):
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
         28 
         29     def __call__(self, *inputs, **kwargs):
    ---> 30         outputs = self.forward(*inputs, **kwargs)
         31         if isinstance(outputs, list):
         32             return [_validate_module_outputs(output) for output in outputs]
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/variational/variational_strategy.py in forward(self, x, inducing_points, inducing_values, variational_inducing_covar, **kwargs)
        167         num_induc = inducing_points.size(-2)
        168         test_mean = full_output.mean[..., num_induc:]
    --> 169         induc_induc_covar = full_covar[..., :num_induc, :num_induc].add_jitter()
        170         induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate()
        171         data_data_covar = full_covar[..., num_induc:, num_induc:]
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in __getitem__(self, index)
        401             _, row_index, col_index = index
        402             batch_indices = [slice(None, None, None)] * (self.dim() - 2)
    --> 403             return self._getitem(row_index, col_index, *batch_indices)
        404         else:
        405             return super().__getitem__(index)
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in _getitem(self, row_index, col_index, *batch_indices)
         69             if not isinstance(x1, slice) or not isinstance(x2, slice):
         70                 # It's too complicated to deal with tensor indices in this case - we'll use the super method
    ---> 71                 return self.evaluate_kernel()._getitem(row_index, col_index, *batch_indices)
         72 
         73             # Now we know that x1 and x2 are slices
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/utils/memoize.py in g(self, *args, **kwargs)
         57         kwargs_pkl = pickle.dumps(kwargs)
         58         if not _is_in_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl):
    ---> 59             return _add_to_cache(self, cache_name, method(self, *args, **kwargs), *args, kwargs_pkl=kwargs_pkl)
         60         return _get_from_cache(self, cache_name, *args, kwargs_pkl=kwargs_pkl)
         61 
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate_kernel(self)
        339                 diag=False,
        340                 last_dim_is_batch=self.last_dim_is_batch,
    --> 341                 **self.params,
        342             )
        343             self.kernel.active_dims = temp_active_dims
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/kernels/kernel.py in __call__(self, x1, x2, diag, last_dim_is_batch, **params)
        406                 res = LazyEvaluatedKernelTensor(x1_, x2_, kernel=self, last_dim_is_batch=last_dim_is_batch, **params)
        407             else:
    --> 408                 res = lazify(super(Kernel, self).__call__(x1_, x2_, last_dim_is_batch=last_dim_is_batch, **params))
        409             return res
        410 
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
         28 
         29     def __call__(self, *inputs, **kwargs):
    ---> 30         outputs = self.forward(*inputs, **kwargs)
         31         if isinstance(outputs, list):
         32             return [_validate_module_outputs(output) for output in outputs]
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/kernels/multitask_kernel.py in forward(self, x1, x2, diag, last_dim_is_batch, **params)
         49             covar_i = covar_i.repeat(*x1.shape[:-2], 1, 1)
         50         covar_x = lazify(self.data_covar_module.forward(x1, x2, **params))
    ---> 51         res = KroneckerProductLazyTensor(covar_x, covar_i)
         52         return res.diag() if diag else res
         53 
    
    /usr/local/lib/python3.7/dist-packages/gpytorch/lazy/kronecker_product_lazy_tensor.py in __init__(self, *lazy_tensors)
         77                 raise RuntimeError(
         78                     "KroneckerProductLazyTensor expects lazy tensors with the "
    ---> 79                     "same batch shapes. Got {}.".format([lv.batch_shape for lv in lazy_tensors])
         80                 )
         81         super().__init__(*lazy_tensors)
    
    RuntimeError: KroneckerProductLazyTensor expects lazy tensors with the same batch shapes. Got [torch.Size([4]), torch.Size([16])].
    

    这个库中的例子不多。我不知道怎样才能摆脱这个错误。有什么建议吗?

    0 回复  |  直到 3 年前