代码之家  ›  专栏  ›  技术社区  ›  blue-sky

列表未正确填充,除非使用PyTorch clone()

  •  0
  • blue-sky  · 技术社区  · 6 年前

    我尝试使用以下代码将每个训练模型的最终权重添加到列表中:

    %reset -f
    
    import torch
    import torch.nn as nn
    import torchvision
    import torchvision.transforms as transforms
    import torch
    import torch.nn as nn
    import torchvision
    import torchvision.transforms as transforms
    import torch.utils.data as data_utils
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.datasets import make_moons
    from matplotlib import pyplot
    from pandas import DataFrame
    import torchvision.datasets as dset
    import os
    import torch.nn.functional as F
    import time
    import random
    import pickle
    from sklearn.metrics import confusion_matrix
    import pandas as pd
    import sklearn
    
    
    trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
    
    root = './data'
    if not os.path.exists(root):
        os.mkdir(root)
    train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
    test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
    
    batch_size = 64
    
    train_loader = torch.utils.data.DataLoader(
                     dataset=train_set,
                     batch_size=batch_size,
                     shuffle=True)
    test_loader = torch.utils.data.DataLoader(
                    dataset=test_set,
                    batch_size=batch_size,
    shuffle=True)
    
    class NeuralNet(nn.Module):
        def __init__(self):
            super(NeuralNet, self).__init__()
            self.fc1 = nn.Linear(28*28, 500)
            self.fc2 = nn.Linear(500, 256)
            self.fc3 = nn.Linear(256, 2)
        def forward(self, x):
            x = x.view(-1, 28*28)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x
    
    num_epochs = 2
    random_sample_size = 200
    
    values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
    values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
    
    print(len(values_0_or_1))
    print(len(values_0_or_1_testset))
    
    train_loader_subset = torch.utils.data.DataLoader(
                     dataset=values_0_or_1,
                     batch_size=batch_size,
                     shuffle=True)
    
    test_loader_subset = torch.utils.data.DataLoader(
                     dataset=values_0_or_1_testset,
                     batch_size=batch_size,
                     shuffle=False)
    
    train_loader = train_loader_subset
    
    # Hyper-parameters 
    input_size = 100
    hidden_size = 100
    num_classes = 2
    # learning_rate = 0.00001
    learning_rate = .0001
    # Device configuration
    device = 'cpu'
    print_progress_every_n_epochs = 1
    
    model = NeuralNet().to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
    
    N = len(train_loader)
    # Train the model
    total_step = len(train_loader)
    
    most_recent_prediction = []
    test_actual_predicted_dict = {}
    
    rm = random.sample(list(values_0_or_1), random_sample_size)
    train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
    
    weights_without_clone = []
    weights_with_clone = []
    
    for i in range(0 , 2) : 
        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader_subset):  
                # Move tensors to the configured device
                images = images.reshape(-1, 2).to(device)
                labels = labels.to(device)
    
                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)
    
                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
    
            if (epoch) % print_progress_every_n_epochs == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    
    
        print('model fc2 weights ' , model.fc2.weight.data)
        weights_without_clone.append(model.fc2.weight.data)
        weights_with_clone.append(model.fc2.weight.data.clone())
    

    模型输出:

    12665
    2115
    Epoch [1/2], Step [50/198], Loss: 0.0968
    Epoch [2/2], Step [50/198], Loss: 0.0082
    model fc2 weights  tensor([[-3.9507e-02, -4.0454e-02,  3.5576e-03,  ...,  6.2181e-03,
              4.1372e-02, -6.2960e-03],
            [ 1.8778e-02,  2.7049e-02, -3.5624e-02,  ...,  2.6797e-02,
              2.2041e-03, -4.2284e-02],
            [ 1.9571e-02, -3.2545e-02,  2.6618e-02,  ..., -1.6139e-02,
              4.1192e-02, -2.3458e-02],
            ...,
            [-4.6123e-03,  2.6943e-02,  3.9979e-02,  ..., -3.3848e-02,
              3.6096e-02,  2.4211e-02],
            [-1.4698e-02,  9.7528e-04, -2.5244e-03,  ..., -3.3145e-02,
              1.0888e-02,  3.1091e-02],
            [-1.7451e-02, -2.1646e-02,  2.5885e-02,  ...,  4.0453e-02,
             -6.5324e-03, -3.5410e-02]])
    Epoch [1/2], Step [50/198], Loss: 0.0025
    Epoch [2/2], Step [50/198], Loss: 0.0013
    model fc2 weights  tensor(1.00000e-02 *
           [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
            [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
            [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
            ...,
            [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
            [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
            [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])
    

    打印的值 weights_without_clone

    print(weights_without_clone[0])
    print(weights_without_clone[1])
    

    输出:

    tensor(1.00000e-02 *
           [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
            [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
            [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
            ...,
            [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
            [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
            [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])
    tensor(1.00000e-02 *
           [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
            [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
            [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
            ...,
            [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
            [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
            [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])
    

    打印的值 weights_with_clone

    print(weights_with_clone[0])
    print(weights_with_clone[1])
    

    输出:

    tensor([[-3.9507e-02, -4.0454e-02,  3.5576e-03,  ...,  6.2181e-03,
              4.1372e-02, -6.2960e-03],
            [ 1.8778e-02,  2.7049e-02, -3.5624e-02,  ...,  2.6797e-02,
              2.2041e-03, -4.2284e-02],
            [ 1.9571e-02, -3.2545e-02,  2.6618e-02,  ..., -1.6139e-02,
              4.1192e-02, -2.3458e-02],
            ...,
            [-4.6123e-03,  2.6943e-02,  3.9979e-02,  ..., -3.3848e-02,
              3.6096e-02,  2.4211e-02],
            [-1.4698e-02,  9.7528e-04, -2.5244e-03,  ..., -3.3145e-02,
              1.0888e-02,  3.1091e-02],
            [-1.7451e-02, -2.1646e-02,  2.5885e-02,  ...,  4.0453e-02,
             -6.5324e-03, -3.5410e-02]])
    tensor(1.00000e-02 *
           [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
            [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
            [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
            ...,
            [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
            [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
            [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])
    

    1.00000e-02 * 在第二个模型的最终重量值之前?

    为什么使用 clone() 克隆() 将相同的权重添加到每个迭代中?:

    weights_without_clone.append(model.fc2.weight.data)
    weights_with_clone.append(model.fc2.weight.data.clone())
    
    0 回复  |  直到 6 年前
        1
  •  1
  •   trsvchn    5 年前

    首先,我要复制你的案例。我将使用非常简单的模型:

    代码:

    import torch
    import torch.nn as nn
    import torch.optim as optim
    
    torch.manual_seed(42)
    
    # Some dummy data:
    X = torch.randn(100, 5, requires_grad=True, dtype=torch.float)
    Y = torch.randn(100, 5, requires_grad=True, dtype=torch.float)
    
    
    class Model(nn.Module):
    
        def __init__(self):
            super().__init__()
            self.fc1 = nn.Linear(5, 5, bias=False)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(5, 5, bias=False)
    
        def forward(self, x):
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            return x
    
    
    def train(model, x, y, loss_fn, optimizer, n_epochs=1000, print_loss=True):
    
        weights = []
    
        for i in range(n_epochs):
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
    
            optimizer.zero_grad()
    
            loss.backward()
    
            if print_loss:
                print(f'| {i+1} | Loss: {loss.item():.4f}')
    
            optimizer.step()
    
            print('W:\n', model.fc2.weight.data)
    
            weights.append(model.fc2.weight.data)
    
        return weights
    
    
    torch.manual_seed(42)
    
    model = Model()
    
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    n_epochs = 2
    
    weights = train(model=model,
                          x=X,
                          y=Y,
                          loss_fn=loss_fn,
                          optimizer=optimizer,
                          n_epochs=n_epochs,
                          print_loss=True)
    

    输出:

    | 1 | Loss: 1.0285
    W:
     tensor([[-0.2052, -0.1257, -0.2684,  0.0425, -0.4413],
            [ 0.4034, -0.3797,  0.3448,  0.0741, -0.1450],
            [ 0.2759,  0.0695,  0.3608,  0.0487, -0.1411],
            [ 0.1201, -0.1213,  0.1881,  0.3990,  0.2583],
            [-0.1956,  0.2581,  0.0798,  0.2270, -0.2725]])
    | 2 | Loss: 1.0279
    W:
     tensor([[-0.2041, -0.1251, -0.2679,  0.0428, -0.4410],
            [ 0.4030, -0.3795,  0.3444,  0.0738, -0.1447],
            [ 0.2755,  0.0693,  0.3603,  0.0484, -0.1411],
            [ 0.1200, -0.1213,  0.1879,  0.3987,  0.2580],
            [-0.1958,  0.2580,  0.0796,  0.2269, -0.2725]])
    

    好的,效果很好。现在让我们看看 weights :

    print(*weights, sep='\n')
    

    输出:

    tensor([[-0.2041, -0.1251, -0.2679,  0.0428, -0.4410],
            [ 0.4030, -0.3795,  0.3444,  0.0738, -0.1447],
            [ 0.2755,  0.0693,  0.3603,  0.0484, -0.1411],
            [ 0.1200, -0.1213,  0.1879,  0.3987,  0.2580],
            [-0.1958,  0.2580,  0.0796,  0.2269, -0.2725]])
    tensor([[-0.2041, -0.1251, -0.2679,  0.0428, -0.4410],
            [ 0.4030, -0.3795,  0.3444,  0.0738, -0.1447],
            [ 0.2755,  0.0693,  0.3603,  0.0484, -0.1411],
            [ 0.1200, -0.1213,  0.1879,  0.3987,  0.2580],
            [-0.1958,  0.2580,  0.0796,  0.2269, -0.2725]])
    

    好吧,这不是我们想要的,但实际上这是我们期望的行为。如果您再次查看,您将看到列表中的值对应于第二个历元的权重值。这意味着我们附加的不是新的张量,而是指向真实权重存储的赋值,这就是为什么我们得到相同的最终结果。

    所以你要用 clone 但是 建议使用 tensor.clone().detach() 克隆 被记录到计算图中,这意味着如果你通过这个克隆的张量反向推进,

    传播到克隆张量的梯度将传播到原始张量。 clone docs

    weights.append(model.fc2.weight.data.clone().detach())