TypeError: forward() missing 1 required positional argument: ‘c’

I created this simplified version of VGG16:

class VGG16COMBO(nn.Module):
    
    def __init__(self, num_classes):
        super(VGG16COMBO, self).__init__()

        # calculate same padding:
        # (w - k + 2*p)/s + 1 = o
        # => p = (s(o-1) - w + k)/2

        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=1,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      # (1(32-1)- 32 + 3)/2 = 1
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )

        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=64,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )
        
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=128,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )

        self.block_4 = nn.Sequential(
            nn.Conv2d(in_channels=256,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        ) 


        self.classifier = nn.Sequential(
            nn.Linear(2048, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.25),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.25),
            nn.Linear(4096, num_classes),
        )

    def forward(self, m, c):

        m = self.block_1(m)
        m = self.block_2(m)
        m = self.block_3(m)
        m = self.block_4(m)
        m = m.view(m.size(0), -1)
        m = self.classifier(m)

        c = self.block_1(c)
        c = self.block_2(c)
        c = self.block_3(c)
        c = self.block_4(c)
        c = c.view(c.size(0), -1)
        c = self.classifier(c)

        x = torch.cat((m, c), dim=1)
        return x

As you can see, in the forward I pass 2 arguments, m and c, that are referred to data of Mnist and Cifar10.
Then I create the model:

modelcombo = VGG16COMBO(1).cuda()
print(modelcombo)

# Define an optimizier
import torch.optim as optim
optimizer = optim.SGD(modelcombo.parameters(), lr = 0.01)
# Define a loss 
criterion = nn.BCEWithLogitsLoss()

The problem is in the training function:

def train(net, loaders, optimizer, criterion, epochs=20, dev=dev, save_param = False, model_name="valerio"):
    try:
        net = net.to(dev)
        #print(net)
        # Initialize history
        history_loss = {"train": [], "val": [], "test": []}
        history_accuracy = {"train": [], "val": [], "test": []}
        # Store the best val accuracy
        best_val_accuracy = 0

        # Process each epoch
        for epoch in range(epochs):
            # Initialize epoch variables
            sum_loss = {"train": 0, "val": 0, "test": 0}
            sum_accuracy = {"train": 0, "val": 0, "test": 0}
            # Process each split
            for split in ["train", "val", "test"]:
                if split == "train":
                  net.train()
                else:
                  net.eval()
                # Process each batch
                for (input, labels) in loaders[split]:
                    # Move to CUDA
                    input = input.to(dev)
                    labels = labels.to(dev)
                    # Reset gradients
                    optimizer.zero_grad()
                    # Compute output
                    pred = net(input)
                    #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
                    labels = labels.unsqueeze(1)
                    labels = labels.float()
                    loss = criterion(pred, labels)
                    # Update loss
                    sum_loss[split] += loss.item()
                    # Check parameter update
                    if split == "train":
                        # Compute gradients
                        loss.backward()
                        # Optimize
                        optimizer.step()
                    # Compute accuracy
                    #pred_labels = pred.argmax(1) + 1
                    pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
                    batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
                    # Update accuracy
                    sum_accuracy[split] += batch_accuracy
            # Compute epoch loss/accuracy
            epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
            epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}

            # Store params at the best validation accuracy
            if save_param and epoch_accuracy["val"] > best_val_accuracy:
              #torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
              torch.save(net.state_dict(), f"{model_name}_best_val.pth")
              best_val_accuracy = epoch_accuracy["val"]

            # Update history
            for split in ["train", "val", "test"]:
                history_loss[split].append(epoch_loss[split])
                history_accuracy[split].append(epoch_accuracy[split])
            # Print info
            print(f"Epoch {epoch+1}:",
                  f"TrL={epoch_loss['train']:.4f},",
                  f"TrA={epoch_accuracy['train']:.4f},",
                  f"VL={epoch_loss['val']:.4f},",
                  f"VA={epoch_accuracy['val']:.4f},",
                  f"TeL={epoch_loss['test']:.4f},",
                  f"TeA={epoch_accuracy['test']:.4f},")
    except KeyboardInterrupt:
        print("Interrupted")
    finally:
        # Plot loss
        plt.title("Loss")
        for split in ["train", "val", "test"]:
            plt.plot(history_loss[split], label=split)
        plt.legend()
        plt.show()
        # Plot accuracy
        plt.title("Accuracy")
        for split in ["train", "val", "test"]:
            plt.plot(history_accuracy[split], label=split)
        plt.legend()
        plt.show()

Because when I call the function the problem is that I am passing only one dataloader, the one of mnist, and not for cifar.

# Train model
train(modelcombo, loaders, optimizer, criterion, epochs=10, dev=dev) #loaders is only for mnist
#I want to pass also loaders_cifar

TypeError: forward() missing 1 required positional argument: 'c'

Now, I have to modify the training function, the forward function, or I have to combine the loaders in such a way?

My first guess: incorrectly defined loaders variable.

Second: if input (you should change the name tho, it’s reserved keyword) is a tuple, then you would have to unpack it. The easiest way: net(*input)

1 Like

So, first of all, I have to modify loaders, right?
I have to concat them?

Just print it first to make sure it has a form where the inputs are really a tuple. I have no idea how you define them.

Always that notebook :smiley:

# Define loaders
from torch.utils.data import DataLoader
train_loader = DataLoader(train_set, batch_size=64, num_workers=2, shuffle=True, drop_last=True)
val_loader   = DataLoader(val_set,   batch_size=64, num_workers=2, shuffle=False, drop_last=False)
test_loader  = DataLoader(test_set,  batch_size=64, num_workers=2, shuffle=False, drop_last=False)
# Define dictionary of loaders
loaders = {"train": train_loader,
           "val": val_loader,
           "test": test_loader}

Same for the loaders of cifar, but only with the suffix _cifar :smiley:

So, you define the same variable, called under the same name loaders two times?
Then the first values are lost :stuck_out_tongue:

a = 1 # this gets lost
a = 2

No no, I have loaders, and loaders_cifar
I want to pass both of them to forward

Then I would do it like this:

train(..., (loaders_mnist, loaders_cifar), ...) # how to call the train func

# split the loaders, because you pass them as single argument:
loaders_mnist, loaders_cifar = loaders

# inside train
for ((input_mnist, output_mnist), (input_cifar, output_cifar)) in zip(loaders_mnist, loaders_cifar)

# and calling the model:
x = net(input_mnist, input_cifar)

Albeit I feel that your model should also return two outputs: one for MNIST and the other for Cifar10
This way you have really no idea how to calculate the loss, because you have two output variables from the loaders, and your model only returns one.

You could concatenate the outputs in the same way you do in the model. No idea if it will work :stuck_out_tongue:

I am going to try, thanks. No I want only one concatenate output. In these days I will define a custom loss. The experiment is the same of last time, about summing weights.
@Sebgolos I have written this:

def train2(net, loaders, optimizer, criterion, epochs=10, dev=dev, save_param = False, model_name="valerio"):
    loaders_mnist, loaders_cifar = loaders
    try:
        net = net.to(dev)
        #print(net)
        # Initialize history
        history_loss = {"train": [], "val": [], "test": []}
        history_accuracy = {"train": [], "val": [], "test": []}
        # Store the best val accuracy
        best_val_accuracy = 0

        # Process each epoch
        for epoch in range(epochs):
            # Initialize epoch variables
            sum_loss = {"train": 0, "val": 0, "test": 0}
            sum_accuracy = {"train": 0, "val": 0, "test": 0}
            # Process each split
            for split in ["train", "val", "test"]:
                if split == "train":
                  net.train()
                else:
                  net.eval()
                # Process each batch
                for ((input_mnist, labels_mnist), (input_cifar, labels_cifar)) in zip(loaders_mnist, loaders_cifar):
                #for (input, labels) in loaders[split]:
                    # Move to CUDA
                    input_mnist = input_mnist.to(dev)
                    labels_mnist = labels_mnist.to(dev)
                    input_cifar = input_cifar.to(dev)
                    labels_cifar = labels_cifar.to(dev)
                    # Reset gradients
                    optimizer.zero_grad()
                    # Compute output
                    pred = net(input_mnist, input_cifar)
                    #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
                    labels = labels.unsqueeze(1)
                    labels = labels.float()
                    loss = criterion(pred, labels)
                    # Update loss
                    sum_loss[split] += loss.item()
                    # Check parameter update
                    if split == "train":
                        # Compute gradients
                        loss.backward()
                        # Optimize
                        optimizer.step()
                    # Compute accuracy
                    #pred_labels = pred.argmax(1) + 1
                    pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
                    batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
                    # Update accuracy
                    sum_accuracy[split] += batch_accuracy
            # Compute epoch loss/accuracy
            epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
            epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}

            # Store params at the best validation accuracy
            if save_param and epoch_accuracy["val"] > best_val_accuracy:
              #torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
              torch.save(net.state_dict(), f"{model_name}_best_val.pth")
              best_val_accuracy = epoch_accuracy["val"]

            # Update history
            for split in ["train", "val", "test"]:
                history_loss[split].append(epoch_loss[split])
                history_accuracy[split].append(epoch_accuracy[split])
            # Print info
            print(f"Epoch {epoch+1}:",
                  f"TrL={epoch_loss['train']:.4f},",
                  f"TrA={epoch_accuracy['train']:.4f},",
                  f"VL={epoch_loss['val']:.4f},",
                  f"VA={epoch_accuracy['val']:.4f},",
                  f"TeL={epoch_loss['test']:.4f},",
                  f"TeA={epoch_accuracy['test']:.4f},")
    except KeyboardInterrupt:
        print("Interrupted")
    finally:
        # Plot loss
        plt.title("Loss")
        for split in ["train", "val", "test"]:
            plt.plot(history_loss[split], label=split)
        plt.legend()
        plt.show()
        # Plot accuracy
        plt.title("Accuracy")
        for split in ["train", "val", "test"]:
            plt.plot(history_accuracy[split], label=split)
        plt.legend()
        plt.show()

Is it right? (Note that for me loaders is the name of loaders_mnist)
And # Train model train2(modelcombo, (loaders, loaders_cifar), optimizer, criterion, epochs=3, dev=dev)
I have this error

---> 24                 for ((input_mnist, labels_mnist), (input_cifar, labels_cifar)) in zip(loaders_mnist, loaders_cifar):
     25                 #for (input, labels) in loaders[split]:
     26                     # Move to CUDA

ValueError: too many values to unpack (expected 2)

Obviously I will have some errors in the part of the loss because I have not created the custom loss yet

I see you’re applying some splitting there.
So try to use

zip(loaders_mnist[split], loaders_cifar[split])

Ok, I will try again in the next few hours, thanks
@Sebgolos I did in this way, and I obtained an error ahah

def train2(net, loaders, optimizer, criterion, epochs=10, dev=dev, save_param = False, model_name="valerio"):
    loaders_mnist, loaders_cifar = loaders
    try:
        net = net.to(dev)
        #print(net)
        # Initialize history
        history_loss = {"train": [], "val": [], "test": []}
        history_accuracy = {"train": [], "val": [], "test": []}
        # Store the best val accuracy
        best_val_accuracy = 0

        # Process each epoch
        for epoch in range(epochs):
            # Initialize epoch variables
            sum_loss = {"train": 0, "val": 0, "test": 0}
            sum_accuracy = {"train": 0, "val": 0, "test": 0}
            # Process each split
            for split in ["train", "val", "test"]:
                if split == "train":
                  net.train()
                else:
                  net.eval()
                # Process each batch
                for ((input_mnist, labels_mnist), (input_cifar, labels_cifar)) in zip(loaders_mnist[split], loaders_cifar[split]):
                #for (input, labels) in loaders[split]:
                    # Move to CUDA
                    input_mnist = input_mnist.to(dev)
                    labels_mnist = labels_mnist.to(dev)
                    input_cifar = input_cifar.to(dev)
                    labels_cifar = labels_cifar.to(dev)
                    # Reset gradients
                    optimizer.zero_grad()
                    # Compute output
                    pred = net(input_mnist, input_cifar)
                    #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
                    labels_mnist = labels_mnist.unsqueeze(1)
                    labels_mnist = labels_mnist.float()
                    labels_cifar = labels_cifar.unsqueeze(1)
                    labels_cifar = labels_cifar.float()
                    labels = labels_mnist, labels_cifar
                    loss = criterion(pred, labels)
                    # Update loss
                    sum_loss[split] += loss.item()
                    # Check parameter update
                    if split == "train":
                        # Compute gradients
                        loss.backward()
                        # Optimize
                        optimizer.step()
                    # Compute accuracy
                    #pred_labels = pred.argmax(1) + 1
                    pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
                    batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
                    # Update accuracy
                    sum_accuracy[split] += batch_accuracy
            # Compute epoch loss/accuracy
            epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
            epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}

            # Store params at the best validation accuracy
            if save_param and epoch_accuracy["val"] > best_val_accuracy:
              #torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
              torch.save(net.state_dict(), f"{model_name}_best_val.pth")
              best_val_accuracy = epoch_accuracy["val"]

            # Update history
            for split in ["train", "val", "test"]:
                history_loss[split].append(epoch_loss[split])
                history_accuracy[split].append(epoch_accuracy[split])
            # Print info
            print(f"Epoch {epoch+1}:",
                  f"TrL={epoch_loss['train']:.4f},",
                  f"TrA={epoch_accuracy['train']:.4f},",
                  f"VL={epoch_loss['val']:.4f},",
                  f"VA={epoch_accuracy['val']:.4f},",
                  f"TeL={epoch_loss['test']:.4f},",
                  f"TeA={epoch_accuracy['test']:.4f},")
    except KeyboardInterrupt:
        print("Interrupted")
    finally:
        # Plot loss
        plt.title("Loss")
        for split in ["train", "val", "test"]:
            plt.plot(history_loss[split], label=split)
        plt.legend()
        plt.show()
        # Plot accuracy
        plt.title("Accuracy")
        for split in ["train", "val", "test"]:
            plt.plot(history_accuracy[split], label=split)
        plt.legend()
        plt.show()

# Train model train2(modelcombo, (loaders, loaders_cifar), optimizer, criterion, epochs=3, dev=dev)
Error:

AttributeError                            Traceback (most recent call last)

<ipython-input-117-96aba51d9c54> in <module>()
      1 # Train model
----> 2 train2(modelcombo, (loaders, loaders_cifar), optimizer, criterion, epochs=3, dev=dev)

3 frames

<ipython-input-116-ec5051c9c145> in train2(net, loaders, optimizer, criterion, epochs, dev, save_param, model_name)
     39                     labels_cifar = labels_cifar.float()
     40                     labels = labels_mnist, labels_cifar
---> 41                     loss = criterion(pred, labels)
     42                     # Update loss
     43                     sum_loss[split] += loss.item()

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
    715                                                   self.weight,
    716                                                   pos_weight=self.pos_weight,
--> 717                                                   reduction=self.reduction)
    718 
    719 

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
   2824         reduction_enum = _Reduction.get_enum(reduction)
   2825 
-> 2826     if not (target.size() == input.size()):
   2827         raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
   2828 

AttributeError: 'tuple' object has no attribute 'size'

You’ve tried a weird thing to be honest.
The labels variable is a tuple, but it can’t be meaningfully interpreted by this loss function.

If you’re really eager to do it like this, you would have to concatenate the labels the same way you did in your model (with torch.cat).

So, I will define my loss function for this training function and then I will try again, thanks
@Sebgolos
My CustomLoss will be this: CrossEntropy(MNIST) + CrossEntropy(CIFAR10) + CrossEntropy([MNIST+CIFAR10]-MNIST) (this last term of loss should guarantee that the network on MNIST have similar performances before and after the operation of sum.
Do you think a solution like this code could be good?

class CustomLossFunction(nn.Module):
    def __init__(self):
        super(CustomLossFunction, self).__init__()
    def forward(self, pred_mnist, pred_cifar, target_mnist, target_cifar):
        loss_mnist = nn.BCEWithLogitsLoss(pred_mnist, target_mnist)
        loss_cifar = nn.BCEWithLogitsLoss(pred_cifar, target_cifar)
        loss_mnist_cifar = nn.BCEWithLogitsLoss(pred_mnist_cifar - pred_mnist, target_mnist_cifar - target_mnist)
        loss_sum = loss_mnist + loss_cifar + loss_mnist_cifar
        return loss_sum