Inception modules help a neural network learn different features at the same time by using multiple filter sizes. This makes the model better at understanding images without getting too big or slow.
Inception modules in Computer Vision
class InceptionModule(nn.Module): def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_pool): super().__init__() self.branch1 = nn.Sequential( nn.Conv2d(in_channels, out_1x1, kernel_size=1), nn.ReLU() ) self.branch2 = nn.Sequential( nn.Conv2d(in_channels, red_3x3, kernel_size=1), nn.ReLU(), nn.Conv2d(red_3x3, out_3x3, kernel_size=3, padding=1), nn.ReLU() ) self.branch3 = nn.Sequential( nn.Conv2d(in_channels, red_5x5, kernel_size=1), nn.ReLU(), nn.Conv2d(red_5x5, out_5x5, kernel_size=5, padding=2), nn.ReLU() ) self.branch4 = nn.Sequential( nn.MaxPool2d(kernel_size=3, stride=1, padding=1), nn.Conv2d(in_channels, out_pool, kernel_size=1), nn.ReLU() ) def forward(self, x): b1 = self.branch1(x) b2 = self.branch2(x) b3 = self.branch3(x) b4 = self.branch4(x) return torch.cat([b1, b2, b3, b4], dim=1)
The module uses 1x1 convolutions to reduce the number of channels before applying bigger filters.
Outputs from all branches are joined together along the channel dimension.
inception = InceptionModule(192, 64, 96, 128, 16, 32, 32) output = inception(torch.randn(1, 192, 28, 28)) print(output.shape)
inception = InceptionModule(256, 128, 128, 192, 32, 96, 64) output = inception(torch.randn(1, 256, 14, 14)) print(output.shape)
This program defines an inception module and applies it to a random image-like tensor. It prints the shape of the output tensor, showing how channels from different branches combine.
import torch import torch.nn as nn class InceptionModule(nn.Module): def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_pool): super().__init__() self.branch1 = nn.Sequential( nn.Conv2d(in_channels, out_1x1, kernel_size=1), nn.ReLU() ) self.branch2 = nn.Sequential( nn.Conv2d(in_channels, red_3x3, kernel_size=1), nn.ReLU(), nn.Conv2d(red_3x3, out_3x3, kernel_size=3, padding=1), nn.ReLU() ) self.branch3 = nn.Sequential( nn.Conv2d(in_channels, red_5x5, kernel_size=1), nn.ReLU(), nn.Conv2d(red_5x5, out_5x5, kernel_size=5, padding=2), nn.ReLU() ) self.branch4 = nn.Sequential( nn.MaxPool2d(kernel_size=3, stride=1, padding=1), nn.Conv2d(in_channels, out_pool, kernel_size=1), nn.ReLU() ) def forward(self, x): b1 = self.branch1(x) b2 = self.branch2(x) b3 = self.branch3(x) b4 = self.branch4(x) return torch.cat([b1, b2, b3, b4], dim=1) # Create a random input tensor with batch=1, channels=192, height=28, width=28 input_tensor = torch.randn(1, 192, 28, 28) # Instantiate the inception module inception = InceptionModule(192, 64, 96, 128, 16, 32, 32) # Forward pass output = inception(input_tensor) # Print output shape print(f"Output shape: {output.shape}")
Inception modules help balance model size and performance by mixing small and large filters.
1x1 convolutions reduce computation by shrinking channel numbers before bigger filters.
Pooling branch adds robustness by capturing spatial info differently.
Inception modules combine multiple filter sizes in parallel to learn diverse features.
They use 1x1 convolutions to reduce channels and keep models efficient.
Outputs from all branches are joined to form a richer feature map.