Positional encoding helps a model know the order of words in a sentence. It adds position information to word data so the model can understand sequence.
Positional encoding in PyTorch
import torch import torch.nn as nn import math class PositionalEncoding(nn.Module): def __init__(self, d_model, max_len=5000): super().__init__() pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) # shape (1, max_len, d_model) self.register_buffer('pe', pe) def forward(self, x): x = x + self.pe[:, :x.size(1), :] return x
The positional encoding matrix uses sine and cosine functions of different frequencies.
It is added to the input embeddings to give each position a unique representation.
pos_encoder = PositionalEncoding(d_model=512) input_tensor = torch.zeros(1, 10, 512) # batch_size=1, seq_len=10, embedding_dim=512 output = pos_encoder(input_tensor)
pos_encoder = PositionalEncoding(d_model=256, max_len=100) input_tensor = torch.randn(2, 50, 256) # batch_size=2, seq_len=50 output = pos_encoder(input_tensor)
This program defines the positional encoding class, creates a small example input, applies positional encoding, and prints the result. The output shows the added sine and cosine values for each position.
import torch import torch.nn as nn import math class PositionalEncoding(nn.Module): def __init__(self, d_model, max_len=5000): super().__init__() pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) # shape (1, max_len, d_model) self.register_buffer('pe', pe) def forward(self, x): x = x + self.pe[:, :x.size(1), :] return x # Create positional encoding for embedding size 6 and sequence length 4 pos_encoder = PositionalEncoding(d_model=6, max_len=10) # Create a dummy input tensor (batch_size=1, seq_len=4, embedding_dim=6) input_tensor = torch.zeros(1, 4, 6) # Apply positional encoding output = pos_encoder(input_tensor) # Print the output tensor print(output)
Positional encoding is fixed and does not change during training.
It allows the model to distinguish positions without using recurrent or convolutional layers.
Positional encoding adds position info to input embeddings using sine and cosine waves.
It helps models understand the order of words or tokens in sequences.
It is commonly used in transformer models for language and sequence tasks.