RNN and LSTM
class BasicRNN(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
super(BasicRNN, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# x: [batch_size, seq_length]
embedded = self.embedding(x)
# embedded: [batch_size, seq_length, embedding_dim]
output, hidden = self.rnn(embedded)
# output: [batch_size, seq_length, hidden_dim]
# hidden: [1, batch_size, hidden_dim] - only considering last layer's output
last_output = output[:, -1, :]
# last_output: [batch_size, hidden_dim]
prediction = self.fc(last_output)
# prediction: [batch_size, output_dim]
return prediction
class LSTMModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
super(LSTMModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
# Replace the basic RNN with an LSTM
self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# x: [batch_size, seq_length]
embedded = self.embedding(x)
# embedded: [batch_size, seq_length, embedding_dim]
output, (hidden, cell) = self.lstm(embedded)
# output: [batch_size, seq_length, hidden_dim]
# hidden: [1, batch_size, hidden_dim] - considering the last layer's last hidden state
# cell: [1, batch_size, hidden_dim] - considering the last layer's last cell state
last_output = output[:, -1, :]
# last_output: [batch_size, hidden_dim]
prediction = self.fc(last_output)
# prediction: [batch_size, output_dim]
return prediction
# Define hyperparameters
vocab_size = len(word_to_index)
hidden_size = 128
embedding_dim = 128
output_size = len(word_to_index)
learning_rate = 0.001
num_epochs = 10
batch_size = 64
# Convert data and targets to PyTorch tensors
data = torch.tensor(data, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)
# Create DataLoader
dataset = TensorDataset(data, targets)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
```python
Initialize the model with the specified hyperparameters
model = BasicRNN(vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=hidden_size, output_dim=output_size).to(device)
Define the loss function
loss_function = nn.CrossEntropyLoss()
Choose an optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
Function to train the model
def train_model(model, dataloader, optimizer, loss_function, num_epochs, device): model.train() # Set the model to training mode for epoch in range(num_epochs): total_loss = 0 for inputs, targets in dataloader: inputs, targets = inputs.to(device), targets.to(device) # Transfer data to GPU optimizer.zero_grad() # Clear the gradients predictions = model(inputs) # Forward pass: compute the output class given a batch of inputs loss = loss_function(predictions, targets) # Compute the loss loss.backward() # Backward pass: compute the gradient of the loss w.r.t. the model’s parameters optimizer.step() # Perform a single optimization step (parameter update) total_loss += loss.item() print(f’Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(dataloader)}’) ```
Enjoy Reading This Article?
Here are some more articles you might like to read next: