RNN and LSTM

class BasicRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(BasicRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # x: [batch_size, seq_length]
        embedded = self.embedding(x)
        # embedded: [batch_size, seq_length, embedding_dim]
        output, hidden = self.rnn(embedded)
        # output: [batch_size, seq_length, hidden_dim]
        # hidden: [1, batch_size, hidden_dim] - only considering last layer's output
        last_output = output[:, -1, :]
        # last_output: [batch_size, hidden_dim]
        prediction = self.fc(last_output)
        # prediction: [batch_size, output_dim]
        return prediction
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # Replace the basic RNN with an LSTM
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # x: [batch_size, seq_length]
        embedded = self.embedding(x)
        # embedded: [batch_size, seq_length, embedding_dim]
        output, (hidden, cell) = self.lstm(embedded)
        # output: [batch_size, seq_length, hidden_dim]
        # hidden: [1, batch_size, hidden_dim] - considering the last layer's last hidden state
        # cell: [1, batch_size, hidden_dim] - considering the last layer's last cell state
        last_output = output[:, -1, :]
        # last_output: [batch_size, hidden_dim]
        prediction = self.fc(last_output)
        # prediction: [batch_size, output_dim]
        return prediction
# Define hyperparameters
vocab_size = len(word_to_index)
hidden_size = 128
embedding_dim = 128
output_size = len(word_to_index)
learning_rate = 0.001
num_epochs = 10
batch_size = 64

# Convert data and targets to PyTorch tensors
data = torch.tensor(data, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Create DataLoader
dataset = TensorDataset(data, targets)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

```python

Initialize the model with the specified hyperparameters

model = BasicRNN(vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=hidden_size, output_dim=output_size).to(device)

Define the loss function

loss_function = nn.CrossEntropyLoss()

Choose an optimizer

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Function to train the model

def train_model(model, dataloader, optimizer, loss_function, num_epochs, device): model.train() # Set the model to training mode for epoch in range(num_epochs): total_loss = 0 for inputs, targets in dataloader: inputs, targets = inputs.to(device), targets.to(device) # Transfer data to GPU optimizer.zero_grad() # Clear the gradients predictions = model(inputs) # Forward pass: compute the output class given a batch of inputs loss = loss_function(predictions, targets) # Compute the loss loss.backward() # Backward pass: compute the gradient of the loss w.r.t. the model’s parameters optimizer.step() # Perform a single optimization step (parameter update) total_loss += loss.item() print(f’Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(dataloader)}’) ```




    Enjoy Reading This Article?

    Here are some more articles you might like to read next:

  • ANN Backpropagation Process
  • Python Review Notes
  • Tokenization and Embedding Layer