Source code for speechbrain.lobes.models.RNNLM

"""Implementation of a Recurrent Language Model.

Authors
 * Mirco Ravanelli 2020
 * Peter Plantinga 2020
 * Ju-Chieh Chou 2020
 * Titouan Parcollet 2020
 * Abdel 2020
"""
import torch
from torch import nn
import speechbrain as sb


[docs] class RNNLM(nn.Module): """This model is a combination of embedding layer, RNN, DNN. It can be used for RNNLM. Arguments --------- output_neurons : int Number of entries in embedding table, also the number of neurons in output layer. embedding_dim : int Size of embedding vectors (default 128). activation : torch class A class used for constructing the activation layers for DNN. dropout : float Neuron dropout rate applied to embedding, RNN, and DNN. rnn_class : torch class The type of RNN to use in RNNLM network (LiGRU, LSTM, GRU, RNN) rnn_layers : int The number of recurrent layers to include. rnn_neurons : int Number of neurons in each layer of the RNN. rnn_re_init : bool Whether to initialize rnn with orthogonal initialization. rnn_return_hidden : bool Whether to return hidden states (default True). dnn_blocks : int The number of linear neural blocks to include. dnn_neurons : int The number of neurons in the linear layers. Example ------- >>> model = RNNLM(output_neurons=5) >>> inputs = torch.Tensor([[1, 2, 3]]) >>> outputs = model(inputs) >>> outputs.shape torch.Size([1, 3, 5]) """ def __init__( self, output_neurons, embedding_dim=128, activation=torch.nn.LeakyReLU, dropout=0.15, rnn_class=sb.nnet.RNN.LSTM, rnn_layers=2, rnn_neurons=1024, rnn_re_init=False, return_hidden=False, dnn_blocks=1, dnn_neurons=512, ): super().__init__() self.embedding = sb.nnet.embedding.Embedding( num_embeddings=output_neurons, embedding_dim=embedding_dim ) self.dropout = nn.Dropout(p=dropout) self.rnn = rnn_class( input_size=embedding_dim, hidden_size=rnn_neurons, num_layers=rnn_layers, dropout=dropout, re_init=rnn_re_init, ) self.return_hidden = return_hidden self.reshape = False self.dnn = sb.nnet.containers.Sequential( input_shape=[None, None, rnn_neurons] ) for block_index in range(dnn_blocks): self.dnn.append( sb.nnet.linear.Linear, n_neurons=dnn_neurons, bias=True, layer_name="linear", ) self.dnn.append(sb.nnet.normalization.LayerNorm, layer_name="norm") self.dnn.append(activation(), layer_name="act") self.dnn.append(torch.nn.Dropout(p=dropout), layer_name="dropout") self.out = sb.nnet.linear.Linear( input_size=dnn_neurons, n_neurons=output_neurons )
[docs] def forward(self, x, hx=None): """ Processes the input tensor x and returns an output tensor.""" x = self.embedding(x) x = self.dropout(x) # If 2d tensor, add a time-axis # This is used for inference time if len(x.shape) == 2: x = x.unsqueeze(dim=1) self.reshape = True x, hidden = self.rnn(x, hx) x = self.dnn(x) out = self.out(x) if self.reshape: out = out.squeeze(dim=1) if self.return_hidden: return out, hidden else: return out