Source code for opennmt.encoders.conv_encoder

"""Define convolution-based encoders."""

import tensorflow as tf

from opennmt.encoders.encoder import Encoder
from opennmt.layers import common, position


[docs]class ConvEncoder(Encoder): """An encoder that applies a convolution over the input sequence as described in https://arxiv.org/abs/1611.02344. """
[docs] def __init__( self, num_layers_a, num_layers_c, num_units, kernel_size=3, dropout=0.3, position_encoder_class=position.PositionEmbedder, ): """Initializes the parameters of the encoder. Args: num_layers_a: The number of layers in CNN-a. num_layers_c: The number of layers in CNN-c. num_units: The number of output filters. kernel_size: The kernel size. dropout: The probability to drop units from the inputs. position_encoder_class: The :class:`opennmt.layers.PositionEncoder` class to use for position encoding (or a callable that returns an instance). """ super().__init__() self.dropout = dropout self.position_encoder = None if position_encoder_class is not None: self.position_encoder = position_encoder_class() self.cnn_a = [ tf.keras.layers.Conv1D(num_units, kernel_size, padding="same") for _ in range(num_layers_a) ] self.cnn_c = [ tf.keras.layers.Conv1D(num_units, kernel_size, padding="same") for _ in range(num_layers_c) ]
[docs] def call(self, inputs, sequence_length=None, training=None): if self.position_encoder is not None: inputs = self.position_encoder(inputs) inputs = common.dropout(inputs, self.dropout, training=training) cnn_a = _cnn_stack(self.cnn_a, inputs) cnn_c = _cnn_stack(self.cnn_c, inputs) outputs = cnn_a state = tf.reduce_mean(cnn_c, axis=1) return (outputs, state, sequence_length)
def _cnn_stack(layers, inputs): next_input = inputs for i, layer in enumerate(layers): outputs = layer(next_input) # Add residual connections past the first layer. if i > 0: outputs += next_input next_input = tf.tanh(outputs) return next_input