Source code for opennmt.models.model

"""Base class for models."""

import abc
import contextlib

import tensorflow as tf

from opennmt import optimizers, schedules
from opennmt.utils import exporters, losses, misc


[docs]class Model(tf.keras.layers.Layer):
    """Base class for models."""

    def __init__(self, examples_inputter):
        super().__init__()
        self.examples_inputter = examples_inputter
        self.params = {}
        self._jit_compile = False

    @property
    def unsupervised(self):
        """Unsupervised model."""
        return self.labels_inputter is None

    @property
    def features_inputter(self):
        """The inputter producing features."""
        return getattr(
            self.examples_inputter, "features_inputter", self.examples_inputter
        )

    @property
    def labels_inputter(self):
        """The inputter producing labels."""
        return getattr(self.examples_inputter, "labels_inputter", None)

[docs]    def __repr__(self):
        """Returns a description of the model and its submodules."""
        return misc.describe_layer(self, name="model")

[docs]    def auto_config(self, num_replicas=1):
        """Returns automatic configuration values specific to this model.

        Args:
          num_replicas: The number of synchronous model replicas used for the
            training.

        Returns:
          A partial training configuration.
        """
        _ = num_replicas
        return {}

[docs]    def initialize(self, data_config, params=None):
        """Initializes the model from the data configuration.

        Args:
          data_config: A dictionary containing the data configuration set
            by the user (e.g. vocabularies, tokenization, pretrained embeddings,
            etc.).
          params: A dictionary of hyperparameters.
        """
        if params is None:
            params = {}
        self.params.update(params)
        dropout = self.params.get("dropout")
        if dropout is not None:
            misc.set_dropout(self, dropout)
        self.examples_inputter.initialize(data_config)

[docs]    def set_jit_compile(self, enable):
        """Allow (or not) this model to use XLA compilation."""
        self._jit_compile = enable

[docs]    def build(self, input_shape):
        freeze_layers = self.params.get("freeze_layers")
        if freeze_layers:
            if not isinstance(freeze_layers, list):
                freeze_layers = [freeze_layers]
            for layer_path in freeze_layers:
                layer = misc.index_structure(self, layer_path)
                layer.trainable = False
                misc.set_dropout(layer, 0)  # Disable dropout in frozen layers.
        self.examples_inputter.build(input_shape)
        self.built = True

[docs]    def split_features_labels(self, batch):
        """Splits a batch from the dataset into features and labels."""
        return batch if not self.unsupervised else (batch, batch)

[docs]    def __call__(self, features, labels=None, training=None, step=None):
        """Runs the model.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.
          training: If ``True``, run in training mode.
          step: The current training step.

        Returns:
          A tuple containing,

          - The model outputs (usually unscaled probabilities).
          - The model predictions.
        """
        if training and self._jit_compile:
            # Remove string tensors which are not supported by XLA.
            features, labels = misc.filter_features(
                (features, labels),
                lambda tensor: tensor.dtype != tf.string,
            )

            call_method = self._forward_xla
        else:
            call_method = self._forward

        outputs, predictions = call_method(
            features,
            labels,
            training,
            step,
        )

        # Include the example index vector in the outputs.
        index = features.get("index") if isinstance(features, dict) else None
        if index is not None:
            if isinstance(outputs, dict):
                outputs["index"] = index
            if isinstance(predictions, dict):
                predictions["index"] = index

        return outputs, predictions

    @tf.function(jit_compile=True)
    def _forward_xla(self, features, labels, training, step):
        return self._forward(features, labels, training, step)

    def _forward(self, features, labels, training, step):
        return super().__call__(features, labels=labels, training=training, step=step)

[docs]    @abc.abstractmethod
    def call(self, features, labels=None, training=None, step=None):
        """Runs the model.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.
          training: If ``True``, run in training mode.
          step: The current training step.

        Returns:
          A tuple containing,

          - The model outputs (usually unscaled probabilities).
          - The model predictions.
        """
        raise NotImplementedError()

[docs]    def infer(self, features):
        """Runs inference on :obj:`features`.

        This is a small convenience wrapper around
        :meth:`opennmt.models.Model.call`.

        Args:
          features: A nested structure of features ``tf.Tensor``.

        Returns:
          The model predictions.
        """
        _, predictions = self(features)
        return predictions

[docs]    def evaluate(self, features, labels):
        """Evaluates :obj:`features` predictions against `labels`.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.

        Returns:
          A tuple with the loss and the model predictions.
        """
        outputs, predictions = self(features, labels=labels)
        loss = self.compute_loss(outputs, labels, training=False)
        return loss, predictions

[docs]    def score(self, features, labels):
        """Scores labels.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.

        Returns:
          The score results.
        """
        raise NotImplementedError("This model does not define a score function")

[docs]    def train(self, features, labels, optimizer, loss_scale=None):
        """Computes and applies the gradients for a batch of examples.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.
          optimizer: The optimizer instance
            (``tf.keras.mixed_precision.LossScaleOptimizer`` is supported).
          loss_scale: An optional loss scaling factor.

        Returns:
          The loss.
        """
        loss, gradients = self.compute_gradients(
            features,
            labels,
            optimizer,
            loss_scale=loss_scale,
        )
        optimizer.apply_gradients(list(zip(gradients, self.trainable_weights)))
        return loss

[docs]    def compute_gradients(
        self,
        features,
        labels,
        optimizer,
        loss_scale=None,
        normalize_loss=True,
    ):
        """Computes the gradients for a batch of examples.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.
          optimizer: The optimizer instance
            (``tf.keras.mixed_precision.LossScaleOptimizer`` is supported).
          loss_scale: An optional loss scaling factor.
          normalize_loss: Normalize the loss by the sample size.

        Returns:
          A tuple containing,

          - The loss.
          - The gradients.
          - The sample size, if :obj:`normalize_loss` is disabled.
        """

        with tf.GradientTape() as tape:
            loss, sample_size = self.compute_training_loss(
                features,
                labels,
                step=optimizer.iterations,
            )

            if normalize_loss and sample_size is not None:
                loss /= sample_size
            if loss_scale is not None:
                loss /= loss_scale

            if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
                scaled_loss = optimizer.get_scaled_loss(loss)
            else:
                scaled_loss = loss

        gradients = tape.gradient(scaled_loss, self.trainable_weights)
        if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
            gradients = optimizer.get_unscaled_gradients(gradients)

        if normalize_loss:
            return loss, gradients
        return loss, gradients, sample_size

[docs]    def compute_training_loss(self, features, labels, step=None):
        """Computes the training loss for a batch of examples.

        Args:
          features: A nested structure of features ``tf.Tensor``.
          labels: A nested structure of labels ``tf.Tensor``.
          step: The current training step.

        Returns:
          A tuple containing,

          - The cumulated loss.
          - The sample size (or ``None`` if not returned by the model).
        """
        outputs, _ = self(features, labels, training=True, step=step)
        loss = self.compute_loss(outputs, labels, training=True)

        if isinstance(loss, tuple):
            sample_size = loss[1]
            loss = loss[0]
        else:
            sample_size = None

        loss = self.regularize_loss(loss, variables=self.trainable_weights)
        return loss, sample_size

[docs]    @abc.abstractmethod
    def compute_loss(self, outputs, labels, training=True):
        """Computes the loss.

        Args:
          outputs: The model outputs (usually unscaled probabilities).
          labels: The dict of labels ``tf.Tensor``.
          training: If ``True``, compute the loss for training.

        Returns:
          The loss or a tuple ``(numerator, train_denominator, stats_denominator)``
          to use a different normalization for training compared to reporting (e.g.
          batch-normalized for training vs. token-normalized for reporting).
        """
        raise NotImplementedError()

[docs]    def regularize_loss(self, loss, variables=None):
        """Regularizes the loss.

        Args:
          loss: The loss.
          variables: List of variables.

        Returns:
          The regularized loss.
        """
        if variables is None:
            variables = self.trainable_variables
        regularization = self.params.get("regularization")
        if regularization is not None:
            loss += losses.regularization_penalty(
                regularization["type"], regularization["scale"], variables
            )
        return loss

[docs]    def get_metrics(self):
        """Returns the metrics for this model.

        Returns:
          A dictionary of ``tf.keras.metrics.Metric`` metrics.
        """
        return None

[docs]    def update_metrics(self, metrics, predictions, labels):
        """Computes additional metrics on the predictions.

        Args:
          metrics: A dictionary of metrics to update.
          predictions: The model predictions.
          labels: The dict of labels ``tf.Tensor``.
        """
        return

[docs]    def get_optimizer(self):
        """Returns the optimizer for this model.

        Returns:
          A ``tf.keras.optimizers.legacy.Optimizer`` instance or ``None`` if no optimizer
          is configured.
        """
        params = self.params
        optimizer_name = params.get("optimizer")
        if optimizer_name is None:
            return None
        schedule_type = params.get("decay_type")
        if schedule_type is None:
            learning_rate = tf.constant(params["learning_rate"], dtype=tf.float32)
        else:
            schedule_params = params.get("decay_params", {})
            learning_rate = schedules.make_learning_rate_schedule(
                params.get("learning_rate"),
                schedule_type,
                schedule_params=schedule_params,
                schedule_step_duration=params.get("decay_step_duration", 1),
                start_step=params.get("start_decay_steps", 0),
                minimum_learning_rate=params.get("minimum_learning_rate", 0),
            )
        optimizer_params = params.get("optimizer_params")
        if optimizer_params is None:
            optimizer_params = {}
        optimizer = optimizers.make_optimizer(
            optimizer_name, learning_rate, **optimizer_params
        )
        return optimizer

[docs]    def serve_function(self):
        """Returns a function for serving this model.

        Returns:
          A ``tf.function``.
        """
        # Set name attribute of the input TensorSpec.
        input_signature = {
            name: tf.TensorSpec.from_spec(spec, name=name)
            for name, spec in self.features_inputter.input_signature().items()
        }

        @tf.function(input_signature=(input_signature,))
        def _run(features):
            features = self.features_inputter.make_features(features=features.copy())
            _, predictions = self(features)
            return predictions

        return _run

    @property
    def tflite_mode(self):
        """Returns ``True`` if the model is being traced for TensorFlow Lite."""
        return getattr(self, "_tflite_mode", False)

[docs]    @contextlib.contextmanager
    def enable_tflite_mode(self):
        """Enable TensorFlow Lite mode for this model."""
        layers = [self] + list(self.submodules)
        for layer in layers:
            setattr(layer, "_tflite_mode", True)
        yield
        for layer in layers:
            delattr(layer, "_tflite_mode")

[docs]    def tflite_function(self):
        """Returns the inference function that should be used for TensorFlow Lite.

        Returns:
          A ``tf.function``.
        """
        with self.enable_tflite_mode():
            return self.serve_function()

[docs]    def export(self, export_dir, exporter=None):
        """Exports the model for serving.

        Args:
          export_dir: The output directory.
          exporter: A :class:`opennmt.utils.Exporter` instance. Defaults to
            :class:`opennmt.utils.SavedModelExporter`.
        """
        if exporter is None:
            exporter = exporters.SavedModelExporter()
        exporter.export(self, export_dir)

[docs]    def create_variables(self, optimizer=None):
        """Creates the model variables by running it once.

        Args:
          optimizer: If set, also create the optimizer variables.
        """
        # Create input features from the input signatures. We remove the leading
        # batch dimension as sometimes assumed by make_features methods and set
        # unspecified dimensions to 1.
        features = tf.nest.map_structure(
            lambda spec: tf.fill(
                [dim or 1 for dim in spec.shape.as_list()[1:]],
                tf.constant("a" if spec.dtype is tf.string else 1, dtype=spec.dtype),
            ),
            self.examples_inputter.input_signature(),
        )
        features = self.examples_inputter.make_features(features=features)

        # Add the batch dimension back before calling the model.
        features = tf.nest.map_structure(lambda x: tf.expand_dims(x, 0), features)
        features, labels = self.split_features_labels(features)

        _ = self(features, labels=labels, training=True, step=0)

        if optimizer is not None:
            optimizer._create_all_weights(self.trainable_variables)

[docs]    def transfer_weights(
        self, new_model, new_optimizer=None, optimizer=None, ignore_weights=None
    ):
        """Transfers weights (and optionally optimizer slots) from this model to
        another.

        This default implementation assumes that :obj:`self` and :obj:`new_model`
        have exactly the same variables. Subclasses can override this method to
        transfer weights to another model type or architecture. For example,
        :class:`opennmt.models.SequenceToSequence` can transfer weights to a model
        with a different vocabulary.

        All model and optimizer variables are expected to be initialized.

        Args:
          new_model: The new model to transfer weights to.
          new_optimizer: The new optimizer.
          optimizer: The optimizer used for the current model.
          ignore_weights: Optional list of weights to not transfer.
        """
        if type(self) is not type(new_model):
            raise ValueError(
                "Transferring weights to another model type is not supported"
            )
        if ignore_weights is None:
            ignore_weights = set()
        ignore_weights_ref = set(weight.ref() for weight in ignore_weights)
        weights = self.weights
        new_weights = new_model.weights
        for weight, new_weight in zip(weights, new_weights):
            if new_weight.ref() not in ignore_weights_ref:
                new_weight.assign(weight)
                if new_optimizer is not None and optimizer is not None:
                    for slot_name in new_optimizer.get_slot_names():
                        if slot_name not in optimizer.get_slot_names():
                            continue
                        new_slot = new_optimizer.get_slot(new_weight, slot_name)
                        slot = optimizer.get_slot(weight, slot_name)
                        new_slot.assign(slot)

[docs]    def map_v1_weights(self, weights):
        """Maps current weights to V1 weights.

        Args:
          weights: A nested dictionary following the scope names used in V1. The
            leaves are tuples with the variable value and optionally the optimizer
            slots.

        Returns:
          A list of tuples associating variables and their V1 equivalent.
        """
        raise NotImplementedError("This model can not restore V1 checkpoints")

[docs]    def export_assets(self, asset_dir):
        """Exports additional assets used by this model.

        Args:
          asset_dir: The directory where assets can be written.

        Returns:
          A dictionary of additional assets.
        """
        return self.examples_inputter.export_assets(asset_dir)

[docs]    def visualize(self, log_dir):
        """Setups model visualization (e.g. word embedding projections).

        Args:
          log_dir: The log directory.
        """
        self.features_inputter.visualize(self, log_dir)
        if not self.unsupervised:
            self.labels_inputter.visualize(self, log_dir)

[docs]    def format_prediction(self, prediction, params=None):
        """Formats the model prediction for file saving.

        Args:
          prediction: The model prediction (same structure as the second output of
            :meth:`opennmt.models.Model.call`).
          params: (optional) Dictionary of formatting parameters.

        Returns:
          A string or list of strings.
        """
        return str(prediction)

[docs]    def format_score(self, score, params=None, stream=None):
        """Formats the score result for file saving.

        Args:
          score: The score result (same structure as the output of
            :meth:`opennmt.models.Model.score`).
          params: (optional) Dictionary of formatting parameters.
        """
        return str(score)

[docs]    def print_prediction(self, prediction, params=None, stream=None):
        """Prints the model prediction.

        Args:
          prediction: The model prediction (same structure as the second output of
            :meth:`opennmt.models.Model.call`).
          params: (optional) Dictionary of formatting parameters.
          stream: (optional) The stream to print to.
        """
        _write_lines(self.format_prediction(prediction, params=params), stream)

[docs]    def print_score(self, score, params=None, stream=None):
        """Prints the score result.

        Args:
          score: The score result (same structure as the output of
            :meth:`opennmt.models.Model.score`).
          params: (optional) Dictionary of formatting parameters.
          stream: (optional) The stream to print to.
        """
        _write_lines(self.format_score(score, params=params), stream)


def _write_lines(lines, stream):
    if not isinstance(lines, list):
        lines = [lines]
    for line in lines:
        misc.print_as_bytes(line, stream=stream)


[docs]class SequenceGenerator(Model):
    """Base class for models generating sequences."""

    @property
    def decoder_inputter(self):
        """The inputter used on the decoder side."""
        return self.labels_inputter if not self.unsupervised else self.examples_inputter

[docs]    def score(self, features, labels):
        outputs, _ = self(features, labels=labels)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels["ids_out"], outputs["logits"]
        )
        weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype)
        masked_cross_entropy = cross_entropy * weights
        scores = tf.reduce_sum(masked_cross_entropy, axis=1)
        results = {
            "cross_entropy": cross_entropy,
            "score": scores,
            "tokens": labels["tokens"],
            "length": self.decoder_inputter.get_length(
                labels, ignore_special_tokens=True
            ),
        }
        for key_to_forward in ("attention", "index"):
            value = outputs.get(key_to_forward)
            if value is not None:
                results[key_to_forward] = value
        return results

[docs]    def format_score(self, score, params=None):
        if params is None:
            params = {}
        length = score["length"]
        tokens = score["tokens"][:length]
        sentence = " ".join(token.decode("utf-8") for token in tokens)
        token_level_scores = None
        attention = None
        if params.get("with_token_level"):
            token_level_scores = score["cross_entropy"][:length]
        if "attention" in score:
            attention = score["attention"][:length]
        alignment_type = params.get("with_alignments")
        return misc.format_translation_output(
            sentence,
            score=score["score"],
            token_level_scores=token_level_scores,
            attention=attention,
            alignment_type=alignment_type,
        )