# Source code for onmt.translate.greedy_search

import torch
import torch.nn.functional as F

from onmt.translate.decode_strategy import DecodeStrategy

def sample_topp(logits, keep_topp):
sorted_logits, sorted_indices = torch.sort(logits,
descending=True,
dim=1)

cumulative_probs = torch.cumsum(F.softmax(sorted_logits,
dim=-1), dim=-1)
sorted_indices_to_keep = cumulative_probs.lt(keep_topp)

# keep indices until overflowing p
last_included = cumsum_mask[:, -1:]
last_included.clamp_(0, sorted_indices_to_keep.size() - 1)
sorted_indices_to_keep = sorted_indices_to_keep.scatter_(
1, last_included, 1)

# Set all logits that are not in the top-p to -10000.
# This puts the probabilities close to 0.
keep_indices = sorted_indices_to_keep.scatter(
1,
sorted_indices,
sorted_indices_to_keep,
)

def sample_topk(logits, keep_topk):
top_values, _ = torch.topk(logits, keep_topk, dim=1)
kth_best = top_values[:, -1].view([-1, 1])
kth_best = kth_best.repeat([1, logits.shape]).float()

# Set all logits that are not in the top-k to -10000.
# This puts the probabilities close to 0.
ignore = torch.lt(logits, kth_best)

[docs]def sample_with_temperature(logits, sampling_temp, keep_topk, keep_topp):
"""Select next tokens randomly from the top k possible next tokens.

Samples from a categorical distribution over the keep_topk words using
the category probabilities logits / sampling_temp.

Args:
logits (FloatTensor): Shaped (batch_size, vocab_size).
These can be logits ((-inf, inf)) or log-probs ((-inf, 0]).
(The distribution actually uses the log-probabilities
logits - logits.logsumexp(-1), which equals the logits if
they are log-probabilities summing to 1.)
sampling_temp (float): Used to scale down logits. The higher the
value, the more likely it is that a non-max word will be
sampled.
keep_topk (int): This many words could potentially be chosen. The
other logits are set to have probability 0.
keep_topp (float): Keep most likely words until the cumulated
probability is greater than p. If used with keep_topk: both
conditions will be applied

Returns:
(LongTensor, FloatTensor):

* topk_ids: Shaped (batch_size, 1). These are
the sampled word indices in the output vocab.
* topk_scores: Shaped (batch_size, 1). These
are essentially (logits / sampling_temp)[topk_ids].
"""

if sampling_temp == 0.0 or keep_topk == 1:
# For temp=0.0, take the argmax to avoid divide-by-zero errors.
# keep_topk=1 is also equivalent to argmax.
topk_scores, topk_ids = logits.topk(1, dim=-1)
if sampling_temp > 0:
topk_scores /= sampling_temp
else:
logits = torch.div(logits, sampling_temp)
if keep_topp > 0:
logits = sample_topp(logits, keep_topp)
if keep_topk > 0:
logits = sample_topk(logits, keep_topk)
dist = torch.distributions.Categorical(logits=logits)
topk_ids = dist.sample().view(-1, 1)
topk_scores = logits.gather(dim=1, index=topk_ids)

[docs]class GreedySearch(DecodeStrategy):
"""Select next tokens randomly from the top k possible next tokens.

The scores attribute's lists are the score, after applying temperature,
of the final prediction (either EOS or the final token in the event
that max_length is reached)

Args:
pad (int): See base.
bos (int): See base.
eos (int): See base.
unk (int): See base.
batch_size (int): See base.
global_scorer (onmt.translate.GNMTGlobalScorer): Scorer instance.
min_length (int): See base.
max_length (int): See base.
ban_unk_token (Boolean): See base.
block_ngram_repeat (int): See base.
exclusion_tokens (set[int]): See base.
return_attention (bool): See base.
max_length (int): See base.
sampling_temp (float): See
:func:~onmt.translate.greedy_search.sample_with_temperature().
keep_topk (int): See
:func:~onmt.translate.greedy_search.sample_with_temperature().
keep_topp (float): See
:func:~onmt.translate.greedy_search.sample_with_temperature().
beam_size (int): Number of beams to use.
"""

def __init__(self, pad, bos, eos, unk, batch_size, global_scorer,
min_length, block_ngram_repeat, exclusion_tokens,
return_attention, max_length, sampling_temp, keep_topk,
keep_topp, beam_size, ban_unk_token):
super(GreedySearch, self).__init__(
pad, bos, eos, unk, batch_size, beam_size, global_scorer,
min_length, block_ngram_repeat, exclusion_tokens,
return_attention, max_length, ban_unk_token)
self.sampling_temp = sampling_temp
self.keep_topk = keep_topk
self.keep_topp = keep_topp
self.topk_scores = None
self.beam_size = beam_size

[docs]    def initialize(self, memory_bank, src_lengths, src_map=None, device=None,
target_prefix=None):
"""Initialize for decoding."""
(fn_map_state, memory_bank,
src_map, target_prefix) = self.initialize_tile(
memory_bank, src_lengths, src_map, target_prefix)
if device is None:
device = self.get_device_from_memory_bank(memory_bank)

super(GreedySearch, self).initialize(
memory_bank, src_lengths, src_map, device, target_prefix)
self.select_indices = torch.arange(
self.batch_size*self.beam_size, dtype=torch.long, device=device)
self.original_batch_idx = fn_map_state(torch.arange(
self.batch_size, dtype=torch.long, device=device), dim=0)
self.beams_scores = torch.zeros((self.batch_size*self.beam_size, 1),
dtype=torch.float, device=device)
return fn_map_state, memory_bank, self.memory_lengths, src_map

@property
def current_predictions(self):
return self.alive_seq[:, -1]

@property
def batch_offset(self):
return self.select_indices

def _pick(self, log_probs):
"""Function used to pick next tokens.

Args:
log_probs (FloatTensor): (batch_size, vocab_size).
"""
# maybe fix some prediction at this step by modifying log_probs
log_probs = self.target_prefixing(log_probs)
topk_ids, topk_scores = sample_with_temperature(
log_probs, self.sampling_temp, self.keep_topk, self.keep_topp)

def align_select_indices(self):
nb_finished_beams = (self.is_finished.view(-1).size(0) -
self.select_indices.size(0))
if nb_finished_beams:
self.select_indices = torch.arange(
self.select_indices.size(0), dtype=torch.long,
device=self.select_indices.device)

[docs]    def advance(self, log_probs, attn):
"""Select next tokens randomly from the top k possible next tokens.

Args:
log_probs (FloatTensor): Shaped (batch_size, vocab_size).
These can be logits ((-inf, inf)) or log-probs
((-inf, 0]). (The distribution actually uses the
log-probabilities logits - logits.logsumexp(-1),
which equals the logits if they are log-probabilities summing
to 1.)
attn (FloatTensor): Shaped (1, B, inp_seq_len).
"""

self.align_select_indices()

self.ensure_min_length(log_probs)
self.ensure_unk_removed(log_probs)
self.block_ngram_repeats(log_probs)

topk_ids, self.topk_scores = self._pick(log_probs)
self.beams_scores += self.topk_scores

self.is_finished = topk_ids.eq(self.eos)

self.alive_seq = torch.cat([self.alive_seq, topk_ids], -1)
if self.return_attention:
if self.alive_attn is None:
self.alive_attn = attn
else:
self.alive_attn = torch.cat([self.alive_attn, attn], 0)
self.ensure_max_length()

[docs]    def update_finished(self):
"""Finalize scores and predictions."""
# shape: (sum(~ self.is_finished), 1)
finished_batches = self.is_finished.view(-1).nonzero(as_tuple=False)
step = len(self)
length_penalty = self.global_scorer.length_penalty(
step, alpha=self.global_scorer.alpha)

for b in finished_batches.view(-1):
b_orig = self.original_batch_idx[b]
score = self.beams_scores[b, 0]/length_penalty
pred = self.alive_seq[b, 1:]
attention = (
self.alive_attn[:, b, :self.memory_lengths[b]]
if self.alive_attn is not None else [])
self.hypotheses[b_orig].append((score, pred, attention))
self.done = self.is_finished.all()
if self.done:
for b in range(self.batch_size):
best_hyp = sorted(
self.hypotheses[b], key=lambda x: x, reverse=True)
for score, pred, attn in best_hyp:
self.scores[b].append(score)
self.predictions[b].append(pred)
self.attention[b].append(attn)
return
is_alive = ~self.is_finished.view(-1)
self.alive_seq = self.alive_seq[is_alive]
self.beams_scores = self.beams_scores[is_alive]
if self.alive_attn is not None:
self.alive_attn = self.alive_attn[:, is_alive]
self.select_indices = is_alive.nonzero(as_tuple=False).view(-1)
self.original_batch_idx = self.original_batch_idx[is_alive]
self.maybe_update_target_prefix(self.select_indices)

class GreedySearchLM(GreedySearch):
def update_finished(self):
super(GreedySearchLM, self).update_finished()
self.update_memory_lengths()

def update_memory_lengths(self):
is_alive = ~self.is_finished.view(-1)
self.memory_lengths = self.memory_lengths[is_alive]

def advance(self, log_probs, attn):

# in LM task memory_lengths is associated with currently generated src
# and therefore needs to follow the generation
self.memory_lengths += 1

def initialize(self, src, src_lengths, src_map=None, device=None,
target_prefix=None):
"""Initialize for decoding."""

if device is None:
device = src.device

(fn_map_state, _, self.memory_lengths,
src_map) = super(GreedySearchLM, self).initialize(
None, src_lengths, src_map, device, target_prefix)

return fn_map_state, src, self.memory_lengths, src_map