Source code for bella.models.tdlstm

'''
Module contains all of the classes that represent Machine Learning models
that are within `Tang et al. 2016 paper \
<https://aclanthology.info/papers/C16-1311/c16-1311>`_:

1. :py:class:`bella.models.tdlstm.LSTM` -- LSTM model.
2. :py:class:`bella.models.tdlstm.TDLSTM` -- TDLSTM model.
3. :py:class:`bella.models.tdlstm.TCLSTM` -- TCLSTM model.
'''

from typing import Dict, Callable, Any, List, Union, Tuple

import numpy as np
import keras
from keras import models, optimizers, layers
from keras.utils import to_categorical

import bella
from bella.models.base import KerasModel
from bella.contexts import context
from bella.neural_pooling import matrix_median


[docs]class LSTM(KerasModel):
    '''
    Attributes:

    1. pad_size -- The max number of tokens to use per sequence. If -1
       use the text sequence in the training data that has the most tokens as
       the pad size.
    2. embedding_layer_kwargs -- Keyword arguments to pass to the embedding
       layer which is a :py:class:`keras.layers.Embedding` object. Can be
       None if no parameters to pass.
    3. lstm_layer_kwargs -- Keyword arguments to pass to the lstm layer(s)
       which is a :py:class:`keras.layers.LSTM` object. Can be
       None if no parameters to pass.
    4. dense_layer_kwargs -- Keyword arguments to pass to the dense (final
       layer) which is a :py:class:`keras.layers.Dense` object. Can be
       None if no parameters to pass.

    Methods:

    1. model_parameters -- Returns a dictionary containing the attributes of
       the class instance, the parameters to give to the class constructior to
       re-create this instance, and the class itself.
    2. create_training_text -- Converts the training and validation data into a
       format that the keras model can take as input.
    3. create_training_y -- Converts the training and validation target values
       from a vector of class lables into a matrix of binary values. of shape
       [n_samples, n_classes].
    4. keras_model -- The model that represents this class. This is a single
       forward LSTM.
    '''

[docs]    @classmethod
    def name(cls) -> str:
        return 'LSTM'

    def __repr__(self) -> str:
        '''
        Name of the machine learning model.
        '''
        return self.name()

[docs]    def __init__(self, tokeniser: Callable[[str], List[str]],
                 embeddings: 'bella.word_vectors.WordVectors',
                 reproducible: Union[int, None] = None, pad_size: int = -1,
                 lower: bool = True, patience: int = 10,
                 batch_size: int = 32, epochs: int = 300,
                 embedding_layer_kwargs: Dict[str, Any] = None,
                 lstm_layer_kwargs: Dict[str, Any] = None,
                 dense_layer_kwargs: Dict[str, Any] = None,
                 optimiser: 'keras.optimizers.Optimizer' = optimizers.SGD,
                 optimiser_params: Union[Dict[str, Any], None] = None
                 ) -> None:
        '''
        :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split`.
        :param embeddings: Embedding (Word vectors) to be used e.g.
                           :py:class:`bella.word_vectors.SSWE`
        :param reproducible: Whether to be reproducible. If None then it is
                             quicker to run. Else provide a `int` that
                             will represent the random seed value.
        :param pad_size: The max number of tokens to use per sequence. If -1
                         use the text sequence in the training data that has
                         the most tokens as the pad size.
        :param lower: Whether to lower case the words being processed.
        :param patience: Number of epochs with no improvement before training
                         is stopped.
        :param batch_size: Number of samples per gradient update.
        :param epochs: Number of times to train over the entire training set
                       before stopping. If patience is set, then it may
                       stop before reaching the number of epochs specified
                       here.
        :param embedding_layer_kwargs: Keyword arguments to pass to the
                                       embedding layer which is a
                                       :py:class:`keras.layers.Embedding`
                                       object. If no parameters to pass leave
                                       as None.
        :param lstm_layer_kwargs: Keyword arguments to pass to the lstm
                                  layer(s) which is a
                                  :py:class:`keras.layers.LSTM` object. If no
                                  parameters to pass leave as None.
        :param dense_layer_kwargs: Keyword arguments to pass to the dense
                                   (final layer) which is a
                                   :py:class:`keras.layers.Dense` object. If no
                                   parameters to pass leave as None.
        :param optimiser: Optimiser to be used accepts any
                          `keras optimiser <https://keras.io/optimizers/>`_.
                          Default is :py:class:`keras.optimizers.SGD`
        :param optimiser_params: Parameters for the optimiser. If None uses
                                 default optimiser parameters.
        '''

        self.tokeniser = tokeniser
        self.embeddings = embeddings
        self.reproducible = reproducible
        self.pad_size = pad_size
        self.test_pad_size = 0
        self.lower = lower
        self.patience = patience
        self.batch_size = batch_size
        self.epochs = epochs

        self.optimiser_params = optimiser_params
        if optimiser_params is None:
            self.optimiser_params = {}

        self.embedding_layer_kwargs = embedding_layer_kwargs
        if embedding_layer_kwargs is None:
            self.embedding_layer_kwargs = {}
        self.lstm_layer_kwargs = lstm_layer_kwargs
        if lstm_layer_kwargs is None:
            self.lstm_layer_kwargs = {}
        self.dense_layer_kwargs = dense_layer_kwargs
        if dense_layer_kwargs is None:
            self.dense_layer_kwargs = {}
        self.optimiser = optimiser
        self.model = None
        self.fitted = False

[docs]    def model_parameters(self) -> Dict[str, Any]:
        '''
        Returns a dictionary containing the attributes of the class instance,
        the parameters to give to the class constructior to re-create this
        instance, and the class itself.

        This is used by the :py:meth:`save` method so that the instance can
        be re-created when loaded by the :py:meth:`load` method.
        '''

        class_params = {'tokeniser': self.tokeniser,
                        'embeddings': self.embeddings,
                        'reproducible': self.reproducible,
                        'pad_size': self.pad_size,
                        'lower': self.lower,
                        'patience': self.patience,
                        'batch_size': self.batch_size, 'epochs': self.epochs,
                        'embedding_layer_kwargs': self.embedding_layer_kwargs,
                        'lstm_layer_kwargs': self.lstm_layer_kwargs,
                        'dense_layer_kwargs': self.dense_layer_kwargs,
                        'optimiser': self.optimiser,
                        'optimiser_params': self.optimiser_params}
        class_attrs = {'test_pad_size': self.test_pad_size}
        return {'class': self.__class__, 'class_attrs': class_attrs,
                'class_params': class_params}

    def _pre_process(self, data_dicts: List[Dict[str, str]],
                     training: bool = False) -> np.ndarray:
        '''
        Converts the text in the data_dicts into a matrix of shape
        [n_samples, pad_size] where each integer in the matrix represents
        the word embedding lookup. This is then used as input into the
        keras model.

        The text from the data_dicts are converted by the
        :py:meth:`process_text` method.

        :param data_dicts: A list of dictonaries that contains a `text` field.
        :param training: Whether the text should be processed for training or
                         for prediction. prediction = False, training = True
        :return: The output of :py:meth:`process_text` method.
        '''
        text_data = [data['text'] for data in data_dicts]
        if training:
            pad_data = self.process_text(text_data, self.pad_size)
            self.test_pad_size, sequence_data = pad_data
            return sequence_data
        _, sequence_data = self.process_text(text_data, self.test_pad_size)
        return sequence_data

[docs]    def create_training_y(self, train_y: np.ndarray, validation_y: np.ndarray,
                          ) -> Tuple[np.ndarray, np.ndarray]:
        '''
        Converts the training and validation target values from a vector of
        class lables into a matrix of binary values of shape [n_samples,
        n_classes].

        To convert the vector of classes to a matrix we the
        :py:func:`keras.utils.to_categorical` function.

        :param train_y: Vector of class labels, shape = [n_samples]
        :param validation_y: Vector of class labels, shape = [n_samples]
        :return: A tuple of length two containing the train and validation
                 matrices respectively. The shape of each matrix is:
                 [n_samples, n_classes]
        '''
        train_y = to_categorical(train_y).astype(np.float32)
        validation_y = to_categorical(validation_y).astype(np.float32)
        return train_y, validation_y

[docs]    def create_training_text(self, train_data: List[Dict[str, str]],
                             validation_data: List[Dict[str, str]]
                             ) -> Tuple[np.ndarray, np.ndarray]:
        '''
        Converts the training and validation data into a format that the keras
        model can take as input.

        :param train_data: Data to be trained on. Which is a list of
                           dictionaries where each dictionary has a `text`
                           field containing text.
        :param validation_data: Data to evaluate the model at training time.
                                Which is a list of dictionaries where each
                                dictionary has a `text` field containing text.
        :return: A tuple of length two containing the train and validation
                 input that are both the output of :py:meth:`_pre_process`
        '''
        train_sequence = self._pre_process(train_data, training=True)
        val_sequence = self._pre_process(validation_data, training=False)
        return train_sequence, val_sequence

[docs]    def keras_model(self, num_classes: int) -> 'keras.models.Model':
        '''
        The model that represents this class. This is a single forward LSTM.

        :param num_classes: Number of classes to predict.
        :return: Forward LSTM keras model.
        '''
        # Embeddings
        embedding_matrix = self.embeddings.embedding_matrix
        vocab_size, vector_size = embedding_matrix.shape

        embedding_layer_kwargs = self.embedding_layer_kwargs
        embedding_layer_trainable = True
        if 'trainable' in embedding_layer_kwargs:
            embedding_layer_trainable = embedding_layer_kwargs.pop('trainable')

        lstm_layer_kwargs = self.lstm_layer_kwargs
        lstm_dimension = vector_size
        if 'cell' in self.lstm_layer_kwargs:
            lstm_dimension = lstm_layer_kwargs.pop('cell')

        dense_layer_kwargs = self.dense_layer_kwargs
        # output_activation = 'softmax' if num_classes > 2 else ''
        # Model layers
        input_layer = layers.Input(shape=(self.test_pad_size,),
                                   name='text_input')
        embedding_layer = layers\
                          .Embedding(input_dim=vocab_size,
                                     output_dim=vector_size,
                                     input_length=self.test_pad_size,
                                     trainable=embedding_layer_trainable,
                                     weights=[embedding_matrix],
                                     name='embedding_layer',
                                     **embedding_layer_kwargs
                                     )(input_layer)
        lstm_layer = layers.LSTM(lstm_dimension,
                                 name='lstm_layer',
                                 **lstm_layer_kwargs)(embedding_layer)
        prediction_layer = layers.Dense(num_classes, activation='softmax',
                                        name='output',
                                        **dense_layer_kwargs)(lstm_layer)

        return models.Model(inputs=input_layer, outputs=prediction_layer)

    @property
    def pad_size(self) -> int:
        '''
        pad_size attribute

        :return: The pad_size used in the model
        '''

        return self._pad_size

    @pad_size.setter
    def pad_size(self, value: int) -> None:
        '''
        Sets the pad_size attribute

        :param value: The value to assign to the pad_size attribute
        '''

        self.fitted = False
        self._pad_size = value

    @property
    def embedding_layer_kwargs(self) -> Dict[str, Any]:
        '''
        embedding_layer_kwargs attribute

        :return: The embedding_layer_kwargs used in the model
        '''

        return self._embedding_layer_kwargs

    @embedding_layer_kwargs.setter
    def embedding_layer_kwargs(self, value: Dict[str, Any]) -> None:
        '''
        Sets the embedding_layer_kwargs attribute

        :param value: The value to assign to the embedding_layer_kwargs
                      attribute
        '''

        self.fitted = False
        self._embedding_layer_kwargs = value

    @property
    def lstm_layer_kwargs(self) -> Dict[str, Any]:
        '''
        lstm_layer_kwargs attribute

        :return: The lstm_layer_kwargs used in the model
        '''

        return self._lstm_layer_kwargs

    @lstm_layer_kwargs.setter
    def lstm_layer_kwargs(self, value: Dict[str, Any]) -> None:
        '''
        Sets the lstm_layer_kwargs attribute

        :param value: The value to assign to the lstm_layer_kwargs
                      attribute
        '''

        self.fitted = False
        self._lstm_layer_kwargs = value

    @property
    def dense_layer_kwargs(self) -> Dict[str, Any]:
        '''
        dense_layer_kwargs attribute

        :return: The dense_layer_kwargs used in the model
        '''

        return self._dense_layer_kwargs

    @dense_layer_kwargs.setter
    def dense_layer_kwargs(self, value: Dict[str, Any]) -> None:
        '''
        Sets the dense_layer_kwargs attribute

        :param value: The value to assign to the dense_layer_kwargs
                      attribute
        '''

        self.fitted = False
        self._dense_layer_kwargs = value


[docs]class TDLSTM(LSTM):
    '''
    Attributes:

    1. include_target -- Wheather to include the target in the LSTM
       representations.
    '''

[docs]    @classmethod
    def name(cls) -> str:
        return 'TDLSTM'

    def __repr__(self) -> str:
        '''
        Name of the machine learning model.
        '''
        return self.name()

[docs]    def __init__(self, tokeniser: Callable[[str], List[str]],
                 embeddings: 'bella.word_vectors.WordVectors',
                 reproducible: Union[int, None] = None, pad_size: int = -1,
                 lower: bool = True, patience: int = 10,
                 batch_size: int = 32, epochs: int = 300,
                 embedding_layer_kwargs: Dict[str, Any] = None,
                 lstm_layer_kwargs: Dict[str, Any] = None,
                 dense_layer_kwargs: Dict[str, Any] = None,
                 optimiser: 'keras.optimizers.Optimizer' = optimizers.SGD,
                 optimiser_params: Union[Dict[str, Any], None] = None,
                 include_target: bool = True) -> None:
        '''
        :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split`.
        :param embeddings: Embedding (Word vectors) to be used e.g.
                           :py:class:`bella.word_vectors.SSWE`
        :param reproducible: Whether to be reproducible. If None then it is
                             but quicker to run. Else provide a `int` that
                             will represent the random seed value.
        :param pad_size: The max number of tokens to use per sequence. If -1
                         use the text sequence in the training data that has
                         the most tokens as the pad size.
        :param lower: Whether to lower case the words being processed.
        :param patience: Number of epochs with no improvement before training
                         is stopped.
        :param batch_size: Number of samples per gradient update.
        :param epochs: Number of times to train over the entire training set
                       before stopping. If patience is set, then it may
                       stop before reaching the number of epochs specified
                       here.
        :param embedding_layer_kwargs: Keyword arguments to pass to the
                                       embedding layer which is a
                                       :py:class:`keras.layers.Embedding`
                                       object. If no parameters to pass leave
                                       as None.
        :param lstm_layer_kwargs: Keyword arguments to pass to the lstm
                                  layer(s) which is a
                                  :py:class:`keras.layers.LSTM` object. If no
                                  parameters to pass leave as None.
        :param dense_layer_kwargs: Keyword arguments to pass to the dense
                                   (final layer) which is a
                                   :py:class:`keras.layers.Dense` object. If no
                                   parameters to pass leave as None.
        :param optimiser: Optimiser to be used accepts any
                          `keras optimiser <https://keras.io/optimizers/>`_.
                          Default is :py:class:`keras.optimizers.SGD`
        :param optimiser_params: Parameters for the optimiser. If None uses
                                 default optimiser parameters.
        :param include_target: Wheather to include the target in the LSTM
                               representations.
        '''

        super().__init__(tokeniser, embeddings, reproducible, pad_size, lower,
                         patience, batch_size, epochs, embedding_layer_kwargs,
                         lstm_layer_kwargs, dense_layer_kwargs, optimiser,
                         optimiser_params)

        self.left_pad_size = pad_size
        self.left_test_pad_size = 0
        self.right_pad_size = pad_size
        self.right_test_pad_size = 0
        self.include_target = include_target

[docs]    def model_parameters(self) -> Dict[str, Any]:
        '''
        Returns a dictionary containing the attributes of the class instance,
        the parameters to give to the class constructior to re-create this
        instance, and the class itself.

        This is used by the :py:meth:`save` method so that the instance can
        be re-created when loaded by the :py:meth:`load` method.
        '''

        attributes = super().model_parameters()
        class_attrs = {'left_test_pad_size': self.left_test_pad_size,
                       'right_test_pad_size': self.right_test_pad_size}
        attributes['class_attrs'] = class_attrs

        class_params = attributes['class_params']
        class_params['include_target'] = self.include_target
        attributes['class_params'] = class_params
        return attributes

    def _pre_process(self, data_dicts: List[Dict[str, Any]],
                     training: bool = False) -> List[np.ndarray]:
        '''
        Converts the text in the data_dicts into a List of size two
        representing the left and right context of the target word
        respectively. Each List is made up of a matrix of of integers
        representing the text as their embedding lookups. These two Lists
        are the inputs into the keras model.

        Two find the left and right contexts it uses the `spans` field of
        the dictionaries in the `data_dicts`. The `spans` field is a list of
        Tuples where each Tuple represents a occurence of the Target, each
        Tuple contains the index of the starting and ending character index
        (Expects the List to be of size 1 as there should be only one target
        per target sample. This case is not True for the
        `Dong et al. <https://aclanthology.info/papers/P14-2009/p14-2009>`_
        dataset therefore it only takes the first target instance in the
        sentence as the target).

        The texts are converted into integers using the
        :py:meth:`process_text` method.

        :param data_dicts: A list of dictonaries that contains a `text` and
                           `spans` field.
        :param training: Whether the text should be processed for training or
                         for prediction. prediction = False, training = True
        :return: A list of two contaning the left and right context of
                 the target both represented by the output of
                 :py:meth:`process_text` method.
        '''

        def context_texts(context_data_dicts: List[Dict[str, Any]]
                          ) -> Tuple[List[str], List[str]]:
            '''
            :param context_data_dicts: A list of dictonaries that contains a
                                       `text` and `spans` field.
            :return: A list of the left and right text contexts for all the
                     dictionaries.
            '''
            # Context returns all of the left and right context occurrences
            # therefore if a target is mentioned Twice and are associated then
            # for a single text two left and right occurrences are returned.
            # Thus these are a list of lists we therefore chose only the
            # first mentioned target as the paper linked to this method does
            # not specify which they used.
            left_texts = [context(data, 'left', inc_target=self.include_target)
                          for data in context_data_dicts]
            right_texts = [context(data, 'right',
                                   inc_target=self.include_target)
                           for data in context_data_dicts]
            left_texts = [texts[0] for texts in left_texts]
            right_texts = [texts[0] for texts in right_texts]
            return left_texts, right_texts

        # Convert from a sequence of dictionaries into texts and then integers
        # that represent the tokens in the text within the embedding space.

        # Get left and right contexts
        left_text, right_text = context_texts(data_dicts)
        if training:
            left_pad_sequence = self.process_text(left_text,
                                                  self.left_pad_size)
            self.left_test_pad_size, left_sequence = left_pad_sequence

            right_pad_sequence = self.process_text(right_text,
                                                   self.right_pad_size,
                                                   padding='post',
                                                   truncate='post')
            self.right_test_pad_size, right_sequence = right_pad_sequence
            return left_sequence, right_sequence

        left_pad_sequence = self.process_text(left_text,
                                              self.left_test_pad_size)
        _, left_sequence = left_pad_sequence

        right_pad_sequence = self.process_text(right_text,
                                               self.right_test_pad_size,
                                               padding='post',
                                               truncate='post')
        _, right_sequence = right_pad_sequence
        return [left_sequence, right_sequence]

[docs]    def create_training_text(self, train_data: List[Dict[str, Any]],
                             validation_data: List[Dict[str, Any]]
                             ) -> Tuple[List[np.ndarray],
                                        List[np.ndarray]]:
        '''
        Converts the training and validation data into a format that the keras
        model can take as input.

        :param train_data: Data to be trained on. Which is a list of
                           dictionaries where each dictionary has a `text`
                           field containing text and a field `spans` containing
                           a list of Tuples where each Tuple represents a
                           occurence of the Target, each Tuple contains the
                           index of the starting and ending character index
                           (Expects the List to be of size 1 as there should
                           be only one target per target sample. This case is
                           not True for the
                           `Dong et al. <https://aclanthology.info/papers/P14-\
                           2009/p14-2009>`_ dataset therefore it only takes
                           the first target instance in the sentence as the
                           target).
        :param validation_data: Data to evaluate the model at training time.
                                Expects the same data as the `train_data`
                                parameter.
        :return: A tuple of length two containing the train and validation
                 input that are both the output of :py:meth:`_pre_process`
        '''

        return super().create_training_text(train_data, validation_data)

[docs]    def keras_model(self, num_classes: int) -> 'keras.models.Model':
        '''
        The model that represents this class. This is a custom combination
        of two LSTMs.

        :param num_classes: Number of classes to predict.
        :return: Two LSTMs, one forward from the left context and the other
                 backward from the right context. The output of the two are
                 concatenated and are input to the output layer.
        '''
        # Embeddings
        embedding_matrix = self.embeddings.embedding_matrix
        vocab_size, vector_size = embedding_matrix.shape

        embedding_layer_kwargs = self.embedding_layer_kwargs
        embedding_layer_trainable = True
        if 'trainable' in embedding_layer_kwargs:
            embedding_layer_trainable = embedding_layer_kwargs.pop('trainable')

        lstm_layer_kwargs = self.lstm_layer_kwargs
        lstm_dimension = vector_size
        if 'cell' in self.lstm_layer_kwargs:
            lstm_dimension = lstm_layer_kwargs.pop('cell')

        dense_layer_kwargs = self.dense_layer_kwargs
        # Model layers
        # Left LSTM
        left_input = layers.Input(shape=(self.left_test_pad_size,),
                                  name='left_text_input')
        left_embedding_layer = layers\
                               .Embedding(input_dim=vocab_size,
                                          output_dim=vector_size,
                                          input_length=self.left_test_pad_size,
                                          trainable=embedding_layer_trainable,
                                          weights=[embedding_matrix],
                                          name='left_embedding_layer',
                                          **embedding_layer_kwargs
                                          )(left_input)
        left_lstm_layer = layers.LSTM(lstm_dimension,
                                      name='left_lstm_layer',
                                      **lstm_layer_kwargs
                                      )(left_embedding_layer)
        # Right LSTM
        right_input = layers.Input(shape=(self.right_test_pad_size,),
                                   name='right_text_input')
        right_embedding_layer = layers\
                                .Embedding(input_dim=vocab_size,
                                           output_dim=vector_size,
                                           input_length=self.right_test_pad_size,
                                           trainable=embedding_layer_trainable,
                                           weights=[embedding_matrix],
                                           name='right_embedding_layer',
                                           **embedding_layer_kwargs
                                           )(right_input)
        right_lstm_layer = layers.LSTM(lstm_dimension,
                                       name='right_lstm_layer',
                                       go_backwards=True,
                                       **lstm_layer_kwargs
                                       )(right_embedding_layer)
        # Merge the outputs of the left and right LSTMs
        merge_layer = layers.concatenate([left_lstm_layer, right_lstm_layer],
                                         name='left_right_lstm_merge')
        predictions = layers.Dense(num_classes, activation='softmax',
                                   name='output',
                                   **dense_layer_kwargs)(merge_layer)

        return models.Model(inputs=[left_input, right_input],
                            outputs=predictions)

    @property
    def include_target(self) -> bool:
        '''
        include_target attribute

        :return: The include_target used in the model
        '''

        return self._include_target

    @include_target.setter
    def include_target(self, value: bool) -> None:
        '''
        Sets the include_target attribute

        :param value: The value to assign to the include_target attribute
        '''

        self.fitted = False
        self._include_target = value


[docs]class TCLSTM(TDLSTM):

[docs]    @classmethod
    def name(cls) -> str:
        return 'TCLSTM'

    def __repr__(self) -> str:
        '''
        Name of the machine learning model.
        '''
        return self.name()

    def _pre_process(self, data_dicts: List[Dict[str, Any]],
                     training: bool = False) -> List[np.ndarray]:
        '''
        Converts the text in the data_dicts into a list of size four
        representing the left context, left targets, right context and
        right targets. Where the contexts come are the same as those from
        TDLSTM :py:meth:`bella.models.tdlstm.TDLSTM._pre_process` method.

        The targets are a matrix of size [word_embedding_dimension, pad_size]
        and each vector in the matrix is the word embedding representation
        of the target word. If the target word is made up of multiple words
        it is then the average of the words vector representation (we use the
        median as the average). Both the contexts and the target matrix are
        used as input into the keras model.

        The texts are converted into integers using the
        :py:meth:`process_text` method.

        :param data_dicts: A list of dictonaries that contains a `text` and
                           `spans` field.
        :param training: Whether the text should be processed for training or
                         for prediction. prediction = False, training = True
        :return: A list of four contaning the left context, left vectors,
                 right context, and right vectors.
        '''
        def context_median_targets(pad_size: int):
            '''
            :param pad_size: The number of timesteps within the LSTM
            :return: Matrix of size [word_embedding_dimension, pad_size] where
                     each word embedding represents the target word or if
                     multiple words make up the target the word embedding is
                     the median of the words embeddings.
            '''
            vector_size = self.embeddings.vector_size
            target_matrix = np.zeros((len(data_dicts),
                                      pad_size, vector_size))
            for index, data in enumerate(data_dicts):
                target_vectors = []
                target_words = data['target'].split()
                for target_word in target_words:
                    if self.lower:
                        target_word = target_word.lower()
                    target_embedding = self.embeddings\
                                           .lookup_vector(target_word)
                    target_vectors.append(target_embedding)
                target_vectors = np.vstack(target_vectors)
                median_target_vector = matrix_median(target_vectors)
                median_vectors = np.repeat(median_target_vector, pad_size,
                                           axis=0)
                target_matrix[index] = median_vectors
            return target_matrix

        sequences = super()._pre_process(data_dicts, training=training)
        left_sequence, right_sequence = sequences
        left_target_vectors = context_median_targets(self.left_test_pad_size)
        right_target_vectors = context_median_targets(self.right_test_pad_size)
        return [left_sequence, left_target_vectors,
                right_sequence, right_target_vectors]

[docs]    def create_training_text(self, train_data: List[Dict[str, Any]],
                             validation_data: List[Dict[str, Any]]
                             ) -> Tuple[List[np.ndarray],
                                        List[np.ndarray]]:
        '''
        Converts the training and validation data into a format that the keras
        model can take as input.

        :param train_data: See :py:meth:`bella.models.tdlstm.\
                           TDLSTM.create_training_text` `train_data`
                           parameter.
        :param validation_data: See :py:meth:`bella.models.tdlstm.\
                                TDLSTM.create_training_text` `validation_data`
                                parameter.
        :return: A tuple of length two containing the train and validation
                 input that are both the output of :py:meth:`_pre_process`
        '''

        return super().create_training_text(train_data, validation_data)

[docs]    def keras_model(self, num_classes: int) -> 'keras.models.Model':
        '''
        The model that represents this class. This is the same as the
        :py:meth:`bella.models.tdlstm.TDLSTM.keras_model` model, however
        the words in before inputting into the LSTM are concatenated with
        the word embedding of the target. If the target is more than one word
        then the word embedding of the target is the average (median in our
        case) embeddings of the target words.

        :param num_classes: Number of classes to predict.
        :return: Two LSTMs one forward from the left context and the other
                 backward from the right context taking into account the
                 target vector embedding.
        '''
        # Embeddings
        embedding_matrix = self.embeddings.embedding_matrix
        vocab_size, vector_size = embedding_matrix.shape

        embedding_layer_kwargs = self.embedding_layer_kwargs
        embedding_layer_trainable = True
        if 'trainable' in embedding_layer_kwargs:
            embedding_layer_trainable = embedding_layer_kwargs.pop('trainable')

        lstm_layer_kwargs = self.lstm_layer_kwargs
        # Double the vector size as we have to take into consideration the
        # concatenated target vector
        lstm_dimension = vector_size * 2
        if 'cell' in self.lstm_layer_kwargs:
            lstm_dimension = lstm_layer_kwargs.pop('cell')

        dense_layer_kwargs = self.dense_layer_kwargs
        # Model layers
        # Left LSTM
        left_input = layers.Input(shape=(self.left_test_pad_size,),
                                  name='left_text_input')
        left_embedding_layer = layers\
                               .Embedding(input_dim=vocab_size,
                                          output_dim=vector_size,
                                          input_length=self.left_test_pad_size,
                                          trainable=embedding_layer_trainable,
                                          weights=[embedding_matrix],
                                          name='left_embedding_layer',
                                          **embedding_layer_kwargs
                                          )(left_input)
        left_target_input = layers.Input(shape=(self.left_test_pad_size,
                                                vector_size),
                                         name='left_target')
        left_text_target = layers.concatenate([left_embedding_layer,
                                               left_target_input],
                                              name='left_text_target')
        left_lstm_layer = layers.LSTM(lstm_dimension,
                                      name='left_lstm_layer',
                                      **lstm_layer_kwargs
                                      )(left_text_target)
        # Right LSTM
        right_input = layers.Input(shape=(self.right_test_pad_size,),
                                   name='right_text_input')
        right_embedding_layer = layers\
                                .Embedding(input_dim=vocab_size,
                                           output_dim=vector_size,
                                           input_length=self.right_test_pad_size,
                                           trainable=embedding_layer_trainable,
                                           weights=[embedding_matrix],
                                           name='right_embedding_layer',
                                           **embedding_layer_kwargs
                                           )(right_input)
        right_target_input = layers.Input(shape=(self.right_test_pad_size,
                                                 vector_size),
                                          name='right_target')
        right_text_target = layers.concatenate([right_embedding_layer,
                                                right_target_input],
                                               name='right_text_target')
        right_lstm_layer = layers.LSTM(lstm_dimension,
                                       name='right_lstm_layer',
                                       go_backwards=True,
                                       **lstm_layer_kwargs
                                       )(right_text_target)
        # Merge the outputs of the left and right LSTMs
        merge_layer = layers.concatenate([left_lstm_layer, right_lstm_layer],
                                         name='left_right_lstm_merge')
        predictions = layers.Dense(num_classes, activation='softmax',
                                   name='output',
                                   **dense_layer_kwargs)(merge_layer)

        input_layers = [left_input, left_target_input,
                        right_input, right_target_input]
        return models.Model(inputs=input_layers, outputs=predictions)