Source code for bella.neural_pooling

'''
Contains the following neural pooling functions:

1. min
2. max
3. avg

Which are from
`Tang et al <https://aclanthology.coli.uni-saarland.de/papers/P14-1146/p14-1146>`_.

and the following pooling functions:

4. prod
5. std

Which are from
`Vo and Zhang <https://www.ijcai.org/Proceedings/15/Papers/194.pdf>`_.

and finally the following pooling function:

6. median

From `Bo Wang et al.
<https://aclanthology.coli.uni-saarland.de/papers/E17-1046/e17-1046>`_

All the functions are applied over the columns and not the rows e.g. matrix
of (m, n) size and apply mean it will return a vector of (1, n). Therefore by
default all of the vectors returned are row vectors but if transpose is True
then column vectors are returned.
'''
from functools import wraps

import numpy as np

[docs]def inf_nan_check(neural_func):
    '''
    Contains decorator function that converts any inf or NAN value to a real
    number to avoid any potential problems with inf and NAN's latter on in the
    processing chain.

    Inf conversion - Converts it to the max (min) value of the numpy array/matrix
    dtype based on it being positive (negative) value.

    NAN conversion - based on the following
    `post <https://stackoverflow.com/questions/25506281/what-are-all-the-possib\
    le-calculations-that-could-cause-a-nan-in-python>`_ about how NAN's occur.
    It converts NAN's to zeros as the majority of the operation should equal
    zero or are close to zero. This is a rough approximation but it should not
    affect that many numbers.
    '''

    @wraps(neural_func)
    def func_wrapper(matrix, **kwargs):
        '''
        :param matrix: Numpy array/matrix that could contain NAN or inf values.
        :param transpose: If to convert the column vector into row vector
        :type matrix: np.ndarray
        :type transpose: bool
        :returns: The numpy array/matrix with NAN and inf values converted to \
        real values.
        :rtype: np.ndarray
        '''

        matrix = neural_func(matrix, **kwargs)
        if not issubclass(matrix.dtype.type, np.floating):
            raise TypeError('Only accept floating value word embeddings not '\
                            '{}'.format(matrix.dtype.type))
        # Convert all NAN values to zero
        if np.any(np.isnan(matrix)):
            matrix[np.where(np.isnan(matrix))] = 0
        # Find any value that is greater than half the min and max values and
        # convert them to half the min or max value respectively. This is
        # done to ensure that range can be done without overflow exception
        dtype_info = np.finfo(matrix.dtype)
        min_value = dtype_info.min / 2
        max_value = dtype_info.max / 2
        if np.any(matrix[matrix < min_value]) or np.any(matrix[matrix > max_value]):
            matrix[matrix < min_value] = min_value
            matrix[matrix > max_value] = max_value

        return matrix

    return func_wrapper

[docs]def matrix_checking(neural_func):
    '''
    Contains decorator function to check argument compbatbility and the
    decorated functions return. The functions decorated are the neural functions
    which are:

    1. :py:func:`bella.neural_pooling.matrix_min`
    2. :py:func:`bella.neural_pooling.matrix_max`
    3. :py:func:`bella.neural_pooling.matrix_avg`
    '''
    @wraps(neural_func)
    def func_wrapper(matrix, transpose=False):
        '''
        Checks the matrix is of the correct type and that the return matrix
        is of the correct size after the neural_func function has been applied to
        the matrix.

        inf values are converted to max (min) value defined by the dtype if
        the value is positive (negative).

        Applies transpose to convert row vectors into column vectors if
        transpose == False

        :param matrix: matrix or vector
        :param transpose: If to convert the column vector into row vector
        :type matrix: np.ndarray
        :type transpose: bool
        :returns: The output of the neural_func function.
        :rtype: np.ndarray
        '''
        # Pre check
        if not isinstance(matrix, np.ndarray):
            raise TypeError('The matrix has to be of type numpy.ndarray and not '\
                            '{}'.format(type(matrix)))
        # Applying the relevant neural pooling function
        reduced_matrix = neural_func(matrix)
        # Post check
        rm_cols = reduced_matrix.shape[0]
        rm_dim = len(reduced_matrix.shape)
        if rm_dim != 1:
            raise ValueError('The returned matrix should be a vector and have '\
                             'a dimension of 1 it is: {}'.format(rm_dim))
        m_columns = matrix.shape[1]
        if rm_cols != m_columns:
            raise ValueError('The number of columns has changed during the pooling'\
                             'func from {} to {}'.format(m_columns, rm_cols))
        if transpose:
            return reduced_matrix.reshape(rm_cols, 1)
        return reduced_matrix.reshape(1, rm_cols)
    return func_wrapper

[docs]@inf_nan_check
@matrix_checking
def matrix_min(matrix, **kwargs):
    '''
    :param matrix: matrix or vector
    :param kwargs: Can keywords that are accepted by `matrix_checking` function
    :type matrix: np.ndarray
    :type kwargs: dict
    :returns: The minimum column values in the matrix.
    :rtype: np.ndarray
    '''

    return matrix.min(axis=0)

[docs]@inf_nan_check
@matrix_checking
def matrix_max(matrix, **kwargs):
    '''
    :param matrix: matrix or vector
    :param kwargs: Can keywords that are accepted by `matrix_checking` function
    :type matrix: np.ndarray
    :type kwargs: dict
    :returns: The maximum column values in the matrix.
    :rtype: np.ndarray
    '''

    return matrix.max(axis=0)

[docs]@inf_nan_check
@matrix_checking
def matrix_avg(matrix, **kwargs):
    '''
    :param matrix: matrix or vector
    :param kwargs: Can keywords that are accepted by `matrix_checking` function
    :type matrix: np.ndarray
    :type kwargs: dict
    :returns: The mean column values in the matrix.
    :rtype: np.ndarray
    '''

    return matrix.mean(axis=0)

[docs]@inf_nan_check
@matrix_checking
def matrix_median(matrix, **kwargs):
    '''

    :param matrix: matrix or vector
    :param kwargs: Can keywords that are accepted by `matrix_checking` function
    :type matrix: np.ndarray
    :type kwargs: dict
    :returns: The median column values in the matrix.
    :rtype: np.ndarray
    '''

    return np.median(matrix, axis=0)

[docs]@inf_nan_check
@matrix_checking
def matrix_std(matrix, **kwargs):
    '''
    :param matrix: matrix or vector
    :param kwargs: Can keywords that are accepted by `matrix_checking` function
    :type matrix: np.ndarray
    :type kwargs: dict
    :returns: The standard deviation of the column values in the matrix.
    :rtype: np.ndarray
    '''
    return np.std(matrix, axis=0)

[docs]@inf_nan_check
@matrix_checking
def matrix_prod(matrix, **kwargs):
    '''
    :param matrix: matrix or vector
    :param kwargs: Can keywords that are accepted by `matrix_checking` function
    :type matrix: np.ndarray
    :type kwargs: dict
    :returns: The product of the column values in the matrix.
    :rtype: np.ndarray
    '''
    return np.prod(matrix, axis=0)