Source code for bella.models.tdparse

'''
Module contains all of the classes that represent Machine Learning models
that are within `Wang et al. paper \
<https://aclanthology.coli.uni-saarland.de/papers/E17-1046/e17-1046>`_.

1. :py:class:`bella.models.target.TDParseMinus` -- TDParse Minus model
2. :py:class:`bella.models.target.TDParse` -- TDParse model
3. :py:class:`bella.models.tdparse.TDParsePlus` -- TDParse Plus model
'''
from typing import Any, List, Callable, Dict

import sklearn
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import LinearSVC

import bella
from bella.tokenisers import ark_twokenize
from bella.neural_pooling import matrix_max, matrix_min, matrix_avg
from bella.neural_pooling import matrix_median, matrix_prod, matrix_std
from bella.scikit_features.context import Context
from bella.scikit_features import syntactic_context
from bella.scikit_features.lexicon_filter import LexiconFilter
from bella.scikit_features.join_context_vectors import JoinContextVectors
from bella.scikit_features.neural_pooling import NeuralPooling
from bella.scikit_features.tokeniser import ContextTokeniser
from bella.scikit_features.word_vector import ContextWordVectors
from bella.models.target import TargetInd


[docs]class TDParseMinus(TargetInd):
[docs] @classmethod def name(cls) -> str: return 'TDParse Minus'
def __repr__(self) -> str: ''' Name of the machine learning model. ''' return self.name()
[docs] def __init__(self, word_vectors: List['bella.word_vectors.WordVectors'], parser: Any, tokeniser: Callable[[str], List[str]] = ark_twokenize, lower: bool = True, C: float = 0.01, random_state: int = 42, scale: Any = MinMaxScaler()) -> None: ''' :param word_vectors: A list of one or more word vectors to be used as feature vector lookups. If more than one is used the word vectors are concatenated together to create a the feature vector for each word. :param parser: The dependency parser to be used. :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split` :param lower: Whether to lower case the words :param C: The C value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param random_state: The random_state value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param scale: How to scale the data before input into the estimator. If no scaling is to be used set this to None. ''' # Inherit from SKLearnModel __init__ method # The parameters here go into the self.get_parameters method super(TargetInd, self).__init__(word_vectors, parser, tokeniser, lower, C, random_state, scale)
[docs] @staticmethod def pipeline() -> 'sklearn.pipeline.Pipeline': ''' Machine Learning model that is used as the base template for the model attribute. :returns: The template machine learning model ''' return Pipeline([ ('dependency_context', syntactic_context.SyntacticContext()), ('contexts', syntactic_context.Context('full')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])), ('scale', MinMaxScaler()), ('svm', LinearSVC()) ])
[docs] @classmethod def normalise_parameter_names(cls, parameter_dict: Dict[str, Any] ) -> Dict[str, Any]: ''' Converts the output of :py:meth:`get_parameters` into a dictionary that can be used as input into :py:meth:`get_parameters`. :returns: A dictonary that can be used as keyword arguments into the :py:meth:`get_parameters` method ''' parameter_names = ['word_vectors__', '__tokeniser', '__lower', 'scale', '__C', '__random_state', '__parser'] name_parameter = {} for parameter_name in parameter_names: for name, parameter in parameter_dict.items(): if parameter_name in name: real_parameter_name = parameter_name.strip('_') name_parameter[real_parameter_name] = parameter return name_parameter
[docs] @classmethod def get_parameters(cls, word_vectors: List['bella.word_vectors.WordVectors'], parser: Any, tokeniser: Callable[[str], List[str]] = ark_twokenize, lower: bool = True, C: float = 0.01, random_state: int = 42, scale: Any = MinMaxScaler()) -> Dict[str, Any]: ''' Transform the given parameters into a dictonary that is accepted as model parameters :param word_vectors: A list of one or more word vectors to be used as feature vector lookups. If more than one is used the word vectors are concatenated together to create a the feature vector for each word. :param parser: The dependency parser to be used. :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split` :param lower: Whether to lower case the words :param C: The C value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param random_state: The random_state value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param scale: How to scale the data before input into the estimator. If no scaling is to be used set this to None. :return: Model parameters ''' params_list = super().get_parameters(word_vectors, tokeniser, lower, C, random_state, scale) params_list = cls._add_to_params_dict(params_list, cls._get_dependency_context(), parser) return params_list
[docs] @classmethod def get_cv_parameters(cls, word_vectors: List[List['bella.word_vectors\ .WordVectors']], parser: List[Any], tokeniser=[ark_twokenize], lower=[True], C=[0.01], random_state=[42], scale=[MinMaxScaler()]): ''' Transform the given parameters into a list of dictonaries that is accepted as `param_grid` parameter in :py:class:`sklearn.model_selection.GridSearchCV` :param word_vectors: A list of a list of word vectors e.g. [[SSWE()], [SSWE(), GloveCommonCrawl()]]. :param parser: A list of dependency parser to be used. :param tokenisers: A list of tokeniser to be used e.g. :py:meth:`str.split`. Default [ark_twokenize] :param lowers: A list of bool values which indicate whether to lower case the input words. Default [True] :param C: A list of C values for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. Default [0.01] :param random_state: A list of random_state values for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. Default [42] :param scale: List of scale values. The list can include :py:class:`sklearn.preprocessing.MinMaxScaler` type of clases or None if no scaling is to be used. Default [:py:class:`sklearn.preprocessing.MinMaxScaler`] :return: Parameters to explore through cross validation ''' params_list = super().get_cv_parameters(word_vectors, tokeniser, lower, C, random_state, scale) # dependency parser dep_context = cls._get_dependency_context()[0] params_list = cls._add_to_all_params(params_list, dep_context, parser) return params_list
@staticmethod def _get_dependency_context() -> List[str]: ''' Method to be overidden by subclasses as each pipeline will be different and will have a different parameter name for where the dependency parser process. :returns: A list of parameters names where the dependency parser is set in the pipeline ''' return ['dependency_context__parser']
[docs]class TDParse(TDParseMinus):
[docs] @classmethod def name(cls) -> str: return 'TDParse'
def __repr__(self) -> str: ''' Name of the machine learning model. ''' return self.name()
[docs] def __init__(self, word_vectors: List['bella.word_vectors.WordVectors'], parser: Any, tokeniser: Callable[[str], List[str]] = ark_twokenize, lower: bool = True, C: float = 0.01, random_state: int = 42, scale: Any = MinMaxScaler()) -> None: ''' :param word_vectors: A list of one or more word vectors to be used as feature vector lookups. If more than one is used the word vectors are concatenated together to create a the feature vector for each word. :param parser: The dependency parser to be used. :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split` :param lower: Whether to lower case the words :param C: The C value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param random_state: The random_state value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param scale: How to scale the data before input into the estimator. If no scaling is to be used set this to None. ''' # Inherit from SKLearnModel __init__ method # The parameters here go into the self.get_parameters method super(TargetInd, self).__init__(word_vectors, parser, tokeniser, lower, C, random_state, scale)
[docs] @staticmethod def pipeline() -> 'sklearn.pipeline.Pipeline': ''' Machine Learning model that is used as the base template for the model attribute. :returns: The template machine learning model ''' return Pipeline([ ('union', FeatureUnion([ ('dependency', Pipeline([ ('context', syntactic_context.SyntacticContext()), ('contexts', syntactic_context.Context('full')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('left', Pipeline([ ('contexts', Context('left')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('right', Pipeline([ ('contexts', Context('right')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('target', Pipeline([ ('contexts', Context('target')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])) ])), ('scale', MinMaxScaler()), ('svm', LinearSVC()) ])
@staticmethod def _get_word_vector_names() -> List[str]: ''' :returns: A list of of parameter names where the word vectors are set in the pipeline. ''' return ['union__dependency__word_vectors__vectors', 'union__left__word_vectors__vectors', 'union__right__word_vectors__vectors', 'union__target__word_vectors__vectors'] @staticmethod def _get_tokeniser_names() -> List[str]: ''' :returns: A list of of parameter names where the tokenisers are set in the pipeline. ''' return ['union__dependency__tokens', 'union__left__tokens', 'union__right__tokens', 'union__target__tokens'] @staticmethod def _get_dependency_context() -> List[str]: ''' :returns: A list of parameters names where the dependency parser is set in the pipeline ''' return ['union__dependency__context__parser']
[docs]class TDParsePlus(TDParseMinus):
[docs] @classmethod def name(cls) -> str: return 'TDParsePlus'
def __repr__(self) -> str: ''' Name of the machine learning model. ''' return self.name()
[docs] def __init__(self, word_vectors: List['bella.word_vectors.WordVectors'], parser: Any, senti_lexicon: 'bella.lexicons.Lexicon', tokeniser: Callable[[str], List[str]] = ark_twokenize, lower: bool = True, C: float = 0.01, random_state: int = 42, scale: Any = MinMaxScaler()) -> None: ''' :param word_vectors: A list of one or more word vectors to be used as feature vector lookups. If more than one is used the word vectors are concatenated together to create a the feature vector for each word. :param parser: The dependency parser to be used. :param senti_lexicon: Sentiment Lexicon to be used for the Left and Right sentiment context (LS and RS). :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split` :param lower: Whether to lower case the words :param C: The C value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param random_state: The random_state value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param scale: How to scale the data before input into the estimator. If no scaling is to be used set this to None. ''' # Inherit from SKLearnModel __init__ method # The parameters here go into the self.get_parameters method super(TargetInd, self).__init__(word_vectors, parser, senti_lexicon, tokeniser, lower, C, random_state, scale)
[docs] @staticmethod def pipeline() -> 'sklearn.pipeline.Pipeline': ''' Machine Learning model that is used as the base template for the model attribute. :returns: The template machine learning model ''' return Pipeline([ ('union', FeatureUnion([ ('dependency', Pipeline([ ('context', syntactic_context.SyntacticContext()), ('contexts', syntactic_context.Context('full')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('left', Pipeline([ ('contexts', Context('left')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('right', Pipeline([ ('contexts', Context('right')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('left_s', Pipeline([ ('contexts', Context('left')), ('tokens', ContextTokeniser()), ('filter', LexiconFilter()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('right_s', Pipeline([ ('contexts', Context('right')), ('tokens', ContextTokeniser()), ('filter', LexiconFilter()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])), ('target', Pipeline([ ('contexts', Context('target')), ('tokens', ContextTokeniser()), ('word_vectors', ContextWordVectors()), ('pool_funcs', FeatureUnion([ ('max_pipe', Pipeline([ ('max', NeuralPooling(matrix_max)), ('join', JoinContextVectors(matrix_median)) ])), ('min_pipe', Pipeline([ ('min', NeuralPooling(matrix_min)), ('join', JoinContextVectors(matrix_median)) ])), ('avg_pipe', Pipeline([ ('avg', NeuralPooling(matrix_avg)), ('join', JoinContextVectors(matrix_median)) ])), ('prod_pipe', Pipeline([ ('min', NeuralPooling(matrix_prod)), ('join', JoinContextVectors(matrix_median)) ])), ('std_pipe', Pipeline([ ('min', NeuralPooling(matrix_std)), ('join', JoinContextVectors(matrix_median)) ])) ])) ])) ])), ('scale', MinMaxScaler()), ('svm', LinearSVC()) ])
[docs] @classmethod def normalise_parameter_names(cls, parameter_dict: Dict[str, Any] ) -> Dict[str, Any]: ''' Converts the output of :py:meth:`get_parameters` into a dictionary that can be used as input into :py:meth:`get_parameters`. :returns: A dictonary that can be used as keyword arguments into the :py:meth:`get_parameters` method ''' parameter_names = ['word_vectors__', '__tokeniser', '__lower', 'scale', '__C', '__random_state', '__parser', '__lexicon'] name_parameter = {} for parameter_name in parameter_names: for name, parameter in parameter_dict.items(): if parameter_name in name: real_parameter_name = parameter_name.strip('_') if parameter_name == '__lexicon': real_parameter_name = 'senti_lexicon' name_parameter[real_parameter_name] = parameter return name_parameter
[docs] @classmethod def get_parameters(cls, word_vectors: List['bella.word_vectors.WordVectors'], parser: Any, senti_lexicon: 'bella.lexicons.Lexicon', tokeniser: Callable[[str], List[str]] = ark_twokenize, lower: bool = True, C: float = 0.01, random_state: int = 42, scale: Any = MinMaxScaler()) -> Dict[str, Any]: ''' Transform the given parameters into a dictonary that is accepted as model parameters :param word_vectors: A list of one or more word vectors to be used as feature vector lookups. If more than one is used the word vectors are concatenated together to create a the feature vector for each word. :param parser: The dependency parser to be used. :param senti_lexicon: Sentiment Lexicon to be used for the Left and Right sentiment context (LS and RS). :param tokeniser: Tokeniser to be used e.g. :py:meth:`str.split` :param lower: Whether to lower case the words :param C: The C value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param random_state: The random_state value for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. :param scale: How to scale the data before input into the estimator. If no scaling is to be used set this to None. :return: Model parameters ''' params_dict = super().get_parameters(word_vectors, parser, tokeniser, lower, C, random_state, scale) params_dict = cls._add_to_params_dict(params_dict, cls._get_word_senti_names(), senti_lexicon) return params_dict
[docs] @classmethod def get_cv_parameters(cls, word_vectors: List[List['bella.word_vectors\ .WordVectors']], parser: List[Any], senti_lexicon: List['bella.lexicons\ .Lexicon'], tokeniser=[ark_twokenize], lower=[True], C=[0.01], random_state=[42], scale=[MinMaxScaler()]): ''' Transform the given parameters into a list of dictonaries that is accepted as `param_grid` parameter in :py:class:`sklearn.model_selection.GridSearchCV` :param word_vectors: A list of a list of word vectors e.g. [[SSWE()], [SSWE(), GloveCommonCrawl()]]. :param parser: A list of dependency parser to be used. :param senti_lexicon: A list of Sentiment Lexicons to be explored for the Left and Right sentiment context (LS and RS). :param tokenisers: A list of tokeniser to be used e.g. :py:meth:`str.split`. Default [ark_twokenize] :param lowers: A list of bool values which indicate whether to lower case the input words. Default [True] :param C: A list of C values for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. Default [0.01] :param random_state: A list of random_state values for the :py:class:`sklearn.svm.SVC` estimator that is used in the pipeline. Default [42] :param scale: List of scale values. The list can include :py:class:`sklearn.preprocessing.MinMaxScaler` type of clases or None if no scaling is to be used. Default [:py:class:`sklearn.preprocessing.MinMaxScaler`] :return: Parameters to explore through cross validation ''' params_list = super().get_cv_parameters(word_vectors, parser, tokeniser, lower, C, random_state, scale) # sentiment lexicon params_list = cls._add_to_params(params_list, senti_lexicon, cls._get_word_senti_names()) return params_list
@staticmethod def _get_word_vector_names() -> List[str]: ''' :returns: A list of of parameter names where the word vectors are set in the pipeline. ''' return ['union__dependency__word_vectors__vectors', 'union__left__word_vectors__vectors', 'union__right__word_vectors__vectors', 'union__target__word_vectors__vectors', 'union__right_s__word_vectors__vectors', 'union__left_s__word_vectors__vectors'] @staticmethod def _get_tokeniser_names() -> List[str]: ''' :returns: A list of of parameter names where the tokenisers are set in the pipeline. ''' return ['union__dependency__tokens', 'union__left__tokens', 'union__right__tokens', 'union__target__tokens', 'union__right_s__tokens', 'union__left_s__tokens'] @staticmethod def _get_dependency_context() -> List[str]: ''' :returns: A list of parameters names where the dependency parser is set in the pipeline ''' return ['union__dependency__context__parser'] @staticmethod def _get_word_senti_names() -> List[str]: ''' :returns: A list of of parameter names where the sentiment lexicons are set in the pipeline. ''' return ['union__left_s__filter__lexicon', 'union__right_s__filter__lexicon']