Source code for bella.scikit_features.lexicon_filter

from sklearn.base import TransformerMixin
from sklearn.base import BaseEstimator

from bella import lexicons

[docs]class LexiconFilter(BaseEstimator, TransformerMixin):
[docs] def __init__(self, lexicon=None, zero_token='$$$ZERO_TOKEN$$$'): self.lexicon = lexicon self.zero_token = zero_token
[docs] def fit(self, context_tokens, y=None): '''Kept for consistnecy with the TransformerMixin''' return self
[docs] def fit_transform(self, context_tokens, y=None): '''see self.transform''' return self.transform(context_tokens)
[docs] def transform(self, contexts_tokens): lexicon_words = self.lexicon.words context_tokens_filtered = [] for context_tokens in contexts_tokens: all_tokens_filtered = [] for context in context_tokens: context_tokens = [] for token in context: if token not in lexicon_words: context_tokens.append(self.zero_token) else: context_tokens.append(token) all_tokens_filtered.append(context_tokens) context_tokens_filtered.append(all_tokens_filtered) return context_tokens_filtered