from abc import ABC, abstractmethod
import dill
[docs]class BaseFeaturizer(ABC):
"""
Base class that all featurizer classes should inherit from to ensure
uniformity.
"""
COARSE_POS_TAGS = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'CCONJ', 'DET',
'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN',
'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X', 'SPACE']
[docs] @abstractmethod
def add_feature(self, feature_extraction_function):
"""
Should add a custom feature extraction function to the ones
defined in the inheriting class.
:param feature_extraction_function: Custom function that
extracts features from text.
:type feature_extraction_function: function
"""
pass
[docs] def save(self, filename):
"""
Saves current featurizer instance in binary format.
:param filename: Name of the file where the featurizer should be
saved.
:type filename: str
"""
with open(filename, "wb") as file:
dill.dump(self, file)
[docs] @classmethod
def load(cls, filename):
"""
Loads a previously saved featurizer from a binary file.
:param filename: Name of the binary file that the featurizer
should be loaded from.
:type filename: str
:return: Classifier instance.
"""
with open(filename, "rb") as file:
featurizer = dill.load(file)
return featurizer