Source code for sciquence.postprocessing.binarizer

# -*- coding: utf-8 -*-
# Krzysztof Joachimiak 2018
# sciquence: Time series & sequences in Pythonn
#
# Binarizers
# Author: Krzysztof Joachimiak
#
# License: MIT

import sys
sys.path.append("..")

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
import copy
#from sciquence.utils.docstring import inherit_docstring

#@inherit_docstring
[docs]class ClasswiseBinarizer(BaseEstimator, TransformerMixin): ''' Performing binarization classwise. It may be used for binarize independently multiple class in the tagging tasks. Parameters ---------- thresholds: list of float or numpy.ndarray Binarization thresholds for all the classes '''
[docs] def __init__(self, thresholds): # TODO: axis? self.thresholds=thresholds
def fit(self, X, y=None): '''Does nothing''' return self def transform(self, X, y=None, copy=False): ''' Perform classwise binarization, i.e. every column has own specific binarization thresholds. Parameters ---------- X: numpy.ndarray Probabilities vector y: None Nothing, argument for API compatibility copy: bool Copy or make transformation inplace Returns ------- binarized_X: numpy.ndarray Binarized output Examples --------- >>> import numpy as np >>> X = np.array( >>> [[ 0.04344385 0.24317802 0.81423947], >>> [ 0.30503777 0.08385118 0.48402043], >>> [ 0.38695257 0.64501778 0.19023201], >>> [ 0.49452506 0.35440145 0.74149338], >>> [ 0.25147325 0.14294654 0.6648142 ], >>> [ 0.99852846 0.75026559 0.43106003], >>> [ 0.33369685 0.41158767 0.86865335], >>> [ 0.07741532 0.90428353 0.87152301], >>> [ 0.79609158 0.47617837 0.1890651 ], >>> [ 0.14287567 0.52800364 0.10957203]] >>> ) >>> X_binarized = ClasswiseBinarizer(thresholds=[.5, .4, .3]).transform(X) >>> print X_binarized >>> [[ 0. 0. 1.], >>> [ 0. 0. 1.], >>> [ 0. 1. 0.], >>> [ 0. 0. 1.], >>> [ 0. 0. 1.], >>> [ 1. 1. 1.], >>> [ 0. 1. 1.], >>> [ 0. 1. 1.], >>> [ 1. 1. 0.], >>> [ 0. 1. 0.]] ''' return (X >= self.thresholds).astype(float)
[docs]def binarize_classwise(X, thresholds): ''' Binarization performed classwise. Parameters ---------- X: numpy.ndarray Probabilities vector thresholds: list of float or numpy.ndarray Binarization thresholds for all the classes Examples -------- >>> import numpy as np >>> X = np.array( >>> [[ 0.04344385 0.24317802 0.81423947], >>> [ 0.30503777 0.08385118 0.48402043], >>> [ 0.38695257 0.64501778 0.19023201], >>> [ 0.49452506 0.35440145 0.74149338], >>> [ 0.25147325 0.14294654 0.6648142 ], >>> [ 0.99852846 0.75026559 0.43106003], >>> [ 0.33369685 0.41158767 0.86865335], >>> [ 0.07741532 0.90428353 0.87152301], >>> [ 0.79609158 0.47617837 0.1890651 ], >>> [ 0.14287567 0.52800364 0.10957203]] >>> ) >>> X_binarized = ClasswiseBinarizer(thresholds=[.5, .4, .3]).transform(X) >>> print X_binarized >>> [[ 0. 0. 1.], >>> [ 0. 0. 1.], >>> [ 0. 1. 0.], >>> [ 0. 0. 1.], >>> [ 0. 0. 1.], >>> [ 1. 1. 1.], >>> [ 0. 1. 1.], >>> [ 0. 1. 1.], >>> [ 1. 1. 0.], >>> [ 0. 1. 0.]] ''' return (X >= thresholds).astype(float)
## TODO: ClasswiseMeanBinarizer if __name__== '__main__': # Dummy data X = np.random.rand(10, 3) print X # Binarizing bX = ClasswiseBinarizer(thresholds=[.5, .4, .3]).transform(X) print bX