Source code for sciquence.postprocessing.binarizer

# -*- coding: utf-8 -*-
# Krzysztof Joachimiak 2018
# sciquence: Time series & sequences in Pythonn
#
# Binarizers
# Author: Krzysztof Joachimiak
#
# License: MIT

import sys
sys.path.append("..")

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
import copy
#from sciquence.utils.docstring import inherit_docstring

#@inherit_docstring
[docs]class ClasswiseBinarizer(BaseEstimator, TransformerMixin):
    '''
    Performing binarization classwise. 
    
    It may be used for binarize independently multiple class in the tagging tasks.

    Parameters
    ----------
    thresholds: list of float or numpy.ndarray
        Binarization thresholds for all the classes
    
    '''
[docs]    def __init__(self, thresholds):
        # TODO: axis?
        self.thresholds=thresholds
        
    def fit(self, X, y=None):
        '''Does nothing'''
        return self
        
    def transform(self, X, y=None, copy=False):
       '''

       Perform classwise binarization, i.e. every column has
       own specific binarization thresholds.

       Parameters
       ----------
       X: numpy.ndarray
          Probabilities vector
       y: None
          Nothing, argument for API compatibility
       copy: bool
          Copy or make transformation inplace

       Returns
       -------
       binarized_X: numpy.ndarray
           Binarized output

       Examples
       ---------
       >>> import numpy as np
       >>> X = np.array(
       >>> [[ 0.04344385  0.24317802  0.81423947],
       >>> [ 0.30503777  0.08385118  0.48402043],
       >>> [ 0.38695257  0.64501778  0.19023201],
       >>> [ 0.49452506  0.35440145  0.74149338],
       >>> [ 0.25147325  0.14294654  0.6648142 ],
       >>> [ 0.99852846  0.75026559  0.43106003],
       >>> [ 0.33369685  0.41158767  0.86865335],
       >>> [ 0.07741532  0.90428353  0.87152301],
       >>> [ 0.79609158  0.47617837  0.1890651 ],
       >>> [ 0.14287567  0.52800364  0.10957203]]
       >>> )
       >>> X_binarized = ClasswiseBinarizer(thresholds=[.5, .4, .3]).transform(X)
       >>> print X_binarized
       >>> [[ 0.  0.  1.],
       >>> [ 0.  0.  1.],
       >>> [ 0.  1.  0.],
       >>> [ 0.  0.  1.],
       >>> [ 0.  0.  1.],
       >>> [ 1.  1.  1.],
       >>> [ 0.  1.  1.],
       >>> [ 0.  1.  1.],
       >>> [ 1.  1.  0.],
       >>> [ 0.  1.  0.]]

       '''
       return (X >= self.thresholds).astype(float)
       
       
[docs]def binarize_classwise(X, thresholds):
    '''
    
    Binarization performed classwise.

    Parameters
    ----------
    X: numpy.ndarray
        Probabilities vector
    thresholds: list of float or numpy.ndarray
        Binarization thresholds for all the classes

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array(
    >>> [[ 0.04344385  0.24317802  0.81423947],
    >>> [ 0.30503777  0.08385118  0.48402043],
    >>> [ 0.38695257  0.64501778  0.19023201],
    >>> [ 0.49452506  0.35440145  0.74149338],
    >>> [ 0.25147325  0.14294654  0.6648142 ],
    >>> [ 0.99852846  0.75026559  0.43106003],
    >>> [ 0.33369685  0.41158767  0.86865335],
    >>> [ 0.07741532  0.90428353  0.87152301],
    >>> [ 0.79609158  0.47617837  0.1890651 ],
    >>> [ 0.14287567  0.52800364  0.10957203]]
    >>> )
    >>> X_binarized = ClasswiseBinarizer(thresholds=[.5, .4, .3]).transform(X)
    >>> print X_binarized
    >>> [[ 0.  0.  1.],
    >>> [ 0.  0.  1.],
    >>> [ 0.  1.  0.],
    >>> [ 0.  0.  1.],
    >>> [ 0.  0.  1.],
    >>> [ 1.  1.  1.],
    >>> [ 0.  1.  1.],
    >>> [ 0.  1.  1.],
    >>> [ 1.  1.  0.],
    >>> [ 0.  1.  0.]]
    
    '''
    return (X >= thresholds).astype(float)


## TODO: ClasswiseMeanBinarizer


if __name__== '__main__':

    # Dummy data
    X = np.random.rand(10, 3)

    print X

    # Binarizing
    bX = ClasswiseBinarizer(thresholds=[.5, .4, .3]).transform(X)

    print bX