Source code for sciquence.sequences.sampling

# Krzysztof Joachimiak 2017
# sciquence: Time series & sequences in Python
#
# Sequence sampling
# Author: Krzysztof Joachimiak
#
# License: MIT

import numpy as np


[docs]def random_slice(array_len, slice_length):
    '''

    Choose a random slice of given length

    Parameters
    ----------
    array_len: int
        Array length
    slice_length: int
        Length of subsequence
    Returns
    -------
    slice: slice
        A subsequence slice

    Examples
    --------
    >>> import numpy as np
    >>> import sciquence.sequences as sq
    >>> print sq.random_slice(54, 6)
    slice(15, 21, None)

    '''

    if array_len < slice_length:
        raise Exception("Slice length cannot be greater than input array length")

    max_possible = array_len - slice_length
    first = np.random.randint(0, max_possible)
    last = first + slice_length
    return slice(first, last)


def random_fragments(array_len, frag_len, n):
    '''
    
    Get n disjunctive fragments.
    
    Parameters
    ----------
    array_len: int
        Len of array to be sampled from
    frag_len: int or tuple
        Fragment 
    n: int

    Returns
    -------
    fragments: list of list
        Fragment indices

    '''
    
    # TODO: optimize! Case when 

    # Check if possible
    if array_len < n*frag_len:
        raise ValueError("Cannot sample {} disjunctive "
                         "fragments (len: {}) "
                         "from array of length {}".format(n, frag_len, array_len))

    # List of fragments
    fragments = []
    occupied = []

    while len(fragments) < n:
        # Choose random fragment
        max_possible = array_len - frag_len
        first = np.random.randint(0, max_possible)
        last = first + frag_len

        current_fragment = range(first, last)

        if not is_overlapped(occupied, current_fragment):
            fragments.append(current_fragment)
            occupied += current_fragment

    return fragments


def is_overlapped(idx1, idx2):
    '''
    
    Check, if two list of indices overlap.
    
    Parameters
    ----------
    idx1: list of int
        First list of indices
    idx2: list of int
        Second list of indices
    
    Returns
    -------
    is_overlapped: bool
        True if indices overlap, otherwise: False

    Examples
    --------
    >>> import numpy as np
    >>> import sciquence.sequences as sq
    
    '''
    # TODO: check & optimize!'
    s1 = set(idx1)
    s2 = set(idx2)
    return bool(len(s1.intersection(s2)))


def cut_patches(data, center_indices, pad, ignore_short=False):
    '''
    
    Cut patches around selected centers
        
    Parameters
    ----------
    data: numpy.ndarray
        1-d numpy array
    center_indices: list of int
        List of patch centers
    pad: int
        Padding for both side
    ignore_short: bool
        Ignore patches if are too short

    Returns
    -------
    patches: list of numpy.ndarray
        List of patches

    Examples
    --------
    >>> import numpy as np
    >>> import sciquence.sequences as sq
    >>>

    '''

    # TODO: Add fix length

    patches = []
    max_idx = len(data) - 1

    for ci in center_indices:
        start = ci - pad

        if start < 0 and ignore_short:
            continue
        elif start < 0:
            start = 0

        stop = ci + pad + 1

        if stop > max_idx and ignore_short:
            continue
        elif stop < 0:
            stop = max_idx

        patches.append(data[start:stop])
   
    return patches


def random_chunk(seq, chunk_length):
    '''
    
    Cut random chunk from a sequence
    
    Parameters
    ----------
    seq: numpy.ndarray
        A sequence
    chunk_length: int
        Desired length of sequence
    
    Returns
    -------
    random_chunk: numpy.ndarray
        A random chunk of given length
    
    '''
        
    if len(seq) < chunk_length:
        empty = np.zeros((chunk_length, 1))
        empty[:len(seq)] = seq
        return empty        
    
    start = np.random.randint(0, len(seq)-chunk_length)
    stop = start + chunk_length

        
    return seq[start:stop]

# TODO: replace librosa.fix_length with own function
# def put_center(data, desired_size):
 #   '''
 #   
 #   Pad or trim data.
 #   
 #   Parameters
 #   ----------
  #  data: numpy.ndarray
 #       Data
 #   desired_size: int
 #       Desired length of array
  #  
  #  Returns
  #  -------
  #  random_chunk: numpy.ndarray
  #      A random chunk of given length
    
    
#    '''
#    if len(wave) > wave_size: 
#        pd = (len(wave) - wave_size) / 2
#        wave = wave[pd:-pd]
#        return librosa.util.fix_length(wave, wave_size)
#    else:
#       return librosa.util.pad_center(wave, wave_size)
    

if __name__ == '__main__':
    print random_slice(44, 6)