# Krzysztof Joachimiak 2017
# sciquence: Time series & sequences in Python
#
# Functions for sequence processing
# Author: Krzysztof Joachimiak
#
# License: MIT
import numpy as np
from itertools import groupby
############## Getting sequences ##############
[docs]def seq(array):
'''
Cut input array into sequences consisting of the same elements
Parameters
----------
array: ndarray
Numpy array
Returns
-------
seq_list: list of ndarray
List of sequences
Examples
--------
>>> import sciquence.sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0])
>>> print sq.seq(x)
[array([1, 1, 1, 1, 1, 1]), array([0, 0, 0, 0, 0, 0]), array([1, 1, 1, 1, 1]), array([0, 0, 0, 0])]
'''
return [np.array(list(group)) for elem, group in groupby(array)]
[docs]def specseq(array, element):
'''
Return sequences consisting of specific tag
Parameters
----------
array: ndarray
Numpy array
element: object
Element
Returns
-------
seq_list: list of ndarray
List of sequences consisting of specific tag
Examples
--------
>>> import sciquence.sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 44, 44, 44, 44, 44, 1, 1, 0, 0, 0, 0])
>>> print sq.specseq(x, 44)
[array([44, 44, 44, 44, 44])]
'''
return [np.array(list(group)) for elem, group in groupby(array) if elem == element]
[docs]def nseq(array):
'''
Returns sequences consisting of zeros
Parameters
----------
array: array-like
Numpy array
Returns
-------
seq_list: list of ndarray
List of negative sequences
Examples
--------
>>> from sciquence import sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0])
>>> print sq.nseq(x)
[array([0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0])]]
'''
return [np.array(list(group)) for elem, group in groupby(array) if not elem]
[docs]def pseq(array):
'''
Returns sequences consisting of ones
Parameters
----------
array: array-like
Numpy array
Returns
-------
seq_list: list of ndarray
List of positive sequences
Examples
--------
>>> from sciquence import sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0])
>>> print sq.nseq(x)
[array([1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1])]
'''
return [np.array(list(group)) for elem, group in groupby(array) if elem]
[docs]def seqi(array):
'''
Get list of sequences and corresponding list of indices
Parameters
----------
array: ndarray
Numpy array
Returns
-------
seq_list: list of ndarray
List of sequences
idx_list: list of ndarray
List of seqences indices
Examples
--------
>>> import sciquence.sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 44, 44, 44, 44, 44, 1, 1, 0, 0, 0, 0])
>>> print sq.seqi(x)
([array([0, 1, 2, 3, 4, 5]), array([6, 7, 8, 9, 10, 11]), array([12]),
array([13, 14, 15, 16, 17]), array([18, 19]), array([20, 21, 22, 23])],
'''
# TODO: optimize
lseq = seq(array)
indices = []
last_index = 0
for s in lseq:
indices.append(range(last_index, last_index + len(s)))
last_index += len(s)
return lseq, indices
[docs]def nseqi(array):
'''
Get list of negative sequences indices (consisting of zeroes)
Parameters
----------
array: ndarray
Numpy array
Returns
-------
seq_list: list of ndarray
List of sequences
idx_list: list of ndarray
List of seqences indices
Examples
--------
>>> import sciquence.sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0])
>>> print sq.seqi(x)
[array([ 6, 7, 8, 9, 10, 11]), array([17, 18, 19, 20])]
'''
lseq = seq(array)
indices = []
last_index = 0
nlseq = []
for s in lseq:
if s[0] == 0:
indices.append(np.array(range(last_index, last_index + len(s))))
nlseq.append(s)
last_index += len(s)
return indices
[docs]def pseqi(array):
'''
Get list of positive sequences indices (consisting of ones)
Parameters
----------
array: ndarray
Numpy array
Returns
-------
seq_list: list of ndarray
List of sequences
idx_list: list of ndarray
List of seqences indices
Examples
--------
>>> import sciquence.sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0])
>>> print sq.seqi(x)
[array([0, 1, 2, 3, 4, 5]), array([12, 13, 14, 15, 16])]
'''
lseq = seq(array)
indices = []
last_index = 0
plseq = []
for s in lseq:
if s[0] == 1:
indices.append(np.array(range(last_index, last_index + len(s))))
plseq.append(s)
last_index += len(s)
return indices
[docs]def specseqi(array, elem):
'''
Get list of sequences indices, consisting of specific element
Parameters
----------
array: ndarray
Numpy array
elem: object
A sequence element
Returns
-------
seq_list: list of ndarray
List of sequences
idx_list: list of ndarray
List of seqences indices
Examples
--------
>>> import sciquence.sequences as sq
>>> import numpy as np
>>> x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 44, 44, 44, 44, 44, 1, 1, 0, 0, 0, 0])
>>> print sq.seqi(x)
[array([13, 14, 15, 16, 17])]
'''
lseq = seq(array)
indices = []
last_index = 0
plseq = []
for s in lseq:
if s[0] == elem:
indices.append(np.array(range(last_index, last_index + len(s))))
plseq.append(s)
last_index += len(s)
return indices
############### Splitting into chunks ###########
[docs]def chunk(array, chunk_size):
'''
Split numpy array into chunks of equal length.
Parameters
----------
array: ndarray
A numpy array
chunk_size: int
Desired length of a single chunk
Returns
-------
chunks: list of ndarray
Chunks of equal length
Examples
--------
>>> import numpy as np
>>> import sciquence.sequences as sq
>>> x = np.array([1,2,3,4,5,6,7,8,9,10])
>>> sq.chunk(x, 3)
[array([1, 2, 3]), array([4, 5, 6]), array([7, 8, 9]), array([10])]
'''
chunks = []
for i in xrange(0, len(array), chunk_size):
chunks.append(array[i:i+chunk_size])
return chunks
if __name__ == '__main__':
x = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 44, 44, 44, 44, 44, 1, 1, 0, 0, 0, 0])
print specseqi(x, 44)