sequtils

List manipulation utilities.

source


00001 def chunk( seq, size, pad=None ):
00002     '''
00003     Slice a list into consecutive disjoint 'chunks' of
00004     length equal to size. The last chunk is padded if necessary.
00005 
00006     >>> list(chunk(range(1,10),3))
00007     [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
00008     >>> list(chunk(range(1,9),3))
00009     [[1, 2, 3], [4, 5, 6], [7, 8, None]]
00010     >>> list(chunk(range(1,8),3))
00011     [[1, 2, 3], [4, 5, 6], [7, None, None]]
00012     >>> list(chunk(range(1,10),1))
00013     [[1], [2], [3], [4], [5], [6], [7], [8], [9]]
00014     >>> list(chunk(range(1,10),9))
00015     [[1, 2, 3, 4, 5, 6, 7, 8, 9]]
00016     >>> for X in chunk([],3): print X
00017     >>>
00018     '''
00019     n = len(seq)
00020     mod = n % size
00021     for i in xrange(0, n-mod, size):
00022         yield seq[i:i+size]
00023     if mod:
00024         padding = [pad] * (size-mod)
00025         yield seq[-mod:] + padding
00026 
00027 def columnise( seq, numcols, pad=None ):
00028     '''
00029     >>> columnise(range(1,10),3)
00030     [(1, 4, 7), (2, 5, 8), (3, 6, 9)]
00031     >>> columnise(range(1,9),3)
00032     [(1, 4, 7), (2, 5, 8), (3, 6, None)]
00033     >>> columnise(range(1,8),3)
00034     [(1, 4, 7), (2, 5, None), (3, 6, None)]
00035     '''
00036     return zip( *chunk(seq, numcols, pad) )
00037 
00038 def kslice( seq, k ):
00039     '''
00040     Generate all slices of seq that have length k.
00041 
00042     >>> list(kslice(range(1,4), 2))
00043     [[1, 2], [1, 3], [2, 3]]
00044     >>> [''.join(s) for s in kslice( 'abcd', 3 )]
00045     ['abc', 'abd', 'acd', 'bcd']
00046     '''
00047     for s in nksubsets( len(seq), k ):
00048         yield [ seq[i-1] for i in s ]
00049 
00050 def nksubsets( n, k ):
00051     '''
00052     Generate all subsets of S(n) that have k items.
00053     S(n) = {1, 2, 3, ..., n}
00054 
00055     >>> list(nksubsets(3, 2))
00056     [[1, 2], [1, 3], [2, 3]]
00057     '''
00058     m = n - k + 1
00059     indexer = range(0, k)
00060     seq = range(1, k+1)
00061     last = range(m, n+1)
00062     yield seq[:]
00063     while seq != last:
00064         high_value = -1
00065         high_index = -1
00066         for i in indexer:
00067             val = seq[i]
00068             if val > high_value and val < m + i:
00069                 high_value = val
00070                 high_index = i
00071         for j in range(k - high_index):
00072             seq[j+high_index] = high_value + j + 1
00073         yield seq[:]
00074 
00075 def subsequence_ascending( seq ):
00076     '''based on original C code by Lucian Bentea
00077 
00078     >>> subsequence_ascending( [1, 2, 3, 4] )
00079     [1, 2, 3, 4]
00080     >>> subsequence_ascending( [4, 3, 2, 1] )
00081     [4]
00082     >>> subsequence_ascending( [1, 0, 2, 0, 3, 0, 4] )
00083     [1, 2, 3, 4]
00084     >>> subsequence_ascending( [1, 9, 2, 9, 3, 9, 4] )
00085     [1, 9, 9, 9]
00086     >>> subsequence_ascending( [1, 9, 2, 8, 3, 2, 4] )
00087     [1, 2, 3, 4]
00088     >>> subsequence_ascending( [1, 9, 2, 8, 3, 7, 4] )
00089     [1, 2, 3, 7]
00090     >>> subsequence_ascending( [1, 1, 1, 1] )
00091     [1, 1, 1, 1]
00092     >>> subsequence_ascending( [] )
00093     []
00094     '''
00095     if not seq: return []
00096     n = len( seq )
00097     U = [1] * n
00098     V = [0] * n
00099     for i in xrange( n-2, -1, -1 ):
00100         max_ = 0
00101         imax = 0
00102         seq_i = seq[i]
00103         for j in xrange( i+1, n ):
00104             if U[j] > max_ and seq_i <= seq[j]:
00105                 max_ = U[j]
00106                 imax = j
00107         U[i] = 1 + max_
00108         V[i] = imax
00109     max_ = U[0]
00110     imax = 0
00111     for i in xrange( 1, n ):
00112         if U[i] > max_:
00113             max_ = U[i]
00114             imax = i
00115     ret = []
00116     while True:
00117         ret.append( seq[imax] )
00118         imax = V[imax]
00119         if not imax:
00120             break
00121     return ret
00122 
00123 def itemwise_intersect( *args ):
00124     '''
00125     >>> itemwise_intersect('##a##b#a', '%%a%%b%a')
00126     set([(2, 'a'), (7, 'a'), (5, 'b')])
00127     '''
00128     m = min( map( len, args ) )
00129     s = set(enumerate(args[0][:m]))
00130     for t in ( set(enumerate(X[:m])) for X in args[1:] ):
00131         s.intersection_update(t)
00132     return s
00133 
00134 def _test():
00135     import doctest
00136     doctest.testmod()
00137 
00138 if __name__ == '__main__':
00139     _test()