#!/usr/bin/env python ''' Show relatively simple implementations of various sorting algorithms. The dataset used for each algorithm has a value and the original position so that we can check for stability. This implementation uses the python decorator idiom to avoid cluttering each algorithm with informational print statements. In addition, the heapsort shows how to implement a simple minheap. ''' import copy import math import random import sys class Item(object): ''' An item in a sortable list. It stores the value which is what is used to sort and the position which is the position in the original list. In a stable sort, the position values will always be increasing for identical values. ''' def __init__(self, val=0, pos=0, obj=None): self.m_val = val self.m_pos = pos if obj is not None: self.m_val = obj.m_val self.m_pos = obj.m_pos def __lt__(self, obj): ''' Do not consider the pos because we use that to determine whether the sort is stable. ''' return self.m_val < obj.m_val def __str__(self): return '%d:%d' % (self.m_val, self.m_pos) def __repr__(self): return '%d:%d' % (self.m_val, self.m_pos) def wrapper(func): ''' @wrapper decorator. It reports the function name, the input data, the sorted data, tests whether the sort algorithm worked and reports whether it was stable. ''' def sorter(items): def items_str(items): lstr = '' for item in items: datum = ', %s' % (str(item)) lstr += datum return '[' + lstr[2:] + ']' # Print the function name and list information. print('%s of %d items' % (func.__name__, len(items))) print(' input : %s' % (items_str(items))) func(items) print(' sorted : %s' % (items_str(items))) # Test whether the list was correct sorted. is_sorted = -1 for i in range(len(items)-1): if items[i].m_val > items[i+1].m_val: is_sorted = i break if is_sorted: print(' _sorted: true') else: print(' _sorted: false (%d)' % (is_sorted)) # Test the stability of the sort by looking at the # pos values. For a stable sort they will always # increase for items that have the same value. is_stable = -1 for i in range(len(items)-1): if items[i].m_val == items[i+1].m_val: if items[i].m_pos > items[i+1].m_pos: is_stable = i break; if is_stable: print(' _stable: true') else: print(' _stable: false (%d)' % (is_stable)) # All of the sorting algorithms must work! assert is_sorted == -1 return sorter @wrapper def bubble_sort(items): ''' Bubble sort. WC = O(n^2) BC = O(n) AC = O(n^2) SPC = O(1) stable = yes CITATION: http://en.wikipedia.org/wiki/Bubble_sort ''' swaps = 1 # to get started while swaps > 0: swaps = 0 for i in range(len(items) - 1): j = i + 1 if items[j].m_val < items[i].m_val: items[i], items[j] = items[j], items[i] swaps += 1 @wrapper def selection_sort(items): ''' Selection sort. WC = O(n^2) BC = O(n) AC = O(n^2) SPC = O(1) stable = no CITATION: http://en.wikipedia.org/wiki/Selection_sort ''' for i in range(len(items)-1): k = i for j in range(i+1, len(items)): if items[j].m_val < items[k].m_val: k = j if k != i: items[i], items[k] = items[k], items[i] @wrapper def insertion_sort(items): ''' Insertion sort. WC = O(n^2) BC = O(n) AC = O(n^2) SPC = O(1) stable = yes CITATION: http://en.wikipedia.org/wiki/Insertion_sort ''' for i in range(len(items)): v = items[i] j = i while j > 0 and v.m_val < items[j - 1].m_val: items[j], items[j-1] = items[j-1], items[j] j -= 1 items[j] = v @wrapper def merge_sort_simple_top_down(items): ''' Simple top down merge sort. WC = O(n log(n)) BC = O(n log(n)) AC = O(n log(n)) SPC = O(n) stable = no This is an extremely simple implementation. CITATION: http://en.wikipedia.org/wiki/Merge_sort#Top-down_implementation ''' def _merge(data, left, right): # Merge the left and right results into data. i = 0 # left index j = 0 # right index k = 0 # data index while i < len(left) and j < len(right): if left[i].m_val < right[j].m_val: data[k] = left[i] i += 1 else: data[k] = right[j] j += 1 k += 1 # Copy over the remainders. while i < len(left): data[k] = left[i] i += 1 k += 1 while j < len(right): data[k] = right[j] j += 1 k += 1 assert k == len(data) return data def _mergesort(data): if len(data) < 2: return data # Split into two sub arrays of approximately equal size and # copy over the results. mid = len(data) / 2 left = [Item(obj=data[i]) for i in range(mid)] right = [Item(obj=data[i]) for i in range(mid, len(data))] # Recursively sort each side. left = _mergesort(left) right = _mergesort(right) # Merge them return _merge(data, left, right) return _mergesort(items) @wrapper def heapsort(items): ''' Heap sort. WC = O(n log(n)) BC = O(n log(n)) AC = O(n log(n)) SPC = O(n) stable = no CITATION: http://en.wikipedia.org/wiki/Heapsort CITATION: http://www.shaunlippy.org/blog/?p=105 CITATION: Skiena, "The Algorithm Design Manual", pages 112-117 ''' # The basic idea is to add all of the elements to a min-heap and # then pop them off one by one. def _heapify(heap, node, size): left = 2 * node + 1 right = 2 * node + 2 # Find the smallest item for a node by first looking at the # left child and then looking at the right. # In a min heap, the smallest node must be the node. if left < size and heap[left].m_val < heap[node].m_val: smallest = left else: smallest = node if right < size and heap[right].m_val < heap[smallest].m_val: smallest = right # The node (node) is not the smallest, make it so. if node != smallest: heap[node], heap[smallest] = heap[smallest], heap[node] _heapify(heap, smallest, size) # Construct the heap. # We could, of course, use the built in heap or create a heap # object but the purpose of this exercise is to keep the data # structures as simple as possible so i am going to use a # simple array structure. heap = [None for _ in xrange(len(items))] size = len(items) for i in xrange(size): # first load everything heap[i] = items[i] for i in xrange(size-1, -1, -1): # now bubble down the values _heapify(heap, i, size) # Now the heap is constructed walk through it and pop off the # minimum. for i in xrange(size): new_size = size - i - 1 items[i] = heap[0] # get the min val heap[0] = heap[new_size] # replace top with maxval _heapify(heap, 0, new_size) @wrapper def quicksort_inplace(items): ''' Quicksort. WC: O(n^2) # rare AC: O(n log(n)) BC: O(n log(n)) SPC: O(log(n)) stable = no CITATION: http://en.wikipedia.org/wiki/Quicksort ''' def _quicksort(data, left, right): pivot = (left + right) / 2 val = data[pivot].m_val i = left j = right while i <= j: # Find the leftmost value >= pivot # In the worst case it will terminate at the pivot while data[i].m_val < val: i += 1 # Find the rightmost value <= pivot # In the worst case it will terminate at the pivot while data[j].m_val > val: j -= 1 # Wrong order, swap. # Note that we have an extra swap when i==j but # we need to increment the values in the case # so we will suffer the overhead. if i <= j: data[i], data[j] = data[j], data[i] i += 1 j -= 1 if left < j: _quicksort(data, left, j) if i < right: _quicksort(data, i, right) _quicksort(items, 0, len(items) - 1) @wrapper def radix_sort(items): ''' Radix sort. WC = O(kN) # k is the number of digits in the largest key SPC = O(k + N) stable = no # this implementation CITATION: http://en.wikipedia.org/wiki/Radix_sort ''' def _radixsort(items, base=10): maxval = max(abs(num.m_val) for num in items) passes = int(round(math.log(maxval, base)) + 1) for digit_num in xrange(passes): # create a list of empty lists to hold the split by digit buckets = [ [] for _ in range(base) ] for item in items: num = item.m_val # append the object to the list selected by the digit # note: // == floor division k = (num // base ** digit_num) % base buckets[k].append(item) items = [] for sublist in buckets: items.extend(sublist) return items data = _radixsort(items) for i in range(len(data)): items[i] = data[i] def test(): ''' Test the sort algorithms with different data sets. For each test report the sorted results, whether the sort algorithm worked and whether the sort was stable. ''' algs = [bubble_sort, selection_sort, insertion_sort, merge_sort_simple_top_down, heapsort, quicksort_inplace, radix_sort, ] data = [ [ Item(i, i) for i in xrange(20)], [ Item(19 - i, i) for i in xrange(20)], [ Item(i/4, i) for i in xrange(20)], # lots of dups [ Item((19 - i) / 4, i) for i in xrange(20)], [ Item(random.randint(0, 19), i) for i in xrange(20)], [ Item(random.randint(0, 10), i) for i in xrange(24)], [ Item(random.randint(513, 997), i) for i in xrange(20)], ] for alg in algs: print('') print('-'*64) for i in range(len(data)): datum = data[i] # deepcopy to make sure we have the original data items = copy.deepcopy(datum) alg(items) if __name__ == '__main__': test()