import random import time import sys # This makes the code nicer later on - we can travel up # or down an array and see how many duplicates are there def dups_by_step(a, index, step): high = len(a)-1 low = 0 count = 0 while True: next = index + step if next > high or next < low or a[next] != a[index]: return count count += 1 index = next def median(one, two): half = (len(one) + len(two))/2 potentials = [] # Anything that's not ruled out goes here min = -sys.maxint-1 max = sys.maxint if (not one) and (not two): return None # The job of this function is to look at a particular number and # either rule it out or add it to potentials (it's ruled out if # bigger or smaller numbers comprise more than half of the other numbers) # We use return codes to talk to the calling function. # 2: Smaller than one we've already checked # -2: Bigger than one we've already checked # 1: Too small, and our new minimum # -1: Too large, and our new max # 0: One of the medians def refine_range(min, max, me, him, i): cur = me[i] if cur in potentials: # duplicate numbers would screw with an average return 0 if cur <= min: return 2 if cur >= max: return -2 # We know by array position how many are bigger and smaller smaller = i - dups_by_step(me, i, -1) bigger = len(me) - (i + dups_by_step(me, i, 1) + 1) # We binary search for a bigger number low = 0 high = len(him)-1 while low <= high: mid = (low+high)/2 if him[mid] < cur: low = mid + 1 elif him[mid] > cur: high = mid - 1 else: low = mid smaller -= dups_by_step(him, low, -1) bigger -= 1 + dups_by_step(him, low, 1) break if low > len(him): # Everything in the whole array was too small smaller += len(him) else: smaller += low bigger += len(him) - low if bigger > half: return 1 if smaller > half: return -1 potentials.append(cur) return 0 # This function does a binary search of the array "me" for medians. # It gets called once for each array. def finder(min, max, me, him): low = 0 high = len(me)-1 while(low <= high): i = (low + high)/2 direction = refine_range(min, max, me, him,i) if direction < 0: if direction == -1: max = me[i] high = i - 1 elif direction > 0: if direction == 1: min = me[i] low = i + 1 else: # pick up any medians to our immediate left and right # (which is where they'll be if they're in this list) left = i - (dups_by_step(me, i, -1) + 1) right = i + (dups_by_step(me, i, 1) + 1) if(left >= 0 and left < len(me)): refine_range(min, max, me, him, left) if(right >= 0 and right < len(me)): refine_range(min, max, me, him, right) break return (min, max) min, max = finder(min, max, one, two) min, max = finder(min, max, two, one) total = 0 for p in potentials: total += p return float(total)/len(potentials) def old_median(one, two): newList = one[:] newList.extend(two) newList.sort() if len(newList) == 0: return None midpoint = len(newList) / 2 if not len(newList) % 2: # if length is even return float(newList[midpoint-1] + newList[midpoint]) / 2 else: return float(newList[midpoint]) def timeRun(fn, args): start = time.time() val = apply(fn, args) end = time.time() seconds = end-start return (val, seconds) def runTest(one, two, expected=None): print("Testing lists with lengths %s and %s\n" % (len(one), len(two))) oldVal, oldTime = timeRun(old_median, [one, two]) print("Old method (%f seconds): %s" % (oldTime, oldVal)) if expected and expected != oldVal: print("Old method returns incorrect value.") newVal, newTime = timeRun(median, [one, two]) print("New method (%f seconds): %s" % (newTime, newVal)) if expected and expected != newVal: print("New method returns incorrect value.") if oldVal != newVal: print one print two full = one[:] full.extend(two) full.sort() showByHand(full) raise Exception("Mismatch!") print("-----") def showByHand(full): print full if full: showByHand(full[1:len(full)-1]) def normalTest(): runTest([1,6,7,8], [2,3,4,5], 4.5) runTest([1,2,2,2], [2,2,2,5], 2) runTest([1,2,3,3,3], [2,3,3,5], 3) def edgeTest(): runTest([], [1], 1) runTest([1], [], 1) runTest([], [], None) runTest([], [], None) def randomTest(minels=0, maxels=100, times=1): for i in range(times): one = [random.randint(-1000,1000) for e in range(random.randint(minels,maxels))] two = [random.randint(-1000,1000) for e in range(random.randint(minels,maxels))] one.sort() two.sort() runTest(one, two) if __name__=="__main__": normalTest() edgeTest() randomTest(times=10000) randomTest(10000, 10005, 3) print("All tests passed.")