Comparing the Partition Swaps
Introduction
This is part of a series of posts about the Partition algorithm that starts with this post.
Imports and Setup
# python
from typing import Callable, List, TypeVar, MutableSequence
from dataclasses import dataclass
from functools import partial
import random
# pypi
from expects import be_true, equal, expect
from joblib import Parallel, delayed
import altair
import pandas
# monkey
from graeae.visualization.altair_helpers import output_path, save_chart
SLUG = "comparing-the-partition-swaps"
OUTPUT_PATH = output_path(SLUG)
save_it = partial(save_chart, output_path=OUTPUT_PATH)
Orderable = TypeVar("Orderable")
random.seed(2022)
Comparing the Swaps
Both of the versions of partition shown here traverse the collection once, so looking at the number of comparisons doesn't seem so interesting. According to the Wikipedia page on Quicksort, the swaps for Hoare's method (which is slightly different from Levitin's) is better for the worst cases, so let's see if this matters for Levitin's and CLRS's swaps.
Some Swap Counters
Since we're only calling the partitioners once rather than repeatedly using them to partition sub-lists in the collection we can get rid of the left
and right
arguments to the functions.
A Levitin Swap Counter
def levitin_swaps(collection: MutableSequence[Orderable]) -> int:
"""Partitions the collection using a variation of Hoare's method
Args:
collection: the list to partition
Returns:
count of swaps
"""
left, right = 0, len(collection) - 1
pivot_element = collection[left]
partition_left = left
partition_right = right + 1
stop_right = len(collection) - 1
swaps = 0
while True:
while partition_left < stop_right:
partition_left += 1
if collection[partition_left] >= pivot_element:
break
while True:
partition_right -= 1
if collection[partition_right] <= pivot_element:
break
if partition_left >= partition_right:
break
collection[partition_left], collection[partition_right] = (
collection[partition_right], collection[partition_left]
)
swaps += 1
# move the pivot element from the far left to its final place
collection[left], collection[partition_right] = (
collection[partition_right], collection[left]
)
swaps += 1
return swaps
middle = 100
prefix = random.choices(range(middle), k=middle)
suffix = random.choices(range(middle + 1, 201), k=middle)
test = [middle] + prefix + suffix
swaps = levitin_swaps(test)
expect(all(item < middle for item in test[:middle])).to(be_true)
expect(all(item > middle for item in test[middle + 1:])).to(be_true)
A CLRS Swap Counter
def clrs_swaps(collection: MutableSequence[Orderable]) -> int:
"""Partitions the collection around the last element
Args:
collection: the list to partition
Returns:
count of swaps
"""
left, right = 0, len(collection) - 1
pivot_element = collection[right]
lower_bound = left - 1
swaps = 0
for upper_bound in range(left, right):
if collection[upper_bound] <= pivot_element:
lower_bound += 1
(collection[lower_bound],
collection[upper_bound]) = (collection[upper_bound],
collection[lower_bound])
swaps += 1
pivot = lower_bound + 1
(collection[pivot],
collection[right]) = (collection[right],
collection[pivot])
swaps += 1
return swaps
middle = 100
prefix = random.choices(range(middle), k=middle)
suffix = random.choices(range(middle + 1, 201), k=middle)
test = prefix + suffix + [middle]
swaps = clrs_swaps(test)
expect(all(item < middle for item in test[:middle])).to(be_true)
expect(all(item > middle for item in test[middle + 1:])).to(be_true)
A Parallelizer Function
This is what I'll pass to joblib to run the two swap-counter functions.
@dataclass
class SwapCounts:
size: int
swaps: int
def swap_counter(collection: MutableSequence, swap_counter: object) -> SwapCounts:
"""Runs the swap_counter over the collection of inputs
Args:
collection: elements to partition
Returns:
SwapCounts: size, swap-count
"""
size = len(collection)
swaps = swap_counter(collection)
return SwapCounts(size=size, swaps=swaps)
def swap_plots(first_output: List[SwapCounts], second_output: List[SwapCounts],
first_label: str, second_label: str,
title: str, filename: str):
"""Plot the swap counts
Args:
first_output, second_output: lists of SwapCounts
first_label, second_label: Algorithm labels for the counts
title: title for the plot
filename: name to save the plot to
"""
expect(len(first_output)).to(equal(len(second_output)))
frame = pandas.DataFrame({
"Input Size": [output.size for output in first_output],
first_label: [output.swaps for output in first_output],
second_label: [output.swaps for output in second_output]})
melted = frame.melt(id_vars="Input Size",
value_vars=[first_label, second_label],
var_name="Algorithm", value_name="Swaps")
base = (altair.Chart(melted).mark_circle(opacity=0.5)
.encode(x=altair.X("Input Size:Q",
scale=altair.Scale(
domain=(0, melted["Input Size"].max()))),
y=altair.Y("Swaps:Q",scale=altair.Scale(domainMin=0)),
color="Algorithm:N",
tooltip=[altair.Tooltip("Input Size:Q", format=","),
altair.Tooltip("Swaps:Q", format=","),
altair.Tooltip("Algorithm:N")]))
line = (base.transform_regression("Input Size", "Swaps",
groupby=["Algorithm"])
.mark_line())
chart = (line + base).properties(
title=title,
width=800,
height=525
).interactive()
save_it(chart, filename)
return
def swap_ratio_plots(first_output: List[SwapCounts], second_output: List[SwapCounts],
first_label: str, second_label: str,
title: str, filename: str):
"""Plot the swap count ratios
Args:
first_output, second_output: lists of SwapCounts
first_label, second_label: Algorithm labels for the counts
title: title for the plot
filename: name to save the plot to
"""
expect(len(first_output)).to(equal(len(second_output)))
frame = pandas.DataFrame({
"Input Size": [output.size for output in first_output],
first_label: [output.swaps for output in first_output],
second_label: [output.swaps for output in second_output]})
frame["Ratio"] = frame[first_label]/(frame[second_label] + 1)
base = (altair.Chart(frame[["Input Size", "Ratio"]]).mark_circle(opacity=0.5)
.encode(x="Input Size:Q", y=altair.Y("Ratio:Q", scale=altair.Scale(domainMin=0)),
tooltip=[altair.Tooltip("Input Size", format=","),
altair.Tooltip("Ratio", format=",.2f")]))
line = (base.transform_regression("Input Size", "Ratio")
.mark_line())
chart = (base + line).properties(
title=title,
width=800,
height=525
).interactive()
save_it(chart, filename)
return
def swap_runner(swapper_one: Callable, swapper_two: Callable,
label_one: str, label_two: str,
things_to_partition: List[List],
title: str, filename: str, plotter: Callable=swap_plots) -> None:
"""Run the partitioners and plot
Args:
swapper_one, swapper_two: swap-counter functions
label_one, label_two: algorithm labels for the plot
things_to_partition: collections for the partitioners to partition
title, filename: plot strings
"""
first_output = Parallel(n_jobs=-1)(
delayed(swap_counter)(thing_to_partition, swapper_one)
for thing_to_partition in things_to_partition)
second_output = Parallel(n_jobs=-1)(
delayed(swap_counter)(thing_to_partition, swapper_two)
for thing_to_partition in things_to_partition)
plotter(first_output, second_output, label_one, label_two,
title=title,
filename=filename)
return
Random Inputs
counts = range(10, 100011, 100)
random_things_to_partition = [random.choices(range(count), k=count)
for count in counts]
swap_runner(levitin_swaps, clrs_swaps, "Levitin", "CLRS", random_things_to_partition,
title="Levitin vs CLRS Partition Swap Count (Randomized Input)",
filename="swaps_random_2")
swap_runner(clrs_swaps, levitin_swaps, "CLRS", "Levitin", random_things_to_partition,
title="CLRS/Levitin Partition Swap Ratio (Randomized Input)",
filename="swaps_random_ratio", plotter=swap_ratio_plots)
Biggest Item Last
What happens if the input has the biggest item at the end of the list?
counts = range(10, 100011, 100)
big_ended_things_to_partition = [random.choices(range(count), k=count) + [count]
for count in counts]
swap_runner(levitin_swaps, clrs_swaps, "Levitin", "CLRS",
big_ended_things_to_partition,
title="Levitin vs CLRS Partition Swap Count (Biggest Item Last)",
filename="swaps_sorted")
Sorted
What happens if the input is already sorted?
counts = range(10, 100011, 100)
sorted_things_to_partition = [list(range(count)) for count in counts]
swap_runner(levitin_swaps, clrs_swaps, "Levitin", "CLRS",
sorted_things_to_partition,
title="Levitin vs CLRS Partition Swap Count (Sorted input)",
filename="swaps_already_sorted")
All The Same
counts = range(10, 100011, 100)
same_things_to_partition = [[count] * count for count in counts]
swap_runner(clrs_swaps, levitin_swaps, "CLRS", "Levitin", same_things_to_partition,
title="Levitin vs CLRS Partition Swap Count (All Same Input)",
filename="swaps_all_same")
swap_runner(clrs_swaps, levitin_swaps,"CLRS", "Levitin", same_things_to_partition,
title="CLRS/Levitin Partition Swap Ratio (All Same Input)",
filename="swaps_all_same_ratio", plotter=swap_ratio_plots)
Randomized Swaps
A Randomized Swap Counter
def randomized_swaps(collection: MutableSequence[Orderable],
partition: Callable, pivot: int) -> int:
"""Partitions the collection around the last element
Args:
collection: the list to partition
partition: function to partition the list
pivot: index of the element used for the pivot
Returns:
count of swaps
"""
random_index = random.randrange(len(collection))
collection[pivot], collection[random_index] = (collection[random_index],
collection[pivot])
return 1 + partition(collection)
randomized_levitin = partial(randomized_swaps, partition=levitin_swaps, pivot=0)
randomized_clrs = partial(randomized_swaps, partition=clrs_swaps, pivot=-1)
Randomized Levitin Swaps
Shuffled Input
swap_runner(levitin_swaps, randomized_levitin, "Levitin", "Randomized Levitin",
random_things_to_partition,
title="Levitin vs Randomized Levitin Partition Swap Count (Randomized Input)",
filename="swaps_randomized_random")
Smallest Item At the End
counts = range(10, 100011, 100)
backwards_things_to_partition = [random.choices(range(1, count), k=count) + [0]
for count in counts]
swap_runner(levitin_swaps, randomized_levitin, "Levitin", "Randomized Levitin",
backwards_things_to_partition,
title=("Levitin vs Randomized Levitin Partition Swap Count (Backwards Input)"),
filename="swaps_levitin_randomized_sorted_backwards")
All The Same Input
Since only difference between the randomized and the non-randomized input is that the pivot was (probably) swapped out, the input will look the same when all the values are the same to start with so I'm not going to re-plot those.
Randomized CLRS Swaps
Shuffled Input
swap_runner(clrs_swaps, randomized_clrs, "CLRS", "Randomized CLRS",
random_things_to_partition,
title="CLRS vs Randomized CLRS Partition Swap Count (Randomized Input)",
filename="swaps_randomized_clrs_random")
Biggest Item at the End
swap_runner(clrs_swaps, randomized_clrs, "CLRS", "Randomized CLRS",
big_ended_things_to_partition,
title="CLRS vs Randomized CLRS Partition Swap Count (Big-Ended Input)",
filename="swaps_clrs_randomized_big_ended")
Sorted
swap_runner(clrs_swaps, randomized_clrs, "CLRS", "Randomized CLRS",
sorted_things_to_partition,
title="CLRS vs Randomized CLRS Partition Swap Count (Sorted Input)",
filename="swaps_clrs_randomized_sorted")