Graphs

Introduction

This is a starter post for an introduction to Graphs.

Imports and Setup

Imports

# python
from __future__ import annotations
from collections import defaultdict
from enum import Enum

# pypi
from attrs import define

Constants

class Color(Enum):
    WHITE = 1
    GRAY = 2
    BLACK = 3

INFINITY = float("inf")

A Vertex Implementation

Note to future self: the default setting for attrs.define makes the object to unhashable, to make it hashable by object identity pass in eq=False otherwise trying to add the object to a set will raise an error.

@define(eq=False)
class Vertex:
    """A single node in a graph

    Args:
     identifier: something to identify the node
     color: the 'discovery' state of the node
     distance: The number of edges to the root
     predecessor: The 'parent' of the node in a tree
    """
    identifier: int
    color: Enum=Color.WHITE
    distance: float=INFINITY
    predecessor: Vertex=None

    def __str__(self) -> str:
        return (f"{self.identifier}: {self.color}, "
                f"distance: {self.distance}, predecessor: {self.predecessor}")

A Graph Implementation

class Graph:
    """A graph implementation

    """
    def __init__(self) -> None:
        self._adjacent = None
        self._vertices = None
        return

Adjacencies

The book uses an adjacency linked list, but I'll assume that there will be only one edge between each pair of vertices and use a dictionary with sets of adjacent nodes instead.

@property
def adjacent(self) -> defaultdict:
    """The dictionary of adjacent vertices"""
    if self._adjacent is None:
        self._adjacent = defaultdict(set)
    return self._adjacent

@adjacent.setter
def adjacent(self, new_adjacent: dict) -> None:
    """Sets the dictionary of adjacent vertices (converting to default dict)

    Note:
     This expects the new_adjacent to be a dict of node: set of nodes
    """
    if type(new_adjacent) is not defaultdict:
        new_new_adjacent = defaultdict(set)
        for key, nodes in new_adjacent.items():
            new_new_adjacent[key] = nodes
        new_adjacent = new_new_adjacent
    self._adjacent = new_adjacent
    return

Vertices

The representation we're using is an adjacency list, but sometimes you need to traverse the vertices. So I'm going to make an alias to the adjacency list keys, in the assumption that every node in the graph is a key in the dict. This might not be true in a directed graph, so I'll have to revisit it later.

@property
def vertices(self) -> set:
    """The vertices in this graph"""
    if self._vertices is None:
        self._vertices = set()
    return self._vertices

Add Item

def add(self, node_1: Vertex, node_2: Vertex) -> None:
    """Add edge 

    Warning:
     This assumes an undirected graph, change it for a directed graph

    Args:
     node_1: node on one end of the edge
     node_2: Node on the other end of the edge
    """
    self.vertices.add(node_1)
    self.vertices.add(node_2)
    self.adjacent[node_1].add(node_2)
    self.adjacent[node_2].add(node_1)
    return

Getitem

def __getitem__(self, key):
    """Get the list from the adjacencies dict

    Args:
     key: vertex whose list we want
    """
    return self.adjacent[key]

Testing

# pypi
from expects import be, contain, equal, expect

# software under test
from bowling.data_structures.graphs import graph
from bowling.data_structures.graphs.graph import Color, Graph, Vertex

v = Vertex(1)
expect(v.color).to(be(Color.WHITE))

g = Graph()
v2 = Vertex(2)
v3 = Vertex(3)
g.add(v, v2)
g.add(v, v3)

expect(g.adjacent.keys()).to(contain(v, v2))
expect(g.adjacent[v]).to(contain(v2))
expect(g[v2]).to(contain(v))
expect(g[v3]).to(contain(v))
expect(g[v3]).not_to(contain(v2))

Red-Black Trees: Delete

This is a post on deletings nodes from a tree. It is part of a series of posts starting with this post.

The Deleter For the Arborist

This adds a property to the Arborist that builds a Deleter object for it.

@property
def delete(self) -> Deleter:
    """A node deleter"""
    if self._delete is None:
        self._delete = Deleter(tree=self.tree)
    return self._delete

The Deleter

The Deleter deletes nodes from a Tree, maintaining the Red-Black Tree Properties.

Extra Imports

from bowling.data_structures.red_black_tree.forester import Forester

The Class

class Deleter:
    """Deletes nodes

    Args:
     tree: the RedBlackTree to update
    """
    def __init__(self, tree: rb_tree.RedBlackTree) -> None:
        self._tree = None
        self.tree = tree
        self._forester = None
        self._rotate = None
        return

The Tree Setter

@tree.setter
def tree(self, sapling: rb_tree.RedBlackTree) -> None:
    """stores the tree, resets other objects that used the prior tree"""
    self._tree = sapling
    self._rotate = None
    self._forester = None
    return

Forester

@property
def forester(self) -> Forester:
    """A forester to measure the tree"""
    if self._forester is None:
        self._forester = Forester(tree=self.tree)
    return self._forester

Transplant

\begin{algorithm}
\caption{RBTransplant}
\begin{algorithmic}
\INPUT The Tree, the original node and node to replace it
\PROCEDURE{RBTransplant}{\textit{T}, \textit{u}, \textit{v}}

\IF {\textit{u}.p = \textit{T}.\textsc{NIL}}
    \STATE \textit{T}.root $\gets$ \textit{v}
\ELSEIF {\textit{u} = \textit{u}.p.left}
    \STATE \textit{u}.p.left $\gets$ \textit{v}
\ELSE
  \STATE \textit{u}.p.right $\gets$ \textit{v}
\ENDIF

\STATE \textit{v}.p $\gets$ \textit{u}.p

\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}
def transplant(self, unwanted: rb_tree.Node,
               replacement: rb_tree.Node) -> None:
    """Replace one node with another in the tree

    Gives replacement the parent of replaced, doesn't remove
    parent from replaced

    Args:
     unwanted: node to remove
     replacement: node to replace replaced
    """
    if unwanted.is_root:
        self.tree.root = replacement
    elif unwanted.is_left:
        unwanted.parent.left = replacement
    else:
        unwanted.parent.right = replacement
    replacement.parent = unwanted.parent
    return

Delete

\begin{algorithm}
\caption{RBDelete}
\begin{algorithmic}
\INPUT The Tree and the node to remove
\PROCEDURE{RBTransplant}{\textit{T}, \textit{z}}

\STATE \textit{y} $\gets$ \textit{z}
\STATE \textit{y-original-color} $\gets$ \textit{y}.color
\IF {\textit{z}.left = \textit{T}.\textsc{NIL}}
    \STATE \textit{x} $\gets$ \textit{z}.right
    \STATE \textsc{RBTransplant}(\textit{T}, \textit{z}, \textit{z}.right)
\ELSEIF {\textit{z}.right = \textit{T}.\textsc{NIL}}
    \STATE \textit{x} $\gets$ \textit{z}.left
    \STATE \textsc{RBTransplant}(\textit{T}, \textit{z}, \textit{z}.left)

\ELSE
  \STATE \textit{y} $\gets$ \textsc{TreeMinimum}(\textit{z}.right)
  \STATE \textit{y-original-color} $\gets$ \textit{y}.color
  \STATE \textit{x} $\gets$ \textit{y}.right
  \IF {\textit{y}.p = \textit{z}}
    \STATE \textit{x}.p $\gets$ \textit{y}
  \ELSE
    \STATE \textsc{RBTransplant}(\textit{T}, \textit{y}, \textit{y}.right)
    \STATE \textit{y}.right $\gets$ \textit{z}.right
    \STATE \textit{y}.right.p $\gets$ \textit{y}
  \ENDIF
  \STATE \textsc{RBTransplant}(\textit{T}, \textit{z}, \textit{y})
  \STATE \textit{y}.left $\gets$ \textit{z}.left
  \STATE \textit{y}.left.p $\gets$ \textit{y}
  \STATE \textit{y}.color $\gets$ \textit{z}.color
\ENDIF

\IF {\textit{y-original-color} = \textbf{BLACK}}
  \STATE \textsc{RBDeleteFixup}(\textit{T}, \textit{x})
\ENDIF
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}
def delete(self, unwanted: rb_tree.Node) -> None:
    """Delete a node

    Args:
     unwanted: the node to delete
    """
    needs_fixing = unwanted.is_black

    if unwanted.left is rb_tree.LEAF:
        unwanted_child = unwanted.right
        self.transplant(unwanted, unwanted_child)
    elif unwanted.right is rb_tree.LEAF:
        unwanted_child = unwanted.left
        self.transplant(unwanted, unwanted_child)
    else:
        adopter = self.forester.min(unwanted.right)        
        needs_fixing = unwanted.is_black
        unwanted_child = adopter.right
        if adopter.parent is unwanted:
            unwanted_child.parent = adopter
        else:
            self.transplant(adopter, unwanted_child)
            adopter.right = unwanted.right
            adopter.right.parent = adopter
        self.transplant(unwanted, adopter)
        adopter.left = unwanted.left
        adopter.left.parent = adopter
        adopter.color = unwanted.color
    if needs_fixing:
        self.fixup(unwanted_child)
    return

The Fixup

\begin{algorithm}
\caption{RBFixup}
\begin{algorithmic}
\INPUT The Tree and the node to check
\PROCEDURE{RBFixup}{\textit{T}, \textit{x}}
\WHILE {\textit{x} $\neq$ \textit{T}.root and \textit{x} = \textbf{BLACK}}
\IF {\textit{x} = \textit{x}.p.left}
\textit{w} $\gets$ \textit{x}.p.right
\IF  \textit{w}.color = \textbf{RED}
\STATE  \textit{w}.color $\gets$ \textbf{BLACK}
\STATE \textit{x}.p.color $\gets$ \textbf{RED}
\STATE \textsc{LeftRodate}(\textit{T}, \textit{x}.p)
\STATE  \textit{w} $\gets$ \textit{x}.p.right
\ENDIF

\IF { \textit{w}.left.color = \textbf{BLACK} and  \textit{w}.right.color = \textbf{BLACK}}
\STATE  \textit{w}.color $\gets$ \textbf{RED}
\STATE \textit{x} $\gets$ \textit{x}.p
\ELSE
\IF { \textit{w}.color=\textbf{BLACK}}
\STATE \textit{w}.left.color $\gets$ \textbf{BLACK}
\STATE  \textit{w}.color $\gets$ \textbf{RED}
\STATE \textsc{RightRotate}(\textit{T},  \textit{w})
\STATE  \textit{w} $\gets$ \textit{x}.p.right
\ENDIF

\STATE  \textit{w}.color $\gets$ \textit{x}.p.color
\STATE \textit{x}.p.color $\gets$ \textbf{BLACK}
\STATE  \textit{w}.right.color $\gets$ \textbf{BLACK}
\STATE \textsc{LeftRotate}(\textit{T}, \textit{x}.p)
\STATE \textit{x} $\gets$ \textit{T}.root
\ENDIF
\ELSE
    // Same as x is left but with left and right swapped
  \ENDIF
\ENDWHILE
\STATE \textit{x}.color $\gets$ \textbf{BLACK}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}
def fixup(self, node: rb_tree.Node)-> None:
    """Fixup the tree after a node deletion

    Args:
     node: the child of the deleted node
    """
    while not node.is_root and node.is_black:
        self.fixup_one_side(node, left=node.is_left)        
    node.color = rb_tree.Color.BLACK
    return

Fixup One Side

def fixup_one_side(self, node: rb_tree.Node, left: bool=True) -> None:
    """Does either the case where the node is left or node is right"""
    child = node.parent.right if left else node.parent.left
    if child.is_red:
        child.color = rb_tree.Color.BLACK
        node.parent.color = rb_tree.Color.BLACK
        rotate = self.rotate.left if left else self.rotate.right
        rotate(node.parent)
        child = node.parent.right if left else node.parent.left
    if child.left.is_black and child.right.is_black:
        child.color = rb_tree.Color.RED
        node = node.parent
    else:
        if child.is_black:
            grandchild = child.left if left else child.right
            grandchild.color = rb_tree.Color.BLACK
            child.color = rb_tree.Color.RED
            rotate = self.rotate.right if left else self.rotate.left
            rotate(child)
            child = node.parent.right if left else node.parent.left
        child.color = node.parent.color
        node.parent.color = rb_tree.Color.BLACK
        grandchild = child.right if left else child.left
        grandchild.color = rb_tree.Color.BLACK
        rotate = self.rotate.left if left else self.rotate.right
        rotate(node.parent)
        node = self.tree.root
    return

Testing

# pypi
from expects import be, be_true, expect

# software under test
from bowling.data_structures.red_black_tree import tree
from bowling.data_structures.red_black_tree.arborist import Arborist
from bowling.data_structures.red_black_tree.forester import Forester

Transplant

The Node is Root

original = tree.Node(10)
replacement = tree.Node(11)
arborist = Arborist(tree.RedBlackTree(original))
arborist.delete.transplant(original, replacement)
expect(arborist.tree.root).to(be(replacement))
expect(replacement.is_root).to(be_true)

The Original Node Is a Left Child

root = tree.Node(12)
root.left = original
arborist.tree.root = root
arborist.delete.transplant(original, replacement)
expect(root.left).to(be(replacement))
expect(replacement.parent).to(be(root))

The Original Node is a Right Child

original.key = 15
root.right = original
replacement.key = 20
arborist.tree.root = root
arborist.delete.transplant(original, replacement)
expect(root.right).to(be(replacement))
expect(replacement.parent).to(be(root))

Delete

A Build-Up and a Tear Down

# python
from pathlib import Path
# pypi
import networkx

SLUG = "red-black-trees-delete"
OUTPUT = Path(f"files/posts/{SLUG}")
if not OUTPUT.is_dir():
    OUTPUT.mkdir()
def preorder(node: tree.Node, adjacencies: dict) -> dict:
    """Traverse the nodes and build an adjancency dictionary
    """
    if node is not None:
        left = node.left.key if node.left else None
        right = node.right.key if node.right else None
        if any((left, right)):
            if left and right:
                adjacencies[node.key] = (left, right)
            elif left and not right:
                adjacencies[node.key] = (left, )
            else:
                adjacencies[node.key] = (right,)
        preorder(node.left, adjacencies)
        preorder(node.right, adjacencies)
    return
def plot_graph(root, name):
    adjacencies = {}
    preorder(root, adjacencies)

    graph = networkx.DiGraph(adjacencies)
    pygraph = networkx.nx_pydot.to_pydot(graph)
    filename = f"{name}.png"
    filepath = OUTPUT/filename
    pygraph.write_png(filepath)
    print(f"[[img-url:{filename}]]")
    return
node = tree.Node(10)
plot_graph(node, "node_1")

nil

Sources

The Main Source:

The Clearer RB-Insert-Fixup Pseudocode:

Red-Black Trees: Querying

nil

The Imports

from bowling.data_structures.red_black_tree import tree as rb_tree

The Forester

This is the class to hold the tree and answer questions about it. Most of the methods (except for the red-black-height) are the same as the Binary Search Tree with the main exception being that we need to use the NIL object instead of None. This means that nothing in the tree module can use the Forester directly or we'll end up with a circular reference.

class Forester:
    """The forester manages the tree

    Args:
     tree: tree to manage
     enforce_properties: check that the black-heights are the same
    """
    def __init__(self, tree: rb_tree.RedBlackTree=None,
                 enforce_properties: bool=False) -> None:
        self.tree = tree
        self.enforce_properties = enforce_properties
        return

Search

The first thing we'll look at is searching for a Node in a Binary Search Tree. Because of the way the tree is structured the search looks a lot like a Binary Search where we start at the root of the tree and then work our way downwards, picking the next child to follow based on whether the key we're searching for is greater than the right child's key or less than the left child's key. If it's equal then we've found the node we want.

The Implementation

I'll add the iterative version of the Tree Search as a method for our Query class.

def search(self, key) -> rb_tree.Node:
    """Searches the tree for the node with the matching key

    Args:
     key: node's key to search for

    Returns:
     the node with the key or None if it isn't in the tree
    """
    node = self.tree.root
    while node is not rb_tree.LEAF and key != node.key:
        if key < node.key:
            node = node.left
        else:
           node = node.right
    return node

Testing

# pypi
from expects import be, be_none, equal, expect
from bowling.data_structures.red_black_tree import tree as rb_tree
from bowling.data_structures.red_black_tree.forester import Forester

root = rb_tree.Node(10)
tree = rb_tree.RedBlackTree(root)
query = Forester(tree)
output = query.search(5)
expect(output).to(be(rb_tree.NIL))
expect(query.search(10)).to(equal(root))

five = rb_tree.Node(5)
root.left = five
expect(query.search(5)).to(be(five))

root.right = fifteen = rb_tree.Node(15)
expect(query.search(15)).to(be(fifteen))

Miminum and Maximum

Mimimum

def min(self, node: rb_tree.Node=None) -> rb_tree.Node:
    """Returns the node with the smallest key

    Args:
     node: a node to use as the starting root
    """
    if node is None:
        node = self.tree.root

    while node.left is not rb_tree.LEAF:
        node = node.left
    return node

Testing

root = rb_tree.Node(10)
tree = rb_tree.RedBlackTree(root)
query = Forester(tree)
root.left = rb_tree.Node(5)
root.left.left = rb_tree.Node(2)
root.right = rb_tree.Node(15)
root.right.right = rb_tree.Node(17)
root.right.left = rb_tree.Node(11)
expect(query.min()).to(equal(rb_tree.Node(2)))

root.left.left.left = rb_tree.Node(1)
expect(query.min()).to(equal(rb_tree.Node(1)))

expect(query.min(tree.root.right)).to(equal(rb_tree.Node(11)))

Maximum

def max(self, root: rb_tree.Node=None) -> rb_tree.Node:
    """Returns the node with the largest key

    Args:
     root: subtree root to start at

    Returns:
     node with the largest key in tree/subtree
    """
    if root is None:
        root = self.tree.root
    while root.right is not rb_tree.LEAF:
        root = root.right
    return root

Testing

root = rb_tree.Node(10)
tree = rb_tree.RedBlackTree(root)
query = Forester(tree)
root.left = rb_tree.Node(5)
root.left.left = rb_tree.Node(2)
root.right = rb_tree.Node(15)

expect(query.max()).to(equal(rb_tree.Node(15)))

root.right.right = rb_tree.Node(17)
expect(query.max()).to(equal(rb_tree.Node(17)))
expect(query.min()).to(equal(rb_tree.Node(2)))

expect(query.max(tree.root.left)).to(equal(rb_tree.Node(5)))

Height

The height of the Binary Search Tree is the number of edges from the root of the tree to the furthest node. The algorithms we're looking at here don't use them but I'm going to use height to look at how the order you insert nodes in the tree affects the height.

@property
def height(self) -> int:
    """The length of the longest path starting at the root

    Returns:
     number of edges from root to furthest leaf
    """
    return self.tree_height(self.tree.root)
def tree_height(self, node: rb_tree.Node=None) -> int:
    """The length of the longest path starting at the node

    Args:
     the node to start the measurement from

    Returns:
     number of edges from root to furthest leaf
    """
    if node is rb_tree.LEAF:
        return -1

    left = self.tree_height(node.left) + 1
    right = self.tree_height(node.right) + 1
    return max(left, right)
tree = rb_tree.RedBlackTree()
query = Forester(tree)

expect(query.height).to(equal(-1))
root = rb_tree.Node(10)
tree.root = root
expect(query.height).to(equal(0))

root.left = rb_tree.Node(8)
expect(query.height).to(equal(1))

root.right = rb_tree.Node(12)
expect(query.height).to(equal(1))

root.left.left = rb_tree.Node(4)
expect(query.height).to(equal(2))

root.left.left.left = rb_tree.Node(2)
expect(query.height).to(equal(3))

Black Height

@property
def black_height(self) -> int:
    """The number of black nodes below the root
    """
    return self.find_black_height(self.tree.root)
def find_black_height(self, node: rb_tree.Node=None) -> int:
    """Find the number of black nodes below a node

    Note:
     This assumes that the starting node is black. In the cases where it's red
     it will be one more than the true height

    Args:
     node: base node to use
    """
    if node is rb_tree.LEAF:
        return 0

    add_for_color = 1 if node.is_black else 0
    left = self.find_black_height(node.left) + add_for_color
    right = self.find_black_height(node.right) + add_for_color
    if self.enforce_properties:
        assert left == right, f"Black Height: Left={left} Right={right}"
    return max((left, right))

Testing

tree = rb_tree.RedBlackTree()
forester = Forester(tree)

root = rb_tree.Node(26, color=rb_tree.Color.BLACK)
tree.root = root
expect(forester.black_height).to(equal(1))

root.left = rb_tree.Node(17, color=rb_tree.Color.RED)
expect(forester.black_height).to(equal(1))

root.left.left = rb_tree.Node(14, color=rb_tree.Color.BLACK)
expect(forester.black_height).to(equal(2))
root.left.left.left = rb_tree.Node(10, color=rb_tree.Color.RED)
expect(forester.black_height).to(equal(2))

root.left.left.right = rb_tree.Node(16, color=rb_tree.Color.BLACK)
expect(forester.black_height).to(equal(3))

Testing

from bowling.data_structures.red_black_tree.tree import Node, RedBlackTree
from bowling.data_structures.red_black_tree.arborist import Arborist
from bowling.data_structures.red_black_tree.forester import Forester

Red-Black Properties

These are the properties that need to be maintained for a Red-Black Tree to work correctly (from CLRS).

  1. Every Node is either Red or Black.
  2. The Root is Black.
  3. Every Leaf (Nil) is Black.
  4. If a Node is Red then both of its children are Black.
  5. For each Node, all simple paths from the node to descendant leaves contain the same number of Black Nodes.

When the properties are true the height of a tree with n nodes is \(O(\log(n))\).

Red-Black Trees: Insertion

This is a post on inserting nodes into a tree. It is part of a series of posts starting with this post.

Insertion

Because we need to maintain the Red-Black Properties inserting a node into a Red-Black Tree is similar to inserting a node into a Binary Search Tree, but it involves a second function that restores the Red-Black properties if needed.

The Inserter

class Inserter:
    """Insert a node into the tree

    Args:
      tree: Red-Black-Tree to insert nodes into
    """
    def __init__(self, tree: rb_tree.RedBlackTree) -> None:
        self._tree = None
        self.tree = tree
        self._rotate: Rotator = None
        return

The Tree Properties

@property
def tree(self) -> rb_tree.RedBlackTree:
    """The tree we're inserting nodes into"""
    return self._tree
@tree.setter
def tree(self, sapling: rb_tree.RedBlackTree) -> None:
    """stores the tree, resets other objects that used the prior tree"""
    self._tree = sapling
    self._rotate = None
    return

The Rotator

@property
def rotate(self) -> Rotator:
    """A rotator for inserts"""
    if self._rotate is None:
        self._rotate = Rotator(tree=self.tree)
    return self._rotate

Insert

The Pseudocode

\begin{algorithm}
\caption{RBInsert}
\begin{algorithmic}
\INPUT The Tree and the Node to insert
\PROCEDURE{RBInsert}{\textit{T}, \textit{z}}

\STATE \textit{y} $\gets$ \textit{T}.\textsc{NIL}
\STATE \textit{x} $\gets$ \textit{T}.root

\WHILE {\textit{x} $\neq$ \textit{T}.\textsc{NIL}}
  \STATE \textit{y} $\gets$ \textit{x}

  \IF {\textit{z}.key < \textit{x}.key}
    \STATE \textit{x} $\gets$ \textit{x}.left
  \ELSE
    \STATE \textit{x} $\gets$ \textit{x}.right
  \ENDIF
\ENDWHILE

\STATE \textit{z}.parent $\gets$ \textit{y}

\IF {\textit{y} = \textit{T}.\textsc{NIL}}
  \STATE \textit{T}.root $\gets$ \textit{z}
\ELIF {\textit{z}.key < \textit{y}.key}
  \STATE \textit{y}.left $\gets$ \textit{z}
\ELSE
  \STATE \textit{y}.right $\gets$ \textit{z}
\ENDIF

\STATE \textit{z}.left $\gets$ \textit{T}.\textsc{NIL}
\STATE \textit{z}.right $\gets$ \textit{T}.\textsc{NIL}
\STATE \textit{z}.color $\gets$ \textbf{RED}
\STATE \textsc{RBInsertFixup}(\textit{T}, \textit{z})

\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}

Some Code

def insert(self, node: rb_tree.Node) -> None:
    """Insert the node into to the tree

    Args:
     node: node to insert into the tree
    """
    hunter = rb_tree.ROOT_PARENT
    hound = self.tree.root

    while hound is not rb_tree.LEAF:
        hunter = hound
        if node > hound:
            hound = hound.right
        else:
            hound = hound.left

    node.parent = hunter

    if hunter is rb_tree.ROOT_PARENT:
        self.tree.root = node
    node.left = rb_tree.LEAF
    node.right = rb_tree.LEAF
    node.color = rb_tree.Color.RED
    self.fixup(node)
    return
from expects import be, be_true, equal, expect
from bowling.data_structures.red_black_tree import tree
from bowling.data_structures.red_black_tree.arborist import Arborist
from bowling.data_structures.red_black_tree.forester import Forester

node = tree.Node(10)
node_5 = tree.Node(5)
node_15 = tree.Node(15)
node.left = node_5
node.right = node_15
rb_tree = tree.RedBlackTree()
arborist = Arborist(rb_tree)

# case: Root is NIL and new-node's parent is NIL
arborist.insert(node)
expect(node.parent).to(be(tree.NIL))
expect(rb_tree.root).to(be(node))

# case: Root isn't NIL
root = tree.Node(20)
node = tree.Node(10)
rb_tree = tree.RedBlackTree(root=root)
arborist.tree = rb_tree
arborist.insert(node)
expect(rb_tree.root).to(be(root))
expect(node.parent).to(be(root))
expect(root.left).to(be(node))

# root is less than inserted node
root = tree.Node(5)
node = tree.Node(11)
rb_tree = tree.RedBlackTree(root=root)
arborist.tree = rb_tree
arborist.insert(node)
expect(rb_tree.root).to(be(root))
expect(node.parent).to(be(root))
expect(root.right).to(be(node))

# in all cases
expect(node.left).to(be(tree.NIL))
expect(node.right).to(be(tree.NIL))
expect(node.is_red).to(be_true)

Insert Fixup

The Pseudocode

This is a separate function to restore the red-black properties (if we messed them up with the insert). It's mostly from CLRS but they used some kind of weird formatting that made it hard for me to tell what their first else conditional was supposed to contain so I'm using a slightly clearer version that I found at https://gcallah.github.io/algorithms/RedBlackTrees.html.

\begin{algorithm}
\caption{RBInsertFixup}
\begin{algorithmic}
\INPUT The Tree and the Node to insert
\PROCEDURE{RBInsertFixup}{\textit{T}, \textit{z}}

\WHILE {\textit{z.parent.color} = \textbf{RED} }
  \IF {\textit{z}.parent = \textit{z}.parent.parent.left}
    \STATE \textit{y} $\gets$ \textit{z}.parent.parent.right

    \IF {\textit{y}.color = \textbf{RED}}
      \STATE \textit{z}.parent.color $\gets$ \textbf{BLACK}
      \STATE \textit{y}.color $\gets$ \textbf{BLACK}
      \STATE \textit{z} $\gets$ \textit{z}.parent.parent
    \ELSE
      \IF {\textit{z} = \textit{z}.parent.right}
        \STATE \textit{z} $\gets$ \textit{z}.parent
        \STATE \textsc{LeftRotate}(\textit{T}, \textit{z})
      \ENDIF
      \STATE \textit{z}.parent.color $\gets$ \textbf{BLACK}
      \STATE \textit{z}.parent.parent.color $\gets$ \textbf{RED}
      \STATE \textsc{RightRotate}(\textit{T}, \textit{z})
    \ENDIF
    
  \ELSE
    \STATE Same as when the parent is left case but with left/right switched
  \ENDIF
  \STATE \textit{T}.root.color $\gets$ \textbf{BLACK}
\ENDWHILE

\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}

The Implementation

  • One Side
    def fixup_side(self, node: rb_tree.Node, uncle: rb_tree.Node,
                   swap_and_rotate: bool=True, left: bool=True) -> None:
        """Fixup either the left or the right sides
    
        Args:
         node: the node that we're fixing
         uncle: the node's parent's sibling
         swap_and_rotate: whether we need to do a swap and rotation
         left: if the node's parent is a left child
        """
        first_rotate = self.rotate.left if left else self.rotate.right
        final_rotate = self.rotate.right if left else self.rotate.left
    
        if uncle.is_red:
            node.parent.color = rb_tree.Color.BLACK
            node.parent.parent.color = rb_tree.Color.RED
            uncle.color = rb_tree.Color.BLACK
            node = node.parent.parent
        else:
            if swap_and_rotate:
                node = node.parent
                first_rotate(node)
            node.parent.color = rb_tree.Color.BLACK
            node.parent.parent.color = rb_tree.Color.RED
    
            final_rotate(node.parent.parent)
        return
    
  • Both Sides
    def fixup(self, node: rb_tree.Node) -> None:
        """Fix-up the red-black properties after an insert
    
        Args:
         node: the node that was just inserted
        """
        while node.parent.is_red:
            if node.parent.is_left:
                uncle = node.parent.parent.right
                self.fixup_side(node, uncle,
                                swap_and_rotate=node.is_right,
                                left=True)
            else:
                uncle = node.parent.parent.left
                self.fixup_side(node, uncle,
                                swap_and_rotate=node.is_left,
                                left=False)
        self.tree.root.color = rb_tree.Color.BLACK
        return
    

Some Testing

# the parent is black
root = tree.Node(10, color=tree.Color.RED)
parent = tree.Node(5, color=tree.Color.BLACK)
parent.parent = root
child = tree.Node(8, color=tree.Color.RED)
child.parent = parent
uncle = tree.Node(11, color=tree.Color.BLACK)
root.right = uncle
rb_tree = tree.RedBlackTree(root=root)
arborist.tree = rb_tree
forester = Forester(rb_tree, enforce_properties=True)

arborist.insert.fixup(child)

expect(root.color).to(be(tree.Color.BLACK))
expect(parent.color).to(be(tree.Color.BLACK))
expect(uncle.color).to(be(tree.Color.BLACK))
expect(child.color).to(be(tree.Color.RED))
expect(forester.height).to(equal(2))
expect(forester.black_height).to(equal(2))
# the parent is a RED left child and the uncle is red
root = tree.Node(10, color=tree.Color.BLACK)
parent = tree.Node(5, color=tree.Color.RED)
parent.parent = root
child = tree.Node(8, color=tree.Color.RED)
child.parent = parent
uncle = tree.Node(11, color=tree.Color.RED)
root.right = uncle
arborist.tree = tree.RedBlackTree(root=root)
forester = Forester(arborist.tree, enforce_properties=True)

expect(forester.black_height).to(equal(1))
arborist.insert.fixup(child)
expect(parent.is_black).to(be_true)
expect(uncle.is_black).to(be_true)
expect(forester.black_height).to(equal(2))

# the parent is a RED left child, the uncle is black, and the node is the left child
parent.color = tree.Color.RED
uncle.color = tree.Color.BLACK
child.color = tree.Color.RED
child.key = 2
parent.left = child
node_6 = tree.Node(6, tree.Color.BLACK)
parent.right = node_6
node_1 = tree.Node(1, tree.Color.BLACK)
child.left = node_1
node_3 = tree.Node(3, tree.Color.BLACK)
child.right = node_3

arborist.insert.fixup(child)

expect(arborist.tree.root).to(be(parent))
expect(parent.color).to(be(tree.Color.BLACK))
expect(parent.right).to(be(root))
expect(root.color).to(be(tree.Color.RED))
expect(child.left).to(be(node_1))
expect(child.right).to(be(node_3))
expect(root.left).to(be(node_6))
expect(root.right).to(be(uncle))
expect(forester.height).to(equal(2))
expect(forester.black_height).to(equal(2))

# the parent is a RED left child, the uncle is black, and the node is the right child
root = tree.Node(10, color=tree.Color.BLACK)
parent = tree.Node(5, color=tree.Color.RED)
uncle = tree.Node(11, color=tree.Color.BLACK)
child = tree.Node(8, color=tree.Color.RED)
parent.parent = root
uncle.parent = root

node_4 = tree.Node(4, color=tree.Color.BLACK)
parent.left = node_4
parent.right = child
node_7 = tree.Node(7, color=tree.Color.BLACK)
node_9 = tree.Node(9, color=tree.Color.BLACK)
child.left = node_7
child.right = node_9

arborist.tree = tree.RedBlackTree(root)
forester.tree = arborist.tree

arborist.insert.fixup(child)

expect(child).to(be(arborist.tree.root))
expect(child.color).to(be(tree.Color.BLACK))
expect(child.left).to(be(parent))
expect(child.right).to(be(root))
expect(parent.color).to(be(tree.Color.RED))
expect(root.color).to(be(tree.Color.RED))
expect(parent.left).to(be(node_4))
expect(parent.right).to(be(node_7))
expect(root.left).to(be(node_9))
expect(root.right).to(be(uncle))
expect(forester.height).to(equal(2))
expect(forester.black_height).to(equal(2))

The Call

This is just a thing to make it look more like a function than a class.

def __call__(self, node: rb_tree.Node) -> None:
    """Inserts the node into the tree

    this is an alias for the ``insert`` method

    Args:
     node: the node to insert 
    """
    return self.insert(node)

All Together Now

rb_tree = tree.RedBlackTree()
arborist = Arborist(rb_tree)
forester = Forester(rb_tree)
root = tree.Node(10)

arborist.insert(root)
expect(forester.height).to(equal(0))
expect(forester.black_height).to(equal(1))

node_5 = tree.Node(5)
arborist.insert(node_5)
expect(forester.height).to(equal(1))
expect(forester.black_height).to(equal(1))
expect(node_5.color).to(be(tree.Color.RED))

node_11 = tree.Node(11)
arborist.insert(node_11)
expect(forester.height).to(equal(1))
expect(forester.black_height).to(equal(1))
expect(node_11.color).to(be(tree.Color.RED))

node_6 = tree.Node(6)
arborist.insert(node_6)
expect(forester.height).to(equal(2))
expect(forester.black_height).to(equal(2))
expect(node_11.color).to(be(tree.Color.BLACK))
expect(node_5.color).to(be(tree.Color.BLACK))
expect(node_6.color).to(be(tree.Color.RED))

node_3 = tree.Node(3)
arborist.insert(node_3)
expect(node_3.color).to(be(tree.Color.RED))
expect(node_5.color).to(be(tree.Color.BLACK))
expect(forester.height).to(be(2))
expect(forester.black_height).to(be(2))

node_1 = tree.Node(1)
arborist.insert(node_1)
expect(forester.height).to(equal(3))
expect(forester.black_height).to(equal(2))
expect(node_1.color).to(be(tree.Color.RED))
expect(node_1.parent.color).to(be(tree.Color.BLACK))
expect(node_6.color).to(be(tree.Color.BLACK))
expect(node_5.color).to(be(tree.Color.RED))

node_4 = tree.Node(4)
arborist.insert(node_4)
expect(forester.height).to(equal(3))
expect(forester.black_height).to(equal(2))
expect(node_4.color).to(be(tree.Color.RED))

node_0 = tree.Node(0)
arborist.insert(node_0)
expect(forester.height).to(equal(4))
expect(forester.black_height).to(equal(2))
expect(node_3.color).to(be(tree.Color.RED))
expect(node_1.color).to(be(tree.Color.BLACK))
expect(node_4.color).to(be(tree.Color.BLACK))
expect(node_0.color).to(be(tree.Color.RED))
def inorder(node: tree.Node):
    if node is not tree.LEAF:
        inorder(node.left)
        print(f"{node.key} ({node.color})", end=", ")
        inorder(node.right)
    return

The Arborist

The arborist takes care of trees.

# this project
from bowling.data_structures.red_black_tree import tree as rb_tree
from bowling.data_structures.red_black_tree.rotator import Rotator
class Arborist:
    """An arborist to take care of trees

    Args:
     tree: tree to take care of
    """
    def __init__(self, tree: rb_tree.RedBlackTree) -> None:
        self._tree = None
        self.tree = tree
        self._insert = None
        self._delete = None
        return
@tree.setter
def tree(self, sapling: rb_tree.RedBlackTree) -> None:
    """Sets the tree and wipes out other attributes that use it

    Args:
     sapling: new tree
    """
    self._tree = sapling
    self._insert = None
    self._delete = None
    return

The Inserter Attribute

@property
def insert(self) -> Inserter:
    """Something to insert nodes"""
    if self._insert is None:
        self._insert = Inserter(tree=self.tree)
    return self._insert

Testing

arborist = Arborist(tree.RedBlackTree())

Sources

The Main Source:

The Clearer RB-Insert-Fixup Pseudocode:

Red-Black Trees: Rotation

This is a post on rotating nodes in a tree. It is a continuation of this post.

What are Rotations?

In the context of a Red-Black Tree, a Rotation involves swapping the position of two nodes (a parent and one of its children) while maintaining the Binary Search Tree Properties. There are two kinds of rotation:

  • Right Rotation: The left child is moved into the position of its parent and the former parent becomes the new parent's right child.
  • Left Rotation: The right child is moved into the position of its parent and the former parent becomes the new parent's left child.

Left Rotate

CLRS gives the Left Rotate function so we'll start with that.

Pseudocode

\begin{algorithm}
\caption{LeftRotate}
\begin{algorithmic}
\INPUT The Tree and the parent Node to rotate
\PROCEDURE{LeftRotate}{\textit{T}, \textit{x}}

\STATE \textit{y} $\gets$ \textit{x}.right
\STATE \textit{x}.right $\gets$ \textit{y}.left

\IF {\textit{y}.left $\neq$ \textit{T}.\textsc{NIL}}
  \STATE \textit{y}.left.parent $\gets$ \textit{x}
\ENDIF

\STATE \textit{y}.parent $\gets$ \textit{x}.parent

\IF {\textit{x}.parent = \textit{T}.\textsc{NIL}}
 \STATE \textit{T}.root $\gets$ \textit{y}
\ELIF {\textit{x} = \textit{x}.parent.left}
 \STATE \textit{x}.parent.left $\gets$ \textit{y}
\ELSE
 \STATE \textit{x}.parent.right $\gets$ \textit{y}
\ENDIF

\STATE \textit{y}.left $\gets$ \textit{x}
\STATE \textit{x}.parent $\gets$ \textit{y}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}

Some Code

# pypi
from expects import be, equal, expect

import networkx

# this project
from bowling.data_structures.red_black_tree import tree
from bowling.data_structures.red_black_tree.arborist import Arborist, Rotator
SLUG = "red-black-trees-rotation"
OUTPUT = f"files/posts/{SLUG}/"

The Imports

# pypi
from attrs import define

# this project
from bowling.data_structures.red_black_tree import tree as rb_tree

The Rotator

@define
class Rotator:
    """A rotator of nodes and their children

    Args:
     tree: the tree that the parent belongs to
    """
    tree: rb_tree.RedBlackTree

The Left-Rotate

This is more-or-less a straight implementation of the Left-Rotate algorithm given above. The main difference is that our Node object automatically sets the parent for a child when the child is set on the node so you won't see the parent

def left(self, node: rb_tree.Node) -> None:
    """Rotates the node with its right child

    Args:
     node: the parent node to rotate
    """
    new_parent = node.right
    node.right= new_parent.left

    new_parent.parent = node.parent

    if node.is_root:
        self.tree.root = new_parent
    new_parent.left = node
    return

Note: Because I defined the Node class so that setting any of the three relation attributes (parent, left, right) updates both sides of the link (e.g. setting left also updates left.parent) we don't need the lines of the algorithm that figure out the other side of a newly established link, so the code is a little shorter than the pseudocode.

Testing It Out

def preorder(node: tree.Node, adjacencies: dict) -> dict:
    """Traverse the nodes and build an adjancency dictionary
    """
    if node is not None:
        left = node.left.key if node.left else None
        right = node.right.key if node.right else None
        if any((left, right)):
            if left and right:
                adjacencies[node.key] = (left, right)
            elif left and not right:
                adjacencies[node.key] = (left, )
            else:
                adjacencies[node.key] = (right,)
        preorder(node.left, adjacencies)
        preorder(node.right, adjacencies)
    return
def build_tree(root_parent: tree.Node) -> tuple:
    """Build the test-tree

    Args:
     - root_parent: The parent of the root-node

    Returns:
     Tree, Nodes dict
    """
    nodes = dict()
    nodes[5] = tree.Node(5, parent=root_parent)

    root = nodes[5] if root_parent is tree.NIL else root_parent
    test_tree = tree.RedBlackTree(root=root)

    nodes[4] = tree.Node(4)
    nodes[7] = tree.Node(7)

    nodes[6] = tree.Node(6)
    nodes[8] = tree.Node(8)

    nodes[5].left = nodes[4]
    nodes[5].right = nodes[7]
    nodes[5].right.left = nodes[6]
    nodes[5].right.right = nodes[8]
    return test_tree, nodes
  • Nil Parent

    Our first case will be when the node to swap with its child is root.

    def test_nodes(arborist: Arborist, nodes: list) -> dict:
        root_parent = nodes[5].parent
    
        arborist.rotate.left(nodes[5])
    
        if root_parent is tree.NIL:
            expect(test_tree.root).to(be(nodes[7]))
        else:
            expect(test_tree.root).to(be(root_parent))
    
        expect(nodes[7].parent).to(be(root_parent))
        expect(nodes[7].left).to(be(nodes[5]))
        expect(nodes[5].parent).to(be(nodes[7]))
        expect(nodes[5].right).to(be(nodes[6]))
        expect(nodes[6].parent).to(be(nodes[5]))
        expect(nodes[5].left).to(be(nodes[4]))
        expect(nodes[4].parent).to(be(nodes[5]))
        expect(nodes[7].right).to(be(nodes[8]))
        expect(nodes[8].parent).to(be(nodes[7]))
        return nodes
    
    test_tree, nodes = build_tree(tree.NIL)
    
    def plot_graph(root, name):
        adjacencies = {}
        preorder(root, adjacencies)
    
        graph = networkx.DiGraph(adjacencies)
        pygraph = networkx.nx_pydot.to_pydot(graph)
        pygraph.write_png(OUTPUT + f"{name}.png")
        return
    
    plot_graph(test_tree.root, "root_left_rotate")
    

    nil

    arborist = Arborist(test_tree)
    nodes = test_nodes(arborist, nodes)
    
    plot_graph(test_tree.root, "root-left-rotated")
    

    nil

  • Left Child

    This is the case where the parent node being demoted is the left-child of its parent.

    parent = tree.Node(10)
    test_tree, nodes = build_tree(parent)
    plot_graph(test_tree.root, "root_left_left_rotate")
    

    nil

    parent.left = nodes[5]
    nodes = test_nodes(Arborist(test_tree), nodes)
    expect(parent.left).to(be(nodes[7]))
    plot_graph(test_tree.root, "root_left_left_rotated")
    

    nil

  • Right Child

    This is the case where the parent node being demoted is the right-child of its parent.

    parent = tree.Node(2)
    parent.right = nodes[5]
    test_tree, nodes = build_tree(parent)
    plot_graph(test_tree.root, "root_parent_right_rotate")
    

    nil

    nodes = test_nodes(Arborist(test_tree), nodes)
    expect(parent.right).to(be(nodes[7]))
    plot_graph(test_tree.root, "root_parent_right_rotated")
    

    nil

Right Rotate

This will be the Right-Rotate version. Since we saw with the Left Rotate that the Node definition reduces some of the code needed for the rotation I'll leave those lines out of this version

Pseudocode

\begin{algorithm}
\caption{RightRotate}
\begin{algorithmic}
\INPUT The Tree and the parent Node to rotate
\PROCEDURE{RightRotate}{\textit{T}, \textit{x}}

\STATE \textit{y} $\gets$ \textit{x}.left
\STATE \textit{x}.left $\gets$ \textit{y}.right

\STATE \textit{y}.parent $\gets$ \textit{x}.parent

\IF {\textit{x}.parent = \textit{T}.\textsc{NIL}}
 \STATE \textit{T}.root $\gets$ \textit{y}
\ENDIF

\STATE \textit{y}.right $\gets$ \textit{x}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}

This makes it a little clearer, I think. What the algorithm is doing is pretty simple. If x is the parent node to rotate with its right child:

  1. Move x's left grandchild up to be x's left-child.
  2. Set the parent of the prior x.left to be x's parent.
  3. If x was root, make y root.
  4. Make x the right child of its prior left-child.

Some Code

The Right-Rotate

This is more-or-less a straight implementation of the Right-Rotate algorithm given above.

def right(self, node: rb_tree.Node) -> None:
    """Rotates the node with its left child

    Args:
     node: the parent node to rotate
    """
    previous_child = node.left
    node.left = previous_child.right
    previous_child.parent = node.parent

    if node.is_root:
        self.tree.root = previous_child
    previous_child.right = node
    return

Testing It Out

def build_right_rotate_tree(root_parent: tree.Node) -> tuple:
    """Build the test-tree

    Args:
     - root_parent: The parent of the root-node

    Returns:
     Tree, Nodes dict
    """
    nodes = dict()
    nodes[7] = tree.Node(7, parent=root_parent)

    root = nodes[7] if root_parent is tree.NIL else root_parent
    test_tree = tree.RedBlackTree(root=root)

    nodes[4] = tree.Node(4)
    nodes[5] = tree.Node(5)

    nodes[6] = tree.Node(6)
    nodes[8] = tree.Node(8)

    nodes[7].left = nodes[5]
    nodes[7].right = nodes[8]
    nodes[7].left.left = nodes[4]
    nodes[7].left.right = nodes[6]
    return test_tree, nodes
  • Nil Parent

    Our first case will be when the node to swap with its child is root.

    def test_nodes_right(arborist, nodes) -> dict:
        root_parent = nodes[7].parent
        arborist.rotate.right(nodes[7])
    
        # check the new root
        expect(nodes[5].parent).to(be(root_parent))
        expect(nodes[5].left).to(be(nodes[4]))
        expect(nodes[4].parent).to(be(nodes[5]))
        expect(nodes[5].right).to(be(nodes[7]))
        expect(nodes[7].parent).to(be(nodes[5]))
    
        expect(nodes[7].left).to(be(nodes[6]))
        expect(nodes[6].parent).to(be(nodes[7]))
        expect(nodes[7].right).to(be(nodes[8]))
        expect(nodes[8].parent).to(be(nodes[7]))
        return nodes
    
    test_tree, nodes = build_right_rotate_tree(tree.NIL)
    plot_graph(test_tree.root, "root_right_rotate")
    

    nil

    nodes = test_nodes_right(Arborist(test_tree), nodes)
    expect(test_tree.root).to(be(nodes[5]))
    plot_graph(test_tree.root, "root_right_rotated")
    

    nil

  • Left Child

    This is the case where the parent node being demoted is the left-child of its parent.

    parent = tree.Node(10)
    test_tree, nodes = build_right_rotate_tree(parent)
    plot_graph(test_tree.root, "root_right_left_rotate")
    

    nil

    parent.left = nodes[7]
    nodes = test_nodes_right(Arborist(test_tree), nodes)
    expect(parent.left).to(be(nodes[5]))
    plot_graph(test_tree.root, "root-right-left-rotated")
    

    nil

  • Right Child

    This is the case where the parent node being demoted is the right-child of its parent.

    parent = tree.Node(2)
    test_tree, nodes = build_right_rotate_tree(parent)
    plot_graph(test_tree.root, "root_right_right_rotate")
    

    nil

    parent.right = nodes[7]
    nodes = test_nodes_right(Arborist(test_tree), nodes)
    expect(parent.right).to(be(nodes[5]))
    plot_graph(test_tree.root, "root_right_right_rotated")
    

    nil

Sources

The Red-Black Tree

This is a post that implements the Node for a Red-Black Tree. It is a continuation of this post.

Beginning

I'm going to implement a Node and Red-Black Tree so that I can explore it further in other posts.

Imports

# python
from pathlib import Path

Set Up

SLUG = "the-red-black-tree"
OUTPUT_PATH = Path(f"files/posts/")/SLUG
if not OUTPUT_PATH.is_dir():
    OUTPUT_PATH.mkdir()

Middle

This is the basic structure of our tree.

nil

Imports

# python
from __future__ import annotations
from collections import namedtuple
from enum import Enum

# this project
from bowling.types import Orderable

The Nil Node and Colors

I'm going to set up an Enum for the Colors and a Node instance for the NIL object. I originally made the NIL from a namedtuple, but as I've been adding features to the Node class it's become harder to work with.

Note: The python Enum uses object ID as the way to see if two things are equal, not their values. You can get around it, but since both the colors and the Nil are meant only to be used internally by the tree, not by the end user, I'm going to leave it as is. This means that to do things like check the color outside of the tree module that I'm creating, you have to import the module and refer to the Color or NIL using the module (e.g. tree.Color), otherwise you might end up with some funky problems where things that look equal aren't because they are actually different objects.

Color

class Color(Enum):
    """red or black"""
    RED = 1
    BLACK = 2
from bowling.data_structures.red_black_tree import tree
print(tree.Color.RED)
print(tree.Color.BLACK)

red = tree.Color.RED
print(red is tree.Color.RED)
print(red == tree.Color.RED)
print(red == 1)
Color.RED
Color.BLACK
True
True
False

Note that last case (print(red==1)). It's False because I'm using an Enum. We could switch it to an IntEnum and allow integers but for now I'm sticking to the Enum to see how well it works.

The Nil Node

Note: We have a little bit of a chicken or the egg problem here - I want to check for NILs in the Node Class methods which requires me to define it before defining the Node, so even though the NIL is a special case of the Node, we can't actually use the Node class to create it. It just has to be duck typed enough to be used in the methods.

NIL_KEY = "NIL"
NIL = namedtuple(
    "NIL", "key color parent left right is_red is_black",
    defaults=[NIL_KEY, Color.BLACK, None, None, None, False, True])()
LEAF = NIL
ROOT_PARENT = NIL
print(tree.NIL)
expect(tree.NIL.color).to(equal(tree.Color.BLACK))
NIL(key='NIL', color=<Color.BLACK: 2>, parent=None, left=None, right=None, is_red=False, is_black=True)

The Node

The Node for a red-black tree is the same as the Binary Search Tree Node except that it has the color attribute. But, since I'm doing integrity checks in the object that also means that the checks have to be updated to take into consideration the Red-Black Tree Properties.

As part of the Insert Node method the new node gets set to RED, so I'm not going to require passing the color in.

class Node:
    """A Node in a Binary Search Tree

    Args:
     key: item to compare nodes
     color: RED or BLACK
     parent: parent of this node
     left: left child
     right: right child
    """
    def __init__(self, key: Orderable, color: Color=None,
                 parent: Node=NIL,
                 left: Node=NIL, right: Node=NIL) -> None:
        self.key = key
        self.color = color
        self._parent = None
        self.parent = parent
        self._left = None
        self.left = left
        self._right = None
        self.right = right
        return
node = tree.Node(key=1, color=tree.Color.RED)
expect(node.color).to(be(tree.Color.RED))

Properties

Parent

@property
def parent(self) -> Node:
    """The parent of this node"""
    if self._parent is None:
        self._parent = NIL
    return self._parent

@parent.setter
def parent(self, parent_: Node) -> None:
    """Sets the parent and updates the parent

    Warning:
     this will clobber the parent's child if there's a node where this should
    be

    Args:
     parent: to add to self

    Raises:
     AssertionError if parent and self have same key
    """
    if parent_ is NIL:
        self._parent = parent_
        return

    if self == parent_:
        raise AssertionError(f"Self ({self}) cannot equal parent ({parent_})")

    # since the left and right assignments update the parent
    # we need a hack to get around the setters or you end up
    # with an infinite loop - we set left, they set parent, we set left,...
    if self < parent_:
        parent_._left = self
    else:
        parent_._right = self

    self._parent = parent_        
    return

Left

@property
def left(self) -> Node:
    """The left child"""
    if self._left is None:
        self._left = NIL
    return self._left

@left.setter
def left(self, new_left: Node) -> None:
    """Sets the left and its parent

    Raises:
     AssertionError if left isn't less than self

    Args:
     new_left: a node to be the left child or None
    """
    if new_left is NIL:
        self._left = new_left
        return

    assert new_left < self, f"Left ({new_left} not < self {self})"
    new_left.parent = self
    self._left = new_left
    return

Right

@property
def right(self) -> Node:
    """The right child"""
    if self._right is None:
        self._right = NIL
    return self._right

@right.setter
def right(self, new_right: Node) -> None:
    """Sets the right and its parent

    Raises:
     AssertionError if right isn't greater than self

    Args:
     new_right: a node to be the right child or None
    """
    if new_right is NIL:
        self._right = new_right
        return

    assert new_right > self, f"right ({new_right} not > self ({self})"
    new_right.parent = self
    self._right = new_right
    return

Comparisons

These are convenience methods to make it so that you can compare the node-objects without referring to the key (see the python Data Model documentation). In reading the documentation I thought that you had to implement everything, but after implementing less than and less than or equal to the greater than and greater than or equal to comparisons started to work. I guess if you don't implement them they just take the negative of the less than cases.

Equal

def __eq__(self, other: Node) -> bool:
    """Check if the other node has an equal key

    """
    return hasattr(other, "key") and self.key == other.key

Less Than

def __lt__(self, other: Node) -> bool:
    """See if this key is less than the other's

    Raises:
     AttributeError: the other thing doesn't have a key

    Returns:
     self < other
    """
    if not hasattr(other, "key"):
        raise AttributeError(f"'<' not supported between '{type(self)}' "
                             f"and '({other}): {type(other)}'")
    return self.key < other.key

Less Than or Equal

def __le__(self, other: Node) -> bool:
    """See if this key is less than or equal to other's

    Raises:
     AttributeError: other doesn't have key

    Returns:
     self <= other
    """
    if not hasattr(other, "key"):
        raise AttributeError(f"'<' not supported between '{type(self)}' "
                        "and '{type(other)}'")
    return self.key <= other.key

State Properties

Is Left

Is this a left-child node?

@property
def is_left(self) -> bool:
    """True if this node is a left child"""
    return self is self.parent.left
from expects import expect, be_true

child = tree.Node(key=2)
parent = tree.Node(key=5)
parent.left = child
expect(child.is_left).to(be_true)

expect(parent.is_left).to_not(be_true)

Is Right

Is the node a right child?

@property
def is_right(self) -> bool:
    """True if this node is a right child"""
    return self.parent.right is self
parent = tree.Node(10)
child = tree.Node(15)
parent.right = child

expect(child.is_right).to(be_true)
expect(child.is_left).not_to(be_true)
expect(parent.is_right).not_to(be_true)

Is Red

@property
def is_red(self) -> bool:
    """True if the node is colored red"""
    return self.color is Color.RED
node = tree.Node(15, color=tree.Color.RED)
expect(node.is_red).to(be_true)
node.color=tree.Color.BLACK
expect(node.is_red).not_to(be_true)

Is Black

@property
def is_black(self) -> bool:
    """True if the node is colored black"""
    return self.color is Color.BLACK
node = tree.Node(15, color=tree.Color.BLACK)
expect(node.is_black).to(be_true)
node.color=tree.Color.RED
expect(node.is_black).not_to(be_true)

Is Root

@property
def is_root(self) -> bool:
    """True if the node is the root"""
    return self.parent is NIL
node = tree.Node(15, color=tree.Color.BLACK)
expect(node.is_root).to(be_true)
parent = tree.Node(16)
parent.left = node
expect(node.is_root).not_to(be_true)
expect(parent.is_root).to(be_true)

Check Nodes

This is a convenience method to check if a node and its sub-trees maintain the Binary Search Tree Property. It calls the children too so that the whole tree can be checked by calling this on the root. Now that there's checks when the attributes are set this isn't quite as necessary. The only time you might need it is if the attributes are set directly instead of using the setter.

Note: Although the Binary Search Tree Property allows duplicate keys, once you start doing things with the tree like inserting and deleting nodes it causes problems. Also, it's not likely that the keys are what you would be most interested in when using a tree, it would be the data associated with the node, so what would it mean to have two different items associated with the same key? There are probably uses for this, but to make it simpler I'm going to treat the keys more like dictionary keys and say that it's a mistake to have duplicates.

def check_state(self) -> None:
    """Checks that the Binary Search Tree Property holds

    Raises:
     AssertionError: Binary Search Tree Property violated or duplicates exist
    """
    # red-black property 1: every node is either red or black
    assert self.color in (Color.RED, Color.BLACK), f"Invalid Color: {self.color}"

    # red-black property 4: if a node is red, both children are black
    if self.color is Color.RED:
        assert (self.left.color is Color.BLACK and
                self.right.color is Color.BLACK),\
            (f"Parent: {self.color} Left: {self.left.color} "
             f"Right: {self.right.color}. "
             "Both Children of a Red parent must be Black")

    if self.left is not NIL:
        assert self.left < self, f"Left: {self.left} not < Self: {self}"
        self.left.check_state()

    if self.right is not NIL:
        assert self.right > self, f"Right: {self.right} not > Self: {self}"
        self.right.check_state()
    return

String Output

This is to make it a little easier to print.

def __str__(self) -> str:
    """The key as a string"""
    return str(self.key)

Testing

I'll have to break this up later.

Imports

# pypi
from expects import (
    be,
    be_above,
    be_above_or_equal,
    be_below,
    be_below_or_equal,
    be_none,
    equal,
    expect,
    raise_error
)

# software under test
from bowling.data_structures.red_black_tree import tree

One Node

parent = tree.Node(key=10, color=tree.Color.RED)
parent.check_state()

expect(parent.key).to(equal(10))
expect(parent.color).to(be(tree.Color.RED))
expect(parent.left).to(be(tree.NIL))
expect(parent.right).to(be(tree.NIL))
expect(parent.parent).to(be(tree.NIL))

Check the Comparisons

uncle = tree.Node(key=9, color=tree.Color.BLACK)

expect(uncle).to(equal(tree.Node(key=9, color=tree.Color.RED)))
expect(uncle).to(be_below(parent))
expect(uncle).to(be_below_or_equal(parent))

brother = tree.Node(key=20, color=tree.Color.BLACK)

expect(brother).to(be_above(parent))
expect(brother).to(be_above_or_equal(parent))

# I'm still deciding who's responsible for checking if a node exists
# for now I'll copy what happens when None is compared to ints
expect(brother).not_to(equal(uncle.parent))

expect(lambda: brother < uncle.parent).to(raise_error(TypeError))
expect(lambda: brother.parent > uncle).to(raise_error(TypeError))

Check the Two-Way Updates.

  • Set the Parent

    In the constructor.

    parent = tree.Node(key=10, color=tree.Color.BLACK)
    
    left = tree.Node(5, parent=parent, color=tree.Color.RED)
    expect(left.parent).to(equal(parent))
    expect(parent.left).to(equal(left))
    
    right = tree.Node(15, parent=parent, color=tree.Color.BLACK)
    expect(right.parent).to(equal(parent))
    expect(parent.right).to(equal(right))
    
    def bad_parent():
        left = tree.Node(key=10,
                         parent=tree.Node(10, color=tree.Color.BLACK),
                         color=tree.Color.BLACK)
        return
    
    expect(bad_parent).to(raise_error(AssertionError))
    
    parent = tree.Node(key=10, color=tree.Color.BLACK)
    left = tree.Node(5, color=tree.Color.RED)
    left.parent = parent
    
    expect(left.parent).to(equal(parent))
    expect(parent.left).to(equal(left))
    
    right = tree.Node(15, color=tree.Color.BLACK)
    right.parent = parent
    expect(right.parent).to(equal(parent))
    expect(parent.right).to(equal(right))
    
    def bad_parent():
        parent = tree.Node(key=10, color=tree.Color.RED)
        left = tree.Node(key=10, color=tree.Color.BLACK)
        left.parent = parent
        return
    
    expect(bad_parent).to(raise_error(AssertionError))
    
  • Set The Left Child
    left = tree.Node(5, tree.Color.RED)
    parent = tree.Node(key=10, left=left, color=tree.Color.BLACK)
    
    expect(parent.left).to(equal(left))
    expect(left.parent).to(equal(parent))
    
    parent = tree.Node(key=10, color=tree.Color.RED)
    parent.left = left
    expect(parent.left).to(equal(left))
    expect(left.parent).to(equal(parent))
    
  • Set The Right Child
    right = tree.Node(15, tree.Color.RED)
    parent = tree.Node(key=10, right=right, color=tree.Color.BLACK)
    
    expect(parent.right).to(equal(right))
    expect(right.parent).to(equal(parent))
    
    parent = tree.Node(key=10, color=tree.Color.RED)
    parent.right = right
    expect(parent.right).to(equal(right))
    expect(right.parent).to(equal(parent))
    

The Check Node Method

uncle = tree.Node(key=9, color=tree.Color.RED)
parent = tree.Node(key=10, color=tree.Color.BLACK)
parent.check_state()

# parent is root
expect(parent.check_state).not_to(raise_error)

# parent is right child
parent.parent = uncle
expect(parent.check_state).not_to(raise_error)

# parent is left child
parent.parent = brother
expect(parent.check_state).not_to(raise_error)

def bad_check():
    parent.check_state()
    return

# left node is greater than the parent
lefty = tree.Node(15, color=tree.Color.RED)
def bad(): 
    parent.left = lefty
expect(bad).to(raise_error(AssertionError))
parent._left = lefty
expect(bad_check).to(raise_error(AssertionError))

# left node is less than the parent
parent.left = tree.NIL
parent.right = lefty
expect(parent.check_state).not_to(raise_error(AssertionError))

# right node is less than the parent
righty = tree.Node(key=2, color=tree.Color.BLACK)
def bad():
    parent.right = righty
    return
expect(bad).to(raise_error(AssertionError))
parent._right = righty
expect(bad_check).to(raise_error(AssertionError))

# right and left are okay
parent.left = righty
parent.right = lefty
expect(parent.check_state).not_to(raise_error)
parent = tree.Node(key=10, color=tree.Color.BLACK)
parent.left = tree.Node(key=2, color=tree.Color.RED)
# left children of parent's left child have to be less than parent
def bad():
    parent.left.left = tree.Node(key=100, color=tree.Color.BLACK)
expect(bad).to(raise_error(AssertionError))

parent.left.left = tree.Node(key=0, color=tree.Color.BLACK)
expect(parent.check_state).not_to(raise_error)
# right is greater than parent
lefty = tree.Node(15, color=tree.Color.RED)
def bad():
    lefty.right = tree.Node(key=0, color=tree.Color.BLACK)
expect(bad).to(raise_error(AssertionError))

# disallow duplicates
parent = tree.Node(10, color=tree.Color.RED)
def bad():
    parent.left = tree.Node(10, color=tree.Color.BLACK)
expect(bad).to(raise_error(AssertionError))

parent.key = 11
expect(parent.check_state).not_to(raise_error(AssertionError))

def bad():
    parent.right = tree.Node(11, color=tree.Color.BLACK)
expect(bad).to(raise_error(AssertionError))

parent.right = tree.Node(12, color=tree.Color.BLACK)
expect(parent.check_state).not_to(raise_error(AssertionError))

expect(str(parent)).to(equal(str(parent.key)))

Check the Red-Black Properties

# colors have to use the Color Enum
node = tree.Node(5, color=3)

expect(node.check_state).to(raise_error(AssertionError))

node = tree.Node(5, color=tree.Color.BLACK)
expect(node.check_state).to_not(raise_error(AssertionError))

# if a node is red both children must be black
left = tree.Node(5, color=tree.Color.RED)
right = tree.Node(20, color=tree.Color.BLACK)
node = tree.Node(10, color=tree.Color.RED, left=left, right=right)

expect(node.check_state).to(raise_error(AssertionError))

left.color = tree.Color.BLACK
expect(node.check_state).to_not(raise_error(AssertionError))

The Red-Black Tree

class RedBlackTree:
    """The Holder of the Red-Black Tree

    Args:
     root: the root node of the tree
    """
    def __init__(self, root: Node=NIL):
        self.root = root
        return

Red-Black Trees

What are Red-Black Trees?

A Red-Black Tree is a variation of a Binary Search Tree that adds an extra attribute to each node - the node's color, which can be either RED or BLACK.

nil

Why Do We Need Red-Black Trees?

The basic operations done on a Binary Search Tree (e.g. search, insert, etc.) are \(O(h)\) - they perform based on the height of the tree, so when the tree is balanced they perform optimally, but the height depends on the order that the nodes are inserted (and deleted) so the tree can end up being taller than we want it to be. Red-Black Trees use the colors and the Red-Black Properties to ensure that the tree is more balanced.

What are the Red-Black Properties?

These are the properties that need to be maintained for a Red-Black Tree to work correctly (from CLRS).

  1. Every Node is either Red or Black.
  2. The Root is Black.
  3. Every Leaf (Nil) is Black.
  4. If a Node is Red then both of its children are Black.
  5. For each Node, all simple paths from the node to descendant leaves contain the same number of Black Nodes.

When the properties are true the height of a tree with n nodes is \(O(\log(n))\).

What's the deal with the Nil Leaves?

CLRS define the leaves of a binary tree as Nil values (not the end nodes with keys like I tend to do) and for Red-Black Trees, unlike Binary Search Trees they use a special Node object that is colored Black and has arbitrary values for the other attributes. To save space the Nil Node is a singleton and besides being used as the leaves it also is the parent of the root.

Anything Else?

One last bit of terminology: the Black-Height of a node is the count of the Black Nodes on a simple path from the node to a leaf, not counting the node itself but counting the leaf. This is basically a name for property five of the Red-Black Properties.

Binary Search Tree: Randomized

This is the next post in a series on Binary Search Trees that start with this post. In this post we'll be looking at tree-heights when you randomize the order that the nodes are inserted into the tree.

Introduction

We now have a basic Binary Search Tree and some methods to build and query it. I didn't really go over it but if you look at the basic querying methods (search, min, max, etc.) you can see that they boil down to starting at a node (often the root) and then traversing up or down the levels of the tree until we find what we want or we reach a leaf. This means that at most the number of comparisons you make (everytime you reach a node you compare the node's key to your key to pick a child to follow) will be the height of tree (h).

The height of the tree depends, however, on the order in which you insert and delete the nodes. If you insert them in ascending order, for instance, you end up with all the nodes being right children and a height of n - 1 (the number of edges). But, CLRS shows that if you randomize the insertion you will, on average have a height of \(O(\lg(n))\) so I'm going to take an experimental look at that here.

Imports and Setup

# python
from functools import partial
from pathlib import Path

import random

# pypi
from expects import equal, expect
from joblib import Parallel, delayed

import altair
import networkx
import numpy
import pandas

# this project
from bowling.data_structures.binary_search_tree import (Node, Query, Tree)

# monkey projects
from graeae import Timer
from graeae.visualization.altair_helpers import output_path, save_chart
TIMER = Timer()
SLUG = "binary-search-tree-randomized"
GRAPH_OUTPUT = Path(f"files/posts/{SLUG}/")
ALTAIR_OUTPUT = output_path(SLUG)
if not GRAPH_OUTPUT.is_dir():
    GRAPH_OUTPUT.mkdir()
save_it = partial(save_chart, output_path=ALTAIR_OUTPUT)
def preorder(node: Node, adjacencies: dict) -> None:
    """Build the adjancency dictionary

    Args:
     node: the next node to add
     adjacencies: dictionary of key: adjacent node keys
    """
    if node is not None:
        left = node.left.key if node.left is not None else None
        right = node.right.key if node.right is not None else None
        if any((left, right)):
            if left is not None and right is not None:
                adjacencies[node.key] = (left, right)
            elif left is not None and right is None:
                adjacencies[node.key] = (left, )
            else:
                adjacencies[node.key] = (right,)
        preorder(node.left, adjacencies)
        preorder(node.right, adjacencies)
    return

Some Illustrative Cases

Added In Sorted Order

This is our worst-case where the nodes after the root all end up as right-children and the height is n - 1.

tree = Tree()
query = Query(tree)
n = 5
for node in range(n):
    tree.insert(Node(node))

expect(query.height).to(equal(n - 1))

adjacencies = {}
preorder(tree.root, adjacencies)

graph = networkx.nx_pydot.to_pydot(networkx.DiGraph(adjacencies))
graph.write_png(GRAPH_OUTPUT/"first_tree.png")

nil

If it were in reversed order we'd get a similar graph but the children would all be on the left and in the reverse order, but since graphviz/dot doesn't show left vs right it'd be hard to see what's going on.

A More Balanced Tree

This would be the best case, where the nodes are evenly balanced across the tree. Even though we have more nodes than the prior tree we only have a height of two.

KEYS = [50, 25, 75, 10, 30, 65, 80]
tree = Tree(Node(KEYS[0]))
for key in KEYS[1:]:
    tree.insert(Node(key))

adjacencies = {}
preorder(tree.root, adjacencies)
graph = networkx.nx_pydot.to_pydot(networkx.DiGraph(adjacencies))
graph.write_png(GRAPH_OUTPUT/"second_tree.png")

query = Query(tree)
expect(query.height).to(equal(2))

nil

Something In Between

If we insert the nodes in random order then we would expect to see something better than the worst case but not a perfectly balanced tree either.

random.shuffle(KEYS)
tree = Tree(Node(KEYS[0]))

for key in KEYS[1:]:
    tree.insert(Node(key))

adjacencies = {}
preorder(tree.root, adjacencies)
graph = networkx.nx_pydot.to_pydot(networkx.DiGraph(adjacencies))
graph.write_png(GRAPH_OUTPUT/"third_tree.png")

nil

An Empirical Check

Height vs Nodes

def heights(keys: list) -> tuple:
    """Builds the binary tree from the nodes and gets height

    Args:
     nodes: list of keys for nodes

    Returns:
     (number of nodes, height)
    """
    tree = Tree()
    query = Query(tree)
    for key in keys:
        tree.insert(Node(key))

    return (len(keys), query.height)
things_to_sort = (random.sample(range(count), k=count)
                  for count in range(10, 10**5, 100))

with TIMER:
    height_vs_count = Parallel(n_jobs=-1)(
        delayed(heights)(thing_to_sort)
        for thing_to_sort in things_to_sort
    )
Started: 2022-03-17 20:17:02.325084
Ended: 2022-03-17 20:17:30.486929
Elapsed: 0:00:28.161845
NODES, HEIGHT = 0, 1
unzipped = list(zip(*height_vs_count))
frame = pandas.DataFrame({"Nodes": unzipped[NODES],
                          "Height": unzipped[HEIGHT]})
frame["3Log2"] = 3 * numpy.log2(frame["Nodes"])
points = altair.Chart(frame).mark_point().encode(
    x="Nodes",
    y="Height",
    tooltip = [altair.Tooltip("Nodes", format=","),
               altair.Tooltip("Height", format=",")]
)

line = points.mark_line(color="#feb236").encode(
    x="Nodes",
    y="3Log2",
    tooltip = [altair.Tooltip("Nodes", format=","),
               altair.Tooltip("3Log2", format=",")]
)
chart = (line + points).properties(
    title="Randomized Binary Search Tree Node Count vs Height",
    width=800, height=500,
)
save_it(chart, "nodes-vs-height")

Figure Missing

So it looks like when you insert the nodes randomly you tend to get a tree height that's \(O(\log(n))\).

Node Distribution

nodes = 10**4
urn = list(range(nodes))
samples = 10**4
print(f"log2({nodes}) = {numpy.log2(nodes)}")
things_to_sort = (random.sample(urn, k=nodes) for sample in range(samples))

with TIMER:
    height_distribution = Parallel(n_jobs=-1)(
        delayed(heights)(thing_to_sort)
        for thing_to_sort in things_to_sort
    )
log2(10000) = 13.287712379549449
Started: 2022-03-17 21:15:42.270315
Ended: 2022-03-17 21:16:32.620183
Elapsed: 0:00:50.349868
NODES, HEIGHT = 0, 1
unzipped = list(zip(*height_distribution))
frame = pandas.DataFrame({"Nodes": unzipped[NODES],
                          "Height": unzipped[HEIGHT]})

base = altair.Chart(frame)

histogram = base.mark_bar(color="#50586C").encode(
    x=altair.X("Height", bin=True),
    y="count()", tooltip=altair.Tooltip("Height"))

median = base.mark_rule(color="#DCE2F0").encode(
    x="mean(Height):Q", size=altair.value(5),
    tooltip=altair.Tooltip("mean(Height)"))

chart = (histogram + median).properties(
    title=f"Randomized Binary Search Tree Height Distribution ({nodes:,} Nodes)",
    width=800, height=500,
)
save_it(chart, "height-distribution")

Figure Missing

print(f"Median: {frame.Height.median()}")
print(f"Min: {frame.Height.min()}")
print(f"Max: {frame.Height.max()}")
print(f"3 x log2(n): {3 * numpy.log2(frame.Nodes.iloc[0]):.2f}")
Median: 30.0
Min: 24
Max: 40
3 x log2(n): 39.86

So, with our evenly ranged inputs, even though in the worst case we could have ended up with a tree with a height of 10,000, by inserting the nodes in random order it turns out that most of the time it has a height of 30 and in this sample it has a maximum of 40.

Binary Search Tree Queries

This is the next post in a series on Binary Search Trees that start with this post. In this post we'll look at some basic queries we can make using the tree.

A Query Class

The textbook(s) treat these as functions, but they seem to suggest to me methods of a class, so I'll make one. The TYPE_CHECKING and annotations imports are to prevent circular imports as noted on Adam Johnson's blog.

# python
from __future__ import annotations
from typing import TYPE_CHECKING

# this project
if TYPE_CHECKING:
    from .node import Node
    from .tree import Tree
class Query:
    """Holds a tree and performs queries on it

    Args:
     tree: A Binary Search Tree
    """
    def __init__(self, tree: Tree):
        self.tree = tree
        return

Search

The first thing we'll look at is searching for a Node in a Binary Search Tree. Because of the way the tree is structured the search looks a lot like a Binary Search where we start at the root of the tree and then work our way downwards, picking the next child to follow based on whether the key we're searching for is greater than the right child's key or less than the left child's key. If it's equal then we've found the node we want.

The Algorithm

The Iterative Version

The recursive version of TreeSearch is quite nice, but it also translates easily to an iterative version if for some reason performance or size becomes a problem.

The Implementation

I'll add the iterative version of the Tree Search as a method for our Query class.

def search(self, key) -> Node:
    """Searches the tree for the node with the matching key

    Args:
     key: node's key to search for

    Returns:
     the node with the key or None if it isn't in the tree
    """
    node = self.tree.root
    while node is not None and key != node.key:
        if key < node.key:
            node = node.left
        else:
           node = node.right
    return node

Testing

# pypi
from expects import be,be_none, equal, expect
from bowling.data_structures.binary_search_tree import Node, Tree
from bowling.data_structures.binary_search_tree import Query

root = Node(10)
tree = Tree(root)
query = Query(tree)
output = query.search(5)
expect(output).to(be_none)
expect(query.search(10)).to(equal(root))

five = Node(5)
tree.insert(five)
expect(query.search(5)).to(be(five))

fifteen = Node(15)
tree.insert(fifteen)
expect(query.search(15)).to(be(fifteen))

I'll have to think of something more interesting to show for this…

Miminum and Maximum

Mimimum

\begin{algorithm}
\caption{TreeMinimum}
\begin{algorithmic}
\INPUT The Node to start the search from.
\OUTPUT The Node with the smallest key.
\PROCEDURE{TreeMinimum}{x}
\WHILE {\textit{x}.left $\neq$ \textsc{NIL}}
   \STATE \textit{x} $\gets$ \textit{x}.left
\ENDWHILE
\RETURN \textit{x}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}

The Implementation

I originally didn't take the root node as an argument, since I thought the minimum of the tree is always the smallest item. But this method gets used later on in the successor method where we want to find the smallest item in the right-subtree of a particular node, so the outcome won't necessarily be the smallest item in the tree.

def min(self, node: Node=None) -> Node:
    """Returns the node with the smallest key

    Args:
     node: a node to use as the starting root
    """
    if node is None:
        node = self.tree.root

    while node.left is not None:
        node = node.left
    return node

Testing

tree = Tree(Node(10))
query = Query(tree)
tree.insert(Node(5))
tree.insert(Node(2))
tree.insert(Node(15))
tree.insert(Node(17))
tree.insert(Node(11))
expect(query.min()).to(equal(Node(2)))

tree.insert(Node(1))
expect(query.min()).to(equal(Node(1)))

expect(query.min(tree.root.right)).to(equal(Node(11)))

Maximum

\begin{algorithm}
\caption{TreeMaximum}
\begin{algorithmic}
\INPUT The Node to start the search from.
\OUTPUT The Node with the largest key.
\PROCEDURE{TreeMaximum}{x}
\WHILE {\textit{x}.right $\neq$ \textsc{NIL}}
   \STATE \textit{x} $\gets$ \textit{x}.right
\ENDWHILE
\RETURN \textit{x}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}

The Implementation

def max(self, root: Node=None) -> Node:
    """Returns the node with the largest key

    Args:
     root: subtree root to start at

    Returns:
     node with the largest key in tree/subtree
    """
    if root is None:
        root = self.tree.root
    while root.right is not None:
        root = root.right
    return root

Testing

tree = Tree(Node(10))
query = Query(tree)
tree.insert(Node(5))
tree.insert(Node(2))
tree.insert(Node(15))

expect(query.max()).to(equal(Node(15)))

tree.insert(Node(17))
expect(query.max()).to(equal(Node(17)))
expect(query.min()).to(equal(Node(2)))

expect(query.max(tree.root.left)).to(equal(Node(5)))

Tree Successor

A "Successor" node is the next largest node after a given node. Since all the nodes in a right subtree are greater than the node, it's the smallest node in the right (if it exists). If the right subtree is empty then we traverse up the ancestors of the node until we find the first one that is greater than our node.

\begin{algorithm}
\caption{TreeSuccessor}
\begin{algorithmic}
\INPUT The Node to start the search from.
\OUTPUT The Node with the next largest key.
\PROCEDURE{TreeSuccessor}{x}
\IF {\textit{x}.right $\neq$ \textsc{NIL}}
  \RETURN \textsc{TreeMinimum}(\textit{x}.right)
\ENDIF

\STATE \textit{y} $\gets$ \textit{x}.parent

\WHILE {\textit{y} $\neq$ \textsc{NIL} and \textit{x} = \textit{y}.right}
   \STATE \textit{x} $\gets$ \textit{y}
   \STATE \textit{y} $\gets$ \textit{y}.parent
\ENDWHILE
\RETURN \textit{y}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}
def successor(self, node: Node) -> Node:
    """Returns the next largest node

    Args:
     node: the node who's successor to find

    Returns:
     successor node to the input node
    """
    if node.right is not None:
        return self.min(node.right)

    successor = node.parent
    while successor is not None and node == successor.right:
        node = successor
        successor = successor.parent
    return successor
tree = Tree(Node(10))
query = Query(tree)
tree.insert(Node(5))
tree.insert(Node(2))
tree.insert(Node(15))
tree.insert(Node(17))
expect(query.successor(query.search(15))).to(equal(Node(17)))
expect(query.successor(query.search(2))).to(equal(Node(5)))
expect(query.successor(query.search(5))).to(equal(tree.root))
expect(query.successor(tree.root)).to(equal(Node(15)))

Tree Predecessor

Similar in concept to a node successor, a node predecessor is the largest node less than the given node.

\begin{algorithm}
\caption{TreePredecessor}
\begin{algorithmic}
\INPUT The Node to start the search from.
\OUTPUT The Node with the next smallest key.
\PROCEDURE{TreePredecessor}{x}
\IF {\textit{x}.left $\neq$ \textsc{NIL}}
  \RETURN \textsc{TreeMaximum}(\textit{x}.right)
\ENDIF

\STATE \textit{y} $\gets$ \textit{x}.parent

\WHILE {\textit{y} $\neq$ \textsc{NIL} and \textit{x} = \textit{y}.left}
   \STATE \textit{x} $\gets$ \textit{y}
   \STATE \textit{y} $\gets$ \textit{y}.parent
\ENDWHILE
\RETURN \textit{y}
\ENDPROCEDURE
\end{algorithmic}
\end{algorithm}
def predecessor(self, node: Node) -> Node:
    """Returns the predecessor node

    Args:
     node: the node whose predecessor we want

    Returns:
     largest node smaller than given node
    """
    if node.left is not None:
        return self.max(node.left)
    predecessor = node.parent
    while predecessor is not None and node == predecessor.left:
        node, predecessor = predecessor, predecessor.parent
    return predecessor
tree = Tree(Node(10))
query = Query(tree)
expect(query.predecessor(tree.root)).to(be_none)
expect(query.predecessor(Node(5))).to(be_none)
tree.insert(Node(12))
expect(query.predecessor(query.search(12))).to(be(query.search(10)))

tree.insert(Node(8))
expect(query.predecessor(query.tree.root)).to(be(query.search(8)))
tree.insert(Node(4))
expect(query.predecessor(query.search(8))).to(equal(Node(4)))

Height

The height of the Binary Search Tree is the number of edges from the root of the tree to the furthest node. The algorithms we're looking at here don't use them but I'm going to use height to look at how the order you insert nodes in the tree affects the height.

@property
def height(self) -> int:
    """The length of the longest path starting at the root

    Returns:
     number of edges from root to furthest leaf
    """
    return self.tree_height(self.tree.root)
def tree_height(self, node: Node=None) -> int:
    """The length of the longest path starting at the node

    Args:
     the node to start the measurement from

    Returns:
     number of edges from root to furthest leaf
    """
    if node is None:
        return -1

    left = self.tree_height(node.left) + 1
    right = self.tree_height(node.right) + 1
    return max(left, right)
tree = Tree()
query = Query(tree)

expect(query.height).to(equal(-1))
tree.insert(Node(10))
expect(query.height).to(equal(0))
tree.insert(Node(8))
expect(query.height).to(equal(1))
tree.insert(Node(12))
expect(query.height).to(equal(1))
tree.insert(Node(4))
expect(query.height).to(equal(2))
tree.insert(Node(2))
expect(query.height).to(equal(3))

tree = Tree()
query = Query(tree)

n = 20
for key in range(n):
    tree.insert(Node(key))

expect(query.height).to(equal(n - 1))

Sources