Categorical Plotting

Imports

From Python

from functools import partial

From pypi

from tabulate import tabulate
import matplotlib.pyplot as pyplot
import numpy
import pandas
import seaborn

Set Up

The Plotting

%matplotlib inline
seaborn.set(style="whitegrid", color_codes=True)
FIGURE_SIZE = (14, 12)

The Tables

table = partial(tabulate, headers="keys",
		tablefmt="orgtbl")

The Data

This is just some fake stuff to test it out. We're going to simulate how participants in a survey scored two products on a scale from -3 to 3.

Product One

countries = ["china"] * 50 + ["india"] * 20 + ["japan"] * 2
options = [-3, -2, -1, 0, 1, 2, 3]
scores = pandas.Series(numpy.random.choice(options, size=len(countries)), dtype="category")
one_hot = pandas.get_dummies(scores, "Alpha", " ")
data = pandas.DataFrame.from_dict(dict(country=countries),
				  dtype="category")
data = pandas.concat([data, one_hot], axis="columns")
print(table(data.head(), showindex=False))
country Alpha -3 Alpha -2 Alpha -1 Alpha 0 Alpha 1 Alpha 2 Alpha 3
china 0 0 0 0 1 0 0
china 1 0 0 0 0 0 0
china 0 0 0 1 0 0 0
china 0 0 0 0 0 0 1
china 0 1 0 0 0 0 0

Product Two

countries = ["china"] * 20 + ["india"] * 30 + ["japan"] * 25
options = [-3, -2, -1, 0, 1, 2, 3]
scores = pandas.Series(numpy.random.choice(options, size=len(countries)), dtype="category")
one_hot = pandas.get_dummies(scores, "Beta", " ")
data_2 = pandas.DataFrame.from_dict(dict(country=countries),
				    dtype="category")
data_2 = pandas.concat([data_2, one_hot], axis="columns")
print(table(data_2.head()))
country Beta -3 Beta -2 Beta -1 Beta 0 Beta 1 Beta 2 Beta 3
china 0 0 0 1 0 0 0
china 0 0 0 0 0 0 1
china 0 1 0 0 0 0 0
china 0 0 0 1 0 0 0
china 1 0 0 0 0 0 0

Grouping

grouped = data.groupby("country").sum()
print(table(grouped))
country Alpha -3 Alpha -2 Alpha -1 Alpha 0 Alpha 1 Alpha 2 Alpha 3
china 2 8 6 7 9 6 12
india 2 8 0 4 2 0 4
japan 1 0 0 0 1 0 0
grouped_2 = data_2.groupby("country").sum()
print(table(grouped_2))
country Beta -3 Beta -2 Beta -1 Beta 0 Beta 1 Beta 2 Beta 3
china 3 5 2 3 2 2 3
india 4 2 3 7 5 6 3
japan 2 4 5 2 7 2 3

Concatenate our data

figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds"):
    axe = grouped_2.plot.bar(ax=axe)
with seaborn.color_palette("Blues"):
    axe = grouped.plot.bar(ax=axe)
labels = axe.set_xticklabels(grouped.country.unique())

barplot.png

figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds"):
    axe = grouped_2.plot.bar(ax=axe, stacked=True)
with seaborn.color_palette("Blues"):
    axe = grouped.plot.bar(ax=axe, stacked=True)
labels = axe.set_xticklabels(grouped.country.unique())

barplot_2.png