Categorical Plotting

Imports

From Python

from functools import partial

From pypi

from tabulate import tabulate
import matplotlib.pyplot as pyplot
import numpy
import pandas
import seaborn

Set Up

The Plotting

%matplotlib inline
seaborn.set(style="whitegrid", color_codes=True)
FIGURE_SIZE = (14, 12)

The Tables

table = partial(tabulate, headers="keys",
                tablefmt="orgtbl")

The Data

This is just some fake stuff to test it out. We're going to simulate how participants in a survey scored two products on a scale from -3 to 3.

Product One

countries = ["china"] * 50 + ["india"] * 20 + ["japan"] * 2
options = [-3, -2, -1, 0, 1, 2, 3]
scores = pandas.Series(numpy.random.choice(options, size=len(countries)), dtype="category")
one_hot = pandas.get_dummies(scores, "Alpha", " ")
data = pandas.DataFrame.from_dict(dict(country=countries),
                                  dtype="category")
data = pandas.concat([data, one_hot], axis="columns")
print(table(data.head(), showindex=False))
country Alpha -3 Alpha -2 Alpha -1 Alpha 0 Alpha 1 Alpha 2 Alpha 3
china 0 0 0 0 0 0 1
china 0 0 0 1 0 0 0
china 1 0 0 0 0 0 0
china 0 1 0 0 0 0 0
china 0 0 0 1 0 0 0

Product Two

countries = ["china"] * 20 + ["india"] * 30 + ["japan"] * 25
options = [-3, -2, -1, 0, 1, 2, 3]
scores = pandas.Series(numpy.random.choice(options, size=len(countries)), dtype="category")
one_hot = pandas.get_dummies(scores, "Beta", " ")
data_2 = pandas.DataFrame.from_dict(dict(country=countries),
                                    dtype="category")
data_2 = pandas.concat([data_2, one_hot], axis="columns")
print(table(data_2.head()))
country Beta -3 Beta -2 Beta -1 Beta 0 Beta 1 Beta 2 Beta 3
china 0 0 0 1 0 0 0
china 0 0 0 0 0 0 1
china 0 1 0 0 0 0 0
china 0 0 0 1 0 0 0
china 1 0 0 0 0 0 0

Grouping

grouped = data.groupby("country").sum()
print(table(grouped))
country Alpha -3 Alpha -2 Alpha -1 Alpha 0 Alpha 1 Alpha 2 Alpha 3
china 6 8 3 9 4 11 9
india 3 3 3 3 4 1 3
japan 0 0 1 0 0 0 1
grouped_2 = data_2.groupby("country").sum()
print(table(grouped_2))
country Beta -3 Beta -2 Beta -1 Beta 0 Beta 1 Beta 2 Beta 3
china 1 5 4 2 3 1 4
india 5 2 10 2 3 4 4
japan 5 4 2 3 3 5 3

Concatenate our data

figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
    axe = grouped_2.plot.bar(ax=axe)
with seaborn.color_palette("Blues", 7):
    axe = grouped.plot.bar(ax=axe)
labels = axe.set_xticklabels(grouped.index.unique())

barplot.png

figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
    axe = grouped_2.plot.bar(ax=axe, stacked=True)
with seaborn.color_palette("Blues", 7):
    axe = grouped.plot.bar(ax=axe, stacked=True)
labels = axe.set_xticklabels(grouped.index.unique())

barplot_2.png

Scaled

alpha_scaled = (grouped.T/grouped.sum(axis="columns").values).T
print(table(alpha_scaled))
country Alpha -3 Alpha -2 Alpha -1 Alpha 0 Alpha 1 Alpha 2 Alpha 3
china 0.12 0.16 0.06 0.18 0.08 0.22 0.18
india 0.15 0.15 0.15 0.15 0.2 0.05 0.15
japan 0 0 0.5 0 0 0 0.5
beta_scaled = (grouped_2.T/grouped_2.sum(axis="columns").values).T
print(table(beta_scaled))
country Beta -3 Beta -2 Beta -1 Beta 0 Beta 1 Beta 2 Beta 3
china 0.05 0.25 0.2 0.1 0.15 0.05 0.2
india 0.166667 0.0666667 0.333333 0.0666667 0.1 0.133333 0.133333
japan 0.2 0.16 0.08 0.12 0.12 0.2 0.12
figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
    axe = alpha_scaled.plot.bar(ax=axe)
with seaborn.color_palette("Blues", 7):
    axe = beta_scaled.plot.bar(ax=axe)
labels = axe.set_xticklabels(grouped.index.unique())

alpha_scaled.png

figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
    axe = alpha_scaled.plot.bar(ax=axe, stacked=True)
with seaborn.color_palette("Blues", 7):
    axe = beta_scaled.plot.bar(ax=axe, stacked=True)
labels = axe.set_xticklabels(grouped.index.unique())

stacked_scaled.png

Well, I guess I need to work on making the reds visible, but I'm out of time.