Categorical Plotting
Imports
From Python
from functools import partial
From pypi
from tabulate import tabulate
import matplotlib.pyplot as pyplot
import numpy
import pandas
import seaborn
Set Up
The Plotting
%matplotlib inline
seaborn.set(style="whitegrid", color_codes=True)
FIGURE_SIZE = (14, 12)
The Tables
table = partial(tabulate, headers="keys",
tablefmt="orgtbl")
The Data
This is just some fake stuff to test it out. We're going to simulate how participants in a survey scored two products on a scale from -3 to 3.
Product One
countries = ["china"] * 50 + ["india"] * 20 + ["japan"] * 2
options = [-3, -2, -1, 0, 1, 2, 3]
scores = pandas.Series(numpy.random.choice(options, size=len(countries)), dtype="category")
one_hot = pandas.get_dummies(scores, "Alpha", " ")
data = pandas.DataFrame.from_dict(dict(country=countries),
dtype="category")
data = pandas.concat([data, one_hot], axis="columns")
print(table(data.head(), showindex=False))
country | Alpha -3 | Alpha -2 | Alpha -1 | Alpha 0 | Alpha 1 | Alpha 2 | Alpha 3 |
---|---|---|---|---|---|---|---|
china | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
china | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
china | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
china | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
china | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
Product Two
countries = ["china"] * 20 + ["india"] * 30 + ["japan"] * 25
options = [-3, -2, -1, 0, 1, 2, 3]
scores = pandas.Series(numpy.random.choice(options, size=len(countries)), dtype="category")
one_hot = pandas.get_dummies(scores, "Beta", " ")
data_2 = pandas.DataFrame.from_dict(dict(country=countries),
dtype="category")
data_2 = pandas.concat([data_2, one_hot], axis="columns")
print(table(data_2.head()))
country | Beta -3 | Beta -2 | Beta -1 | Beta 0 | Beta 1 | Beta 2 | Beta 3 |
---|---|---|---|---|---|---|---|
china | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
china | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
china | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
china | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
china | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
Grouping
grouped = data.groupby("country").sum()
print(table(grouped))
country | Alpha -3 | Alpha -2 | Alpha -1 | Alpha 0 | Alpha 1 | Alpha 2 | Alpha 3 |
---|---|---|---|---|---|---|---|
china | 6 | 8 | 3 | 9 | 4 | 11 | 9 |
india | 3 | 3 | 3 | 3 | 4 | 1 | 3 |
japan | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
grouped_2 = data_2.groupby("country").sum()
print(table(grouped_2))
country | Beta -3 | Beta -2 | Beta -1 | Beta 0 | Beta 1 | Beta 2 | Beta 3 |
---|---|---|---|---|---|---|---|
china | 1 | 5 | 4 | 2 | 3 | 1 | 4 |
india | 5 | 2 | 10 | 2 | 3 | 4 | 4 |
japan | 5 | 4 | 2 | 3 | 3 | 5 | 3 |
Concatenate our data
figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
axe = grouped_2.plot.bar(ax=axe)
with seaborn.color_palette("Blues", 7):
axe = grouped.plot.bar(ax=axe)
labels = axe.set_xticklabels(grouped.index.unique())
figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
axe = grouped_2.plot.bar(ax=axe, stacked=True)
with seaborn.color_palette("Blues", 7):
axe = grouped.plot.bar(ax=axe, stacked=True)
labels = axe.set_xticklabels(grouped.index.unique())
Scaled
alpha_scaled = (grouped.T/grouped.sum(axis="columns").values).T
print(table(alpha_scaled))
country | Alpha -3 | Alpha -2 | Alpha -1 | Alpha 0 | Alpha 1 | Alpha 2 | Alpha 3 |
---|---|---|---|---|---|---|---|
china | 0.12 | 0.16 | 0.06 | 0.18 | 0.08 | 0.22 | 0.18 |
india | 0.15 | 0.15 | 0.15 | 0.15 | 0.2 | 0.05 | 0.15 |
japan | 0 | 0 | 0.5 | 0 | 0 | 0 | 0.5 |
beta_scaled = (grouped_2.T/grouped_2.sum(axis="columns").values).T
print(table(beta_scaled))
country | Beta -3 | Beta -2 | Beta -1 | Beta 0 | Beta 1 | Beta 2 | Beta 3 |
---|---|---|---|---|---|---|---|
china | 0.05 | 0.25 | 0.2 | 0.1 | 0.15 | 0.05 | 0.2 |
india | 0.166667 | 0.0666667 | 0.333333 | 0.0666667 | 0.1 | 0.133333 | 0.133333 |
japan | 0.2 | 0.16 | 0.08 | 0.12 | 0.12 | 0.2 | 0.12 |
figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
axe = alpha_scaled.plot.bar(ax=axe)
with seaborn.color_palette("Blues", 7):
axe = beta_scaled.plot.bar(ax=axe)
labels = axe.set_xticklabels(grouped.index.unique())
figure, axe = pyplot.subplots(figsize=FIGURE_SIZE)
axe.set_title("Country vs Score")
axe.set_ylabel("Score")
with seaborn.color_palette("Reds", 7):
axe = alpha_scaled.plot.bar(ax=axe, stacked=True)
with seaborn.color_palette("Blues", 7):
axe = beta_scaled.plot.bar(ax=axe, stacked=True)
labels = axe.set_xticklabels(grouped.index.unique())
Well, I guess I need to work on making the reds visible, but I'm out of time.