Aggregating Tabular Data
Table of Contents
Set Up
Imports
Python
from functools import partial
from pathlib import Path
import os
PyPi
from dotenv import load_dotenv
from holoviews import opts
from tabulate import tabulate
import holoviews
import numpy
import pandas
My Projects
from bartleby_the_penguin.tangles.embed_bokeh import EmbedBokeh
Holoviews Bokeh
I don't know why but you have to specify that you're using bokeh
, even though it looks like it's working when you don't.
holoviews.extension("bokeh")
The Embedder
files_path = Path("../../files/posts/libraries/aggregating-tabular-data/")
Embed = partial(
EmbedBokeh,
folder_path=files_path)
Dotenv
I have the path to the data-set in a .env
file so I'll load it into the environment dictionary.
load_dotenv(".env")
Load the Data
This is the same measles/pertusis data that I used before.
path = Path(os.environ.get("DISEASES")).expanduser()
assert path.is_file()
with path.open() as reader:
diseases = pandas.read_csv(path)
Convert the DataFrame to a Dataset
key_dimensions = "Year State".split()
value_dimensions = [("measles", "Measles Incidence"), ("pertussis", "Pertusis Incidence")]
dataset = holoviews.Dataset(diseases, key_dimensions, value_dimensions)
Aggregate the Data
While I had aggregated the data before, this time I'm going to pass in the "Year" column as an argument so it won't keep the states separate.
aggregator = dataset.aggregate("Year", function=numpy.mean, spreadfn=numpy.std)
error_bars = holoviews.ErrorBars(aggregator, vdims=["measles", "measles_std"]).iloc[::2]
overlay = (holoviews.Curve(aggregator) * error_bars).redim.range(measles=(0, None))
plot = overlay.opts(height=500, width=1000, tools=["hover"])
Embed(plot, "northwest_measles_aggregated")()