GISS Zone Anomalies

Introduction

This is a look at the Godard Institute for Space Studies' surface temperature data. I'll look at the mean difference between the yearly mean and the historical "normal" mean.

Set Up

Imports

Python

from functools import partial
from pathlib import Path
import os

PyPi

from dotenv import load_dotenv
from holoviews import opts
import holoviews
from holoviews.plotting.links import RangeToolLink
from bokeh.models import CrosshairTool, HoverTool
from bokeh.palettes import Category20

import hvplot.pandas
import pandas
import seaborn

This Project

from bartleby_the_penguin.tangles.embed_bokeh import EmbedBokeh
from graeae.tables import CountPercentage

Load Dotenv

load_dotenv(".env")

HoloViews Backend

holoviews.extension("bokeh", width=800)

The Embedder

OUTPUT_PATH = Path("../../files/posts/giss/giss-zone-anomalies/")
Embed = partial(EmbedBokeh, 
                folder_path=OUTPUT_PATH)

Some Settings

class Plots:
    width = 1200
    height = 600
    font = "Open Sans"
    font_size = "24pt"
    line_width = 3
    tools =  ["hover"]
    blue = seaborn.color_palette()[0]
    light_blue = Category20[3][1]
    red = seaborn.color_palette()[3]
    yellow = seaborn.color_palette()[1]
    green = seaborn.color_palette()[2]
    gray = seaborn.color_palette()[7]

The Data

The data is the Combined Land-Surface Air and Sea-Surface Water Temperature Anomolies' Zonal Annual Means which shows the different annual mean for each zone in a given year.

zone_path = Path(os.environ.get("ZONES")).expanduser()
assert zone_path.is_file()
with zone_path.open() as reader:
    giss = pandas.read_csv(reader)
print(giss.describe())
              Year        Glob        NHem        SHem     24N-90N  \
count   139.000000  139.000000  139.000000  139.000000  139.000000   
mean   1949.000000    0.032302    0.056043    0.008561    0.077698   
std      40.269923    0.336896    0.393435    0.301848    0.464606   
min    1880.000000   -0.490000   -0.540000   -0.490000   -0.580000   
25%    1914.500000   -0.200000   -0.220000   -0.235000   -0.280000   
50%    1949.000000   -0.070000   -0.010000   -0.080000    0.020000   
75%    1983.500000    0.215000    0.210000    0.265000    0.255000   
max    2018.000000    0.980000    1.260000    0.710000    1.500000   

          24S-24N     90S-24S     64N-90N     44N-64N     24N-44N     EQU-24N  \
count  139.000000  139.000000  139.000000  139.000000  139.000000  139.000000   
mean     0.036115   -0.018561    0.111079    0.117770    0.027698    0.027626   
std      0.331384    0.295695    0.917715    0.516729    0.356416    0.326111   
min     -0.650000   -0.470000   -1.640000   -0.710000   -0.590000   -0.720000   
25%     -0.215000   -0.250000   -0.545000   -0.270000   -0.200000   -0.230000   
50%     -0.030000   -0.100000    0.020000    0.000000   -0.070000    0.000000   
75%      0.255000    0.230000    0.660000    0.360000    0.135000    0.240000   
max      0.970000    0.700000    3.050000    1.440000    1.060000    0.930000   

          24S-EQU     44S-24S     64S-44S     90S-64S  
count  139.000000  139.000000  139.000000  139.000000  
mean     0.045683    0.020432   -0.069353   -0.078129  
std      0.343385    0.312688    0.269380    0.732359  
min     -0.580000   -0.430000   -0.540000   -2.570000  
25%     -0.210000   -0.220000   -0.265000   -0.490000  
50%     -0.030000   -0.080000   -0.090000    0.050000  
75%      0.290000    0.260000    0.180000    0.410000  
max      1.020000    0.780000    0.450000    1.270000  

So we have years from 1880 through 2018.

print(giss.iloc[0])
Year       1880.00
Glob         -0.18
NHem         -0.31
SHem         -0.06
24N-90N      -0.38
24S-24N      -0.17
90S-24S      -0.01
64N-90N      -0.97
44N-64N      -0.47
24N-44N      -0.25
EQU-24N      -0.21
24S-EQU      -0.13
44S-24S      -0.04
64S-44S       0.05
90S-64S       0.67
Name: 0, dtype: float64

Tidying the Data

I'm going to use the locations for names so I'll make them something more meaningful for me. I don't know if these are, strictly speaking, the right names, but it's close enough.

name_remap = {
    "Glob": "Global",
    "NHem": "Northern Hemisphere",
    "SHem": "Southern Hemisphere",
    "24N-90N": "Northern Extratropics",
    "24S-24N": "Tropics",
    "90S-24S": "Southern Extratropics",
    "64N-90N": "North Frigid",
    "44N-64N": "North Temperate",
    "24N-44N": "North Sub-Tropic",
    "EQU-24N": "Tropic of Cancer",
    "24S-EQU": "Tropic of Capricorn",
    "44S-24S": "South Sub-Tropic",
    "64S-44S": "South Temperate",
    "90S-64S": "South Frigid",
}
giss = giss.rename(columns=name_remap)
print(giss.columns)
Index(['Year', 'Global', 'Northern Hemisphere', 'Southern Hemisphere',
       'Northern Extratropics', 'Tropics', 'Southern Extratropics',
       'North Frigid', 'North Temperate', 'North Sub-Tropic',
       'Tropic of Cancer', 'Tropic of Capricorn', 'South Sub-Tropic',
       'South Temperate', 'South Frigid'],
      dtype='object')
giss_tidy = giss.melt(id_vars=["Year"], var_name="location", 
                      value_name="Difference From Normal")
print(giss_tidy.iloc[0])
Year                        1880
location                  Global
Difference From Normal     -0.18
Name: 0, dtype: object
location = CountPercentage(giss_tidy.location)
# plot = location.table.hvplot.table()
# Embed(plot, "location_table")()
location()
Value Count Percentage
Southern Extratropics 139 7.14
Tropics 139 7.14
North Temperate 139 7.14
North Frigid 139 7.14
Northern Extratropics 139 7.14
North Sub-Tropic 139 7.14
South Sub-Tropic 139 7.14
Southern Hemisphere 139 7.14
Global 139 7.14
South Frigid 139 7.14
South Temperate 139 7.14
Tropic of Cancer 139 7.14
Northern Hemisphere 139 7.14
Tropic of Capricorn 139 7.14

The Frigid North

This is a plot of the temperature anomalies for the Frigid North using a Range Finder. You can select a range of years using the mini-plot below the main one.

# The Tools
hover = HoverTool(
    tooltips = [
        ("Year", "@Year"),
        ("Temperature Anomaly (C)", "@{North Frigid}{0.00}"),
],
    formatters={"North Frigid": "numeral"},
    mode="vline",
)

# The Parts
curve = holoviews.Curve(
    giss, "Year",
    ("North Frigid", "Temperature Anomaly (C)"),
)

main = curve.relabel("North Frigid Zone").opts(width=Plots.width, 
                                               height=Plots.height, 
                                               labelled=["y"],
                                               fontsize=Plots.font_size,
                                               color=Plots.blue,
                                               tools=["pan", "zoom_in", 
                                                      hover, "reset"])
range_finder = curve.opts(width=Plots.width, height=100, yaxis=None, default_tools=[])
line = holoviews.HLine(0)
RangeToolLink(range_finder, main)

layout = (main * line + range_finder).cols(1)
layout = layout.opts(
    opts.Layout(shared_axes=False, merge_tools=False),
    opts.HLine(color="black", alpha=0.2, line_width=2))
Embed(layout, "north_frigid_zone")()

This plot is kind of unusual in that the column name has a space in it ("North Frigid") so in the hover tool you have to put braces around it.

If we were only looking at one zone I would think this is the most useful type, but when I tried to add multiple zones I couldn't get it to work. HoloViews… I also tried to change the font and couldn't figure out how to do it. I really don't get what bokeh and HoloViews are doing with their documentation.

All Zones Dropdown

This creates a plot with all the zones but only showing one at a time (you select which one using a dropdown). To get it you specify the dropdown column using groupby.

# the Tools
hover = HoverTool(
tooltips = [
    ("Year", "@Year"),
    ("Difference from Normal", "@Difference From Normal"),
],
    formatters={"Difference From Normal": "numeral"},
    mode="vline",
)

plot = giss_tidy.hvplot("Year", groupby="location", width=Plots.width, dynamic=False, tools=[hover])
line = holoviews.HLine(0)
layout = plot * line
file_name = "zones.html"

plot_file = OUTPUT_PATH.joinpath(file_name)
holoviews.save(layout, plot_file)
print("[[file:{}][Link To Plot]]".format(file_name))

Link To Plot

All Zones Overlay

The dropdown is a convenient way to look at different zones individually but make it hard to compare different zones. I'm going to put all of them on one plot first to get an overall picture.

plot = giss_tidy.hvplot("Year", "Difference From Normal", 
                        by="location", 
                        title="Difference by Zone", 
                        ylabel="Difference From Normal",
                        width=Plots.width, 
                        height=Plots.height).opts(
                            fontsize=Plots.font_size,
                        )
line = holoviews.HLine(0)
layout = plot * line
Embed(layout, "all_zones")()

This is kind of a messy plot, but it does show that the North Frigid and South Frigid zones seem to have the largest anomalies.

Hemisphere Spikes

hemisphere_map = {
    'Global': "Global", 
    'Northern Hemisphere': "Northern", 
    'Southern Hemisphere': "Southern",
    'Northern Extratropics': "Northern", 
    'Tropics': "Tropics", 
    'Southern Extratropics': "Southern",
    'North Frigid': "Northern", 
    'North Temperate': "Northern", 
    'North Sub-Tropic': "Northern",
    'Tropic of Cancer': "Northern", 
    'Tropic of Capricorn': "Southern", 
    'South Sub-Tropic': "Southern",
    'South Temperate': "Southern", 
    'South Frigid': "Southern",
}

giss_tidy["hemisphere"] = giss_tidy.location.map(hemisphere_map)

Separate out the hemispheres for plotting.

northern = giss_tidy[giss_tidy.hemisphere=="Northern"]
southern = giss_tidy[giss_tidy.hemisphere=="Southern"]
global_anomalies = giss_tidy[giss_tidy.location=="Global"]

Plot.

# The Tools
hover = HoverTool(
tooltips = [
    ("Year", "@Year"),
    ("Difference from Normal", "@{Difference From Normal}"),
    ("Hemisphere", "@Hemisphere"),
],
    formatters={"Difference From Normal": "numeral"},
)

northern_spikes = holoviews.Spikes(northern, "Year", ("Difference From Normal", 
                                                      "Difference From Normal"),
                                   group="Hemisphere",
                                   label="Northern")
southern_spikes = holoviews.Spikes(southern, "Year", ("Difference From Normal",
                                                      "Difference From Normal"), 
                                   group="Hemisphere",
                                   label="Southern")
overlay = (northern_spikes
           * holoviews.Scatter(northern_spikes)
           * southern_spikes
           * holoviews.Scatter(southern_spikes)
           )
overlay.opts(
    opts.Spikes("Hemisphere.Northern", color=Plots.blue),
    opts.Spikes("Hemisphere.Southern", 
                fontsize=Plots.font_size,
                color=Plots.red, 
                width=Plots.width,
                height=Plots.height),
    opts.Scatter(size=10, tools=[hover]),
)
line = holoviews.HLine(0)
layout = overlay * line
bokeh_layout = holoviews.render(layout)
bokeh_layout.title.text_font = "open sans"
bokeh_layout.legend.location = "top_left"
embed = Embed(bokeh_layout, "all_hemispheres")
embed._figure = bokeh_layout
embed()

This one is a little hard to understand at first. All of the zones are still being plotted, but they are grouped by their hemisphere, so they look different from just the Hemisphere temperatures that come later. Mostly what you're seeing is a highlightt of the most anomalous zone for each year.

I also couldn't figure out how to get the zone information into the hovertool. With hvPlot you can pass in extra columns, but the Spikes didn't like it when I tried… such a frustrating library.

Anyway, it looks like the Northern Hemisphere started out below average while the Southern started out above average, then during the baseline period it sort of flipped, and now they're both above average, but the Northern is even more so.

Hemispheres

This plot shows what happens when you average the zones by hemisphere.

hemispheres = ["Global", "Northern Hemisphere", "Southern Hemisphere"]
hemispheres = giss_tidy[giss_tidy.location.isin(hemispheres)]
plot = hemispheres.hvplot("Year", "Difference From Normal", 
                          ylabel="Difference From Normal (C)",
                          by="location", 
                          title="Difference by Hemisphere",
                          fontsize=Plots.font_size,
                          width=Plots.width, height=Plots.height)
line = holoviews.HLine(0)
layout = plot * line
Embed(layout, "hemispheres")()

This shows a different view than the Zones grouped by Hemisphere, as all the values start out below normal and move in sort of a linear fashion upwards. This probably is the result of the fact that the Frigid Zones are more extreme in their differences than the other zones, so averaging all of the zones supresses them. At least that's my initial thought. Looking at the previous plot, though, it looks quite different - the lowest value in the Hemispheres plot comes in 1917, while the zones grouped by hemisphere puts it at 1930 and 1917 actually doesn't look so special. Maybe they're calculated differently.

Extratropics

zones = ["Global", "Northern Extratropics", "Southern Extratropics", "Tropics"]
zones = giss_tidy[giss_tidy.location.isin(zones)]
plot = zones.hvplot("Year", "Difference From Normal", by="location",
                    ylabel="Difference From Normal (C)",
                    fontsize=Plots.font_size,
                    title="Tropics and Extratropics", 
                    width=Plots.width, height=Plots.height)
line = holoviews.HLine(0)
layout = plot * line
Embed(layout, "tropics")()

The Zones

I did a previous plot "by zone" but in that case I was including all the zones given in the data. In this case we're only looking at the non-overlapping zones - so this chops up the planet by the smallest latitudinal zones that don't overlapy.

zones = ["North Frigid", "North Temperate", "North Sub-Tropic", 
         "Tropic of Cancer", "Tropic of Capricorn",
         "South Frigid", "South Temperate", "South Sub-Tropic"]
zones =  giss_tidy[giss_tidy.location.isin(zones)]
plot = zones.hvplot("Year", "Difference From Normal", by="location",
                    ylabel="Difference From Normal (C)",
                    title="Difference By (Non-Overlapping) Zones", 
                    fontsize=Plots.font_size,
                    width=Plots.width, height=Plots.height)
line = holoviews.HLine(0)
layout = plot * line
Embed(layout, "zones")()

Tabbed Plots

giss_tidy = giss_tidy.rename(columns={"location": "Location"})
class PlotGroups:
    hemisphere = "Hemishpere"
    tropics = "Tropics"
    climate_zones = "Zones"

group_map = {
    "Global": PlotGroups.hemisphere,
    "Northern Hemisphere": PlotGroups.hemisphere,
    "Southern Hemisphere": PlotGroups.hemisphere,
    "Northern Extratropics": PlotGroups.tropics,
    "Southern Extratropics": PlotGroups.tropics,
    "Tropics": PlotGroups.tropics,
    "North Frigid": PlotGroups.climate_zones,
    "North Sub-Tropic": PlotGroups.climate_zones,
    "North Temperate": PlotGroups.climate_zones,
    "Tropic of Cancer": PlotGroups.climate_zones,
    "Tropic of Capricorn": PlotGroups.climate_zones,
    "South Temperate": PlotGroups.climate_zones,
    "South Sub-Tropic": PlotGroups.climate_zones,
    "South Frigid": PlotGroups.climate_zones,
}
giss_tidy["plot_group"] = giss_tidy.Location.map(group_map)
hemispheric = giss_tidy[giss_tidy.plot_group==PlotGroups.hemisphere]
tropical = giss_tidy[giss_tidy.plot_group==PlotGroups.tropics]
zones = giss_tidy[giss_tidy.plot_group==PlotGroups.climate_zones]

assert (len(hemispheric.Location.unique()) 
        + len(tropical.Location.unique()) 
        + len(zones.Location.unique())) == len(giss_tidy.Location.unique())
assert len(hemispheric.Location.unique()) == 3
assert len(tropical.Location.unique()) == 3
assert len(zones.Location.unique()) == 8, zones.Location.unique()

cross_hair = CrosshairTool()
plot = (hemispheric.hvplot(x="Year", y="Difference From Normal", 
                           ylabel="Difference From Historical Mean",
                           by="Location",
                           width=Plots.width, 
                           height=Plots.height,
                           tools=[cross_hair]
).relabel("Hemispheres")
        * tropical.hvplot(x="Year", y="Difference From Normal",
                          ylabel="Difference From Historical Mean",                          
                          by="Location",
                          width=Plots.width, 
                          height=Plots.height,
                          tools=[cross_hair]).relabel(
                              "Tropics vs Extra-Tropics")
        * zones.hvplot(x="Year", y="Difference From Normal",
                       ylabel="Difference From Historical Mean",                       
                       by="Location",
                       width=Plots.width, 
                       height=Plots.height,
                       tools=[cross_hair],
                       ).relabel("Climate Zones")
).opts(tabs=True, fontsize=Plots.font_size)

Embed(plot, "tabbed_plots")()