High School Facebook Networks

The Departure

This looks at data provided by SocioPatterns that looks a the interactions between students at a High School in Marseilles, France, in December of 2013.

Imports

From Python
from collections import Counter
from functools import partial
from pathlib import Path
import os
From PyPi
from bokeh.models import HoverTool
from dotenv import load_dotenv
from holoviews import dim, opts
from holoviews.operation.datashader import datashade, bundle_graph
import holoviews
import hvplot.pandas
import networkx
import pandas as pandas
My Stuff
from graeae.timers import Timer
from graeae.visualization import EmbedBokeh, EmbedHoloview

Load the Dotenv

load_dotenv(".env")

Build the Timer

TIMER = Timer()

Setup The Plotting

holoviews.extension("bokeh")
SLUG = "high-school-contact-and-friendship-networks/"
output = Path("../../files/posts/networks/" + SLUG)
Embed = partial(EmbedHoloview, folder_path=output)
EmbedB = partial(EmbedBokeh, folder_path=output)
class Plot:
    """Constants for plotting"""
    width = 1000
    height = 800
    fontsize = 18

Load The Data

Let's take a look at the data before loading it into pandas.

HIGH_SCHOOL = Path(os.environ.get("HIGH_SCHOOL")).expanduser()
assert HIGH_SCHOOL.is_dir()

#+begin_src ipython :session highschool :results none
class Files:
    metadata = "metadata_2013.txt"
    contact_diaries = "Contact-diaries-network_data_2013.csv"
    facebook = "Facebook-known-pairs_data_2013.csv"
    friendship = "Friendship-network_data_2013.csv"
    high_school = "High-School_data_2013.csv"

MetaData

metadata_path = HIGH_SCHOOL.joinpath(Files.metadata)
assert metadata_path.is_file()
with metadata_path.open() as reader:
    for line in range(5):
        print(reader.readline(), end="")
650     2BIO1   F
498     2BIO1   F
627     2BIO1   F
857     2BIO1   F
487     2BIO1   F

This first file has the meta-data for the students. The three columns are the student's ID, class, and gender.

meta_data = pandas.read_csv(metadata_path, sep="\t", 
                            names=["id", "class", "gender"])
meta_data.loc[:, "class"] = meta_data["class"].astype("category")
meta_data.loc[:, "gender"] = meta_data.gender.astype("category")
Classes

First a bar-plot to look at how the classes are distributed.

grouped = meta_data.groupby(["class", "gender"]).agg(
    {"class": "count", "gender": "count"})
grouped.columns = ["class_count", "gender_count"]
grouped = grouped.reset_index()
plot = grouped.hvplot.bar(title="Class Counts by Gender", 
                          x="class", y="class_count", 
                          stacked=True,
                          by="gender", height=Plot.height, 
                          width=Plot.width,
                          ylabel="Count",
                          xlabel="Class",
                          tools=["hover"],
                          fontsize=Plot.fontsize).opts(xrotation=90)
Embed(plot=plot, file_name="gender_counts_stacked", height_in_pixels=Plot.height)()

Figure Missing

This is a look at the same thing except not stacked.

plot = grouped.hvplot.bar(title="Class Counts by Gender", x="class", 
                          y="class_count",
                          xlabel="Class",
                          ylabel="Count",
                          by="gender", height=Plot.height, width=Plot.width, 
                          tools=["hover"],
                          fontsize=Plot.fontsize).opts(xrotation=90)
Embed(plot=plot, file_name="gender_counts", height_in_pixels=Plot.height)()

Figure Missing

Strangely, the classes that start with 2BIO are more female while the others are more male.

Gender

A stacked bar plot to get a sense of not just the distribution among genders but among classes.

plot = grouped.hvplot.bar(title="Gender Counts", x="gender", y="gender_count",
                          stacked=True,
                          by="class", 
                          xlabel="Count",
                          ylabel="Gender",
                          fontsize=Plot.fontsize,
                          width=Plot.width,
                          height=Plot.height).opts(
                              xrotation=90, 
                              xlabel="Gender and Class")
Embed(plot=plot, file_name="class_counts_stacked", height_in_pixels=Plot.height)()

Figure Missing

A non-stacked bar plot to get a better sense of how the genders fill the different classes.

plot = grouped.hvplot.bar(title="Gender Counts", x="gender", y="gender_count",
                          xlabel="Gender",
                          ylabel="Count",
                          by="class", 
                          height=Plot.height,
                          width=Plot.width,
                          fontsize=Plot.fontsize).opts(
                              xrotation=90, xlabel="Gender and Class")
Embed(plot=plot, file_name="class_counts", height_in_pixels=Plot.height)()

Figure Missing

It looks like there were a little more males than females, but not a whole lot more.

The Descent

The Facebook Network

This is a dataset that shows whether a student was facebook friends with another student.

facebook_path = HIGH_SCHOOL.joinpath(Files.facebook)
assert facebook_path.is_file()
with facebook_path.open() as reader:
    for line in range(5):
        print(reader.readline(), end="")
1 984 0
1 883 1
1 941 0
1 650 0
1 132 1

The columns are one student, next student, facebook friends.

The third column is 0 if they aren't facebook friends and 1 if they are.

facebook_data = pandas.read_csv(facebook_path, delimiter=" ", 
                                names=["reporter", "other", "friend"])
facebook_data = facebook_data.dropna()

The Descent

Looking at the Friendship Network

with TIMER:
    facebook_graph = networkx.convert_matrix.from_pandas_edgelist(
        facebook_data, "reporter", "other", 
        create_using=networkx.DiGraph)
Started: 2019-03-27 23:05:04.495114
Ended: 2019-03-27 23:05:04.499622
Elapsed: 0:00:00.004508
genders = dict(zip(meta_data.id, meta_data.gender))
classes = dict(zip(meta_data.id, meta_data["class"]))
for node in facebook_graph.nodes:
    facebook_graph.nodes[node]["gender"] = genders[node]
    facebook_graph.nodes[node]["class"] = classes[node]
hover = HoverTool(
    tooltips = [
         ("Gender", "@gender"),
         ("Class", "@class"),
    ],
)

plot = holoviews.Graph.from_networkx(facebook_graph,
                                     networkx.circular_layout).opts(
                                         node_color=dim("gender"), cmap="Set1",
                                         tools=[hover],
                                         fontsize=Plot.fontsize,
                                         width=800,
                                         height=800,                                        
                                         title="Facebook Network by Gender",
                                         xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_circular")()

Figure Missing

It's a little hard to see what's going on here, other than to note that you can see some people are more popular than others.

hover = HoverTool(
    tooltips = [
         ("Gender", "@gender"),
         ("Class", "@class"),
    ],
)

plot = holoviews.Graph.from_networkx(facebook_graph,
                                     networkx.circular_layout).opts(
                                         node_color=dim("class"), cmap="Set1",
                                         tools=[hover],
                                         fontsize=Plot.fontsize,
                                         width=800,
                                         height=800,                                        
                                         title="Facebook Network by Class",
                                         xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_circular_class")()

Figure Missing

plot = holoviews.Graph.from_networkx(facebook_graph, networkx.spring_layout, ).opts(
                                         node_color=dim("class"), cmap="Set1",
                                         tools=["hover"],
                                         width=800,
                                         height=800,
                                         title="Facebook Network By Class",
                                         xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_class_spring", height_in_pixels=810)()

Figure Missing

plot = holoviews.Graph.from_networkx(facebook_graph, networkx.spring_layout, ).opts(
                                         node_color=dim("gender"), cmap="Set1",
                                         tools=["hover"],
                                         width=800,
                                         height=800,
                                         title="Facebook Network By Gender",
                                         xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_gender_spring", height_in_pixels=810)()

Figure Missing

End

Citations

  • R. Mastrandrea, J. Fournet, A. Barrat,

Contact patterns in a high school: a comparison between data collected using wearable sensors, contact diaries and friendship surveys. PLoS ONE 10(9): e0136497 (2015)