High School Facebook Networks
Table of Contents
The Departure
This looks at data provided by SocioPatterns that looks a the interactions between students at a High School in Marseilles, France, in December of 2013.
Imports
From Python
from collections import Counter
from functools import partial
from pathlib import Path
import os
From PyPi
from bokeh.models import HoverTool
from dotenv import load_dotenv
from holoviews import dim, opts
from holoviews.operation.datashader import datashade, bundle_graph
import holoviews
import hvplot.pandas
import networkx
import pandas as pandas
My Stuff
from graeae.timers import Timer
from graeae.visualization import EmbedBokeh, EmbedHoloview
Load the Dotenv
load_dotenv(".env")
Build the Timer
TIMER = Timer()
Setup The Plotting
holoviews.extension("bokeh")
SLUG = "high-school-contact-and-friendship-networks/"
output = Path("../../files/posts/networks/" + SLUG)
Embed = partial(EmbedHoloview, folder_path=output)
EmbedB = partial(EmbedBokeh, folder_path=output)
class Plot:
"""Constants for plotting"""
width = 1000
height = 800
fontsize = 18
Load The Data
Let's take a look at the data before loading it into pandas.
HIGH_SCHOOL = Path(os.environ.get("HIGH_SCHOOL")).expanduser()
assert HIGH_SCHOOL.is_dir()
#+begin_src ipython :session highschool :results none
class Files:
metadata = "metadata_2013.txt"
contact_diaries = "Contact-diaries-network_data_2013.csv"
facebook = "Facebook-known-pairs_data_2013.csv"
friendship = "Friendship-network_data_2013.csv"
high_school = "High-School_data_2013.csv"
MetaData
metadata_path = HIGH_SCHOOL.joinpath(Files.metadata)
assert metadata_path.is_file()
with metadata_path.open() as reader:
for line in range(5):
print(reader.readline(), end="")
650 2BIO1 F 498 2BIO1 F 627 2BIO1 F 857 2BIO1 F 487 2BIO1 F
This first file has the meta-data for the students. The three columns are the student's ID, class, and gender.
meta_data = pandas.read_csv(metadata_path, sep="\t",
names=["id", "class", "gender"])
meta_data.loc[:, "class"] = meta_data["class"].astype("category")
meta_data.loc[:, "gender"] = meta_data.gender.astype("category")
Classes
First a bar-plot to look at how the classes are distributed.
grouped = meta_data.groupby(["class", "gender"]).agg(
{"class": "count", "gender": "count"})
grouped.columns = ["class_count", "gender_count"]
grouped = grouped.reset_index()
plot = grouped.hvplot.bar(title="Class Counts by Gender",
x="class", y="class_count",
stacked=True,
by="gender", height=Plot.height,
width=Plot.width,
ylabel="Count",
xlabel="Class",
tools=["hover"],
fontsize=Plot.fontsize).opts(xrotation=90)
Embed(plot=plot, file_name="gender_counts_stacked", height_in_pixels=Plot.height)()
This is a look at the same thing except not stacked.
plot = grouped.hvplot.bar(title="Class Counts by Gender", x="class",
y="class_count",
xlabel="Class",
ylabel="Count",
by="gender", height=Plot.height, width=Plot.width,
tools=["hover"],
fontsize=Plot.fontsize).opts(xrotation=90)
Embed(plot=plot, file_name="gender_counts", height_in_pixels=Plot.height)()
Strangely, the classes that start with 2BIO
are more female while the others are more male.
Gender
A stacked bar plot to get a sense of not just the distribution among genders but among classes.
plot = grouped.hvplot.bar(title="Gender Counts", x="gender", y="gender_count",
stacked=True,
by="class",
xlabel="Count",
ylabel="Gender",
fontsize=Plot.fontsize,
width=Plot.width,
height=Plot.height).opts(
xrotation=90,
xlabel="Gender and Class")
Embed(plot=plot, file_name="class_counts_stacked", height_in_pixels=Plot.height)()
A non-stacked bar plot to get a better sense of how the genders fill the different classes.
plot = grouped.hvplot.bar(title="Gender Counts", x="gender", y="gender_count",
xlabel="Gender",
ylabel="Count",
by="class",
height=Plot.height,
width=Plot.width,
fontsize=Plot.fontsize).opts(
xrotation=90, xlabel="Gender and Class")
Embed(plot=plot, file_name="class_counts", height_in_pixels=Plot.height)()
It looks like there were a little more males than females, but not a whole lot more.
The Descent
The Facebook Network
This is a dataset that shows whether a student was facebook friends with another student.
facebook_path = HIGH_SCHOOL.joinpath(Files.facebook)
assert facebook_path.is_file()
with facebook_path.open() as reader:
for line in range(5):
print(reader.readline(), end="")
1 984 0 1 883 1 1 941 0 1 650 0 1 132 1
The columns are one student, next student, facebook friends.
The third column is 0 if they aren't facebook friends and 1 if they are.
facebook_data = pandas.read_csv(facebook_path, delimiter=" ",
names=["reporter", "other", "friend"])
facebook_data = facebook_data.dropna()
The Descent
Looking at the Friendship Network
with TIMER:
facebook_graph = networkx.convert_matrix.from_pandas_edgelist(
facebook_data, "reporter", "other",
create_using=networkx.DiGraph)
Started: 2019-03-27 23:05:04.495114 Ended: 2019-03-27 23:05:04.499622 Elapsed: 0:00:00.004508
genders = dict(zip(meta_data.id, meta_data.gender))
classes = dict(zip(meta_data.id, meta_data["class"]))
for node in facebook_graph.nodes:
facebook_graph.nodes[node]["gender"] = genders[node]
facebook_graph.nodes[node]["class"] = classes[node]
hover = HoverTool(
tooltips = [
("Gender", "@gender"),
("Class", "@class"),
],
)
plot = holoviews.Graph.from_networkx(facebook_graph,
networkx.circular_layout).opts(
node_color=dim("gender"), cmap="Set1",
tools=[hover],
fontsize=Plot.fontsize,
width=800,
height=800,
title="Facebook Network by Gender",
xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_circular")()
It's a little hard to see what's going on here, other than to note that you can see some people are more popular than others.
hover = HoverTool(
tooltips = [
("Gender", "@gender"),
("Class", "@class"),
],
)
plot = holoviews.Graph.from_networkx(facebook_graph,
networkx.circular_layout).opts(
node_color=dim("class"), cmap="Set1",
tools=[hover],
fontsize=Plot.fontsize,
width=800,
height=800,
title="Facebook Network by Class",
xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_circular_class")()
plot = holoviews.Graph.from_networkx(facebook_graph, networkx.spring_layout, ).opts(
node_color=dim("class"), cmap="Set1",
tools=["hover"],
width=800,
height=800,
title="Facebook Network By Class",
xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_class_spring", height_in_pixels=810)()
plot = holoviews.Graph.from_networkx(facebook_graph, networkx.spring_layout, ).opts(
node_color=dim("gender"), cmap="Set1",
tools=["hover"],
width=800,
height=800,
title="Facebook Network By Gender",
xaxis=None, yaxis=None, directed=True)
Embed(plot=plot, file_name="facebook_network_gender_spring", height_in_pixels=810)()
End
Citations
- R. Mastrandrea, J. Fournet, A. Barrat,
Contact patterns in a high school: a comparison between data collected using wearable sensors, contact diaries and friendship surveys. PLoS ONE 10(9): e0136497 (2015)