Come take a look at some cool ggforce functions that can help strengthen your ggplot visualization!
The extension package ggforce
provides a myriad of functions for enhancing ggplot visualization. In particular, it has functions that allow users to customize basic geometric elements such as lines and shapes, which may otherwise take more time and effort to achieve with the original ggplot functions. There are also some other interesting/fun functions we’ll explore. Hopefully, after reading this quick post, you’ll be able to put these functions into use in the future!
ggforce
functionalityWe’re going to check out three “line” functions:
(1) geom_link()
, which is similar to geom_segment()
but allows users to draw lines with color/alpha transparency/size gradients. This is done by mapping the computed variable “index” (values from 0 to 1 representing the progression of interpolated points along the line segment) to those aesthetics.
library(tidyverse)
library(ggforce)
### (1) geom_link()
# create a dataframe with the x- and y-coordinates of the segment endpoints
geom_link_dat <- data.frame(x = 1, y = 1, xend = 2, yend = 2)
# draw the segment with color and size gradients
ggplot(geom_link_dat) +
geom_link(aes(x = x, y = y, xend = xend, yend = yend,
color = after_stat(index), # map "index" to the color aesthetic to create a color gradient along the segment
linewidth = after_stat(index)^2 - after_stat(index)), # map "index" to the linewidth aesthetic to create a size gradient along the segment
n = 100, # n controls the number of interpolated points between the two endpoints
show.legend = F) +
scale_color_viridis_c() +
scale_linewidth_continuous(range = c(1, 4)) +
theme_bw(base_size = 14) +
coord_fixed()
(2) geom_arc()
, which draws arcs based on a center point (origin), a radius, a start angle, and an end angle (both in radian).
### (2) geom_arc()
# create a dataframe with the origin, radius, start angle, and end angle of the arc
geom_arc_dat <- data.frame(x0 = 0, y0 = 0, r = 1, start = 0.2 * pi, end = 1.8 * pi)
# draw the arc with color and size gradients
ggplot(geom_arc_dat) +
geom_arc(aes(x0 = x0, y0 = y0, r = r, start = start, end = end,
color = after_stat(index),
linewidth = after_stat(index)),
lineend = "round",
n = 100,
show.legend = F) +
scale_color_distiller(palette = "Spectral") +
scale_linewidth_continuous(range = c(1, 4)) +
theme_bw(base_size = 14) +
coord_fixed()
(3) geom_diagonal()
, which draws sigmoidal diagonal curves between two endpoints (as opposed to drawing straight lines like geom_link()
does).
### (3) geom_diagonal
# create a dataframe with the x- and y-coordinates of the diagonal curve endpoints
geom_diagonal_dat <- data.frame(x = 1, y = 1, xend = 2, yend = 2)
# draw the diagonal curve with a color gradient
ggplot(geom_diagonal_dat) +
geom_diagonal(aes(x = x, y = y, xend = xend, yend = yend,
color = after_stat(index)),
strength = 2, # strength controls the curvature/steepness of the diagonal line
linewidth = 3,
n = 100,
show.legend = F) +
geom_diagonal(aes(x = x, y = y, xend = xend, yend = yend,
color = after_stat(index)),
strength = 1,
linewidth = 2,
n = 100,
show.legend = F) +
geom_diagonal(aes(x = x, y = y, xend = xend, yend = yend,
color = after_stat(index)),
strength = 0.5,
linewidth = 1,
n = 100,
show.legend = F) +
scale_color_distiller(palette = "Set1") +
theme_bw(base_size = 14) +
coord_fixed()
We have again three “shape” functions to check out:
(1) geom_regon()
, which draws regular polygons with any number of sides and any rotation angle.
### (1) geom_regon()
# create a dataframe with the origins of the polygons, numbers of sides, rotation angles, and polygon sizes (r)
geom_regon_dat <- data.frame(x0 = rep(1:3, 3),
y0 = rep(3:1, each = 3),
sides = 3:11,
angle = seq(0, 160, by = 20), # angles in degree
r = rep(0.2, 9)) # r is the ratio of polygon size to plot area
# draw the regular polygons
ggplot(geom_regon_dat) +
geom_regon(aes(x0 = x0, y0 = y0, sides = sides, angle = angle, r = r, fill = sides), show.legend = F) +
scale_fill_viridis_c() +
theme_bw(base_size = 14) +
coord_fixed()
(2) geom_arc_bar()
, which is similar to geom_arc()
but draws sectors (pies) and arc bars instead of arc lines.
### (2) geom_arc_bar()
# create a dataframe with the origins, start radii, end radii, start angles, and end angles of the arc bars
geom_arc_bar_dat <- data.frame(x0 = rep(0, 6),
y0 = rep(0, 6),
r0 = seq(0, 1.8, length.out = 6), # the start radii
r = rep(2, 6), # the end radii
start = c(0/3, 1/3, 2/3, 3/3, 4/3, 5/3) * pi, # the start angles
end = c(1/3, 2/3, 3/3, 4/3, 5/3, 6/3) * pi, # the end angles
fill = letters[1:6])
# draw the sector and arc bars
ggplot(geom_arc_bar_dat) +
geom_arc_bar(aes(x0 = x0, y0 = y0, r0 = r0, r = r, start = start, end = end, fill = fill), show.legend = F) +
scale_fill_brewer(palette = "Set2") +
theme_bw(base_size = 14) +
coord_fixed()
(3) geom_diagonal_wide()
, which is similar to geom_diagonal()
but draws polygons defined by an upper and a lower diagonal curve.
### (3) geom_diagonal_wide()
# create a dataframe with an upper and a lower diagonal curve
geom_diagonal_wide_dat <- data.frame(x = c(1, 2.5, 1, 2.5),
y = c(1, 2, 1.5, 2.5),
group = c(1, 1, 1, 1)) # "group" identifies the four corner points defining each polygon
# draw the diagonal polygon
ggplot(geom_diagonal_wide_dat) +
geom_diagonal_wide(aes(x = x, y = y, group = group),
strength = 0.7, # strength controls the steepness of the upper and lower diagonal curves
radius = unit(0.2, "cm")) + # rounded corners
theme_bw(base_size = 14) +
coord_fixed()
We’re going to take a look at a few other interesting (and potentially useful) ggforce
functions:
(1) geom_arc_bar(stats = "pie")
, which creates pie charts and donut charts with exploded slices. It works almost the same as geom_arc_bar()
, but instead of start and end angles, we map the relative sizes of the slices to the “amount” aesthetic. Additionally, we can explode the slices using the “explode” aesthetic.
### (1) geom_arc_bar(stats = "pie")
# create a dataframe with the origins, start radii, end radii, relative sizes of the slices (amount), and degrees of slice explosion (explode)
geom_arc_bar_pie_dat <- data.frame(x0 = rep(0, 5),
y0 = rep(0, 5),
r0_pie = rep(0, 5),
r0_donut = rep(1.3, 5),
r = rep(2, 5),
amount = c(1, 2, 3, 4, 5),
explode = c(0, 0, 0, 0, 0.3))
# create a pie chart with an exploded slice
ggplot(geom_arc_bar_pie_dat) +
geom_arc_bar(aes(x0 = x0, y0 = y0, r0 = r0_pie, r = r, amount = amount, explode = explode, fill = as.factor(amount)), stat = "pie", show.legend = F) +
scale_fill_brewer(palette = "Dark2") +
theme_bw(base_size = 14) +
coord_fixed()
### Create a donut chart
ggplot(geom_arc_bar_pie_dat) +
geom_arc_bar(aes(x0 = x0, y0 = y0, r0 = r0_donut, r = r, amount = amount, fill = as.factor(amount)), stat = "pie", show.legend = F) +
scale_fill_brewer(palette = "Dark2") +
theme_bw(base_size = 14) +
coord_fixed()
(2) geom_parallel_sets()
, which creates parallel sets/alluvial plots for a set of categorical variables. Basically, we need a dataframe with the counts of all unique combinations of the levels in the categorical variables. We then pass this dataframe to the function gather_set_data()
to convert it into a format used by geom_parallel_sets()
. Two companion functions geom_parallel_sets_axes()
and geom_parallel_sets_labels()
allow us to customize the line sets (i.e., the segments along the y-axis representing the levels of each categorical variable) and their labels.
### (2) geom_parallel_sets()
# count the number of car models in each combination of cyl, am, and gear
mtcars_count <- mtcars %>%
select(cyl, am, gear) %>%
group_by(cyl, am, gear) %>%
summarise(n = n())
mtcars_count <- expand_grid(cyl = unique(mtcars$cyl),
am = unique(mtcars$am),
gear = unique(mtcars$gear)) %>%
left_join(., mtcars_count, by = join_by("cyl", "am", "gear")) %>%
mutate(n = if_else(is.na(n), 0, n)) %>%
mutate(across(.cols = c(cyl, am, gear), as.factor))
# convert the data into a format used by geom_parallel_sets()
geom_parallel_sets_dat <- gather_set_data(mtcars_count, 1:3)
# create parallel sets
ggplot(geom_parallel_sets_dat) +
geom_parallel_sets(aes(x = x, split = y, id = id, value = n, fill = am), alpha = 0.75, show.legend = F) +
geom_parallel_sets_axes(aes(x = x, split = y, id = id, value = n), axis.width = 0.1) +
geom_parallel_sets_labels(aes(x = x, split = y, id = id, value = n), colour = "white", angle = 0) +
scale_x_continuous(breaks = 1:3, labels = c("cyl", "am", "gear")) +
scale_fill_brewer(palette = "Set1") +
theme_void() +
theme(axis.text.x = element_text(color = "black", size = 14, margin = margin(t = -5, b = 5)))
(3) geom_voronoi_tile() and geom_voronoi_segment()
, which create Voronoi diagrams for a given set of points (seeds) on the Cartesian plane. A Voronoi diagram is a partitioning of an area into cells (Voronoi cells) based on a set of seed points, with all points in a cell closer to the focal seed than to any other seed. The associated stat function stat_delvor_summary()
computes Voronoi tessellation, and we can extract the values of the computed variables (e.g., the area of each cell and the number of sides of each cell) from the plot for further analysis.
### (3) geom_voronoi_tile(), geom_voronoi_segment(), and stat_delvor_summary()
# get the "brambles" dataset from the package "boot"
# the dataset contains the x- and y-coordinates of 832 bramble canes at three different ages
library(boot)
data("brambles")
# subset the entire data to avoid overplotting
set.seed(123)
geom_voronoi_dat <- brambles %>%
slice_sample(n = 15, by = "age") %>%
mutate(age = as.factor(age))
# create the Voronoi plot
ggplot(geom_voronoi_dat) +
geom_voronoi_tile(aes(x = x, y = y, group = 1, fill = age)) + # use group = 1 to indicate that all points are in the same group
geom_voronoi_segment(aes(x = x, y = y, group = 1)) +
stat_delvor_summary(aes(x = x, y = y, group = 1, size = after_stat(vorarea)), # make the size of the centroid points proportional to the cell area
geom = "point",
switch.centroid = T) + # x and y values represent the cell centroids when switch.centroid = T
scale_fill_manual(name = "bramble cane age", values = c("#66c2a5", "#fc8d62", "#8da0cb")) +
scale_size_continuous(name = "Voronoi cell area", range = c(0, 6)) +
theme_bw(base_size = 13) +
coord_fixed()
# extract the values of the computed variables by stat_delvor_summary()
p_voronoi <- ggplot(geom_voronoi_dat) +
stat_delvor_summary(aes(x = x, y = y, group = 1, size = after_stat(vorarea)),
geom = "point", switch.centroid = T)
ggplot_build(p_voronoi)$data[[1]] %>%
head()
size xorig yorig group PANEL ntri triarea triprop nsides
1 2.793030 0.859 0.528 1 1 4 0.008462 0.011722 4
2 3.496745 0.993 0.252 1 1 5 0.010363 0.014356 4
3 4.944237 0.917 0.770 1 1 7 0.031396 0.043492 4
4 4.813299 0.042 0.748 1 1 3 0.012672 0.017555 3
5 2.622590 0.827 0.244 1 1 5 0.011491 0.015918 5
6 2.518608 0.156 0.640 1 1 5 0.009080 0.012578 5
nedges vorarea vorprop x y shape colour fill
1 0 0.013638 0.011758 0.8269482 0.5317797 19 black NA
2 2 0.022640 0.019519 0.9980160 0.2002248 19 black NA
3 2 0.050442 0.043487 0.8871570 0.7478630 19 black NA
4 2 0.047413 0.040876 0.0179494 0.7925496 19 black NA
5 0 0.011902 0.010261 0.7947276 0.2968608 19 black NA
6 0 0.010928 0.009422 0.1315732 0.6855280 19 black NA
alpha stroke
1 NA 0.5
2 NA 0.5
3 NA 0.5
4 NA 0.5
5 NA 0.5
6 NA 0.5
(4) geom_delaunay_tile()
and geom_delaunay_segment()
, which create plots of Delaunay triangulation for a given set of points on the Cartesian plane. In Delaunay triangulation, each triangle has a circumcircle that does not contain any other point on the plane except for the three generating points on its circumference.
### (4) geom_delaunay_tile() and geom_delaunay_segment()
# subset the brambles dataset to avoid overplotting
set.seed(123)
geom_delaunay_dat <- brambles %>%
slice_sample(n = 15, by = "age") %>%
mutate(age = as.factor(age))
# create a plot of Delaunay triangulation
ggplot(geom_delaunay_dat) +
geom_delaunay_tile(aes(x = x, y = y), fill = NA) +
geom_delaunay_segment(aes(x = x, y = y), color = "grey60") +
geom_point(aes(x = x, y = y, color = age), size = 2.5) +
scale_color_manual(name = "bramble cane age", values = c("#66c2a5", "#fc8d62", "#8da0cb")) +
theme_bw(base_size = 13) +
coord_fixed()
(5) position_jitternormal()
, which jitters the points by adding random noise generated from a normal distribution (as opposed to a uniform distribution in ggplot’s position_jitter()
). The jittered points from position_jitternormal()
look less rectangular/boxy than those from position_jitter()
.
### (5) position_jitternormal()
set.seed(123)
diamonds_subset <- diamonds %>%
slice_sample(prop = 0.05)
# position_jitter()
p_position_jitter <- ggplot(diamonds_subset) +
geom_point(aes(x = cut, y = price, color = cut),
position = position_jitter(width = 0.15),
show.legend = F) +
scale_color_brewer(palette = "Set1") +
labs(title = "position_jitter()") +
theme_bw(base_size = 13) +
theme(plot.title = element_text(hjust = 0.5, size = 15))
# position_jitternormal()
p_position_jitternormal <- ggplot(diamonds_subset) +
geom_point(aes(x = cut, y = price, color = cut),
position = position_jitternormal(sd_x = 0.05), # sd_x and sd_y control the spread of the points
show.legend = F) +
scale_color_brewer(palette = "Set1") +
labs(title = "position_jitternormal()") +
theme_bw(base_size = 13) +
theme(plot.title = element_text(hjust = 0.5, size = 15))
library(patchwork)
p_position_jitter + p_position_jitternormal
Hope you learn something useful from this post and don’t forget to leave your comments and suggestions below if you have any!
If you see mistakes or want to suggest changes, please create an issue on the source repository.