.Rmd file is available at https://github.com/meekj/TRU-July-2020

Problem

Solution

Discussion

Initialization

## To run from command line:
## Rscript  -e "rmarkdown::render('~/lab/R/var-var-plot.Rmd', output_dir = '~/r-reports/')"

library(dplyr)
library(ggplot2)
library(ggpubr)          # Stack plots with ggpubr::ggarrange
library(palmerpenguins)  # Sample data

pointSize <- 2

theme_jm1 <- theme_bw() + # A decent theme for HTML output
    theme(
        plot.title  = element_text(size = rel(1.5), family = 'Helvetica', face = 'bold'),
        plot.subtitle  = element_text(size = rel(1.3), family = 'Helvetica', face = 'bold'),
        axis.title  = element_text(size = rel(1.5), colour = 'black', face = 'bold'),
        axis.text.x = element_text(angle=0, size = rel(1.5), lineheight = 0.9, colour = 'black', vjust = 1, face = 'bold'),
        axis.text.y = element_text(size = rel(1.5), lineheight = 0.9, colour = 'black', hjust = 1, face = 'bold'),
        legend.title = element_text(size = rel(1.75)),
        legend.key  = element_rect(colour = 'white', fill = 'white'),
        legend.text = element_text(size = rel(1.3))
    )

FigureWidth  <- 14
FigureHeight <- 9

str(penguins)
## tibble [344 × 7] (S3: tbl_df/tbl/data.frame)
##  $ species          : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ island           : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: int [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : int [1:344] 3750 3800 3250 NA 3450 3650 3625 4675 3475 4250 ...
##  $ sex              : Factor w/ 2 levels "female","male": 2 1 1 NA 1 2 1 2 NA NA ...

Code Example 1

## For interactive use select a set of variables, batch job uses the last set defined

vars_to_plot_x <- c('body_mass_g', 'body_mass_g', 'body_mass_g')
vars_to_plot_y <- c('bill_length_mm', 'bill_depth_mm', 'flipper_length_mm')

vars_to_plot_x <- c('flipper_length_mm', 'bill_length_mm', 'bill_depth_mm')
vars_to_plot_y <- c('bill_length_mm', 'bill_depth_mm', 'flipper_length_mm')

penguinPlots <- list()                     # Holds individual plots until they are stacked with ggarrange
for (varnum in 1:length(vars_to_plot_x)) { # Variable variable names
    Title <- paste0('Palmer Penguins', ' - ', vars_to_plot_x[[varnum]], ' - ', vars_to_plot_y[[varnum]])
    penguinPlots[[varnum]] <- ggplot(penguins) +
        geom_point(aes_string(x = vars_to_plot_x[[varnum]], y = vars_to_plot_y[[varnum]], colour = 'species'), size = pointSize) +
        xlab(vars_to_plot_x[[varnum]]) + ylab(vars_to_plot_y[[varnum]]) +
        ggtitle(Title)  + theme_jm1
}

Penguins, penguins, penguins

ggarrange(plotlist = penguinPlots, ncol = 1, nrow = length(penguinPlots), align = "v")


Make it nicer, and more report generator friendly

vars_to_plot_x <- c('body_mass_g', 'body_mass_g', 'body_mass_g')
vars_to_plot_y <- c('bill_length_mm', 'bill_depth_mm', 'flipper_length_mm')

axis_labels_x <- c('', '', 'Body Mass, g')
axis_labels_y <- c('Bill Length, mm', 'Bill Depth, mm', 'Flipper Length, mm')

genPlot <- function(data, title, vars_x, vars_y, ax_lbls_x, ax_lbls_y, var_color) {
    plots <- list()
    for (varnum in 1:length(vars_x)) {
        plots[[varnum]] <- ggplot(data) +
            geom_point(aes_string(x = vars_x[[varnum]], y = vars_y[[varnum]], colour = var_color), size = pointSize) +
            xlab(ax_lbls_x[[varnum]]) + ylab(ax_lbls_y[[varnum]]) +
            theme_jm1
        if (varnum == 1) plots[[varnum]] <- plots[[varnum]] + ggtitle(title) # Title only on top plot
    }
    return(plots)
}

Generate and display a plot set - colorized by species

## Note that all plots are vertically aligned regardless of y-axis tick digits

penguinPlots2 <- genPlot(penguins,'Palmer Penguins', vars_to_plot_x, vars_to_plot_y, axis_labels_x, axis_labels_y, 'species')
ggarrange(plotlist = penguinPlots2, ncol = 1, nrow = length(penguinPlots2), align = "v")


How about labeling the X axis only on the bottom plot?

genPlot2 <- function(data, title, vars_x, vars_y, ax_lbls_x, ax_lbls_y, var_color) {
    plots <- list()
    for (varnum in 1:length(vars_x)) {
        plots[[varnum]] <- ggplot(data) +
            geom_point(aes_string(x = vars_x[[varnum]], y = vars_y[[varnum]], colour = var_color), size = pointSize) +
            xlab(ax_lbls_x[[varnum]]) + ylab(ax_lbls_y[[varnum]]) +
            theme_jm1
        if (varnum == 1)              plots[[varnum]] <- plots[[varnum]] + ggtitle(title)                    # Title only on top plot
        if (varnum != length(vars_x)) plots[[varnum]] <- plots[[varnum]] + scale_x_continuous(labels = NULL) # X axis labels only on bottom
    }
    return(plots)
}

Generate and display a plot set - colorized by sex

penguinPlots2 <- genPlot2(penguins,'Palmer Penguins', vars_to_plot_x, vars_to_plot_y, axis_labels_x, axis_labels_y, 'sex')
ggarrange(plotlist = penguinPlots2, ncol = 1, nrow = length(penguinPlots2), align = "v")


Conclusion and Notes