Introduction

Unlike previous notebooks, there isn’t anything new here. What you’ll see instead is analysis of two data sets generated with less association among the covariates than the data sets you’ve seen in earlier notebooks. The analysis will use rstanarm, horseshoe priors, and projection prediction variable selection.

NOTE: Data the data not scaled so that regression coefficients are more comparable across the two data sets. You may want to return to some of the earlier notebooks where the data were scaled and re-run the analyses to see how the results change.

library(tidyverse)
library(reshape2)
library(ggplot2)
library(cowplot)
library(mvtnorm)
library(corrplot)

rm(list = ls())
## intetcept
##
beta0 <- 1.0
## regression coefficients
##
beta <- c(1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
## pattern of correlation matrix, all non-zero entries are set to saem
## correlation, covariance matrix caldulated from individual variances and a 
## single association parameter governing the non-zero correlation coefficients
##
## Note: Not just any pattern will work here. The correlation matrix and
## covariance matrix generated from this pattern must be positive definite.
## If you change this pattern, you may get an error when you try to generate
## data with a non-zero association parameter.
##
Rho <- matrix(nrow = 9, ncol = , byrow = TRUE, 
              data = c(1,0,1,0,1,0,1,0,1,
                       0,1,0,1,0,1,0,1,0,
                       1,0,1,0,1,0,1,0,1,
                       0,1,0,1,0,1,0,1,0,
                       1,0,1,0,1,0,1,0,1,
                       0,1,0,1,0,1,0,1,0,
                       1,0,1,0,1,0,1,0,1,
                       0,1,0,1,0,1,0,1,0,
                       1,0,1,0,1,0,1,0,1
                       ))
## vector of standard deviations for covariates
##
sigma <- rep(1, 9)

## construct a covariance matrix from the pattern, standard deviations, and
## one parameter in [-1,1] that governs the magnitude of non-zero correlation
## coefficients
##
## Rho - the pattern of associations
## sigma - the vector of standard deviations
## rho - the association parameter
##
construct_Sigma <- function(Rho, sigma, rho) {
  ## get the correlation matris
  ##
  Rho <- Rho*rho
  for (i in 1:ncol(Rho)) {
    Rho[i,i] <- 1.0
  }
  ## notice the use of matrix multiplication
  ##
  Sigma <- diag(sigma) %*% Rho %*% diag(sigma)
  return(Sigma)
}

## set the random number seed manually so that every run of the code will 
## produce the same numbers
##
set.seed(1234)

n_samp <- 100
cov_str <- rmvnorm(n_samp,
                   mean = rep(0, nrow(Rho)),
                   sigma = construct_Sigma(Rho, sigma, 0.8))

resid <- rep(2.0, n_samp)

y_str <- rnorm(nrow(cov_str), mean = beta0 + cov_str %*% beta, sd = resid)
dat_1 <- data.frame(y_str, cov_str, rep("Strong", length(y_str)))

cov_str <- rmvnorm(n_samp,
                   mean = rep(0, nrow(Rho)),
                   sigma = construct_Sigma(Rho, sigma, 0.8))
y_str <- rnorm(nrow(cov_str), mean = beta0 + cov_str %*% beta, sd = resid)
dat_2 <- data.frame(y_str, cov_str, rep("Strong", length(y_str)))

column_names <- c("y", paste("x", seq(1, length(beta)), sep = ""), "Scenario")
colnames(dat_1) <- column_names
colnames(dat_2) <- column_names

Correlation plots with rho = 0.8

corrplot(cor(dat_1[, -c(1,11)]))