# Visualizing a multivariate normal distribution

In R, it is quite straight forward to plot a normal distribution, eg., using the package ggplot2 or plotly.

# Setup

library(tidyverse)
library(mvtnorm)
library(plotly)
library(MASS)

# Simulate multivariate normal data

First, let’s define a covariance matrix $$\Sigma$$:

sigma <- matrix(c(4,2,2,3), ncol = 2)
sigma
##      [,1] [,2]
## [1,]    4    2
## [2,]    2    3

Then, simulate observations n = n from these covariance matrix; the means need be defined, too. As the rank of our covariance matrix is 2, we need two means:

means <- c(0, 0)
n <- 1000
set.seed(42)
x <- rmvnorm(n = n, mean = means, sigma = sigma)

What’s the structure of x?

str(x)
##  num [1:1000, 1:2] 2.314 1.053 0.716 2.848 3.839 ...

Let’s make a data frame out of it:

d <- data.frame(x)
names(d)
## [1] "X1" "X2"

Now, let’s plot.

# Plotting

## Plotting univariate (sampled) normal data

Well, that’s obvious.

d %>%
ggplot(aes(x = X1)) +
geom_density()

### Plot theoretic normal curve

p1 <- data_frame(x = -3:3) %>%
ggplot(aes(x = x)) +
stat_function(fun = dnorm, n = n)

p1

### Interactive version of normal curve

ggplotly(p1)

## Plotting multivariate data

### 2D density

p2 <- ggplot(d, aes(x = X1, y = X2)) +
geom_point(alpha = .5) +
geom_density_2d()

p2

### Same with plotly

ggplotly(p2)

### Geom binhex

p3 <- ggplot(d, aes(x = X1, y = X2)) +
geom_point(alpha = .5) +
geom_bin2d() +
scale_fill_viridis_c()

p3

### 2D histogram (heatmap) with plotly

(p <- plot_ly(d, x = ~X1, y = ~X2))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
add_histogram2d(p)

### 2D cntour with plotly

add_histogram2dcontour(p)

## 3D plot

#### Surface

dens <- kde2d(d$X1, d$X2)

plot_ly(x = dens$x, y = dens$y,
z = dens$z) %>% add_surface() ### 3D Scatter First, compute the density of each (X1, X2) pair. d$dens <- dmvnorm(x = d)

Now plot a point for each (X1, X2, dens) tuple.

p4 <- plot_ly(d, x = ~ X1, y = ~ X2, z = ~ dens,
marker = list(color = ~ dens,
showscale = TRUE)) %>%
p4