Factsheet: Normal distribution

Statistics
Author

Michelle Arnetta and Tom Coleman

Summary
A factsheet for the normal distribution \(N(\mu,\sigma^2)\).
#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 700

library(shiny)
library(bslib)
library(ggplot2)

ui <- page_fluid(
  title = "Normal distribution calculator",
  
  layout_columns(
    col_widths = c(4, 8),
    
    # Left column - Inputs
    card(
      card_header("Parameters"),
      card_body(
        numericInput("mean", "Mean (μ):", value = 0),
        numericInput("sd", "Standard deviation (σ):", value = 1, min = 0.01),
        hr(),
        radioButtons("prob_type", "Probability to calculate:",
                    choices = list("P(X ≤ x)" = "less", 
                                  "P(X ≥ x)" = "greater", 
                                  "P(x ≤ X ≤ y)" = "between"),
                    selected = "less"),
        conditionalPanel(
          condition = "input.prob_type == 'less' || input.prob_type == 'greater'",
          numericInput("x_value", "x value:", value = 0)
        ),
        conditionalPanel(
          condition = "input.prob_type == 'between'",
          numericInput("x_lower", "Lower bound (x):", value = -1),
          numericInput("x_upper", "Upper bound (y):", value = 1)
        )
      )
    ),
    
    # Right column - Plot
    card(
      card_header("Normal distribution plot"),
      card_body(
        uiOutput("plot_title"),
        plotOutput("distPlot", height = "300px")
      )
    )
  ),
  
  # Bottom row - Results
  card(
    card_header("Results"),
    card_body(
      # Removed the LaTeX formula display
      textOutput("explanation")
    )
  )
)

server <- function(input, output, session) {
  
  # Display the plot title with distribution parameters
  output$plot_title <- renderUI({
    title <- sprintf("N(μ = %.2f, σ = %.2f)", input$mean, input$sd)
    tags$h4(title, style = "text-align: center; margin-bottom: 15px;")
  })
  
  # Calculate the probability based on user selection
  probability <- reactive({
    if (input$prob_type == "less") {
      prob <- pnorm(input$x_value, mean = input$mean, sd = input$sd)
      explanation <- sprintf("P(X ≤ %.2f) = %.4f or %.2f%%", 
                            input$x_value, prob, prob * 100)
      return(list(prob = prob, explanation = explanation, type = "less", x = input$x_value))
      
    } else if (input$prob_type == "greater") {
      prob <- 1 - pnorm(input$x_value, mean = input$mean, sd = input$sd)
      explanation <- sprintf("P(X ≥ %.2f) = %.4f or %.2f%%", 
                            input$x_value, prob, prob * 100)
      return(list(prob = prob, explanation = explanation, type = "greater", x = input$x_value))
      
    } else if (input$prob_type == "between") {
      lower_prob <- pnorm(input$x_lower, mean = input$mean, sd = input$sd)
      upper_prob <- pnorm(input$x_upper, mean = input$mean, sd = input$sd)
      prob <- upper_prob - lower_prob
      explanation <- sprintf("P(%.2f ≤ X ≤ %.2f) = %.4f or %.2f%%", 
                            input$x_lower, input$x_upper, prob, prob * 100)
      return(list(prob = prob, explanation = explanation, type = "between", 
                 lower = input$x_lower, upper = input$x_upper))
    }
  })
  
  # Display an explanation of the calculation
  output$explanation <- renderText({
    res <- probability()
    return(res$explanation)
  })
  
  # Generate the normal distribution plot
  output$distPlot <- renderPlot({
    # Calculate range for x-axis (covering 99.7% of the distribution)
    x_min <- input$mean - 3.5 * input$sd
    x_max <- input$mean + 3.5 * input$sd
    
    # Create data frame for plotting
    x <- seq(x_min, x_max, length.out = 500)
    y <- dnorm(x, mean = input$mean, sd = input$sd)
    df <- data.frame(x = x, y = y)
    
    # Create base plot
    p <- ggplot(df, aes(x = x, y = y)) +
      geom_line() +
      labs(x = "X", y = "Density") +
      theme_minimal() +
      theme(panel.grid.minor = element_blank())
    
    # Add bold line at X = 0
    p <- p + geom_vline(xintercept = 0, linetype = "solid", color = "black", linewidth = 0.8)
    
    # Add shaded area based on selected probability type
    res <- probability()
    
    if (res$type == "less") {
      shade_x <- seq(x_min, res$x, length.out = 200)
      shade_y <- dnorm(shade_x, mean = input$mean, sd = input$sd)
      shade_df <- data.frame(x = shade_x, y = shade_y)
      
      p <- p + geom_area(data = shade_df, aes(x = x, y = y), fill = "#3F6BB6", alpha = 0.6) +
        geom_vline(xintercept = res$x, linetype = "dashed", color = "#db4315")
      
    } else if (res$type == "greater") {
      shade_x <- seq(res$x, x_max, length.out = 200)
      shade_y <- dnorm(shade_x, mean = input$mean, sd = input$sd)
      shade_df <- data.frame(x = shade_x, y = shade_y)
      
      p <- p + geom_area(data = shade_df, aes(x = x, y = y), fill = "#3F6BB6", alpha = 0.6) +
        geom_vline(xintercept = res$x, linetype = "dashed", color = "#db4315")
      
    } else if (res$type == "between") {
      shade_x <- seq(res$lower, res$upper, length.out = 200)
      shade_y <- dnorm(shade_x, mean = input$mean, sd = input$sd)
      shade_df <- data.frame(x = shade_x, y = shade_y)
      
      p <- p + geom_area(data = shade_df, aes(x = x, y = y), fill = "#3F6BB6", alpha = 0.6) +
        geom_vline(xintercept = res$lower, linetype = "dashed", color = "#db4315") +
        geom_vline(xintercept = res$upper, linetype = "dashed", color = "#db4315")
    }
    
    return(p)
  })
}

shinyApp(ui = ui, server = server)

Where to use: The normal distribution can be used to model continuous random variables, which can include any positive or negative real values. The use of this distribution is often justified by the Central Limit Theorem: as the sample size increases, the distribution of sample means will resemble a normal distribution more and more closely.

Notation: \(X \sim \textrm{Normal}(\mu,\sigma^2)\) or \(X \sim N(\mu,\sigma^2)\)

Parameters: Two real numbers \(\mu\) and \(\sigma^2\).

Quantity Value Notes
Mean \(\mathbb{E}(X) = \mu\)
Variance \(\mathbb{V}(X) = \sigma^2\)
PDF \(\mathbb{P}(X=x)=\dfrac{1}{\sqrt{2\pi\sigma^2}}\exp\left({-\dfrac{(x-\mu)^2}{2\sigma^2}}\right)\) \(\exp(y) = e^y\)
CDF \(\displaystyle\mathbb{P}(X\leq x)=\dfrac{1}{2}\left[1+\textrm{erf}\left(\dfrac{x-\mu}{\sigma\sqrt{2}}\right)\right]\) \(\textrm{erf}(x)\) is the error function of \(x\)

Example: The lengths of chocolate bars produced by Cantor’s Confectionery follow a normal distribution with a mean of \(5.6\) inches and a variance of \(1.44\). This can be expressed as \(X \sim N(5.6, 1.44)\), meaning the data is normally distributed, centered at \(5.6\) with standard deviation \(\sqrt{1.44} = 1.2\).

Further reading

This interactive element appears in Guide: PMFs, PDFs, CDFs and Overview: Probability distributions. Please click the relevant links to go to the guides.

Version history

v1.0: initial version created 04/25 by tdhc and Michelle Arnetta as part of a University of St Andrews VIP project.

This work is licensed under CC BY-NC-SA 4.0.

Mailing List



Feedback

Your feedback is appreciated and useful. Feel free to leave a comment here,
but please be specific with any issues you encounter so we can help to resolve them
(for example, what page it occured on, what you tried, and so on).