Calculator: \(F\)-distribution

Author
Michelle Arnetta and Tom Coleman
Summary
A calculator to work out cdfs for the \(F\)-distribution.
#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 760

library(shiny)
library(bslib)
library(ggplot2)

ui <- page_fluid(
  title = "F-distribution calculator",
  
  layout_columns(
    col_widths = c(4, 8),
    
    # Left column - Inputs
    card(
      card_header("Parameters"),
      card_body(
        numericInput("df1", "Numerator degrees of freedom (d₁):", value = 5, min = 1, step = 1),
        numericInput("df2", "Denominator degrees of freedom (d₂):", value = 10, min = 1, step = 1),
        hr(),
        radioButtons("prob_type", "Probability to calculate:",
                    choices = list("P(X ≤ x)" = "less", 
                                  "P(X ≥ x)" = "greater", 
                                  "P(x ≤ X ≤ y)" = "between"),
                    selected = "less"),
        conditionalPanel(
          condition = "input.prob_type == 'less'",
          sliderInput("x_less", "x value:", min = 0, max = 10, value = 1, step = 0.1)
        ),
        conditionalPanel(
          condition = "input.prob_type == 'greater'",
          sliderInput("x_greater", "x value:", min = 0, max = 10, value = 1, step = 0.1)
        ),
        conditionalPanel(
          condition = "input.prob_type == 'between'",
          sliderInput("x_lower", "Lower bound (x):", min = 0, max = 10, value = 0.5, step = 0.1),
          sliderInput("x_upper", "Upper bound (y):", min = 0, max = 10, value = 2, step = 0.1)
        )
      )
    ),
    
    # Right column - Plot
    card(
      card_header("F-distribution plot"),
      card_body(
        uiOutput("plot_title"),
        plotOutput("distPlot", height = "300px")
      )
    )
  ),
  
  # Bottom row - Results
  card(
    card_header("Results"),
    card_body(
      textOutput("explanation")
    )
  )
)

server <- function(input, output, session) {
  
  # When degrees of freedom change, adjust the range of sliders
  observe({
    # For F distribution, a reasonable upper limit for the x-axis depends on degrees of freedom
    # Higher df means smaller values make more sense
    df1 <- input$df1
    df2 <- input$df2
    
    # Use a heuristic to determine a reasonable upper bound
    # This captures critical values at the 0.999 quantile
    max_x <- min(qf(0.999, df1, df2), 10)
    
    updateSliderInput(session, "x_less", max = max_x)
    updateSliderInput(session, "x_greater", max = max_x)
    updateSliderInput(session, "x_lower", max = max_x)
    updateSliderInput(session, "x_upper", max = max_x)
  })
  
  # Ensure that x_upper is always greater than or equal to x_lower
  observe({
    if (input$x_upper < input$x_lower) {
      updateSliderInput(session, "x_upper", value = input$x_lower)
    }
  })
  
  # Display the plot title with distribution parameters
  output$plot_title <- renderUI({
    title <- sprintf("F-distribution(d₁ = %d, d₂ = %d)", input$df1, input$df2)
    tags$h4(title, style = "text-align: center; margin-bottom: 15px;")
  })
  
  # Calculate the probability based on user selection
  probability <- reactive({
    if (input$prob_type == "less") {
      prob <- pf(input$x_less, df1 = input$df1, df2 = input$df2)
      explanation <- sprintf("P(X ≤ %.1f) = %.6f or %.4f%%", 
                           input$x_less, prob, prob * 100)
      return(list(prob = prob, explanation = explanation, type = "less", x = input$x_less))
      
    } else if (input$prob_type == "greater") {
      prob <- 1 - pf(input$x_greater, df1 = input$df1, df2 = input$df2)
      explanation <- sprintf("P(X ≥ %.1f) = %.6f or %.4f%%", 
                           input$x_greater, prob, prob * 100)
      return(list(prob = prob, explanation = explanation, type = "greater", x = input$x_greater))
      
    } else if (input$prob_type == "between") {
      if (input$x_lower == input$x_upper) {
        # For continuous distributions, P(X = a) = 0
        prob <- 0
      } else {
        upper_prob <- pf(input$x_upper, df1 = input$df1, df2 = input$df2)
        lower_prob <- pf(input$x_lower, df1 = input$df1, df2 = input$df2)
        prob <- upper_prob - lower_prob
      }
      explanation <- sprintf("P(%.1f ≤ X ≤ %.1f) = %.6f or %.4f%%", 
                           input$x_lower, input$x_upper, prob, prob * 100)
      return(list(prob = prob, explanation = explanation, type = "between", 
                 lower = input$x_lower, upper = input$x_upper))
    }
  })
  
  # Display an explanation of the calculation
  output$explanation <- renderText({
    res <- probability()
    return(res$explanation)
  })
  
  # Generate the F-distribution plot
  output$distPlot <- renderPlot({
    # Get parameters
    df1 <- input$df1
    df2 <- input$df2
    
    # Determine a reasonable max for x-axis based on df values
    max_x <- min(qf(0.999, df1, df2), 10)
    
    # Create data frame for plotting
    x_values <- seq(0.01, max_x, length.out = 500)  # Avoid x=0 since df(0) is undefined
    density_values <- df(x_values, df1 = df1, df2 = df2)
    plot_df <- data.frame(x = x_values, density = density_values)
    
    # Create base plot
    p <- ggplot(plot_df, aes(x = x, y = density)) +
      geom_line(size = 1, color = "darkgray") +
      labs(x = "X", y = "probability density function") +
      theme_minimal() +
      theme(panel.grid.minor = element_blank()) +
      xlim(0, max_x)
    
    # Add shaded area based on selected probability type
    res <- probability()
    
    if (res$type == "less") {
      # Create data for the filled area
      fill_x <- seq(0.01, res$x, length.out = 200)
      fill_y <- df(fill_x, df1 = df1, df2 = df2)
      fill_df <- data.frame(x = fill_x, density = fill_y)
      
      p <- p + geom_area(data = fill_df, aes(x = x, y = density), 
                        fill = "#3F6BB6", alpha = 0.6)
      
    } else if (res$type == "greater") {
      # Create data for the filled area
      fill_x <- seq(res$x, max_x, length.out = 200)
      fill_y <- df(fill_x, df1 = df1, df2 = df2)
      fill_df <- data.frame(x = fill_x, density = fill_y)
      
      p <- p + geom_area(data = fill_df, aes(x = x, y = density), 
                        fill = "#3F6BB6", alpha = 0.6)
      
    } else if (res$type == "between") {
      # Create data for the filled area
      fill_x <- seq(res$lower, res$upper, length.out = 200)
      fill_y <- df(fill_x, df1 = df1, df2 = df2)
      fill_df <- data.frame(x = fill_x, density = fill_y)
      
      p <- p + geom_area(data = fill_df, aes(x = x, y = density), 
                        fill = "#3F6BB6", alpha = 0.6)
    }
    
    return(p)
  })
}

shinyApp(ui = ui, server = server)
Calculator: \(F\)-distribution

Further reading

Version history

Mailing List

Feedback