Factsheet: Lognormal distribution
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 740
library(shiny)
library(bslib)
library(ggplot2)
ui <- page_fluid(
title = "Lognormal distribution calculator",
layout_columns(
col_widths = c(4, 8),
# Left column - Inputs
card(
card_header("Parameters"),
card_body(
numericInput("meanlog", "Log mean (μ):", value = 0, step = 0.1),
numericInput("sdlog", "Log standard deviation (σ):", value = 1, min = 0.01, step = 0.1),
hr(),
radioButtons("prob_type", "Probability to calculate:",
choices = list("P(X ≤ x)" = "less",
"P(X ≥ x)" = "greater",
"P(x ≤ X ≤ y)" = "between"),
selected = "less"),
conditionalPanel(
condition = "input.prob_type == 'less'",
sliderInput("x_less", "x value:", min = 0, max = 10, value = 1, step = 0.1)
),
conditionalPanel(
condition = "input.prob_type == 'greater'",
sliderInput("x_greater", "x value:", min = 0, max = 10, value = 1, step = 0.1)
),
conditionalPanel(
condition = "input.prob_type == 'between'",
sliderInput("x_lower", "Lower bound (x):", min = 0, max = 10, value = 0.5, step = 0.1),
sliderInput("x_upper", "Upper bound (y):", min = 0, max = 10, value = 2, step = 0.1)
)
)
),
# Right column - Plot
card(
card_header("Lognormal distribution plot"),
card_body(
uiOutput("plot_title"),
plotOutput("distPlot", height = "300px")
)
)
),
# Bottom row - Results
card(
card_header("Results"),
card_body(
textOutput("explanation")
)
)
)
server <- function(input, output, session) {
# When parameters change, adjust the range of sliders
observe({
# For lognormal distribution, adjust slider based on parameters
meanlog <- input$meanlog
sdlog <- input$sdlog
# Use a heuristic to determine a reasonable upper bound
# This captures most of the meaningful density
max_x <- min(qlnorm(0.995, meanlog, sdlog), 100)
updateSliderInput(session, "x_less", max = max_x)
updateSliderInput(session, "x_greater", max = max_x)
updateSliderInput(session, "x_lower", max = max_x)
updateSliderInput(session, "x_upper", max = max_x)
})
# Ensure that x_upper is always greater than or equal to x_lower
observe({
if (input$x_upper < input$x_lower) {
updateSliderInput(session, "x_upper", value = input$x_lower)
}
})
# Display the plot title with distribution parameters
output$plot_title <- renderUI({
title <- sprintf("Lognormal(μ = %.2f, σ = %.2f)", input$meanlog, input$sdlog)
tags$h4(title, style = "text-align: center; margin-bottom: 15px;")
})
# Calculate the probability based on user selection
probability <- reactive({
if (input$prob_type == "less") {
prob <- plnorm(input$x_less, meanlog = input$meanlog, sdlog = input$sdlog)
explanation <- sprintf("P(X ≤ %.1f) = %.6f or %.4f%%",
input$x_less, prob, prob * 100)
return(list(prob = prob, explanation = explanation, type = "less", x = input$x_less))
} else if (input$prob_type == "greater") {
prob <- 1 - plnorm(input$x_greater, meanlog = input$meanlog, sdlog = input$sdlog)
explanation <- sprintf("P(X ≥ %.1f) = %.6f or %.4f%%",
input$x_greater, prob, prob * 100)
return(list(prob = prob, explanation = explanation, type = "greater", x = input$x_greater))
} else if (input$prob_type == "between") {
if (input$x_lower == input$x_upper) {
# For continuous distributions, P(X = a) = 0
prob <- 0
} else {
upper_prob <- plnorm(input$x_upper, meanlog = input$meanlog, sdlog = input$sdlog)
lower_prob <- plnorm(input$x_lower, meanlog = input$meanlog, sdlog = input$sdlog)
prob <- upper_prob - lower_prob
}
explanation <- sprintf("P(%.1f ≤ X ≤ %.1f) = %.6f or %.4f%%",
input$x_lower, input$x_upper, prob, prob * 100)
return(list(prob = prob, explanation = explanation, type = "between",
lower = input$x_lower, upper = input$x_upper))
}
})
# Display an explanation of the calculation
output$explanation <- renderText({
res <- probability()
return(res$explanation)
})
# Generate the lognormal distribution plot
output$distPlot <- renderPlot({
# Get parameters
meanlog <- input$meanlog
sdlog <- input$sdlog
# Determine a reasonable max for x-axis based on parameters
max_x <- min(qlnorm(0.995, meanlog, sdlog), 100)
# Create data frame for plotting
x_values <- seq(0.01, max_x, length.out = 500) # Avoid x=0
density_values <- dlnorm(x_values, meanlog = meanlog, sdlog = sdlog)
plot_df <- data.frame(x = x_values, density = density_values)
# Create base plot
p <- ggplot(plot_df, aes(x = x, y = density)) +
geom_line(size = 1, color = "darkgray") +
labs(x = "X", y = "probability density function") +
theme_minimal() +
theme(panel.grid.minor = element_blank()) +
xlim(0, max_x)
# Add shaded area based on selected probability type
res <- probability()
if (res$type == "less") {
# Create data for the filled area
fill_x <- seq(0.01, res$x, length.out = 200)
fill_y <- dlnorm(fill_x, meanlog = meanlog, sdlog = sdlog)
fill_df <- data.frame(x = fill_x, density = fill_y)
p <- p + geom_area(data = fill_df, aes(x = x, y = density),
fill = "#3F6BB6", alpha = 0.6)
} else if (res$type == "greater") {
# Create data for the filled area
fill_x <- seq(res$x, max_x, length.out = 200)
fill_y <- dlnorm(fill_x, meanlog = meanlog, sdlog = sdlog)
fill_df <- data.frame(x = fill_x, density = fill_y)
p <- p + geom_area(data = fill_df, aes(x = x, y = density),
fill = "#3F6BB6", alpha = 0.6)
} else if (res$type == "between") {
# Create data for the filled area
fill_x <- seq(res$lower, res$upper, length.out = 200)
fill_y <- dlnorm(fill_x, meanlog = meanlog, sdlog = sdlog)
fill_df <- data.frame(x = fill_x, density = fill_y)
p <- p + geom_area(data = fill_df, aes(x = x, y = density),
fill = "#3F6BB6", alpha = 0.6)
}
return(p)
})
}
shinyApp(ui = ui, server = server)
Where to use: The lognormal distribution is used to model continuous random variables with values that are both real and non-negative, wherein the logarithms of these variables follow a normal distribution. That is to say, if the random variable \(X\) is lognormally distributed, then the random variable \(Y = \ln(X)\) is normally distributed (where \(\ln\) is the natural logarithm).
Notation: \(X \sim \textrm{Lognormal}(\mu,\sigma^2)\)
Parameters: As with the normal distribution, two numbers \(\mu\) and \(\sigma^2\) where:
- \(\mu\) is the expected value of the normally distributed random variable \(Y = \ln(X)\),
- \(\sigma^2\) is the variance of the normally distributed random variable \(Y = \ln(X)\).
Quantity | Value | Notes |
---|---|---|
Mean | \(\mathbb{E}(X) = \exp(\mu+\frac{\sigma^2}{2})\) | \(\exp(y) = e^y\) |
Variance | \(\mathbb{V}(X) = [\exp(\sigma^2)-1]\exp(2\mu+\sigma^2)\) | \(\exp(y) = e^y\) |
\(\mathbb{P}(X=x)=\dfrac{1}{x\sigma\sqrt{2\pi}}\exp\left(-\dfrac{(\ln(x)-\mu)^2}{2\sigma^2}\right)\) | \(\exp(y) = e^y\) | |
CDF | \(\displaystyle\mathbb{P}(X\leq x)=\dfrac{1}{2}\left[1+\textrm{erf}\left(\dfrac{\ln(x)-\mu}{\sigma\sqrt{2}}\right)\right]\) | \(\textrm{erf}(x)\) is the error function of \(x\) |
Example: The logarithms of Cantor’s Confectionery’s stock prices follow a normal distribution. The mean of the stock prices’ natural logarithms is \(8.01\), whereas the variance of the stock prices’ natural logarithms is \(3\). This can be expressed as \(X \sim \textrm{Lognormal}(8.01, 3)\), meaning the logarithm of the location parameter is \(8.01\) and the logarithm of scale parameter is \(3\).
Further reading
Version history
v1.0: initial version created 04/25 by tdhc and Michelle Arnetta as part of a University of St Andrews VIP project.
- v1.1: moved to factsheet form and populated with material from Overview: Probability distributions by tdhc.