# --- Confidence Intervals for Coefficients ---
fit = lm(response ~ explanatory, data = dataset)
# Show summary
summary(fit)
# Confidence interval for the parameter estimates (intercept and slope)
confint(fit)
# CI for just the slope
confint(fit, "explanatory")
# CI at different confidence level
confint(fit, level = 0.99) # to change alpha
# --- Confidence & Prediction for New Observations ---
# Add a new data point
new_data = data.frame(response = NA, explanatory = 2.5)
# or
new_data = data.frame(explanatory = 2.5)
# Confidence interval (mean response at x)
# 95% CI estimating the mean value of y expected at value of x
predictionCI = predict(fit, newdata = new_data, interval = "confidence")
predictionCI
# Prediction interval (individual response at x)
# 95% CI estimating the individual value of y expected at value of x, more error
predictionPI = predict(fit, newdata = new_data, interval = "prediction")
predictionPI
# --- Plot CI and PI Around Fitted Line ---
# Predicted values + intervals (for the full dataset)
predictions = predict(fit, interval = "confidence", level = 0.95, se.fit = TRUE)
prediction_intervals = predict(fit, interval = "prediction", level = 0.95)
# Add intervals to original data for plotting
movies = movies %>%
mutate(fit = predictions$fit,
lwr_conf = predictions$fit[, "lwr"],
upr_conf = predictions$fit[, "upr"],
lwr_pred = prediction_intervals[, "lwr"],
upr_pred = prediction_intervals[, "upr"])
# Plot fitted line with CI and PI ribbons
ggplot(data, aes(x = explanatory, y = response)) +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
geom_ribbon(aes(ymin = lwr_conf, ymax = upr_conf), alpha = 0.5, fill = "lightblue") +
geom_line(aes(y = lwr_pred), linetype = "dashed", color = "black") +
geom_line(aes(y = upr_pred), linetype = "dashed", color = "black") +
geom_hline(yintercept = 210, color = "darkred") +
geom_point() +
labs(title = "Regression Line with 95% Confidence and Prediction Intervals",
x = "Explanatory", y = "Response") +
theme_bw()
# --- Calibration Intervals (Reverse Prediction) ---
# For estimating values of x from given y
library(investr)
# Calibration interval for the mean budget (Estimate x for a given y0, mean response)
calibrate(fit,y0 = 210, interval = "Wald", mean.response = TRUE, limit = FALSE)
# Calibration interval for a single movie budget (estimate x for an individual response)
calibrate(fit,y0 = 210, interval = "Wald", mean.response = FALSE, limit = FALSE)
# --- R-squared Interpretation ---
# R-squared = measure of the proportion of variation in the response that is accounted for by the explanatory variable
# Get the summary of the model
summary_fit = summary(fit)
# Extract R-squared
R_squared = summary_fit$r.squared
# Interpretation
cat("The R-squared value is", R_squared, "\n")
cat("This means that", round(R_squared * 100, 2), "% of the variability in Response is explained by Explanatory.")