A third regression demonstration on the King County house-sales dataset (~21,600 rows), shipped in full with the package.
library(blockwise)
data(house)
str(house)
#> 'data.frame': 21597 obs. of 17 variables:
#> $ bedrooms : int 3 3 2 4 3 4 3 3 3 3 ...
#> $ bathrooms : num 1 2.25 1 3 2 4.5 2.25 1.5 1 2.5 ...
#> $ sqft_living : int 1180 2570 770 1960 1680 5420 1715 1060 1780 1890 ...
#> $ sqft_lot : int 5650 7242 10000 5000 8080 101930 6819 9711 7470 6560 ...
#> $ floors : num 1 2 1 1 1 1 2 1 1 2 ...
#> $ waterfront : int 0 0 0 0 0 0 0 0 0 0 ...
#> $ view : int 0 0 0 0 0 0 0 0 0 0 ...
#> $ condition : int 3 3 3 5 3 3 3 3 3 3 ...
#> $ grade : int 7 7 6 7 8 11 7 7 7 7 ...
#> $ sqft_above : int 1180 2170 770 1050 1680 3890 1715 1060 1050 1890 ...
#> $ sqft_basement: int 0 400 0 910 0 1530 0 0 730 0 ...
#> $ yr_built : int 1955 1951 1933 1965 1987 2001 1995 1963 1960 2003 ...
#> $ yr_renovated : int 0 1991 0 0 0 0 0 0 0 0 ...
#> $ zip : int 981 981 980 981 980 980 980 981 981 980 ...
#> $ sqft_living15: int 1340 1690 2720 1360 1800 4760 2238 1650 1780 2390 ...
#> $ sqft_lot15 : int 5650 7639 8062 5000 7503 101930 6819 9711 8113 7570 ...
#> $ price : num 221900 538000 180000 604000 510000 ...house_miss <- simulate_blockwise_missing(
house,
blocks = list(
c("sqft_living", "sqft_lot", "sqft_above"),
c("bedrooms", "bathrooms", "floors", "grade")
),
prop_missing = 0.30,
noise = 0.05
)
set.seed(1234)
idx <- sample(nrow(house_miss), floor(0.75 * nrow(house_miss)))
train <- house_miss[idx, ]
test <- house_miss[-idx, ]
X_train <- train[, setdiff(names(train), "price")]
y_train <- train$price
X_test <- test[, setdiff(names(test), "price")]
y_test <- test$price
set.seed(1234)
fit <- brm(X_train, y_train, learner = learner_lm())
fit
#> Blockwise Reduced Model (BRM)
#> blocks : 4
#> overlap : TRUE
#> learner type : regression
#> features : 16
#> cols / block : 16, 13, 12, 9
pred <- predict(fit, X_test)
cat("RMSE:", round(sqrt(mean((y_test - pred)^2)), 0), "\n")
#> RMSE: 217514Srinivasan, K., Currim, F., and Ram, S. (2025). A Reduced Modeling Approach for Making Predictions With Incomplete Data Having Blockwise Missing Patterns. INFORMS Journal on Data Science.