For showing regression SSLR
models, we will use
Airquality dataset with 10% labeled data:
knitr::opts_chunk$set(
digits = 3,
collapse = TRUE,
comment = "#>"
)
options(digits = 3)
library(SSLR)
library(tidymodels)
set.seed(1)
data <- airquality
#Delete column Solar.R (NAs values)
data$Solar.R <- NULL
#Train and test data
train.index <- sample(nrow(data), round(0.7 * nrow(data)))
train <- data[ train.index,]
test <- data[-train.index,]
cls <- which(colnames(airquality) == "Ozone")
#% LABELED
labeled.index <- sample(nrow(train), round(0.1 * nrow(train)))
train[-labeled.index,cls] <- NA
For example, we can train with Decision Tree:
m <- SSLRDecisionTree(min_samples_split = round(length(labeled.index) * 0.25),
w = 0.3) %>% fit(Ozone ~ ., data = train)
Now we can use metrics from yardstick
package:
predict(m,test)%>%
bind_cols(test) %>%
metrics(truth = "Ozone", estimate = .pred)
#> # A tibble: 3 × 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 rmse standard 29.8
#> 2 rsq standard 0.525
#> 3 mae standard 18.6
We can train with Random Forest:
For example, we can train with coBC:
m_r <- rand_forest( mode = "regression") %>%
set_engine("ranger")
m <- coBC(learner = m_r, max.iter = 1) %>% fit(Ozone ~ ., data = train)
We can train with COREG: