--- title: "Constructing Oblique Trees with svmodt" output: bookdown::html_document2: base_format: rmarkdown::html_vignette pkgdown: as_is: true vignette: > %\VignetteIndexEntry{Constructing Oblique Trees with svmodt} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette demonstrates how to: 1. Train SVMODT models on binary classification data 2. Make predictions and evaluate performance 3. Visualize decision boundaries We'll use two datasets: - **Palmer Penguins**: Ecological data for species classification - **Wisconsin Breast Cancer**: Medical diagnosis data ```{r setup, message=FALSE, warning=FALSE} # Packages required for this vignette pkgs <- c( "rpart", "e1071", "dplyr", "tidyr", "ggplot2", "rsample", "gridExtra", "kableExtra", "palmerpenguins" ) # Load each package quietly if available invisible(lapply(pkgs, function(pkg) { if (!requireNamespace(pkg, quietly = TRUE)) { stop(sprintf("Package '%s' is required to run this vignette.", pkg)) } })) library(svmodt) ``` ## Example 1: Palmer Penguins Classification The Palmer Penguins dataset contains measurements of three penguin species from Antarctica. We'll build a classifier to distinguish between **Adelie** and **Chinstrap** penguins. ```{r} # Adelie vs Chinstrap penguins_data <- palmerpenguins::penguins |> dplyr::filter(species %in% c("Adelie", "Chinstrap")) |> dplyr::select( species, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g ) |> na.omit() |> dplyr::mutate(species = droplevels(species)) set.seed(234) split_data <- rsample::initial_split(penguins_data, prop = 0.8, strata = species) train_penguins <- rsample::training(split_data) test_penguins <- rsample::testing(split_data) ``` ### Train SVMODT Model ```{r penguins-train} # Train basic SVMODT tree_penguins <- svm_split( data = train_penguins, response = "species", max_depth = 3, max_features = 2, feature_method = "mutual", verbose = FALSE ) ``` ### Examine Tree Structure ```{r penguins-structure} # Print tree structure print(tree_penguins, show_probabilities = TRUE, show_feature_info = TRUE ) ``` ### Make Predictions ```{r penguins-predict} # Predict classes only predictions <- predict(tree_penguins, test_penguins) # Predict with probabilities predictions_prob <- predict(tree_penguins, test_penguins, return_probs = TRUE ) # View first few predictions head(data.frame( Actual = test_penguins$species, Predicted = predictions_prob$predictions, Prob_Adelie = round(predictions_prob$probabilities[, "Adelie"], 3), Prob_Chinstrap = round(predictions_prob$probabilities[, "Chinstrap"], 3) ), 10) |> kableExtra::kable(align = "lccc", format = "html", caption = "SVMODT Class Predictions with Associated Probabilites on Palmerpenguins dataset") |> kableExtra::kable_styling(position = "center", full_width = FALSE) ``` Table \@ref(tab:penguins-predict) shows the predicted value along with the associated probabilities of the first 10 observations of the palmerpenguins dataset. The prediction function returns the majority class at each node along the class proportion. In this table we can observe that all the observations have been correctly predicted. ### Visualize Decision Boundaries ```{r penguins-viz, fig.height=6, fig.width=8} #| fig-cap: SVMODT Split at Root Node (Depth = 1) # Visualize tree decision boundaries viz_penguins <- plot(tree_penguins, data = train_penguins, response = "species", plot.type = "boundary", max_depth = 3) viz_penguins$plots$depth_1_Root ``` Figure \@ref(fig:penguins-viz) depicts the root node of the SVM-based oblique decision tree, where the first split is performed using the features bill_length_mm and flipper_length_mm. The figure illustrates how the linear SVM at the root node divides the dataset into two branches, guiding samples toward subsequent child nodes. ```{r penguins-viz-2, fig.height=6, fig.width=8} #| fig-cap: SVMODT Split at Child Node (Depth = 2) gridExtra::grid.arrange(viz_penguins$plots$depth_2_Root_L, viz_penguins$plots$depth_2_Root_R, ncol = 2) ``` Figure \@ref(fig:penguins-viz-2) shows a node at depth 2 of the tree, where the SVM uses bill_length_mm and bill_depth_mm to further partition the data. The decision hyper-plane and sample positions are plotted, highlighting how oblique splits can capture multivariate relationships that univariate thresholds cannot. ### Trace Prediction Path ```{r penguins-trace} trace_path(tree_penguins, test_penguins, sample_idx = 1) ``` ## Example 2: Wisconsin Breast Cancer Diagnosis ### About the Data The Wisconsin Breast Cancer dataset contains features computed from digitized images of fine needle aspirate (FNA) of breast masses. The task is to classify tumors as **Benign (B)** or **Malignant (M)**. ### Load and Prepare Data ```{r wdbc-prep} set.seed(234) split_data <- rsample::initial_split(wdbc, prop = 0.8, strata = diagnosis) train_wdbc <- rsample::training(split_data) test_wdbc <- rsample::testing(split_data) ``` ### Train with Class Weights ```{r wdbc-train} tree_wdbc <- svm_split( data = train_wdbc, response = "diagnosis", max_depth = 4, min_samples = 10, max_features = 2, feature_method = "mutual", class_weights = "balanced", # For Class Imbalance verbose = FALSE ) ``` ### Evaluate Performance ```{r wdbc-eval} preds_wdbc <- predict(tree_wdbc, test_wdbc) cat("Accuracy:", round(mean(preds_wdbc == test_wdbc$diagnosis), 4), "\n") print(table(Predicted = preds_wdbc, Actual = test_wdbc$diagnosis)) ``` ### Advanced Features #### Feature Selection with Penalties svmodt promotes feature diversity by penalizing previously used features in ancestor nodes. ```{r feature-penalty} # Train with feature penalty tree_penalty <- svm_split( data = train_penguins, response = "species", max_depth = 4, max_features = 2, feature_method = "cor", penalize_used_features = TRUE, feature_penalty_weight = 0.6, verbose = FALSE ) ``` ### Dynamic Feature Selection The package also allows user to either randomize or decrease the number of features in child nodes. ```{r dynamic-features} # Decrease features with depth tree_decrease <- svm_split( data = train_wdbc, response = "diagnosis", max_depth = 5, max_features = 10, max_features_strategy = "decrease", max_features_decrease_rate = 0.7, verbose = FALSE ) # Random feature selection tree_random <- svm_split( data = train_wdbc, response = "diagnosis", max_depth = 4, max_features_strategy = "random", max_features_random_range = c(0.3, 0.8), verbose = FALSE ) ``` ### Custom Class Weights We can also add custom class weights for our class labels in the model using domain expertise. This enables us to minimize classification of false negatives by assigning correct weights based on domain expertise. ```{r custom-weights} # Give malignant cases higher weight custom_weights <- c("B" = 1, "M" = 3) tree_custom <- svm_split( data = train_wdbc, response = "diagnosis", max_depth = 4, max_features = 8, class_weights = "custom", custom_class_weights = custom_weights, verbose = FALSE ) ``` ## Example 3: Multiclass Classification — Wine Dataset The Wine dataset contains chemical measurements for 178 wine samples from three Italian cultivars. This demonstrates SVMODT's multiclass capability via one-vs-rest splitting at each node. ```{r wine-prep} set.seed(234) wine$class <- as.factor(wine$class) split_wine <- rsample::initial_split(wine, prop = 0.8, strata = class) train_wine <- rsample::training(split_wine) test_wine <- rsample::testing(split_wine) ``` ### Train Multiclass SVMODT At each internal node, SVMODT iterates over all present classes, fits a binary one-vs-rest SVM, and selects the split that maximally reduces entropy. This continues recursively until `max_depth` is reached or `min_impurity_decrease` prevents uninformative splits. ```{r wine-train} tree_wine <- svm_split( data = train_wine, response = "class", max_depth = 5, max_features = 5, feature_method = "mutual", impurity_measure = "entropy", min_impurity_decrease = 0.01, class_weights = "balanced", penalize_used_features = TRUE, feature_penalty_weight = 0.5, verbose = FALSE ) ``` ### Visualize Decision Surface ```{r wine-multiclass-viz, fig.height=6, fig.width=8} #| fig-cap: SVMODT Decision Surface on Wine Dataset plot(tree_wine, data = train_wine, response = "class", plot.type = "surface") ``` Figure \@ref(fig:wine-multiclass-viz) displays multiclass decision surface on the Wine dataset across two features in the tree. ### Tree Structure ```{r wine-structure} print(tree_wine, show_probabilities = FALSE, show_feature_info = TRUE, show_penalties = TRUE ) ``` Each internal node shows the class used for the one-vs-rest binary split (`split: X vs rest`), the features selected at that node, and whether feature penalization was active (`penalty = !`). ### Performance ```{r wine-eval} preds_wine <- predict(tree_wine, newdata = test_wine) acc_wine <- mean(preds_wine == test_wine$class) cat("Test accuracy:", round(acc_wine, 4), "\n") conf_mat <- table(Predicted = preds_wine, Actual = test_wine$class) print(conf_mat) ``` ### Trace a Prediction Path ```{r wine-trace} # Show how the first test observation is routed through the tree trace_path(tree_wine, test_wine, sample_idx = 1) ``` ## Model Comparison Now, we will compare the performance of our SVMODT model with other classification models such as a Linear SVM and a Decision Tree. For our svmodt tree we are using a tree-depth of 2 with mutual feature selection and penalization. For the decision tree we are setting the cost-complexity parameter as 0.01. Lastly, for the linear SVM model we have not done any hyperparameter tuning and using the default parameter of C = 1. ```{r comparison} # RPART decision tree tree_rpart <- rpart::rpart(diagnosis ~ ., data = train_wdbc, control = rpart::rpart.control(cp = 0.01) ) pred_rpart <- predict(tree_rpart, test_wdbc, type = "class") tree_wdbc <- svm_split( data = train_wdbc, response = "diagnosis", max_depth = 2, feature_method = "mutual", penalize_used_features = TRUE ) # Standard SVM model_svm <- e1071::svm(diagnosis ~ ., data = train_wdbc, probability = TRUE) pred_svm <- predict(model_svm, test_wdbc) # Get SVMODT predictions pred_svmodt <- predict(tree_wdbc, test_wdbc) # Compare accuracies results <- data.frame( Model = c("SVMODT", "RPART", "Linear SVM"), Accuracy = c( mean(pred_svmodt == test_wdbc$diagnosis), mean(pred_rpart == test_wdbc$diagnosis), mean(pred_svm == test_wdbc$diagnosis) ) ) results |> kableExtra::kable( align = "lc", format = "html", digits = 4, caption = "Comparing Test set Accuracy of SVMODT model with a Linear SVM and a Decision Tree" ) |> kableExtra::kable_styling(position = "center", full_width = FALSE) ``` From Table \@ref(tab:comparison) we can observe that our SVMODT model has outperformed both the Linear SVM model and the Rpart Decision tree on the test.