clustering

R Markdown

We load de data:


library(tidyverse)
library(caret)
library(SSLR)
library(tidymodels)
data(wine)

data <- iris

set.seed(1)
#% LABELED
cls <- which(colnames(iris) == "Species")

labeled.index <- createDataPartition(data$Species, p = .2, list = FALSE)
data[-labeled.index,cls] <- NA

For example, we can train with Constrained Kmeans:

m <- constrained_kmeans() %>% fit(Species ~ ., data)

Labels:

m %>% cluster_labels()
#> # A tibble: 150 × 1
#>    .pred_class
#>    <fct>      
#>  1 1          
#>  2 1          
#>  3 1          
#>  4 1          
#>  5 1          
#>  6 1          
#>  7 1          
#>  8 1          
#>  9 1          
#> 10 1          
#> # ℹ 140 more rows

Centers:

m %>% get_centers()
#>      [,1] [,2] [,3]  [,4]
#> [1,] 5.01 3.43 1.46 0.246
#> [2,] 5.85 2.75 4.32 1.400
#> [3,] 6.76 3.02 5.62 2.013

We can plot clusters with factoextra:

library(factoextra)
fviz_cluster(m$model, as.matrix(data[,-cls]))