| Title: | Calculates the Density-Based Clustering Validation (DBCV) Index |
|---|---|
| Description: | A metric called 'Density-Based Clustering Validation index' (DBCV) index to evaluate clustering results, following the <https://github.com/pajaskowiak/clusterConfusion/blob/main/R/dbcv.R> 'R' implementation by Pablo Andretta Jaskowiak. Original 'DBCV' index article: Moulavi, D., Jaskowiak, P. A., Campello, R. J., Zimek, A., and Sander, J. (April 2014), "Density-based clustering validation", Proceedings of SDM 2014 -- the 2014 SIAM International Conference on Data Mining (pp. 839-847), <doi:10.1137/1.9781611973440.96>. A more recent article on the 'DBCV' index: Chicco, D., Sabino, G.; Oneto, L.; Jurman, G. (August 2025), "The DBCV index is more informative than DCSI, CDbw, and VIASCKDE indices for unsupervised clustering internal assessment of concave-shaped and density-based clusters", PeerJ Computer Science 11:e3095 (pp. 1-), <doi:10.7717/peerj-cs.3095>. |
| Authors: | Davide Chicco [cre] (ORCID: <https://orcid.org/0000-0001-9655-7142>), Pablo Andretta Jaskowiak [aut] (ORCID: <https://orcid.org/0000-0002-6377-3372>) |
| Maintainer: | Davide Chicco <[email protected]> |
| License: | GPL-3 |
| Version: | 1.6 |
| Built: | 2026-05-12 08:14:05 UTC |
| Source: | https://github.com/cran/DBCVindex |
Function that calculates the original Density-Based Clustering Validation index (DBCV) of clustering results between -1 and +1
dbcv_index(data, partition, noiseLabel = -1)dbcv_index(data, partition, noiseLabel = -1)
data |
input clustering results |
partition |
labels of the clustering |
noiseLabel |
the code of the noise cluster points, -1 by default |
a real value containing the DBCV coefficient in the [-1;+1] interval
n = 300; noise = 0.05; seed = 1782; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) cat("DBCV index of (X, y) = ", dbcv_index(X, y), " in the [-1;+1] interval\n", sep="")n = 300; noise = 0.05; seed = 1782; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) cat("DBCV index of (X, y) = ", dbcv_index(X, y), " in the [-1;+1] interval\n", sep="")
Function that calculates the mutual reachability distance within a matrix
matrix_mutual_reachability_distance(MinPts, G_edges_weights, d)matrix_mutual_reachability_distance(MinPts, G_edges_weights, d)
MinPts |
number of minimal points |
G_edges_weights |
matrix of edges weights |
d |
number of features |
a list of two elements: d_ucore and G_edges_weights
n = 300; noise = 0.05; seed = 1782; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) nfeatures <- ncol(X) i <- 1 clusters <- unique(y) objcl <- which(y == clusters[i]) nuobjcl <- length(objcl) noiseLabel <- -1 distX <- as.matrix(dist(X))^2 distXy <- distX[y != noiseLabel, y != noiseLabel] mr <- matrix_mutual_reachability_distance(nuobjcl, distXy[objcl, objcl], nfeatures)n = 300; noise = 0.05; seed = 1782; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) nfeatures <- ncol(X) i <- 1 clusters <- unique(y) objcl <- which(y == clusters[i]) nuobjcl <- length(objcl) noiseLabel <- -1 distX <- as.matrix(dist(X))^2 distXy <- distX[y != noiseLabel, y != noiseLabel] mr <- matrix_mutual_reachability_distance(nuobjcl, distXy[objcl, objcl], nfeatures)
Function that finds the list of MST edges
MST_Edges(G, start, G_edges_weights)MST_Edges(G, start, G_edges_weights)
G |
list of four elements: number of vertices, MST_edges (matrix of edges), MST_degrees (array of numbers), MST_parent (array of numbers) |
start |
index of the first edge |
G_edges_weights |
matrix of edges weights |
list of two elements: matrix of edges and array of degrees
n = 300; noise = 0.05; seed = 1782; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) nfeatures <- ncol(X) i <- 1 clusters <- unique(y) objcl <- which(y == clusters[i]) nuobjcl <- length(objcl) noiseLabel <- -1 distX <- as.matrix(dist(X))^2 distXy <- distX[y != noiseLabel, y != noiseLabel] mr <- matrix_mutual_reachability_distance(nuobjcl, distXy[objcl, objcl], nfeatures) d_ucore_cl <- rep(0, nrow(X)) d_ucore_cl[objcl] <- mr$d_ucore G <- list(no_vertices = nuobjcl, MST_edges = matrix(0, nrow = nuobjcl - 1, ncol = 3), MST_degrees = rep(0, nuobjcl), MST_parent = rep(0, nuobjcl)) g_start <- 1 mst_results <- MST_Edges(G, g_start, mr$G_edges_weights)n = 300; noise = 0.05; seed = 1782; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) nfeatures <- ncol(X) i <- 1 clusters <- unique(y) objcl <- which(y == clusters[i]) nuobjcl <- length(objcl) noiseLabel <- -1 distX <- as.matrix(dist(X))^2 distXy <- distX[y != noiseLabel, y != noiseLabel] mr <- matrix_mutual_reachability_distance(nuobjcl, distXy[objcl, objcl], nfeatures) d_ucore_cl <- rep(0, nrow(X)) d_ucore_cl[objcl] <- mr$d_ucore G <- list(no_vertices = nuobjcl, MST_edges = matrix(0, nrow = nuobjcl - 1, ncol = 3), MST_degrees = rep(0, nuobjcl), MST_parent = rep(0, nuobjcl)) g_start <- 1 mst_results <- MST_Edges(G, g_start, mr$G_edges_weights)
Function that calculates the normalized Density-Based Clustering Validation index (DBCV) of clustering results between 0 and 1
norm_dbcv_index(data, partition, noiseLabel = -1)norm_dbcv_index(data, partition, noiseLabel = -1)
data |
input clustering results |
partition |
labels of the clustering |
noiseLabel |
the code of the noise cluster points, -1 by default |
a real value containing the normalized DBCV coefficient in the [0;1] interval
n = 200; noise = 0.04; seed = 1783; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) cat("normalized DBCV index of (X, y) = ", norm_dbcv_index(X, y), " in the [0;1] interval\n", sep="")n = 200; noise = 0.04; seed = 1783; theta <- seq(0, pi, length.out = n / 2) x1 <- cos(theta) + rnorm(n / 2, sd = noise) y1 <- sin(theta) + rnorm(n / 2, sd = noise) x2 <- cos(theta + pi) + rnorm(n / 2, sd = noise) y2 <- sin(theta + pi) + rnorm(n / 2, sd = noise) X <- rbind(cbind(x1, y1), cbind(x2, y2)) y <- c(rep(0, n / 2), rep(1, n / 2)) cat("normalized DBCV index of (X, y) = ", norm_dbcv_index(X, y), " in the [0;1] interval\n", sep="")