Title: | Dot Plots Mimicking Violin Plots |
---|---|
Description: | Modifies dot plots to have different sizes of dots mimicking violin plots and identifies modes or peaks for them based on frequency and kernel density estimates (Rosenblatt, 1956) <doi:10.1214/aoms/1177728190> (Parzen, 1962) <doi:10.1214/aoms/1177704472>. |
Authors: | Fernando Roa [aut, cre], Mariana Pires de Campos Telles [ctb] |
Maintainer: | Fernando Roa <[email protected]> |
License: | GPL (>= 2) |
Version: | 0.0.1 |
Built: | 2024-11-23 06:31:53 UTC |
Source: | CRAN |
This function makes a dot-plot and violin-plot
dots_and_violin( dataframe, colgroup, collabel, maxcountcol, widthdots, maxx, labelx, desiredorder, binwidth, adjust, binexp, fill_group = "fill_group", dots = TRUE, violin = TRUE )
dots_and_violin( dataframe, colgroup, collabel, maxcountcol, widthdots, maxx, labelx, desiredorder, binwidth, adjust, binexp, fill_group = "fill_group", dots = TRUE, violin = TRUE )
dataframe |
dataframe |
colgroup |
chr column to group by |
collabel |
label to be used in the plot |
maxcountcol |
numeric variable |
widthdots |
dotsize parameter for geom_dotplot |
maxx |
x axis maximum value |
labelx |
label for x axis |
desiredorder |
order for the colgroup categories |
binwidth |
see, plot_dotviolin |
adjust |
adjust param, see geom_violin |
binexp |
digit to modify size of bins with base 10 |
fill_group |
2nd categorical data (use only 2 categories) |
dots |
boolean include dot plot |
violin |
boolean include violin plot |
A grid of ggplots that mimics a single plot
fabaceae_mode_counts <- get_modes_counts(fabaceae_clade_n_df, "clade", "parsed_n") fabaceae_clade_n_df_count <- make_legend_with_stats(fabaceae_mode_counts, "label_count", 1, TRUE) fabaceae_clade_n_df$label_count <- fabaceae_clade_n_df_count$label_count[match( fabaceae_clade_n_df$clade, fabaceae_clade_n_df_count$clade )] desiredorder1 <- unique(fabaceae_clade_n_df$clade) dots_and_violin( fabaceae_clade_n_df, "clade", "label_count", "parsed_n", 2, 30, "Chromosome haploid number", desiredorder1, 1, .85, 4, "ownwork", violin = FALSE ) dots_and_violin( fabaceae_clade_n_df, "clade", "label_count", "parsed_n", 2, 30, "Chromosome haploid number", desiredorder1, 1, .85, 4, dots = FALSE ) dots_and_violin( fabaceae_clade_n_df, "clade", "label_count", "parsed_n", 2, 30, "Chromosome haploid number", desiredorder1, 1, .85, 4 ) fabaceae_Cx_mode_counts_per_clade_df <- get_peaks_counts_continuous( fabaceae_clade_1Cx_df, "clade", "Cx", 2, 0.25, 1, 2 ) namecol <- "labelcountcustom" fabaceae_clade_Cx_peaks_count_df <- make_legend_with_stats( fabaceae_Cx_mode_counts_per_clade_df, namecol, 1, TRUE ) fabaceae_clade_1Cx_df$labelcountcustom <- fabaceae_clade_Cx_peaks_count_df$labelcountcustom[match( fabaceae_clade_1Cx_df$clade, fabaceae_clade_Cx_peaks_count_df$clade )] desiredorder <- unique(fabaceae_clade_1Cx_df$clade) dots_and_violin( fabaceae_clade_1Cx_df, "clade", "labelcountcustom", "Cx", 3, 3, "Genome Size", desiredorder, 0.03, 0.25, 2, "ownwork" ) dots_and_violin( fabaceae_clade_1Cx_df, "clade", "labelcountcustom", "Cx", 3, 3, "Genome Size", desiredorder, 0.03, 0.25, 2, dots = FALSE ) dots_and_violin( fabaceae_clade_1Cx_df, "clade", "labelcountcustom", "Cx", 3, 3, "Genome Size", desiredorder, 0.03, 0.25, 2, "ownwork", violin = FALSE )
fabaceae_mode_counts <- get_modes_counts(fabaceae_clade_n_df, "clade", "parsed_n") fabaceae_clade_n_df_count <- make_legend_with_stats(fabaceae_mode_counts, "label_count", 1, TRUE) fabaceae_clade_n_df$label_count <- fabaceae_clade_n_df_count$label_count[match( fabaceae_clade_n_df$clade, fabaceae_clade_n_df_count$clade )] desiredorder1 <- unique(fabaceae_clade_n_df$clade) dots_and_violin( fabaceae_clade_n_df, "clade", "label_count", "parsed_n", 2, 30, "Chromosome haploid number", desiredorder1, 1, .85, 4, "ownwork", violin = FALSE ) dots_and_violin( fabaceae_clade_n_df, "clade", "label_count", "parsed_n", 2, 30, "Chromosome haploid number", desiredorder1, 1, .85, 4, dots = FALSE ) dots_and_violin( fabaceae_clade_n_df, "clade", "label_count", "parsed_n", 2, 30, "Chromosome haploid number", desiredorder1, 1, .85, 4 ) fabaceae_Cx_mode_counts_per_clade_df <- get_peaks_counts_continuous( fabaceae_clade_1Cx_df, "clade", "Cx", 2, 0.25, 1, 2 ) namecol <- "labelcountcustom" fabaceae_clade_Cx_peaks_count_df <- make_legend_with_stats( fabaceae_Cx_mode_counts_per_clade_df, namecol, 1, TRUE ) fabaceae_clade_1Cx_df$labelcountcustom <- fabaceae_clade_Cx_peaks_count_df$labelcountcustom[match( fabaceae_clade_1Cx_df$clade, fabaceae_clade_Cx_peaks_count_df$clade )] desiredorder <- unique(fabaceae_clade_1Cx_df$clade) dots_and_violin( fabaceae_clade_1Cx_df, "clade", "labelcountcustom", "Cx", 3, 3, "Genome Size", desiredorder, 0.03, 0.25, 2, "ownwork" ) dots_and_violin( fabaceae_clade_1Cx_df, "clade", "labelcountcustom", "Cx", 3, 3, "Genome Size", desiredorder, 0.03, 0.25, 2, dots = FALSE ) dots_and_violin( fabaceae_clade_1Cx_df, "clade", "labelcountcustom", "Cx", 3, 3, "Genome Size", desiredorder, 0.03, 0.25, 2, "ownwork", violin = FALSE )
A series of functions to get modes/peaks from discrete and continuous variables and integrate them as tables inside plots cite as in: citation("dotsViolin")
fabaceae_clade_1Cx_df: parsed Cx sizes for fabaceae
fabaceae_clade_1Cx_df
fabaceae_clade_1Cx_df
data.frame with columns:
OTU, species
main fabaceae clade
genome size, Cx
fabaceae_clade_n_df: parsed n counts for fabaceae
fabaceae_clade_n_df
fabaceae_clade_n_df
data.frame with columns:
OTU, species
main fabaceae clade
chromosome number, n
This function comes from an answer for a question in stackoverflow https://stackoverflow.com/questions/42698465/obtaining-3-most-common-elements-of-groups-concatenating-ties-and-ignoring-les
get_modes_counts(data, grouping_col, col2, mode_number = 3)
get_modes_counts(data, grouping_col, col2, mode_number = 3)
data |
data.frame |
grouping_col |
string split by this column |
col2 |
string numerical data column |
mode_number |
numeric number of modes to retrieve |
data.frame with modes and counts per group
get_modes_counts(fabaceae_clade_n_df, "clade", "parsed_n")
get_modes_counts(fabaceae_clade_n_df, "clade", "parsed_n")
This function allows you to get peaks and summary counts per group for a continuos variable in a dataframe format.
Handles ties; least frequent is ignored, except if it is the only
one, depends on get.peaks
function
get_peaks_counts_continuous( origtable, grouping_col, columnname, peak_number, adjust1, signifi, nsmall )
get_peaks_counts_continuous( origtable, grouping_col, columnname, peak_number, adjust1, signifi, nsmall )
origtable |
dataframe |
grouping_col |
column with categories - character |
columnname |
column with numerical data |
peak_number |
number of peaks to get, see get.peaks |
adjust1 |
bandwith adjust parameter |
signifi |
see get.peaks function |
nsmall |
see get.peaks function |
data.frame
get_peaks_counts_continuous(fabaceae_clade_1Cx_df, "clade", "Cx", 2, 0.25, 1, 2)
get_peaks_counts_continuous(fabaceae_clade_1Cx_df, "clade", "Cx", 2, 0.25, 1, 2)
This function allows you to get peaks for a continuous variable. Based on the kernel density function
get.peaks(x, bw, signifi, nsmall, ranks = 3)
get.peaks(x, bw, signifi, nsmall, ranks = 3)
x |
dataframe |
bw |
bandwidth |
signifi |
criteria to bin the data in number of digits |
nsmall |
criteria to approximate (round) data |
ranks |
numeric how many ranks to consider |
data.frame
This function merges all columns in a dataframe to be used as legends
make_legend_with_stats( data, namecol, start_column_idx = 2, first_justified_left = FALSE )
make_legend_with_stats( data, namecol, start_column_idx = 2, first_justified_left = FALSE )
data |
dataframe with columns to be merged into 1 |
namecol |
name to be given to new column |
start_column_idx |
numeric index of first column to process |
first_justified_left |
boolean when |
data.frame with combined source columns
fabaceae_mode_counts <- get_modes_counts(fabaceae_clade_n_df, "clade", "parsed_n") fabaceae_clade_n_df_count <- make_legend_with_stats(fabaceae_mode_counts, "label_count", 1, TRUE) fabaceae_Cx_mode_counts_per_clade_df <- get_peaks_counts_continuous( fabaceae_clade_1Cx_df, "clade", "Cx", 2, 0.25, 1, 2 ) namecol <- "labelcountcustom" fabaceae_clade_1Cx_modes_count_df <- make_legend_with_stats( fabaceae_Cx_mode_counts_per_clade_df, namecol, 1, TRUE )
fabaceae_mode_counts <- get_modes_counts(fabaceae_clade_n_df, "clade", "parsed_n") fabaceae_clade_n_df_count <- make_legend_with_stats(fabaceae_mode_counts, "label_count", 1, TRUE) fabaceae_Cx_mode_counts_per_clade_df <- get_peaks_counts_continuous( fabaceae_clade_1Cx_df, "clade", "Cx", 2, 0.25, 1, 2 ) namecol <- "labelcountcustom" fabaceae_clade_1Cx_modes_count_df <- make_legend_with_stats( fabaceae_Cx_mode_counts_per_clade_df, namecol, 1, TRUE )
This function makes a dot-plot and violin-plot, internal function
plot_dotviolin( dataset, par, groupcol, vary, labelx, maxx, adjust, binwidth, fill_group = "fill_group", font = "mono", dots = TRUE, violin = TRUE )
plot_dotviolin( dataset, par, groupcol, vary, labelx, maxx, adjust, binwidth, fill_group = "fill_group", font = "mono", dots = TRUE, violin = TRUE )
dataset |
dataframe with columns to be merged into 1 |
par |
dot size |
groupcol |
categories to group |
vary |
numeric variable |
labelx |
x axis label |
maxx |
x axis maximum value |
adjust |
geom_violin adjust parameter |
binwidth |
geom_dotplot binwidth parameter |
fill_group |
2nd category with 2 options as a fill aes argument for geom_dotplot |
font |
font family |
dots |
boolean include dot plot |
violin |
boolean include violin plot |
ggplot