dataprep: data preprocessing and plots

library(dataprep)
library(ggplot2)
library(scales)

Figure 1. Line plots for variables with names that are essentially numeric and logarithmic

# Descriptive statistics
descplot(data,5,65)
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Time used by descdata: 0.0889 secs 
#> Time used by descplot: 0.171 secs
#> Warning: Removed 3 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 2. Line plots for variables whose names are essentially numeric and logarithmic

# Selected descriptive statistics, equal to descdata(data,5,65,c('na','min','max','IQR'))
descplot(data,5,65,c(2,7:9))
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Time used by descdata: 0.0639 secs 
#> Time used by descplot: 0.0681 secs
#> Warning: Removed 3 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 3. Bar charts for the type of variable names that is character

# Descriptive statistics
descplot(data1,3,7)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00387 secs 
#> Time used by descplot: 0.00993 secs

Figure 4. Bar charts for the type of variable names that is character

# Selected descriptive statistics, equal to descplot(data1,3,7,c('min','max','IQR'))
descplot(data1,3,7,7:9)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00384 secs 
#> Time used by descplot: 0.00733 secs

Figure 5. Particle number size distributions in logarithmic scales

# Top and bottom percentiles
percplot(data,5,65,4)
#> Time used by percdata: 0.0607 secs 
#> Time used by percplot: 0.0881 secs
#> Warning: Removed 144 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 6. Particle number size distributions in logarithmic scales with only one part

# Top percentiles
percplot(data,5,65,4,part=1)
#> Time used by percdata: 0.0583 secs 
#> Time used by percplot: 0.0774 secs
#> Warning: Removed 72 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 7. Particle number size distributions in logarithmic scales with only one part

# Bottom percentiles
percplot(data,5,65,4,part=0)
#> Time used by percdata: 0.0584 secs 
#> Time used by percplot: 0.102 secs
#> Warning: Removed 72 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 8. Percentiles of modes in linear scales

# Top and bottom percentiles
percplot(data1,3,7,2)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00808 secs 
#> Time used by percplot: 0.0229 secs

Figure 9. Percentiles of modes in linear scales with only one part

# Top percentiles
percplot(data1,3,7,2,part=1)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.0083 secs 
#> Time used by percplot: 0.0218 secs

Figure 10. Percentiles of modes in linear scales with only one part

# Bottom percentiles
percplot(data1,3,7,2,part=0)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00754 secs 
#> Time used by percplot: 0.02 secs