dataprep: data preprocessing and plots

library(dataprep)
library(ggplot2)
library(scales)

Figure 1. Line plots for variables with names that are essentially numeric and logarithmic

# Descriptive statistics
descplot(data,5,65)
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Time used by descdata: 0.0617 secs 
#> Time used by descplot: 0.146 secs
#> Warning: Removed 3 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 2. Line plots for variables whose names are essentially numeric and logarithmic

# Selected descriptive statistics, equal to descdata(data,5,65,c('na','min','max','IQR'))
descplot(data,5,65,c(2,7:9))
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Time used by descdata: 0.062 secs 
#> Time used by descplot: 0.0661 secs
#> Warning: Removed 3 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 3. Bar charts for the type of variable names that is character

# Descriptive statistics
descplot(data1,3,7)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00376 secs 
#> Time used by descplot: 0.0101 secs

Figure 4. Bar charts for the type of variable names that is character

# Selected descriptive statistics, equal to descplot(data1,3,7,c('min','max','IQR'))
descplot(data1,3,7,7:9)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00371 secs 
#> Time used by descplot: 0.00699 secs

Figure 5. Particle number size distributions in logarithmic scales

# Top and bottom percentiles
percplot(data,5,65,4)
#> Time used by percdata: 0.0574 secs 
#> Time used by percplot: 0.0821 secs
#> Warning: Removed 144 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 6. Particle number size distributions in logarithmic scales with only one part

# Top percentiles
percplot(data,5,65,4,part=1)
#> Time used by percdata: 0.0572 secs 
#> Time used by percplot: 0.0767 secs
#> Warning: Removed 72 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 7. Particle number size distributions in logarithmic scales with only one part

# Bottom percentiles
percplot(data,5,65,4,part=0)
#> Time used by percdata: 0.0811 secs 
#> Time used by percplot: 0.0998 secs
#> Warning: Removed 72 rows containing missing values or values outside the scale range
#> (`geom_line()`).

Figure 8. Percentiles of modes in linear scales

# Top and bottom percentiles
percplot(data1,3,7,2)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00785 secs 
#> Time used by percplot: 0.0224 secs

Figure 9. Percentiles of modes in linear scales with only one part

# Top percentiles
percplot(data1,3,7,2,part=1)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00746 secs 
#> Time used by percplot: 0.0235 secs

Figure 10. Percentiles of modes in linear scales with only one part

# Bottom percentiles
percplot(data1,3,7,2,part=0)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00751 secs 
#> Time used by percplot: 0.0198 secs