This vignette demonstrates how to use the {samplezoo} package to generate datasets of varying sizes (small, medium, and large) with variables from multiple probability distributions.
Each dataset contains:
Variables/columns from common distributions such as Normal, Binomial, Poisson, and others.
Adjustable sample sizes to meet needs.
data_small <- samplezoo("small")
head(data_small)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 25.15694 65.80074 42.402275 1 0 2 1.828935 0.4606579 0.2356178
#> 2 25.73720 45.59507 5.305106 0 0 5 29.517268 0.1910983 0.3974960
#> 3 52.14277 63.15152 23.951638 1 2 3 12.723542 0.5519936 0.4568973
#> 4 59.59455 62.11344 61.097426 0 0 0 18.018434 0.3936939 0.1238944
#> 5 56.22166 50.14880 17.276617 0 0 4 4.299494 0.3424037 0.5205620
#> 6 64.40413 69.64359 39.579413 0 5 6 2.044494 0.3128539 0.0907647
#> gamma chisq t_dist
#> 1 2.993321 0.5156396 -1.8582884
#> 2 2.644299 0.2756526 0.5036235
#> 3 1.969294 5.8376690 -1.9433715
#> 4 7.115710 3.7607821 0.7088352
#> 5 8.076411 2.6783601 1.6695084
#> 6 6.180865 0.1764780 0.3286485
data_medium <- samplezoo("medium")
head(data_medium)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 66.32780 65.03845 40.73076 0 4 1 0.8459338 0.47356267 0.05140996
#> 2 15.60933 62.24251 35.66566 0 0 3 4.4488537 0.97937912 0.41373055
#> 3 67.99604 65.68543 69.63832 0 4 4 13.5834844 0.07347576 0.49006833
#> 4 77.54035 46.96659 47.95241 1 0 3 6.0745628 0.66047941 0.35677391
#> 5 39.07681 47.36226 49.83504 0 1 4 15.2113350 0.86218629 0.35538683
#> 6 63.32963 56.16136 76.07890 1 1 5 0.9421500 0.79081379 0.22499517
#> gamma chisq t_dist
#> 1 2.096289 3.814471 0.9809230
#> 2 3.537228 3.762445 0.4532561
#> 3 2.665679 9.730170 -1.9000670
#> 4 3.794769 2.938662 0.4266126
#> 5 7.348275 3.732159 -0.3562102
#> 6 6.715543 11.616382 0.8545484
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 67.38657 70.42156 -2.879439 0 2 2 14.114451 0.5490409 0.3784222
#> 2 63.55910 66.85273 40.235570 1 2 3 20.045941 0.7833406 0.4055449
#> 3 76.08150 53.92179 37.981351 0 1 3 4.900619 0.2500442 0.3095650
#> 4 31.57206 62.14065 27.116840 0 1 6 8.828513 0.6659334 0.1559601
#> 5 63.39814 50.32682 58.956412 0 0 4 2.681587 0.9681289 0.2035393
#> 6 25.19843 56.45019 69.429303 0 0 3 1.357291 0.8655445 0.4260651
#> gamma chisq t_dist
#> 1 3.494189 7.849484 3.3730323
#> 2 2.403993 9.112495 0.4460709
#> 3 4.533870 9.779585 -0.8845991
#> 4 1.228562 9.786626 0.3415335
#> 5 10.894135 8.356028 -1.8756460
#> 6 2.971456 4.546987 -1.3489405
To ensure reproducibility and introduce controlled variation in your dataset, use set.seed() before generating random data.
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 0 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 0 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chisq t_dist
#> 1 6.9893762 10.286282 -0.3814568
#> 2 5.4087626 6.519658 -2.3409216
#> 3 1.2587867 8.011417 -0.4744159
#> 4 0.9871787 14.780626 0.4292511
#> 5 2.4021943 6.799788 -0.6692669
#> 6 4.2109032 17.858701 -0.3370763
set.seed(456)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 29.84718 68.13494 7.9885694 0 0 5 3.4417303 0.8866347 0.05413307
#> 2 59.32663 52.32066 21.2526086 0 3 3 0.8114356 0.7976466 0.07195440
#> 3 62.01312 62.47569 38.4789563 0 2 6 46.8038907 0.6469920 0.22555129
#> 4 29.16661 53.51086 -0.8656269 0 1 5 11.6955326 0.2036753 0.71455809
#> 5 39.28465 47.19406 47.7819258 1 1 1 0.3535625 0.3653401 0.34619912
#> 6 45.13908 63.33566 53.3620528 1 1 2 4.5592136 0.7628573 0.25880522
#> gamma chisq t_dist
#> 1 6.7914120 4.464348 -1.0150596
#> 2 3.0132520 8.062120 0.3262369
#> 3 4.7360954 10.969593 1.5141157
#> 4 5.1235878 6.249247 0.6432708
#> 5 6.6851637 4.358815 0.2025742
#> 6 0.3903841 20.019575 1.6257109