EDOIF demo

EXAMPLE#1 Simple Simulation & ordering inference

In the first step, we generate a simple dataset. where C1 and C2 are dominated by C3, C3 is dominated by C4, and is C4 dominated by C5. There is no dominant-distribution relation between C1 and C2.

# Simulation section
nInv<-100
initMean=10
stepMean=20
std=8
simData1<-c()
simData1$Values<-rnorm(nInv,mean=initMean,sd=std)
simData1$Group<-rep(c("C1"),times=nInv)
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean,sd=std) )
simData1$Group<-c(simData1$Group,rep(c("C2"),times=nInv))
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean+2*stepMean,sd=std) )
simData1$Group<-c(simData1$Group,rep(c("C3"),times=nInv) )
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean+3*stepMean,sd=std) )
simData1$Group<-c(simData1$Group, rep(c("C4"),times=nInv) )
simData1$Values<-c(simData1$Values,rnorm(nInv,mean=initMean+4*stepMean,sd=std) )
simData1$Group<-c(simData1$Group, rep(c("C5"),times=nInv) )

The framework is used to analyze the data below.

# Simple ordering inference section
library(EDOIF)
## Loading required package: boot
# parameter setting
bootT=1000 # Number of times of sampling with replacement
alpha=0.05 # significance  significance level

#======= input
Values=simData1$Values
Group=simData1$Group
#=============
A1<-EDOIF(Values,Group,bootT = bootT, alpha=alpha )

We print the result of our framework below.

print(A1) # print results in text
## EDOIF (Empirical Distribution Ordering Inference Framework)
## =======================================================
## Alpha = 0.050000, Number of bootstrap resamples = 1000, CI type = perc
## Using Mann-Whitney test to report whether A ≺ B
## A dominant-distribution network density:1.000000
## Distribution: C2
## Mean:8.494134 95CI:[ 6.971574,10.044695]
## Distribution: C1
## Mean:11.211078 95CI:[ 9.650797,12.838301]
## Distribution: C3
## Mean:52.094574 95CI:[ 50.576449,53.477437]
## Distribution: C4
## Mean:70.138097 95CI:[ 68.333705,71.648110]
## Distribution: C5
## Mean:90.541168 95CI:[ 88.882372,92.269571]
## =======================================================
## Mean difference of C1 (n=100) minus C2 (n=100): C2 ≺ C1
##  :p-val 0.0076
## Mean Diff:2.716944 95CI:[ 0.477633,4.939673]
## 
## Mean difference of C3 (n=100) minus C2 (n=100): C2 ≺ C3
##  :p-val 0.0000
## Mean Diff:43.600440 95CI:[ 41.471397,45.855992]
## 
## Mean difference of C4 (n=100) minus C2 (n=100): C2 ≺ C4
##  :p-val 0.0000
## Mean Diff:61.643963 95CI:[ 59.382536,63.857435]
## 
## Mean difference of C5 (n=100) minus C2 (n=100): C2 ≺ C5
##  :p-val 0.0000
## Mean Diff:82.047034 95CI:[ 79.780771,84.274440]
## 
## Mean difference of C3 (n=100) minus C1 (n=100): C1 ≺ C3
##  :p-val 0.0000
## Mean Diff:40.883495 95CI:[ 38.678505,43.282410]
## 
## Mean difference of C4 (n=100) minus C1 (n=100): C1 ≺ C4
##  :p-val 0.0000
## Mean Diff:58.927019 95CI:[ 56.838184,61.257474]
## 
## Mean difference of C5 (n=100) minus C1 (n=100): C1 ≺ C5
##  :p-val 0.0000
## Mean Diff:79.330090 95CI:[ 76.936912,81.568426]
## 
## Mean difference of C4 (n=100) minus C3 (n=100): C3 ≺ C4
##  :p-val 0.0000
## Mean Diff:18.043524 95CI:[ 15.491051,20.164661]
## 
## Mean difference of C5 (n=100) minus C3 (n=100): C3 ≺ C5
##  :p-val 0.0000
## Mean Diff:38.446594 95CI:[ 36.233081,40.546056]
## 
## Mean difference of C5 (n=100) minus C4 (n=100): C4 ≺ C5
##  :p-val 0.0000
## Mean Diff:20.403071 95CI:[ 18.121089,22.901473]

The first plot is the plot of mean-difference confidence intervals

plot(A1,options =1)

The second plot is the plot of mean confidence intervals

plot(A1,options =2)

The third plot is a dominant-distribution network.

out<-plot(A1,options =3)

EXAMPLE#2 Non-normal-Distribution Simulation & ordering inference

We generate more complicated dataset of mixture distributions. C1, C2, C3, and C4 are dominated by C5. There is no dominant-distribution relation among C1, C2, C3, and C4.

library(EDOIF)
# parameter setting
bootT=1000
alpha=0.05
nInv<-1200

start_time <- Sys.time()
#======= input
simData3<-SimNonNormalDist(nInv=nInv,noisePer=0.01)
Values=simData3$Values
Group=simData3$Group
#=============
A3<-EDOIF(Values,Group, bootT=bootT, alpha=alpha, methodType ="perc")
A3
## EDOIF (Empirical Distribution Ordering Inference Framework)
## =======================================================
## Alpha = 0.050000, Number of bootstrap resamples = 1000, CI type = perc
## Using Mann-Whitney test to report whether A ≺ B
## A dominant-distribution network density:0.500000
## Distribution: C2
## Mean:79.580294 95CI:[ 77.678280,81.384152]
## Distribution: C3
## Mean:79.983918 95CI:[ 78.094245,81.820302]
## Distribution: C1
## Mean:80.932561 95CI:[ 77.533404,84.330483]
## Distribution: C4
## Mean:81.997332 95CI:[ 76.171057,88.999330]
## Distribution: C5
## Mean:142.101588 95CI:[ 140.495895,143.939008]
## =======================================================
## Mean difference of C3 (n=1200) minus C2 (n=1200): C2 ⊀ C3
##  :p-val 0.7669
## Mean Diff:0.403625 95CI:[ -2.115683,3.092577]
## 
## Mean difference of C1 (n=1200) minus C2 (n=1200): C2 ⊀ C1
##  :p-val 0.4022
## Mean Diff:1.352268 95CI:[ -2.628400,5.645595]
## 
## Mean difference of C4 (n=1200) minus C2 (n=1200): C2 ⊀ C4
##  :p-val 0.1472
## Mean Diff:2.417038 95CI:[ -4.117979,9.547170]
## 
## Mean difference of C5 (n=1200) minus C2 (n=1200): C2 ≺ C5
##  :p-val 0.0000
## Mean Diff:62.521295 95CI:[ 60.173434,65.350968]
## 
## Mean difference of C1 (n=1200) minus C3 (n=1200): C3 ⊀ C1
##  :p-val 0.1733
## Mean Diff:0.948643 95CI:[ -2.802631,4.874003]
## 
## Mean difference of C4 (n=1200) minus C3 (n=1200): C3 ≺ C4
##  :p-val 0.0374
## Mean Diff:2.013413 95CI:[ -3.993600,9.869792]
## 
## Mean difference of C5 (n=1200) minus C3 (n=1200): C3 ≺ C5
##  :p-val 0.0000
## Mean Diff:62.117670 95CI:[ 59.702508,64.544550]
## 
## Mean difference of C4 (n=1200) minus C1 (n=1200): C1 ⊀ C4
##  :p-val 0.2070
## Mean Diff:1.064771 95CI:[ -5.745837,9.182128]
## 
## Mean difference of C5 (n=1200) minus C1 (n=1200): C1 ≺ C5
##  :p-val 0.0000
## Mean Diff:61.169027 95CI:[ 56.970562,64.724885]
## 
## Mean difference of C5 (n=1200) minus C4 (n=1200): C4 ≺ C5
##  :p-val 0.0000
## Mean Diff:60.104256 95CI:[ 52.736561,66.308246]
plot(A3)

end_time <- Sys.time()
end_time - start_time
## Time difference of 3.061286 secs

Uniform noise

Generating A dominates B with different degrees of uniform noise

library(ggplot2)

nInv<-1000
simData3<-SimNonNormalDist(nInv=nInv,noisePer=0.01)
#plot(density(simData3$V3))

dat <- data.frame(dens = c(simData3$V3, simData3$V5)
                   , lines = rep(c("B", "A"), each = nInv))
#Plot.
p1<-ggplot(dat, aes(x = dens, fill = lines)) + geom_density(alpha = 0.5) +xlim(-400, 400)+ ylim(0, 0.07) + ylab("Density [0,1]") +xlab("Values") + theme( axis.text.x = element_text(face="bold",  
                                      size=12) )
theme_update(text = element_text(face="bold", size=12)  )
p1$labels$fill<-"Categories"
plot(p1)
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_density()`).