Quick start of BALLI package

Quick Start

This is an quick start manual of BALLI

require(BALLI)

1. Load Count Data

data <- data.frame(read.table("counts.txt"))

or make example count data

GenerateData <- function(nRow) {
    expr_mean <- runif(1,10,100)
    expr_size <- runif(1,1,10)
    expr <- rnbinom(20,mu=expr_mean,size=expr_size)
    return(expr)
}

data <- data.frame(t(sapply(1:10000,GenerateData)))
colnames(data) <- c(paste0("A",1:10),paste0("B",1:10))
rownames(data) <- paste0("gene",1:10000)
head(data)
##        A1  A2 A3  A4  A5  A6 A7  A8  A9 A10  B1  B2 B3  B4 B5 B6 B7  B8  B9 B10
## gene1  17  23 43  35  22  50 20  24  21  39  13  34 33  27 13 14 29  28  42  36
## gene2 180 182 84  32  60  67 98  73  85  51  49  64 82 132 55 57 68  79  34  43
## gene3  24  31 24   9  45   5 40  36   5  28  52   0 26   2 14  3  9  19  13  20
## gene4  51  14 75  93  67  83 85  85  81  91 119 102 72 134 68 52 46  59  27  87
## gene5  50  76 22 101 156 165 46 145 146  63  80 237 37  27 30 44 95  65  14  39
## gene6  56  68 50 106  43  68 62  63  66  54  96  41 89  67 22 28 99 111 116  35

2. Designate Group Information and Make Design Matrix

Group <- c(rep("A",10),rep("B",10))
Group
##  [1] "A" "A" "A" "A" "A" "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "B" "B" "B"
## [20] "B"
design <- model.matrix(~Group, data = data)
head(design)
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 6           1      0

3. Normalize Count Data

dge <- DGEList(counts=data, group=Group)
dge <- calcNormFactors(dge)
dge
## An object of class "DGEList"
## $counts
##        A1  A2 A3  A4  A5  A6 A7  A8  A9 A10  B1  B2 B3  B4 B5 B6 B7 B8 B9 B10
## gene1  17  23 43  35  22  50 20  24  21  39  13  34 33  27 13 14 29 28 42  36
## gene2 180 182 84  32  60  67 98  73  85  51  49  64 82 132 55 57 68 79 34  43
## gene3  24  31 24   9  45   5 40  36   5  28  52   0 26   2 14  3  9 19 13  20
## gene4  51  14 75  93  67  83 85  85  81  91 119 102 72 134 68 52 46 59 27  87
## gene5  50  76 22 101 156 165 46 145 146  63  80 237 37  27 30 44 95 65 14  39
## 9995 more rows ...
## 
## $samples
##    group lib.size norm.factors
## A1     A   553271    0.9982835
## A2     A   545459    1.0000239
## A3     A   549812    1.0071814
## A4     A   558730    0.9955053
## A5     A   548118    1.0035048
## 15 more rows ...

4. Estimate Technical Variance

tV <- tecVarEstim(dge,design)
tV
## An object of class "TecVarList"
## $targets
##    group lib.size norm.factors
## A1     A 552321.3    0.9982835
## A2     A 545472.0    1.0000239
## A3     A 553760.4    1.0071814
## A4     A 556218.7    0.9955053
## A5     A 550039.0    1.0035048
## 15 more rows ...
## 
## $design
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 15 more rows ...
## 
## $logcpm
##             A1       A2       A3       A4       A5       A6       A7       A8
## gene1 5.104717 5.517122 6.344837 6.056465 5.447149 6.559439 5.300322 5.547756
## gene2 8.364244 8.397819 7.279092 5.934541 6.816503 6.967513 7.483244 7.075441
## gene3 5.557127 5.917938 5.553663 4.308206 6.416872 3.666615 6.232291 6.094908
## gene4 6.584466 4.872620 7.119634 7.416412 6.970839 7.268374 7.282395 7.289515
## gene5 6.556988 7.159447 5.438233 7.533034 8.166134 8.242675 6.424810 8.046114
##             A9      A10       B1       B2       B3       B4       B5       B6
## gene1 5.387427 6.229271 4.779934 6.035532 5.996556 5.734514 4.765932 4.863216
## gene2 7.307078 6.599775 6.546877 6.910150 7.259846 7.943575 6.691821 6.746192
## gene3 3.670382 5.778390 6.629371 1.860013 5.674518 2.868908 4.859032 3.184138
## gene4 7.239169 7.411212 7.793654 7.566261 7.076958 7.964953 6.988206 6.618425
## gene5 8.073628 6.894307 7.232225 8.766753 6.152720 5.734514 5.858962 6.387076
##             B7       B8       B9      B10
## gene1 5.816117 5.761478 6.312035 6.099061
## gene2 6.991277 7.194193 6.022607 6.342910
## gene3 4.321113 5.247072 4.760178 5.310925
## gene4 6.446930 6.785128 5.710768 7.326584
## gene5 7.461923 6.920463 4.853222 6.208650
## 9995 more rows ...
## 
## $tecVar
##               A1         A2         A3         A4         A5         A6
## gene1 0.03137790 0.03176540 0.03129765 0.03116134 0.03150596 0.03143247
## gene2 0.01132799 0.01146054 0.01130053 0.01125394 0.01137181 0.01134666
## gene3 0.04362898 0.04415640 0.04351979 0.04333455 0.04380330 0.04370327
## gene4 0.01363335 0.01379532 0.01360001 0.01354349 0.01368689 0.01365617
## gene5 0.01111876 0.01124893 0.01109181 0.01104608 0.01116177 0.01113709
##               A7         A8         A9        A10         B1         B2
## gene1 0.03100630 0.03116108 0.03154568 0.03172004 0.03503363 0.03481867
## gene2 0.01120094 0.01125385 0.01138540 0.01144504 0.01452320 0.01443646
## gene3 0.04312384 0.04333420 0.04385736 0.04409468 0.09091585 0.09038860
## gene4 0.01347919 0.01354338 0.01370349 0.01377637 0.01303535 0.01295873
## gene5 0.01099404 0.01104599 0.01117512 0.01123370 0.01802903 0.01791872
##               B3         B4         B5         B6         B7         B8
## gene1 0.03486052 0.03509793 0.03465286 0.03476539 0.03472721 0.03454280
## gene2 0.01445335 0.01454914 0.01436968 0.01441498 0.01439961 0.01432537
## gene3 0.09049127 0.09107344 0.08998262 0.09025803 0.09016458 0.08971319
## gene4 0.01297365 0.01305827 0.01289973 0.01293975 0.01292617 0.01286057
## gene5 0.01794020 0.01806203 0.01783416 0.01789151 0.01787205 0.01777808
##               B9        B10
## gene1 0.03449717 0.03445951
## gene2 0.01430700 0.01429184
## gene3 0.08960146 0.08950925
## gene4 0.01284434 0.01283093
## gene5 0.01775482 0.01773563
## 9995 more rows ...

5. Fit BALLI and See Top Significant Genes

fit <- balli(tV,intV=2)
fit
## An object of class "Balli"
## $Result
##       log2FC_GroupB      lLLI     lBALLI      pLLI    pBALLI       BCF
## gene1    -0.1328193 0.3263385 0.29008132 0.5678231 0.5901684 0.1249898
## gene2    -0.3576113 1.6048412 1.42653031 0.2052185 0.2323315 0.1249962
## gene3    -0.8480371 2.3977512 2.13144570 0.1215098 0.1443046 0.1249412
## gene4     0.0821774 0.0712518 0.06333494 0.7895228 0.8013005 0.1249999
## gene5    -0.6959010 2.4667294 2.19265400 0.1162797 0.1386702 0.1249971
## 9995 more rows ...
## 
## $topGenes
##          log2FC_GroupB         pLLI       pBALLI   adjpLLI adjpBALLI
## gene3994     0.9241414 1.892826e-05 5.516397e-05 0.1892826 0.5167442
## gene9264    -1.2869402 6.675543e-05 1.702285e-04 0.2132637 0.5167442
## gene2832     1.1190097 8.196976e-05 2.045314e-04 0.2132637 0.5167442
## gene9289     0.7366361 9.678409e-05 2.372973e-04 0.2132637 0.5167442
## gene8915     0.7753783 1.066318e-04 2.583721e-04 0.2132637 0.5167442
## 9995 more rows ...