Quick start of BALLI package

Quick Start

This is an quick start manual of BALLI

require(BALLI)

1. Load Count Data

data <- data.frame(read.table("counts.txt"))

or make example count data

GenerateData <- function(nRow) {
    expr_mean <- runif(1,10,100)
    expr_size <- runif(1,1,10)
    expr <- rnbinom(20,mu=expr_mean,size=expr_size)
    return(expr)
}

data <- data.frame(t(sapply(1:10000,GenerateData)))
colnames(data) <- c(paste0("A",1:10),paste0("B",1:10))
rownames(data) <- paste0("gene",1:10000)
head(data)
##        A1  A2  A3  A4  A5 A6 A7  A8  A9 A10  B1  B2  B3  B4 B5  B6  B7  B8  B9
## gene1  22  91  16 139 245 20 52  84  55 119 212  67  58  68 47  47  58  25  71
## gene2  49  84  69  44 122 88 84  89  60  88 104  85 115  52 50  63  93  94  66
## gene3  60 113 102  71  90 59 91  56  49  42  83 121 111 108 40  53  88 111 124
## gene4  48 124 105  44  77 75 46 138 116 102  21  82  75 134 61 114 107  55  91
## gene5  46  46  38  44  38 23 55  43  75  66  50  62  53  51 17  48  70  40  32
## gene6 138  64  45  45  43 56 82 155  94  64  56  57 114  93 52  63  65  43  77
##       B10
## gene1 145
## gene2  93
## gene3  82
## gene4 128
## gene5  82
## gene6  86

2. Designate Group Information and Make Design Matrix

Group <- c(rep("A",10),rep("B",10))
Group
##  [1] "A" "A" "A" "A" "A" "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "B" "B" "B"
## [20] "B"
design <- model.matrix(~Group, data = data)
head(design)
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 6           1      0

3. Normalize Count Data

dge <- DGEList(counts=data, group=Group)
dge <- calcNormFactors(dge)
dge
## An object of class "DGEList"
## $counts
##       A1  A2  A3  A4  A5 A6 A7  A8  A9 A10  B1  B2  B3  B4 B5  B6  B7  B8  B9
## gene1 22  91  16 139 245 20 52  84  55 119 212  67  58  68 47  47  58  25  71
## gene2 49  84  69  44 122 88 84  89  60  88 104  85 115  52 50  63  93  94  66
## gene3 60 113 102  71  90 59 91  56  49  42  83 121 111 108 40  53  88 111 124
## gene4 48 124 105  44  77 75 46 138 116 102  21  82  75 134 61 114 107  55  91
## gene5 46  46  38  44  38 23 55  43  75  66  50  62  53  51 17  48  70  40  32
##       B10
## gene1 145
## gene2  93
## gene3  82
## gene4 128
## gene5  82
## 9995 more rows ...
## 
## $samples
##    group lib.size norm.factors
## A1     A   546785    1.0024014
## A2     A   550751    0.9874646
## A3     A   547459    1.0008737
## A4     A   545860    1.0032779
## A5     A   554126    0.9982206
## 15 more rows ...

4. Estimate Technical Variance

tV <- tecVarEstim(dge,design)
tV
## An object of class "TecVarList"
## $targets
##    group lib.size norm.factors
## A1     A 548098.1    1.0024014
## A2     A 543847.1    0.9874646
## A3     A 547937.3    1.0008737
## A4     A 547649.3    1.0032779
## A5     A 553140.0    0.9982206
## 15 more rows ...
## 
## $design
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 15 more rows ...
## 
## $logcpm
##             A1       A2       A3       A4       A5       A6       A7       A8
## gene1 5.452121 7.417551 5.037351 8.008145 8.802716 5.314816 6.611733 7.275525
## gene2 6.539756 7.304631 7.017532 6.392007 7.808620 7.346617 7.283014 7.357036
## gene3 6.821554 7.723945 7.568263 7.058351 7.378038 6.785597 7.395896 6.707407
## gene4 6.511184 7.855757 7.609293 6.392007 7.158290 7.121584 6.441841 7.978411
## gene5 6.452284 6.463051 6.189619 6.392007 6.176668 5.499142 6.689722 6.341426
##             A9      A10       B1       B2       B3       B4       B5       B6
## gene1 6.687377 7.785819 8.595440 6.979768 6.763045 6.991225 6.486310 6.485498
## gene2 6.808656 7.358789 7.581986 7.314234 7.726394 6.616844 6.572059 6.893232
## gene3 6.526951 6.326288 7.263502 7.813858 7.676213 7.643285 6.263861 6.652182
## gene4 7.736958 7.567386 5.378290 7.263602 7.122885 7.949382 6.848955 7.728946
## gene5 7.121187 6.954374 6.554696 6.871226 6.637538 6.589878 5.118990 6.514651
##             B7       B8       B9      B10
## gene1 6.779468 5.619642 7.049532 8.069809
## gene2 7.442547 7.449785 6.947179 7.439943
## gene3 7.364533 7.685005 7.836942 7.262387
## gene4 7.640900 6.697694 7.398843 7.892493
## gene5 7.042554 6.257106 5.947296 7.262387
## 9995 more rows ...
## 
## $tecVar
##               A1         A2         A3         A4         A5         A6
## gene1 0.01480245 0.01491260 0.01480659 0.01481400 0.01467393 0.01467674
## gene2 0.01216980 0.01225806 0.01217312 0.01217906 0.01206677 0.01206902
## gene3 0.01293318 0.01302712 0.01293671 0.01294303 0.01282363 0.01282602
## gene4 0.01128664 0.01136895 0.01128973 0.01129528 0.01119058 0.01119268
## gene5 0.01951478 0.01966435 0.01952039 0.01953046 0.01934038 0.01934418
##               A7         A8         A9        A10         B1         B2
## gene1 0.01469636 0.01462215 0.01467279 0.01479785 0.01325261 0.01340460
## gene2 0.01208476 0.01202521 0.01206585 0.01216611 0.01146830 0.01159937
## gene3 0.01284269 0.01277961 0.01282266 0.01292925 0.01040813 0.01052755
## gene4 0.01120734 0.01115186 0.01118972 0.01128320 0.01154362 0.01167551
## gene5 0.01937076 0.01927034 0.01933884 0.01950852 0.01867682 0.01890062
##               B3         B4         B5         B6         B7         B8
## gene1 0.01326994 0.01332163 0.01340867 0.01340132 0.01341689 0.01334717
## gene2 0.01148325 0.01152783 0.01160288 0.01159654 0.01160997 0.01154985
## gene3 0.01042175 0.01046237 0.01053075 0.01052498 0.01053722 0.01048244
## gene4 0.01155866 0.01160352 0.01167904 0.01167266 0.01168618 0.01162568
## gene5 0.01870231 0.01877836 0.01890662 0.01889579 0.01891875 0.01881600
##               B9        B10
## gene1 0.01330182 0.01339379
## gene2 0.01151074 0.01159005
## gene3 0.01044680 0.01051906
## gene4 0.01158632 0.01166613
## gene5 0.01874918 0.01888469
## 9995 more rows ...

5. Fit BALLI and See Top Significant Genes

fit <- balli(tV,intV=2)
fit
## An object of class "Balli"
## $Result
##       log2FC_GroupB       lLLI     lBALLI      pLLI    pBALLI       BCF
## gene1    0.14267112 0.09938653 0.08834360 0.7525671 0.7662936 0.1249999
## gene2    0.07673000 0.18427710 0.16380199 0.6677230 0.6856800 0.1249992
## gene3    0.31705172 2.15139895 1.91236569 0.1424401 0.1667007 0.1249935
## gene4   -0.04512276 0.02371451 0.02107957 0.8776135 0.8845624 0.1249999
## gene5    0.05169834 0.05199677 0.04621937 0.8196245 0.8297775 0.1249996
## 9995 more rows ...
## 
## $topGenes
##          log2FC_GroupB         pLLI       pBALLI   adjpLLI adjpBALLI
## gene5989    -0.7698628 8.241738e-05 0.0002054273 0.6232148  0.956798
## gene4226     0.9943280 2.008894e-04 0.0004561157 0.6232148  0.956798
## gene6441    -0.8808738 3.591075e-04 0.0007670542 0.6232148  0.956798
## gene9758    -0.5170499 3.663538e-04 0.0007809084 0.6232148  0.956798
## gene4997     0.7031050 4.416315e-04 0.0009231533 0.6232148  0.956798
## 9995 more rows ...