Quick start of BALLI package

Quick Start

This is an quick start manual of BALLI

require(BALLI)

1. Load Count Data

data <- data.frame(read.table("counts.txt"))

or make example count data

GenerateData <- function(nRow) {
    expr_mean <- runif(1,10,100)
    expr_size <- runif(1,1,10)
    expr <- rnbinom(20,mu=expr_mean,size=expr_size)
    return(expr)
}

data <- data.frame(t(sapply(1:10000,GenerateData)))
colnames(data) <- c(paste0("A",1:10),paste0("B",1:10))
rownames(data) <- paste0("gene",1:10000)
head(data)
##       A1  A2 A3  A4  A5 A6 A7 A8  A9 A10  B1 B2 B3 B4  B5  B6 B7  B8 B9 B10
## gene1 27  28 76  35  65 16 44 25  26  29  22 46 30 20  23  29 49  32 35  54
## gene2  9  49 42  46  15 21 27 26  82  23  38 42 70 17  30  30 24  99 44  17
## gene3  6   5  7  11  12 15  6 16  28  24  28 15  6  8  26   5  6   6 16  16
## gene4 27  28 26  23  55 51 77 34  31  17  11 36 19 19  24  18 20  25 46  34
## gene5 74 139 99 129 109 83 62 76 131 119 101 58 75 70 122 135 92 236 72  67
## gene6 81  80 62  77  53 67 62 67  88  53  95 56 54 45  62  59 92  85 56  71

2. Designate Group Information and Make Design Matrix

Group <- c(rep("A",10),rep("B",10))
Group
##  [1] "A" "A" "A" "A" "A" "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "B" "B" "B"
## [20] "B"
design <- model.matrix(~Group, data = data)
head(design)
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 6           1      0

3. Normalize Count Data

dge <- DGEList(counts=data, group=Group)
dge <- calcNormFactors(dge)
dge
## An object of class "DGEList"
## $counts
##       A1  A2 A3  A4  A5 A6 A7 A8  A9 A10  B1 B2 B3 B4  B5  B6 B7  B8 B9 B10
## gene1 27  28 76  35  65 16 44 25  26  29  22 46 30 20  23  29 49  32 35  54
## gene2  9  49 42  46  15 21 27 26  82  23  38 42 70 17  30  30 24  99 44  17
## gene3  6   5  7  11  12 15  6 16  28  24  28 15  6  8  26   5  6   6 16  16
## gene4 27  28 26  23  55 51 77 34  31  17  11 36 19 19  24  18 20  25 46  34
## gene5 74 139 99 129 109 83 62 76 131 119 101 58 75 70 122 135 92 236 72  67
## 9995 more rows ...
## 
## $samples
##    group lib.size norm.factors
## A1     A   548524    1.0007690
## A2     A   550012    0.9888917
## A3     A   542979    0.9995316
## A4     A   552839    0.9997865
## A5     A   550961    0.9946588
## 15 more rows ...

4. Estimate Technical Variance

tV <- tecVarEstim(dge,design)
tV
## An object of class "TecVarList"
## $targets
##    group lib.size norm.factors
## A1     A 548945.8    1.0007690
## A2     A 543902.3    0.9888917
## A3     A 542724.7    0.9995316
## A4     A 552721.0    0.9997865
## A5     A 548018.2    0.9946588
## 15 more rows ...
## 
## $design
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 15 more rows ...
## 
## $logcpm
##             A1       A2       A3       A4       A5       A6       A7       A8
## gene1 5.723203 5.784547 7.166668 6.065331 6.933697 5.038205 6.388529 5.608825
## gene2 4.324603 6.550458 6.340365 6.440723 4.954816 5.391937 5.722943 5.661261
## gene3 3.865140 3.682003 4.047884 4.557261 4.674635 4.955718 3.864930 5.024299
## gene4 5.723203 5.784547 5.687851 5.499979 6.700482 6.596488 7.168751 6.023645
## gene5 7.113169 8.017927 7.539576 7.888934 7.662058 7.278014 6.864969 7.138770
##             A9      A10       B1       B2       B3       B4       B5       B6
## gene1 5.670375 5.802322 5.440357 6.435165 5.871322 5.325389 5.511329 5.835695
## gene2 7.255246 5.492259 6.176980 6.309692 7.041488 5.113867 5.867524 5.881535
## gene3 5.769902 5.548788 5.762114 4.938828 3.870019 4.187747 5.674853 3.684922
## gene4 5.907393 5.096783 4.556560 6.098298 5.263412 5.258270 5.567921 5.202797
## gene5 7.918194 7.766128 7.541237 6.756965 7.138362 7.035963 7.821855 7.980417
##             B7       B8       B9      B10
## gene1 6.539904 5.949126 6.079063 6.667703
## gene2 5.567814 7.519744 6.393225 5.108597
## gene3 3.866891 3.862300 5.039245 5.030622
## gene4 5.326766 5.616601 6.454635 6.030365
## gene5 7.422118 8.756313 7.079200 6.968842
## 9995 more rows ...
## 
## $tecVar
##                A1          A2          A3          A4          A5          A6
## gene1 0.026108547 0.026346400 0.026402438 0.025933312 0.026151972 0.026170167
## gene2 0.030345774 0.030619127 0.030683672 0.030144376 0.030395679 0.030416589
## gene3 0.072067605 0.072676052 0.072819649 0.071619006 0.072178726 0.072225278
## gene4 0.026089059 0.026326750 0.026382778 0.025913955 0.026132452 0.026150633
## gene5 0.009334899 0.009415628 0.009434681 0.009275396 0.009349642 0.009355818
##                A7          A8          A9         A10          B1          B2
## gene1 0.026103576 0.025892702 0.026066426 0.025785404 0.026861906 0.026771306
## gene2 0.030340061 0.030097700 0.030297366 0.029974246 0.024523873 0.024440707
## gene3 0.072054886 0.071515001 0.071959806 0.071240122 0.072137799 0.071911649
## gene4 0.026084092 0.025873375 0.026046970 0.025766157 0.036467254 0.036346156
## gene5 0.009333212 0.009261604 0.009320597 0.009225155 0.009703084 0.009671963
##                B3          B4          B5          B6          B7          B8
## gene1 0.027178012 0.027073293 0.027103321 0.027379455 0.027101168 0.026988683
## gene2 0.024813822 0.024717816 0.024745346 0.024998835 0.024743372 0.024640247
## gene3 0.072926196 0.072665149 0.072740029 0.073428800 0.072734661 0.072454122
## gene4 0.036889157 0.036749482 0.036789535 0.037158283 0.036786664 0.036636620
## gene5 0.009811592 0.009775658 0.009785964 0.009880801 0.009785225 0.009746616
##                B9         B10
## gene1 0.027143474 0.026965157
## gene2 0.024782158 0.024618653
## gene3 0.072840137 0.072395432
## gene4 0.036843092 0.036605237
## gene5 0.009799742 0.009738539
## 9995 more rows ...

5. Fit BALLI and See Top Significant Genes

fit <- balli(tV,intV=2)
fit
## An object of class "Balli"
## $Result
##       log2FC_GroupB         lLLI       lBALLI       pLLI     pBALLI       BCF
## gene1  -0.052465577 0.0481389525 0.0427902063 0.82633387 0.83612081 0.1249993
## gene2   0.284639814 0.6680254769 0.5938034377 0.41374086 0.44095211 0.1249943
## gene3  -0.007381279 0.0005223832 0.0004643406 0.98176537 0.98280806 0.1250001
## gene4  -0.481212196 3.4177582467 3.0381623040 0.06449854 0.08132794 0.1249426
## gene5  -0.068676980 0.1006934258 0.0895052854 0.75099908 0.76480703 0.1249998
## 9995 more rows ...
## 
## $topGenes
##          log2FC_GroupB         pLLI       pBALLI   adjpLLI adjpBALLI
## gene3410     2.0382397 1.093056e-05 3.376546e-05 0.1093056 0.3376546
## gene5607     1.0290866 1.003620e-04 2.449344e-04 0.4435383 0.8134918
## gene318     -0.8030151 1.712952e-04 3.952470e-04 0.4435383 0.8134918
## gene637     -1.2381053 2.703061e-04 5.944122e-04 0.4435383 0.8134918
## gene9750    -0.9521504 3.043120e-04 6.614722e-04 0.4435383 0.8134918
## 9995 more rows ...