Quick start of BALLI package

Quick Start

This is an quick start manual of BALLI

require(BALLI)

1. Load Count Data

data <- data.frame(read.table("counts.txt"))

or make example count data

GenerateData <- function(nRow) {
    expr_mean <- runif(1,10,100)
    expr_size <- runif(1,1,10)
    expr <- rnbinom(20,mu=expr_mean,size=expr_size)
    return(expr)
}

data <- data.frame(t(sapply(1:10000,GenerateData)))
colnames(data) <- c(paste0("A",1:10),paste0("B",1:10))
rownames(data) <- paste0("gene",1:10000)
head(data)
##       A1 A2 A3 A4  A5  A6 A7  A8 A9 A10 B1 B2  B3  B4  B5  B6  B7 B8  B9 B10
## gene1 58 14 83 70  27  46 78  70 68  28 93 79  56  59 100  94  93 67  64  34
## gene2 28 21 22 18  30  24 13   6 29  22 40 22  26  25  20  18  21 24  27  15
## gene3 24 79 46 63  20 163 18   2 75  71 42 77 191   9  42  66 167 14  94  18
## gene4 77 61 65 74 115  83 73  71 80  71 92 50  83 107  59 153  60 86 124  90
## gene5  9 61 53 80  51  44 47 112 40  35 54 34  55 142  64  46 103 60  67  33
## gene6 15 33 30 19  25   9 13  43 19  12 13 35  31  22  28  13  31 35  40  48

2. Designate Group Information and Make Design Matrix

Group <- c(rep("A",10),rep("B",10))
Group
##  [1] "A" "A" "A" "A" "A" "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "B" "B" "B"
## [20] "B"
design <- model.matrix(~Group, data = data)
head(design)
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 6           1      0

3. Normalize Count Data

dge <- DGEList(counts=data, group=Group)
dge <- calcNormFactors(dge)
## calcNormFactors has been renamed to normLibSizes
dge
## An object of class "DGEList"
## $counts
##       A1 A2 A3 A4  A5  A6 A7  A8 A9 A10 B1 B2  B3  B4  B5  B6  B7 B8  B9 B10
## gene1 58 14 83 70  27  46 78  70 68  28 93 79  56  59 100  94  93 67  64  34
## gene2 28 21 22 18  30  24 13   6 29  22 40 22  26  25  20  18  21 24  27  15
## gene3 24 79 46 63  20 163 18   2 75  71 42 77 191   9  42  66 167 14  94  18
## gene4 77 61 65 74 115  83 73  71 80  71 92 50  83 107  59 153  60 86 124  90
## gene5  9 61 53 80  51  44 47 112 40  35 54 34  55 142  64  46 103 60  67  33
## 9995 more rows ...
## 
## $samples
##    group lib.size norm.factors
## A1     A   548248    1.0010724
## A2     A   550389    0.9947607
## A3     A   552285    0.9858546
## A4     A   556162    0.9994703
## A5     A   555388    0.9997659
## 15 more rows ...

4. Estimate Technical Variance

tV <- tecVarEstim(dge,design)
tV
## An object of class "TecVarList"
## $targets
##    group lib.size norm.factors
## A1     A 548835.9    1.0010724
## A2     A 547505.4    0.9947607
## A3     A 544472.7    0.9858546
## A4     A 555867.4    0.9994703
## A5     A 555258.0    0.9997659
## 15 more rows ...
## 
## $design
##   (Intercept) GroupB
## 1           1      0
## 2           1      0
## 3           1      0
## 4           1      0
## 5           1      0
## 15 more rows ...
## 
## $logcpm
##             A1       A2       A3       A4       A5       A6       A7       A8
## gene1 6.772248 4.867913 7.286049 7.017459 5.707516 6.453152 7.185150 7.036412
## gene2 5.772062 5.391819 5.460604 5.170389 5.849462 5.568328 4.769751 3.865000
## gene3 5.565554 7.208671 6.461312 6.869940 5.309213 8.234765 5.184900 2.863324
## gene4 7.169183 6.846037 6.942640 7.095442 7.719316 7.277736 7.092035 7.056314
## gene5 4.323962 6.846037 6.657800 7.205042 6.577104 6.391736 6.477879 7.699446
##             A9      A10       B1       B2       B3       B4       B5       B6
## gene1 6.979869 5.757804 7.438111 7.197857 6.695832 6.792289 7.540640 7.433448
## gene2 5.805131 5.436038 6.260349 5.443087 5.646058 5.616371 5.327048 5.171345
## gene3 7.117347 7.040356 6.327481 7.161789 8.429751 4.320739 6.327429 6.936049
## gene4 7.208098 7.040356 7.422843 6.558474 7.246989 7.629759 6.798841 8.124516
## gene5 6.243088 6.060244 6.675487 6.027994 6.670755 8.031507 6.912519 6.433693
##             B7       B8       B9      B10
## gene1 7.434660 6.962817 6.913274 6.033218
## gene2 5.388041 5.555013 5.726507 4.950538
## gene3 8.265729 4.854854 7.453929 5.185065
## gene4 6.818945 7.313687 7.846292 7.386974
## gene5 7.579060 6.808508 6.977417 5.992571
## 9995 more rows ...
## 
## $tecVar
##               A1         A2         A3         A4         A5         A6
## gene1 0.01854310 0.01858751 0.01868955 0.01831192 0.01833171 0.01858118
## gene2 0.04352061 0.04362221 0.04385495 0.04299159 0.04303692 0.04360773
## gene3 0.02409259 0.02414998 0.02428180 0.02379386 0.02381946 0.02414180
## gene4 0.01193813 0.01196507 0.01202699 0.01179779 0.01180982 0.01196124
## gene5 0.01937043 0.01941683 0.01952342 0.01912891 0.01914960 0.01941022
##               A7         A8         A9        A10         B1         B2
## gene1 0.01851478 0.01855742 0.01835111 0.01835055 0.01278865 0.01270212
## gene2 0.04345583 0.04355339 0.04308137 0.04308009 0.03756484 0.03730079
## gene3 0.02405601 0.02411110 0.02384455 0.02384383 0.01852087 0.01838878
## gene4 0.01192095 0.01194683 0.01182162 0.01182128 0.01070383 0.01063164
## gene5 0.01934085 0.01938540 0.01916989 0.01916930 0.01497876 0.01487390
##               B3         B4         B5         B6         B7         B8
## gene1 0.01253218 0.01273218 0.01278820 0.01262285 0.01275947 0.01267078
## gene2 0.03677946 0.03739253 0.03756345 0.03705753 0.03747578 0.03720461
## gene3 0.01812833 0.01843467 0.01852018 0.01826721 0.01847632 0.01834068
## gene4 0.01048905 0.01065672 0.01070345 0.01056512 0.01067948 0.01060534
## gene5 0.01466759 0.01491033 0.01497821 0.01477729 0.01494340 0.01483570
##               B9        B10
## gene1 0.01279465 0.01274774
## gene2 0.03758313 0.03743998
## gene3 0.01853004 0.01845841
## gene4 0.01070884 0.01066970
## gene5 0.01498604 0.01492918
## 9995 more rows ...

5. Fit BALLI and See Top Significant Genes

fit <- balli(tV,intV=2)
fit
## An object of class "Balli"
## $Result
##       log2FC_GroupB      lLLI    lBALLI       pLLI     pBALLI       BCF
## gene1     0.5378744 3.4438555 3.0612346 0.06348738 0.08018072 0.1249891
## gene2     0.1993370 0.8987859 0.7989554 0.34310747 0.37140586 0.1249512
## gene3     0.3407542 0.3017664 0.2682369 0.58277717 0.60451692 0.1249994
## gene4     0.1699873 1.0559371 0.9386153 0.30414367 0.33263420 0.1249945
## gene5     0.3626869 1.2020167 1.0684633 0.27291897 0.30129281 0.1249958
## 9995 more rows ...
## 
## $topGenes
##          log2FC_GroupB         pLLI       pBALLI    adjpLLI  adjpBALLI
## gene3444     0.8595940 1.782716e-06 6.683598e-06 0.01782716 0.06683598
## gene6069     0.7652805 3.331310e-05 9.142174e-05 0.11908089 0.32412302
## gene5565     0.8740074 3.572427e-05 9.723691e-05 0.11908089 0.32412302
## gene7401     0.9920952 7.322260e-05 1.848236e-04 0.18305650 0.46205891
## gene4554    -0.6330306 9.835354e-05 2.407207e-04 0.19670709 0.48144148
## 9995 more rows ...