# Load MARVEL package
library(MARVEL)
# Load adjunct packages for this tutorial
library(ggplot2)
library(gridExtra)
# Load adjunct packages to support additional functionalities
library(AnnotationDbi) # GO analysis
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
# Load adjunct packages to support additional functionalities
library(plyr) # General data processing
library(ggrepel) # General plotting
library(parallel) # To enable multi-thread during RI PSI computation
library(textclean) # AFE, ALE detection
library(fitdistrplus) # Modality analysis: Fit beta distribution
library(FactoMineR) # PCA: Reduce dimension
library(factoextra) # PCA: Retrieve eigenvalues
library(kSamples) # Anderson-Darling (AD) statistical test
library(twosamples) # D Test Statistic (DTS) statistical test
library(stringr) # Plot GO results
# Load saved MARVEL object
marvel.demo <- readRDS(system.file("extdata/data", "marvel.demo.rds", package="MARVEL"))
class(marvel.demo)
## [1] "Marvel"
sample.id
while all other columns are
optional.## sample.id cell.type sample.type qc.seq
## 2 ERR1562084 iPSC Single Cell pass
## 10 ERR1562092 iPSC Single Cell pass
## 18 ERR1562100 iPSC Single Cell pass
## 25 ERR1562107 iPSC Single Cell pass
## 38 ERR1562120 iPSC Single Cell pass
## 41 ERR1562123 iPSC Single Cell pass
coord.intron
.SJ
is created here to contain the splice
junction count files (SJ.out.tab) generated from 1st pass mode to be
used for 2nd pass mode.# STAR in 1st pass mode
STAR --runThreadN 16 \
--genomeDir GRCh38_GENCODE_genome_STAR_indexed \
--readFilesCommand zcat \
--readFilesIn ERR1562083_1_val_1.fq.gz ERR1562083_2_val_2.fq.gz \
--outFileNamePrefix SJ/ERR1562083. \
--outSAMtype None
# STAR in 2nd pass mode
STAR --runThreadN 16 \
--genomeDir GRCh38_GENCODE_genome_STAR_indexed \
--readFilesCommand zcat \
--readFilesIn ERR1562083_1_val_1.fq.gz ERR1562083_2_val_2.fq.gz \
--outFileNamePrefix ERR1562083. \
--sjdbFileChrStartEnd SJ/*SJ.out.tab \
--outSAMtype BAM SortedByCoordinate \
--outSAMattributes NH HI AS nM XS \
--quantMode TranscriptomeSAM
## coord.intron ERR1562084 ERR1562092 ERR1562100 ERR1562107
## 44383 chr1:19357564:19378539 219 37 457 276
## 44387 chr1:19374456:19378539 NA NA NA NA
## 71516 chr1:28236453:28237760 5 22 58 16
## 71517 chr1:28236453:28237836 193 104 265 190
## 93429 chr1:62196529:62203446 4 NA 13 NA
tran_id
and
gene_id
.tran_id
), please refer to https://wenweixiong.github.io/Splicing_Nomenclature.
Example code for running rMATS as follows.--b1
and --b2
. This is because rMATS requires
these specification for statistical testing of splicing events between
the two samples. But here, we will only be using the splicing events
detected (fromGTF.SE.txt, fromGTF.MXE.txt, fromGTF.RI.txt,
fromGTF.A5SS.txt, fromGTF.A3SS.txt), but not the statistical test
results, from this step for our downstream analysis.rmats \
--b1 path_to_BAM_sample_1.txt \
--b2 path_to_BAM_sample_2.txt \
--gtf gencode.v31.annotation.gtf \
--od rMATS/ \
--tmp rMATS/ \
-t paired \
--readLength 125 \
--variable-read-length \
--nthread 8 \
--statoff
## $SE
## tran_id
## 9578 chr1:62196435:62196528:+@chr1:62203447:62203557:+@chr1:62206519:62207636
## 10483 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 10484 chr15:24962114:24962209:+@chr15:24967029:24967240:+@chr15:24967932:24968082
## 11921 chr17:51153576:51153662:+@chr17:51154225:51154444:+@chr17:51155651:51155780
## 15780 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## 15782 chr10:78037194:78037304:+@chr10:78037965:78037982:+@chr10:78040204:78040225
## gene_id gene_short_name gene_type
## 9578 ENSG00000240563.2 L1TD1 protein_coding
## 10483 ENSG00000128739.22 SNRPN protein_coding
## 10484 ENSG00000128739.22 SNRPN protein_coding
## 11921 ENSG00000239672.7 NME1 protein_coding
## 15780 ENSG00000138326.20 RPS24 protein_coding
## 15782 ENSG00000138326.20 RPS24 protein_coding
##
## $MXE
## tran_id
## 541 chr14:22830187:22830254:+@chr14:22830964:22831037:+@chr14:22833418:22833482:+@chr14:22834172:22835037
## 1402 chr11:62761795:62761904:+@chr11:62762542:62762866:+@chr11:62762867:62763026:+@chr11:62765182:62765232
## 1589 chr7:128748573:128748804:+@chr7:128754262:128754455:+@chr7:128754529:128754722:+@chr7:128758871:128759037
## 1792 chr2:271866:271939:+@chr2:272037:272150:+@chr2:272192:272305:+@chr2:275140:275201
## 2221 chr3:186786164:186786273:+@chr3:186786418:186786465:+@chr3:186786502:186786645:+@chr3:186787127:186787264
## 2262 chr12:98593979:98594135:+@chr12:98595433:98595661:+@chr12:98595727:98595848:+@chr12:98597856:98598035
## gene_id gene_short_name gene_type
## 541 ENSG00000172590.18 MRPL52 protein_coding
## 1402 ENSG00000168002.12 POLR2G protein_coding
## 1589 ENSG00000128595.17 CALU protein_coding
## 1792 ENSG00000143727.16 ACP1 protein_coding
## 2221 ENSG00000156976.17 EIF4A2 protein_coding
## 2262 ENSG00000075415.12 SLC25A3 protein_coding
##
## $RI
## tran_id gene_id
## 776 chr6:34236873:34236963:+@chr6:34237204:34237317 ENSG00000137309.19
## 4207 chr13:43055391:43058296:+@chr13:43058654:43058862 ENSG00000120675.6
## 4209 chr13:43059394:43059714:+@chr13:43062190:43062295 ENSG00000120675.6
## 5511 chr2:55232808:55232872:+@chr2:55233363:55233417 ENSG00000143947.13
## 6845 chr7:5528719:5528564:-@chr7:5528344:5528281 ENSG00000075624.16
## 6927 chr6:73519228:73519128:-@chr6:73519064:73518932 ENSG00000156508.18
## gene_short_name gene_type
## 776 HMGA1 protein_coding
## 4207 DNAJC15 protein_coding
## 4209 DNAJC15 protein_coding
## 5511 RPS27A protein_coding
## 6845 ACTB protein_coding
## 6927 EEF1A1 protein_coding
##
## $A5SS
## tran_id
## 493 chr7:141738358:141738410|141738503:+@chr7:141739124:141739190
## 2635 chr19:41860255:41860289|41860445:+@chr19:41860775:41860845
## 4148 chr3:197950190:197950221|197950299:+@chr3:197950936:197950978
## 4721 chr8:144792587:144792245|144792366:-@chr8:144791992:144792140
## 5174 chr6:34426071:34425796|34426032:-@chr6:34425072:34425221
## 5878 chr12:112409641:112409411|112409587:-@chr12:112408420:112408656
## gene_id gene_short_name gene_type
## 493 ENSG00000106028.11 SSBP1 protein_coding
## 2635 ENSG00000105372.7 RPS19 protein_coding
## 4148 ENSG00000182899.17 RPL35A protein_coding
## 4721 ENSG00000161016.17 RPL8 protein_coding
## 5174 ENSG00000270800.3 RPS10-NUDT3 protein_coding
## 5878 ENSG00000089009.15 AC004086.1 protein_coding
##
## $A3SS
## tran_id
## 1411 chr1:28236361:28236452:+@chr1:28237761|28237837:28238105
## 1541 chr16:399627:399739:+@chr16:400109|400219:400754
## 2351 chr1:62196435:62196528:+@chr1:62203447|62206519:62207636
## 3961 chr10:78037194:78037304:+@chr10:78037439|78040204:78040225
## 4321 chr12:56158684:56158711:+@chr12:56159587|56159975:56160148
## 7009 chr9:136862906:136862999:-@chr9:136862345|136862496:136862119
## gene_id gene_short_name gene_type
## 1411 ENSG00000130770.18 ATP5IF1 protein_coding
## 1541 ENSG00000103202.13 NME4 protein_coding
## 2351 ENSG00000240563.2 L1TD1 protein_coding
## 3961 ENSG00000138326.20 RPS24 protein_coding
## 4321 ENSG00000092841.18 MYL6 protein_coding
## 7009 ENSG00000107223.13 EDF1 protein_coding
##
## $ALE
## tran_id
## 227 chr3:184366717:184366800:+@chr3:184368113:184368596|184368177:184368287
## 311 chr6:41789232:41789336:+@chr6:41789531:41790141|41789568:41789891
## 449 chr10:78037194:78037304:+@chr10:78037965:78038104|78040615:78040696
## 461 chr10:123162612:123162828:+@chr10:123163820:123170467|123165047:123165365
## 694 chr16:399627:399739:+@chr16:400109:400648|400219:400416
## 1037 chr1:19378540:19378653:-@chr1:19374175:19374455|19357522:19357563
## event_type gene_id gene_short_name gene_type
## 227 ALE ENSG00000163882.9 POLR2H protein_coding
## 311 ALE ENSG00000124593.16 AL365205.1 protein_coding
## 449 ALE ENSG00000138326.20 RPS24 protein_coding
## 461 ALE ENSG00000154473.18 BUB3 protein_coding
## 694 ALE ENSG00000103202.13 NME4 protein_coding
## 1037 ALE ENSG00000077549.18 CAPZB protein_coding
##
## $AFE
## tran_id
## 739 chr4:108620569:108620600|108620656:108620712:+@chr4:108621951:108622024
## 1806 chr13:27251561:27251585|27251597:27251699:+@chr13:27253765:27253843
## 3775 chr5:150449703:150449739|150449492:150449696:-@chr5:150447585:150447735
## 3776 chr5:150449703:150449739|150449486:150449691:-@chr5:150447585:150447735
## 3862 chr6:34426032:34426052|34425796:34426026:-@chr6:34425072:34425221
## 4208 chr8:144792366:144792423|144792245:144792389:-@chr8:144791992:144792140
## event_type gene_id gene_short_name gene_type
## 739 AFE ENSG00000109475.16 RPL34 protein_coding
## 1806 AFE ENSG00000122026.10 RPL21 protein_coding
## 3775 AFE ENSG00000164587.13 RPS14 protein_coding
## 3776 AFE ENSG00000164587.13 RPS14 protein_coding
## 3862 AFE ENSG00000270800.3 RPS10-NUDT3 protein_coding
## 4208 AFE ENSG00000161016.17 RPL8 protein_coding
RI_featureData.txt
generated from rMATS in the previous
step.bedtools coverage \
-g GRCh38.primary_assembly.genome_bedtools.txt \
-split \
-sorted \
-a RI_Coordinates.bed \
-b ERR1562083.Aligned.sortedByCoord.out.bam > \
ERR1562083.txt \
-d
## coord.intron ERR1562084 ERR1562092 ERR1562100 ERR1562107
## 1233 chr2:55232873:55233362 259 141 481 175
## 2562 chr4:39458442:39458890 5466 6105 46238 20963
## 3552 chr6:34236964:34237203 2395 30 2940 547
## 3657 chr6:73519065:73519127 117305 69093 204019 123522
## 3867 chr7:5528345:5528563 231508 146758 458847 297025
rsem-calculate-expression --bam \
--paired-end \
-p 8 \
ERR1562083.Aligned.toTranscriptome.out.bam \
GRCh38_GENCODE_genome_RSEM_indexed/gencode.v31 \
ERR1562083
## gene_id ERR1562084 ERR1562092 ERR1562100 ERR1562107
## 1007 ENSG00000067225.18 8.007924 5.993674 7.876640 7.956231
## 1248 ENSG00000073921.18 3.460743 6.365798 4.032101 7.397974
## 1309 ENSG00000075415.12 9.497652 9.838385 8.908573 9.913742
## 1317 ENSG00000075624.16 11.493971 11.368496 11.589609 11.712037
## 1392 ENSG00000077549.18 8.425887 6.874797 8.566244 8.752314
gene_id
, gene_short_name
, and
gene_type
. All other columns are optional.## gene_id gene_short_name gene_type
## 1007 ENSG00000067225.18 PKM protein_coding
## 1248 ENSG00000073921.18 PICALM protein_coding
## 1309 ENSG00000075415.12 SLC25A3 protein_coding
## 1317 ENSG00000075624.16 ACTB protein_coding
## 1392 ENSG00000077549.18 CAPZB protein_coding
## 1806 ENSG00000089009.15 AC004086.1 protein_coding
CoverageThreshold
option.# Validate, filter, compute SE splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
UnevenCoverageMultiplier=10,
EventType="SE"
)
# Validate, filter, compute MXE splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
UnevenCoverageMultiplier=10,
EventType="MXE"
)
# Validate, filter, compute RI splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
EventType="RI",
thread=4
)
# Validate, filter, compute A5SS splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
EventType="A5SS"
)
# Validate, filter, compute A3SS splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
EventType="A3SS"
)
# Validate, filter, compute AFE splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
EventType="AFE"
)
# Validate, filter, compute ALE splicing events
marvel.demo <- ComputePSI(MarvelObject=marvel.demo,
CoverageThreshold=10,
EventType="ALE"
)
CoverageThreshold
.
This option indicates the minimum number of splice junction reads
supporting the splicing events, above which, the PSI will be computed.
PSI of splicing events below this threshold will be coded as
NA
.UnevenCoverageMultiplier
Specific to computing SE and
MXE. Two splice junctions are used to compute to inclusion of SE and
MXE. This option represent the ratio of read coverage of one splice
junction over the other. The threshold specified here, above which, the
PSI will be coded as NA
.thread
Specific to computing RI. Number of cores to
use. This is depended on the user’s device.read.length
Specific to computing RI. If the values in
df.intron.counts
represent number of reads, then this
option should reflect the sequencing read length, e.g., 150 etc.. If the
values in df.intron.counts
represent total intronic
coverage (here), then this option should be set to 1
(default).# Check splicing data
marvel.demo <- CheckAlignment(MarvelObject=marvel.demo, level="splicing")
# Check gene data
marvel.demo <- CheckAlignment(MarvelObject=marvel.demo, level="gene")
# Cross-check splicing and gene data
marvel.demo <- CheckAlignment(MarvelObject=marvel.demo, level="splicing and gene")
MATCHED
flags are reported. If any
NOT MATCHED
flags are reported, please double-check the
input file requirements.# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Define sample ids
sample.ids <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Tabulate expressed events
marvel.demo <- CountEvents(MarvelObject=marvel.demo,
sample.ids=sample.ids,
min.cells=5
)
# Output (1): Plot
marvel.demo$N.Events$Plot
## event_type freq pct
## 1 SE 10 14.28571
## 2 MXE 10 14.28571
## 3 RI 10 14.28571
## 4 A5SS 10 14.28571
## 5 A3SS 10 14.28571
## 6 ALE 10 14.28571
## 7 AFE 10 14.28571
min.cells
option. Here, we required the splicing event
to be expressed in at least 5 cells for the splicing event to be
included for analysis.# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Define sample ids
sample.ids <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Tabulate expressed events
marvel.demo <- CountEvents(MarvelObject=marvel.demo,
sample.ids=sample.ids,
min.cells=5
)
# Output (1): Plot
marvel.demo$N.Events$Plot
## event_type freq pct
## 1 SE 10 14.28571
## 2 MXE 10 14.28571
## 3 RI 10 14.28571
## 4 A5SS 10 14.28571
## 5 A3SS 10 14.28571
## 6 ALE 10 14.28571
## 7 AFE 10 14.28571
# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Define sample IDs
sample.ids <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Assign modality
marvel.demo <- AssignModality(MarvelObject=marvel.demo,
sample.ids=sample.ids,
min.cells=5,
seed=1
)
marvel.demo$Modality$Results[1:5, c("tran_id", "event_type", "gene_id", "gene_short_name", "modality.bimodal.adj")]
## tran_id
## 1 chr1:62196435:62196528:+@chr1:62203447:62203557:+@chr1:62206519:62207636
## 2 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 3 chr15:24962114:24962209:+@chr15:24967029:24967240:+@chr15:24967932:24968082
## 4 chr17:51153576:51153662:+@chr17:51154225:51154444:+@chr17:51155651:51155780
## 5 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## event_type gene_id gene_short_name modality.bimodal.adj
## 1 SE ENSG00000240563.2 L1TD1 Excluded.Primary
## 2 SE ENSG00000128739.22 SNRPN Excluded.Dispersed
## 3 SE ENSG00000128739.22 SNRPN Excluded.Dispersed
## 4 SE ENSG00000239672.7 NME1 Excluded.Dispersed
## 5 SE ENSG00000138326.20 RPS24 Excluded.Dispersed
# Tabulate modality proportion (overall)
marvel.demo <- PropModality(MarvelObject=marvel.demo,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=FALSE
)
marvel.demo$Modality$Prop$DoughnutChart$Plot
## modality freq pct
## 4 Included.Primary 2 2.857143
## 3 Included.Dispersed 6 8.571429
## 2 Excluded.Primary 18 25.714286
## 1 Excluded.Dispersed 39 55.714286
## 5 Middle 2 2.857143
## 6 Multimodal 3 4.285714
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells for the splicing event to be
included for modality assignment. This value should match that
previously defined in CountEvents
function.# Tabulate modality proportion (by event type)
marvel.demo <- PropModality(MarvelObject=marvel.demo,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=TRUE,
prop.test="fisher",
prop.adj="fdr",
xlabels.size=8
)
marvel.demo$Modality$Prop$BarChart$Plot
## modality freq pct event_type
## 3 Included (Dispersed) 1 10 SE
## 2 Excluded (Primary) 2 20 SE
## 1 Excluded (Dispersed) 6 60 SE
## 4 Middle 1 10 SE
## 11 Multimodal 0 0 SE
## 21 Included (Primary) 0 0 SE
# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Define sample IDs
sample.ids <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Assign modality
marvel.demo <- AssignModality(MarvelObject=marvel.demo,
sample.ids=sample.ids,
min.cells=5,
seed=1
)
marvel.demo$Modality$Results[1:5, c("tran_id", "event_type", "gene_id", "gene_short_name", "modality.bimodal.adj")]
## tran_id
## 1 chr1:62196435:62196528:+@chr1:62203447:62203557:+@chr1:62206519:62207636
## 2 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 3 chr15:24962114:24962209:+@chr15:24967029:24967240:+@chr15:24967932:24968082
## 4 chr17:51153576:51153662:+@chr17:51154225:51154444:+@chr17:51155651:51155780
## 5 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## event_type gene_id gene_short_name modality.bimodal.adj
## 1 SE ENSG00000240563.2 L1TD1 Excluded.Dispersed
## 2 SE ENSG00000128739.22 SNRPN Excluded.Primary
## 3 SE ENSG00000128739.22 SNRPN Excluded.Dispersed
## 4 SE ENSG00000239672.7 NME1 Excluded.Primary
## 5 SE ENSG00000138326.20 RPS24 Excluded.Dispersed
# Tabulate modality proportion (overall)
marvel.demo <- PropModality(MarvelObject=marvel.demo,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=FALSE
)
marvel.demo$Modality$Prop$DoughnutChart$Plot
## modality freq pct
## 5 Included.Primary 2 2.857143
## 4 Included.Dispersed 7 10.000000
## 3 Excluded.Primary 28 40.000000
## 2 Excluded.Dispersed 29 41.428571
## 1 Bimodal 1 1.428571
## 6 Middle 1 1.428571
## 7 Multimodal 2 2.857143
# Tabulate modality proportion (by event type)
marvel.demo <- PropModality(MarvelObject=marvel.demo,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "AFE", "ALE"),
across.event.type=TRUE,
prop.test="fisher",
prop.adj="fdr",
xlabels.size=8
)
marvel.demo$Modality$Prop$BarChart$Plot
## modality freq pct event_type
## 2 Excluded (Primary) 3 30 SE
## 1 Excluded (Dispersed) 5 50 SE
## 3 Middle 1 10 SE
## 4 Multimodal 1 10 SE
## 11 Included (Dispersed) 0 0 SE
## 21 Bimodal 0 0 SE
# Define cell groups
# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Cell group 1 (reference)
cell.group.g1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Cell group 2
cell.group.g2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# DE analysis
marvel.demo <- CompareValues(MarvelObject=marvel.demo,
cell.group.g1=cell.group.g1,
cell.group.g2=cell.group.g2,
min.cells=3,
method="t.test",
method.adjust="fdr",
level="gene",
show.progress=FALSE
)
marvel.demo$DE$Exp$Table[1:5, ]
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000128739.22 SNRPN protein_coding 15 15
## 2 ENSG00000067225.18 PKM protein_coding 15 15
## 3 ENSG00000137309.19 HMGA1 protein_coding 15 15
## 4 ENSG00000239672.7 NME1 protein_coding 15 13
## 5 ENSG00000120675.6 DNAJC15 protein_coding 14 15
## mean.g1 mean.g2 log2fc statistic p.val p.val.adj
## 1 9.203101 8.331827 -0.8712731 6.773031 9.876425e-07 5.333270e-05
## 2 7.896393 10.207866 2.3114738 -6.477287 2.845515e-06 7.682892e-05
## 3 7.276139 6.060553 -1.2155857 6.148961 5.694655e-06 1.025038e-04
## 4 8.985144 5.082584 -3.9025599 5.627087 5.476249e-05 6.832612e-04
## 5 3.871605 6.065549 2.1939435 -4.706610 6.484850e-05 6.832612e-04
min.cells
option. Here, we required the gene to be
expressed in at least 3 cells in either iPSCs or endoderm cells for the
gene to be included for analysis.show.progress
option. For the brevity of the tutorial,
we did not track the progress of differential expression analysis. But
users are advised to set this option to TRUE
when running
this step on their own devices.# Plot DE results
marvel.demo <- PlotDEValues(MarvelObject=marvel.demo,
pval=0.10,
log2fc=0.5,
point.size=0.1,
level="gene.global",
anno=FALSE
)
marvel.demo$DE$Exp.Global$Plot
## sig freq
## 1 up 4
## 2 down 20
## 3 n.s. 30
## gene_id gene_short_name sig
## 1 ENSG00000128739.22 SNRPN down
## 2 ENSG00000067225.18 PKM up
## 3 ENSG00000137309.19 HMGA1 down
## 4 ENSG00000239672.7 NME1 down
## 5 ENSG00000120675.6 DNAJC15 up
## 6 ENSG00000154473.18 BUB3 down
pval
option. The adjusted p-value, below which, the
gene is considered to be differentially expressed.log2fc
option. The absolute log2 fold change, above
which, the gene is considered to be differentially expressed.# Plot DE results with annotation of selected genes
# Retrieve DE output table
results <- marvel.demo$DE$Exp$Table
# Retrieve top genes
index <- which(results$log2fc > 2 | results$log2fc < -2)
gene_short_names <- results[index, "gene_short_name"]
# Plot
marvel.demo <- PlotDEValues(MarvelObject=marvel.demo,
pval=0.10,
log2fc=0.5,
point.size=0.1,
xlabel.size=10,
level="gene.global",
anno=TRUE,
anno.gene_short_name=gene_short_names
)
marvel.demo$DE$Exp.Global$Plot
marvel.demo <- CompareValues(MarvelObject=marvel.demo,
cell.group.g1=cell.group.g1,
cell.group.g2=cell.group.g2,
min.cells=5,
method=c("ad", "dts"),
method.adjust="fdr",
level="splicing",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS", "ALE", "AFE"),
show.progress=FALSE
)
head(marvel.demo$DE$PSI$Table[["ad"]])
## tran_id
## 110 chr17:8383254:8382781|8383157:-@chr17:8382143:8382315
## 210 chr17:8383157:8383193|8382781:8383164:-@chr17:8382143:8382315
## 310 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 410 chr8:144792587:144792245|144792366:-@chr8:144791992:144792140
## 55 chr8:144792366:144792423|144792245:144792389:-@chr8:144791992:144792140
## 61 chr5:150449703:150449739|150449492:150449696:-@chr5:150447585:150447735
## event_type gene_id gene_short_name gene_type n.cells.g1
## 110 A5SS ENSG00000161970.15 RPL26 protein_coding 15
## 210 AFE ENSG00000161970.15 RPL26 protein_coding 15
## 310 SE ENSG00000128739.22 SNRPN protein_coding 15
## 410 A5SS ENSG00000161016.17 RPL8 protein_coding 15
## 55 AFE ENSG00000161016.17 RPL8 protein_coding 15
## 61 AFE ENSG00000164587.13 RPS14 protein_coding 15
## n.cells.g2 mean.g1 mean.g2 mean.diff statistic p.val p.val.adj
## 110 15 6.663009 0.02754821 -6.635461 18.553 5.1855e-11 1.814925e-09
## 210 15 6.663009 0.02754821 -6.635461 18.553 5.1855e-11 1.814925e-09
## 310 13 8.944493 0.00000000 -8.944493 18.128 8.3362e-11 1.945113e-09
## 410 15 9.446244 0.53253936 -8.913705 16.638 6.7920e-10 9.508800e-09
## 55 15 9.446244 0.53253936 -8.913705 16.638 6.7920e-10 9.508800e-09
## 61 15 3.830668 0.04283629 -3.787831 15.942 1.7288e-09 2.016933e-08
## modality.bimodal.adj.g1 modality.bimodal.adj.g2 n.cells.outliers.g1
## 110 Excluded.Dispersed Excluded.Primary 15
## 210 Excluded.Dispersed Excluded.Primary 15
## 310 Excluded.Dispersed Excluded.Primary 15
## 410 Excluded.Dispersed Excluded.Primary 15
## 55 Excluded.Dispersed Excluded.Primary 15
## 61 Excluded.Primary Excluded.Primary 15
## n.cells.outliers.g2 outliers
## 110 1 FALSE
## 210 1 FALSE
## 310 0 FALSE
## 410 2 FALSE
## 55 2 FALSE
## 61 3 FALSE
## tran_id
## 4 chr10:78037194:78037304:+@chr10:78040204:78040225:+@chr10:78040615:78040747
## 5 chr11:85981129:85981228:-@chr11:85978070:85978093:-@chr11:85976623:85976682
## 23 chr4:108620569:108620600|108620656:108620712:+@chr4:108621951:108622024
## 1 chr7:5528719:5528564:-@chr7:5528344:5528281
## 2 chr6:73519228:73519128:-@chr6:73519064:73518932
## 110 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## event_type gene_id gene_short_name gene_type n.cells.g1
## 4 SE ENSG00000138326.20 RPS24 protein_coding 15
## 5 SE ENSG00000073921.18 PICALM protein_coding 8
## 23 AFE ENSG00000109475.16 RPL34 protein_coding 15
## 1 RI ENSG00000075624.16 ACTB protein_coding 15
## 2 RI ENSG00000156508.18 EEF1A1 protein_coding 15
## 110 SE ENSG00000128739.22 SNRPN protein_coding 15
## n.cells.g2 mean.g1 mean.g2 mean.diff statistic p.val p.val.adj
## 4 15 43.540716 63.151243 19.610527 1.9736640 5e-04 5e-04
## 5 8 90.322395 8.970588 -81.351806 4.6688434 5e-04 5e-04
## 23 15 33.464032 7.113192 -26.350840 2.2608281 5e-04 5e-04
## 1 15 89.823731 84.890452 -4.933279 0.4987283 5e-04 5e-04
## 2 15 86.866886 82.948010 -3.918876 0.3849563 5e-04 5e-04
## 110 13 8.944493 0.000000 -8.944493 0.7556724 5e-04 5e-04
## modality.bimodal.adj.g1 modality.bimodal.adj.g2 n.cells.outliers.g1
## 4 Middle Middle 0
## 5 Included.Dispersed Excluded.Dispersed 0
## 23 Middle Excluded.Dispersed 0
## 1 Included.Primary Included.Dispersed 15
## 2 Included.Primary Included.Primary 15
## 110 Excluded.Dispersed Excluded.Primary 15
## n.cells.outliers.g2 outliers
## 4 0 FALSE
## 5 0 FALSE
## 23 0 FALSE
## 1 15 FALSE
## 2 15 FALSE
## 110 0 FALSE
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells in both iPSCs and endoderm cells
for the splicing event to be included for analysis.method
option. We recommend Anderson-Darling
(ad
) and D Test Statistics (dts
) for comparing
the overall PSI distribution between two cell populations.show.progress
option. For the brevity of the tutorial,
we did not track the progress of differential expression analysis. But
users are advised to set this option to TRUE
when running
this step on their own devices.marvel.demo <- PlotDEValues(MarvelObject=marvel.demo,
method="ad",
pval=0.10,
level="splicing.distance",
anno=TRUE,
anno.tran_id=marvel.demo$DE$PSI$Table[["ad"]]$tran_id[c(1:10)]
)
marvel.demo$DE$PSI$Plot[["ad"]]
method
option. Plot results for ad
statistical test.pval
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.level
option. When set to
"splicing.distance"
, the distance statistic will be used to
plot the DE results. Only applicable when method
set to
"ad"
or "dts"
. When set to
splicing.mean
. The typical volcano plot is returned, and
the delta
option may be used.delta
option. when level
set to
"splicing.mean"
, the absolute differences in mean PSI
values between the two cell populations, above which, the splicing event
is considered to be differentially spliced.marvel.demo <- CompareValues(MarvelObject=marvel.demo,
cell.group.g1=cell.group.g1,
cell.group.g2=cell.group.g2,
psi.method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
method.de.gene="t.test",
method.adjust.de.gene="fdr",
downsample=FALSE,
show.progress=FALSE,
level="gene.spliced"
)
head(marvel.demo$DE$Exp.Spliced$Table)
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000128739.22 SNRPN protein_coding 15 15
## 2 ENSG00000067225.18 PKM protein_coding 15 15
## 3 ENSG00000137309.19 HMGA1 protein_coding 15 15
## 4 ENSG00000239672.7 NME1 protein_coding 15 13
## 5 ENSG00000120675.6 DNAJC15 protein_coding 14 15
## 6 ENSG00000154473.18 BUB3 protein_coding 15 14
## mean.g1 mean.g2 log2fc statistic p.val p.val.adj
## 1 9.203101 8.331827 -0.8712731 6.773031 9.876425e-07 3.851806e-05
## 2 7.896393 10.207866 2.3114738 -6.477287 2.845515e-06 5.548755e-05
## 3 7.276139 6.060553 -1.2155857 6.148961 5.694655e-06 7.403052e-05
## 4 8.985144 5.082584 -3.9025599 5.627087 5.476249e-05 4.934664e-04
## 5 3.871605 6.065549 2.1939435 -4.706610 6.484850e-05 4.934664e-04
## 6 7.893652 4.895821 -2.9978306 5.221021 7.591791e-05 4.934664e-04
psi.method
, psi.pval
, and
psi.delta
options. For defining differentially spliced
events whose corresponding genes will be included for differential gene
expression analysis.method.de.gene
and method.adjust.de.gene
options. The statistical test and multiple testing method for
differential gene expression analysis.# Plot: No annotation
marvel.demo <- PlotDEValues(MarvelObject=marvel.demo,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
gene.pval=0.10,
gene.log2fc=0.5,
point.size=0.1,
xlabel.size=8,
level="gene.spliced",
anno=FALSE
)
marvel.demo$DE$Exp.Spliced$Plot
## sig freq
## 1 up 4
## 2 down 10
## 3 n.s. 25
method
option. Merge results from ad
and
dts
statistical tests.pval.psi
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.delta.psi
option. The absolute difference in mean PSI
values between the two cell populations, above which, the splicing event
is considered to be differentially spliced.gene.pval
option. The adjusted p-value, below which,
the gene is considered to be differentially expressed.gene.log2fc
option. The absolute log2 fold change,
above which, the gene is considered to be differentially expressed.# Plot: Annotate top genes
results <- marvel.demo$DE$Exp.Spliced$Table
index <- which((results$log2fc > 2 | results$log2fc < -2) & -log10(results$p.val.adj) > 1)
gene_short_names <- results[index, "gene_short_name"]
marvel.demo <- PlotDEValues(MarvelObject=marvel.demo,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
gene.pval=0.10,
gene.log2fc=0.5,
point.size=0.1,
xlabel.size=8,
level="gene.spliced",
anno=TRUE,
anno.gene_short_name=gene_short_names
)
marvel.demo$DE$Exp.Spliced$Plot
# Define sample groups
# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Group 1
sample.ids.1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Group 2
sample.ids.2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Merge
cell.group.list <- list("iPSC"=sample.ids.1,
"Endoderm"=sample.ids.2
)
# Retrieve DE genes
# Retrieve DE result table
results.de.exp <- marvel.demo$DE$Exp$Table
# Retrieve relevant gene_ids
index <- which(results.de.exp$p.val.adj < 0.10 & abs(results.de.exp$log2fc) > 0.5)
gene_ids <- results.de.exp[index, "gene_id"]
# Reduce dimension
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=gene_ids,
point.size=2.5,
level="gene"
)
marvel.demo$PCA$Exp$Plot
min.cells
option. Here, we required the gene to be
expressed in at least 25 cells across the overall cell populations
defined in cell.group.list
for the gene to be included for
analysis.feature
option. Gene IDs to be used for dimension
reduction.# Retrieve DE tran_ids
method <- c("ad", "dts")
tran_ids.list <- list()
for(i in 1:length(method)) {
results.de.psi <- marvel.demo$DE$PSI$Table[[method[i]]]
index <- which(results.de.psi$p.val.adj < 0.10 & results.de.psi$outlier==FALSE)
tran_ids <- results.de.psi[index, "tran_id"]
tran_ids.list[[i]] <- tran_ids
}
tran_ids <- unique(unlist(tran_ids.list))
# Reduce dimension
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
marvel.demo$PCA$PSI$Plot
min.cells
option. Here, we required the splicing event
to be expressed in at least 25 cells across the overall cell populations
defined in cell.group.list
for the splicing event to be
included for analysis.feature
option. Splicing events to be used for
dimension reduction.method.impute
option. Method to impute missing PSI
values. Indicate "random"
to to randomly assign any values
between 0-100 to missing values (Song et el., 2017). Indicate
"population.mean"
to use the mean PSI value of each cell
group to impute the missing values found in the corresponding cell group
(Huang et al., 2021).seed
option. Only applicable when
method.impute
option set to "random"
. This
option ensures that the randomly imputed values will always be
reproducible.# Retrieve relevant gene_ids
results.de.exp <- marvel.demo$DE$Exp$Table
index <- which(results.de.exp$p.val.adj < 0.10 & abs(results.de.exp$log2fc) > 0.5)
gene_ids <- results.de.exp[-index, "gene_id"]
# Reduce dimension
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=gene_ids,
point.size=2.5,
level="gene"
)
marvel.demo$PCA$Exp$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel.demo$DE$Exp$Table
index <- which(results.de.exp$p.val.adj > 0.10 )
gene_ids <- results.de.exp[, "gene_id"]
# Retrieve tran_ids
df.feature <- do.call(rbind.data.frame, marvel.demo$SpliceFeatureValidated)
df.feature <- df.feature[which(df.feature$gene_id %in% gene_ids), ]
# Reduce dimension: All DE splicing events
tran_ids <- df.feature$tran_id
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.all <- marvel.demo$PCA$PSI$Plot
# Reduce dimension: SE
tran_ids <- df.feature[which(df.feature$event_type=="SE"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.se <- marvel.demo$PCA$PSI$Plot
# Reduce dimension: MXE
tran_ids <- df.feature[which(df.feature$event_type=="MXE"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.mxe <- marvel.demo$PCA$PSI$Plot
# Reduce dimension: RI
tran_ids <- df.feature[which(df.feature$event_type=="RI"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.ri <- marvel.demo$PCA$PSI$Plot
# Reduce dimension: A5SS
tran_ids <- df.feature[which(df.feature$event_type=="A5SS"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.a5ss <- marvel.demo$PCA$PSI$Plot
# Reduce dimension: A3SS
tran_ids <- df.feature[which(df.feature$event_type=="A3SS"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.a3ss <- marvel.demo$PCA$PSI$Plot
# Reduce dimension: AFE
tran_ids <- df.feature[which(df.feature$event_type=="AFE"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.afe <- marvel.demo$PCA$PSI$Plot
# Reduce dimension:
tran_ids <- df.feature[which(df.feature$event_type=="ALE"), "tran_id"]
marvel.demo <- RunPCA(MarvelObject=marvel.demo,
cell.group.column="cell.type",
cell.group.order=c("iPSC", "Endoderm"),
cell.group.colors=NULL,
min.cells=5,
features=tran_ids,
point.size=2.5,
level="splicing",
method.impute="random",
seed=1
)
plot.ale <- marvel.demo$PCA$PSI$Plot
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.all, plot.se,
plot.mxe, plot.ri,
plot.a5ss, plot.a3ss,
plot.afe, plot.ale,
nrow=4)
PlotValues
for plotting selected splicing events.# Define sample groups
# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Group 1
sample.ids.1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Group 2
sample.ids.2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Merge
cell.group.list <- list("iPSC"=sample.ids.1,
"Endoderm"=sample.ids.2
)
# Assign modality dynamics
marvel.demo <- ModalityChange(MarvelObject=marvel.demo,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10)
)
marvel.demo$DE$Modality$Plot
## tran_id
## 1 chr17:8383254:8382781|8383157:-@chr17:8382143:8382315
## 2 chr17:8383157:8383193|8382781:8383164:-@chr17:8382143:8382315
## 3 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 4 chr8:144792587:144792245|144792366:-@chr8:144791992:144792140
## 5 chr8:144792366:144792423|144792245:144792389:-@chr8:144791992:144792140
## 6 chr5:150449703:150449739|150449492:150449696:-@chr5:150447585:150447735
## event_type gene_id gene_short_name gene_type
## 1 A5SS ENSG00000161970.15 RPL26 protein_coding
## 2 AFE ENSG00000161970.15 RPL26 protein_coding
## 3 SE ENSG00000128739.22 SNRPN protein_coding
## 4 A5SS ENSG00000161016.17 RPL8 protein_coding
## 5 AFE ENSG00000161016.17 RPL8 protein_coding
## 6 AFE ENSG00000164587.13 RPS14 protein_coding
## modality.bimodal.adj.g1 modality.bimodal.adj.g2 modality.change
## 1 Excluded.Dispersed Excluded.Primary Implicit
## 2 Excluded.Dispersed Excluded.Primary Implicit
## 3 Excluded.Dispersed Excluded.Primary Implicit
## 4 Excluded.Dispersed Excluded.Primary Implicit
## 5 Excluded.Dispersed Excluded.Primary Implicit
## 6 Excluded.Primary Excluded.Primary Restricted
## modality.change freq pct
## 1 Explicit 8 15.38462
## 2 Implicit 22 42.30769
## 3 Restricted 22 42.30769
method
option. The statistical tests used earlier for
differential splicing analysis. Here, we combined the differentially
spliced events from both ad
and dts
tests.pval
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced. The numeric
vector should be the same length as the method
option.# Example 1
tran_id <- "chr4:108620569:108620600|108620656:108620712:+@chr4:108621951:108622024"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.1 <- marvel.demo$adhocPlot$PSI
# Example 2
tran_id <- "chr12:110502049:110502117:-@chr12:110499535:110499546:-@chr12:110496012:110496203"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.2 <- marvel.demo$adhocPlot$PSI
# Example 3
tran_id <- "chr9:35685269:35685339:-@chr9:35685064:35685139:-@chr9:35684732:35684807:-@chr9:35684488:35684550"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.3 <- marvel.demo$adhocPlot$PSI
# Example 4
tran_id <- "chr11:85981129:85981228:-@chr11:85978070:85978093:-@chr11:85976623:85976682"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.4 <- marvel.demo$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1, plot.2,
plot.3, plot.4,
nrow=1)
# Example 1
tran_id <- "chr17:8383254:8382781|8383157:-@chr17:8382143:8382315"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.1 <- marvel.demo$adhocPlot$PSI
# Example 2
tran_id <- "chr17:8383157:8383193|8382781:8383164:-@chr17:8382143:8382315"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.2 <- marvel.demo$adhocPlot$PSI
# Example 3
tran_id <- "chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.3 <- marvel.demo$adhocPlot$PSI
# Example 4
tran_id <- "chr8:144792587:144792245|144792366:-@chr8:144791992:144792140"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.4 <- marvel.demo$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1, plot.2,
plot.3, plot.4,
nrow=1)
# Example 1
tran_id <- "chr5:150449703:150449739|150449492:150449696:-@chr5:150447585:150447735"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.1 <- marvel.demo$adhocPlot$PSI
# Example 2
tran_id <- "chr12:56725340:56724962|56725263:-@chr12:56724452:56724523"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.2 <- marvel.demo$adhocPlot$PSI
# Example 3
tran_id <- "chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.3 <- marvel.demo$adhocPlot$PSI
# Example 4
tran_id <- "chr10:78037194:78037304:+@chr10:78037439|78040204:78040225"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=5,
level="splicing",
min.cells=5
)
plot.4 <- marvel.demo$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1, plot.2,
plot.3, plot.4,
nrow=1)
PlotValues
for plotting selected splicing events and
genes.CompareValues
with the level
option set to gene.spliced
needs to be executed prior to proceeding with gene-splicing dynamics
analysis below. Kindly refer to
Differential (spliced) gene analysis
section of this
tutorial.marvel.demo <- IsoSwitch(MarvelObject=marvel.demo,
method=c("ad", "dts"),
psi.pval=c(0.10, 0.10),
psi.delta=0,
gene.pval=0.10,
gene.log2fc=0.5
)
marvel.demo$DE$Cor$Plot
## gene_id gene_short_name gene_type cor
## 3 ENSG00000128739.22 SNRPN protein_coding Coordinated
## 7 ENSG00000196531.10 NACA protein_coding Iso-Switch
## 17 ENSG00000239672.7 NME1 protein_coding Coordinated
## 19 ENSG00000109475.16 RPL34 protein_coding Iso-Switch
## 20 ENSG00000156482.11 RPL30 protein_coding Iso-Switch
## 21 ENSG00000111237.18 VPS29 protein_coding Iso-Switch
## cor freq pct
## 1 Coordinated 8 20.51282
## 2 Opposing 6 15.38462
## 3 Iso-Switch 25 64.10256
method
option. Merge results from ad
and
dts
statistical test.pval.psi
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.delta.psi
option. The absolute difference in mean PSI
values between the two cell populations, above which, the splicing event
is considered to be differentially spliced.gene.pval
option. The adjusted p-value, below which,
the gene is considered to be differentially expressed.gene.log2fc
option. The absolute log2 fold change,
above which, the gene is considered to be differentially expressed.# Define cell groups
# Retrieve sample metadata
df.pheno <- marvel.demo$SplicePheno
# Group 1
sample.ids.1 <- df.pheno[which(df.pheno$cell.type=="iPSC"), "sample.id"]
# Group 2
sample.ids.2 <- df.pheno[which(df.pheno$cell.type=="Endoderm"), "sample.id"]
# Merge
cell.group.list <- list("iPSC"=sample.ids.1,
"Endoderm"=sample.ids.2
)
# Example 1
# Gene
df.feature <- marvel.demo$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="CMC2"), "gene_id"]
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.1_gene <- marvel.demo$adhocPlot$Exp
# Splicing
tran_id <- "chr16:80981806:80981877:-@chr16:80980808:80980879|80976003:80976179"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=5
)
plot.1_splicing <- marvel.demo$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel.demo$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="HNRNPC"), "gene_id"]
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.2_gene <- marvel.demo$adhocPlot$Exp
# Splicing
tran_id <- "chr14:21231072:21230958|21230997:-@chr14:21230319:21230366"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=5
)
plot.2_splicing <- marvel.demo$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing,
plot.2_gene, plot.2_splicing,
nrow=2)
# Example 1
# Gene
df.feature <- marvel.demo$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="APOO"), "gene_id"]
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.1_gene <- marvel.demo$adhocPlot$Exp
# Splicing
tran_id <- "chrX:23840313:23840377:-@chrX:23833353:23833612|23833367:23833510"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=5
)
plot.1_splicing <- marvel.demo$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel.demo$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="BUB3"), "gene_id"]
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.2_gene <- marvel.demo$adhocPlot$Exp
# Splicing
tran_id <- "chr10:123162612:123162828:+@chr10:123163820:123170467|123165047:123165365"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=5
)
plot.2_splicing <- marvel.demo$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing,
plot.2_gene, plot.2_splicing,
nrow=2)
# Example 1
# Gene
df.feature <- marvel.demo$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="AC004086.1"), "gene_id"]
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.1_gene <- marvel.demo$adhocPlot$Exp
# Splicing
tran_id <- "chr12:112409641:112409411|112409587:-@chr12:112408420:112408656"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=5
)
plot.1_splicing <- marvel.demo$adhocPlot$PSI
# Example 2
# Gene
df.feature <- marvel.demo$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="ACP1"), "gene_id"]
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=gene_id,
maintitle="gene_short_name",
xlabels.size=7,
level="gene"
)
plot.2_gene <- marvel.demo$adhocPlot$Exp
# Splicing
tran_id <- "chr2:271866:271939:+@chr2:272037:272150:+@chr2:272192:272305:+@chr2:275140:275201"
marvel.demo <- PlotValues(MarvelObject=marvel.demo,
cell.group.list=cell.group.list,
feature=tran_id,
xlabels.size=7,
level="splicing",
min.cells=5
)
plot.2_splicing <- marvel.demo$adhocPlot$PSI
# Arrange and view plots
# Read plots from right to left for each row
grid.arrange(plot.1_gene, plot.1_splicing,
plot.2_gene, plot.2_splicing,
nrow=2)
marvel.demo <- BioPathways(MarvelObject=marvel.demo,
method=c("ad", "dts"),
pval=0.10,
species="human"
)
head(marvel.demo$DE$BioPathways$Table)
method
option. Merge results from ad
and
dts
statistical test.pval
option. The adjusted p-value, below which, the
splicing event is considered to be differentially spliced.species
option. MARVEL also supports GO analysis of
"mouse"
.custom.genes
option. In lieu of specifying genes with
the method
and pval
options, users may specify
any custom set of genes using this option.# Plot top pathways
df <- marvel.demo$DE$BioPathways$Table
go.terms <- df$Description[c(1:10)]
marvel.demo <- BioPathways.Plot(MarvelObject=marvel.demo,
go.terms=go.terms,
y.label.size=10
)
marvel.demo$DE$BioPathways$Plot
BioPathways
function. Simply specify
the custom set of pathways using the go.terms
option of the
BioPathways.Plot
function.