--- title: "cnv" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{cnv} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) vignette_file <- function(...) { candidates <- c( file.path(...), file.path("vignettes", ...), file.path("inst", "extdata", ...), file.path(Sys.getenv("PWD"), "inst", "extdata", ...), system.file("extdata", ..., package = "oncoPredict"), system.file("doc", ..., package = "oncoPredict") ) candidates <- candidates[nzchar(candidates) & file.exists(candidates)] if (!length(candidates)) { stop("Could not find vignette file: ", file.path(...), call. = FALSE) } candidates[[1]] } ``` ```{r setup, eval=requireNamespace("org.Hs.eg.db", quietly = TRUE) && requireNamespace("TxDb.Hsapiens.UCSC.hg19.knownGene", quietly = TRUE)} library(oncoPredict) #This vignette demonstrates how to map segmented copy-number data to genes with #map_cnv() and test predicted drug response against CNA amplifications with idwas(). #First, download CNV data for your cancer of interest from the GDC database. #The downloaded CNV data can be exported as cnv.txt. #This code will export the CNV data into a text file called, 'cnv.txt', containing a table with colnames() 'Sample', 'Chromosome', 'Start', 'End', 'Num_Probes', 'Segment_Mean' #The genome of reference is hg19. #query.gbm.nocnv<-GDCquery(project = "TCGA-GBM", # data.category = "Copy number variation", # legacy = TRUE, # file.type = "nocnv_hg19.seg", # sample.type = c("Primary Tumor")) #patient_total<-nrow((query.gbm.nocnv$results)[[1]]) #The total number of patients GDC has CNV data for #query.gbm.nocnv$results[[1]]<-query.gbm.nocnv$results[[1]][1:patient_total,] #GDCdownload(query.gbm.nocnv, files.per.chunk = 100) #gbm.nocnv<-GDCprepare(query.gbm.nocnv, save = TRUE, save.filename = "GBMnocnvhg19.rda") #write.table(gbm.nocnv, file='cnv.txt') #Second, apply map_cnv() to map cnv data to genes. #The mapping is accomplished by intersecting each gene with the overlapping CNV #segment. If a gene is not fully captured by a CNV segment, NA is assigned. #Determine the parameters of the map_cnv() function. Cnvs<-read.table(vignette_file("cnv.txt"), header=TRUE, row.names=1) #Third, apply idwas() to test each CNA amplification and each drug. The p-values and beta-values for each test will be returned as an object. #Determine the parameters of the idwas() function... #Set the drug_prediction parameter. #Make sure rownames() are samples, and colnames() are drugs. Also make sure this data is a data frame. drug_prediction<-t(as.data.frame(read.table(vignette_file("DrugPredictions.txt"), header=TRUE, row.names=1))) #dim(drug_prediction) #165 198 #In this example, replace '.' with '-' so the TCGA sample identifiers match the #format used in the CNV data. rownames(drug_prediction)<-gsub(".", "-", rownames(drug_prediction), fixed=T) #Make sure the sample identifiers in the 'drug prediction' data are of similar form as the sample identifiers in the 'data' parameter. rows=rownames(drug_prediction) rownames(drug_prediction)<-substring(rows, 3, nchar(rows)) drug_prediction<-as.data.frame(drug_prediction) #Determine the number of samples you want the CNAs to be amplified in. The default is 10. #The included CNV example is intentionally small, so use a lower threshold for this vignette. n=5 #Indicate whether or not you would like to test CNA amplification data. If TRUE, you will test CNA amplifications. If FALSE, you will test mutation data. cnv=TRUE #Apply map_cnv() mapped_cnv <- map_cnv(Cnvs=Cnvs) #Set the data parameter. #Make sure this data is a data frame and that colnames() are samples. data<-as.data.frame(mapped_cnv$theCnvQuantVecList_mat) samps<-colnames(data) colnames(data)<-substr(samps,1,nchar(samps)-12) #Apply idwas() idwas_results <- idwas(drug_prediction=drug_prediction, data=data, n=n, cnv=cnv) ```