---
title: "cnv"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{cnv}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

vignette_file <- function(...) {
  candidates <- c(
    file.path(...),
    file.path("vignettes", ...),
    file.path("inst", "extdata", ...),
    file.path(Sys.getenv("PWD"), "inst", "extdata", ...),
    system.file("extdata", ..., package = "oncoPredict"),
    system.file("doc", ..., package = "oncoPredict")
  )
  candidates <- candidates[nzchar(candidates) & file.exists(candidates)]
  if (!length(candidates)) {
    stop("Could not find vignette file: ", file.path(...), call. = FALSE)
  }
  candidates[[1]]
}
```

```{r setup, eval=requireNamespace("org.Hs.eg.db", quietly = TRUE) && requireNamespace("TxDb.Hsapiens.UCSC.hg19.knownGene", quietly = TRUE)}

library(oncoPredict)

#This vignette demonstrates how to map segmented copy-number data to genes with
#map_cnv() and test predicted drug response against CNA amplifications with idwas().

#First, download CNV data for your cancer of interest from the GDC database.
#The downloaded CNV data can be exported as cnv.txt.

#This code will export the CNV data into a text file called, 'cnv.txt', containing a table with colnames() 'Sample', 'Chromosome', 'Start', 'End', 'Num_Probes', 'Segment_Mean'
#The genome of reference is hg19.
#query.gbm.nocnv<-GDCquery(project = "TCGA-GBM",
#                          data.category = "Copy number variation",
#                          legacy = TRUE,
#                          file.type = "nocnv_hg19.seg",
#                          sample.type = c("Primary Tumor"))
#patient_total<-nrow((query.gbm.nocnv$results)[[1]]) #The total number of patients GDC has CNV data for
#query.gbm.nocnv$results[[1]]<-query.gbm.nocnv$results[[1]][1:patient_total,]
#GDCdownload(query.gbm.nocnv, files.per.chunk = 100)
#gbm.nocnv<-GDCprepare(query.gbm.nocnv, save = TRUE, save.filename = "GBMnocnvhg19.rda")
#write.table(gbm.nocnv, file='cnv.txt')

#Second, apply map_cnv() to map cnv data to genes.
#The mapping is accomplished by intersecting each gene with the overlapping CNV
#segment. If a gene is not fully captured by a CNV segment, NA is assigned.

#Determine the parameters of the map_cnv() function.
Cnvs<-read.table(vignette_file("cnv.txt"), header=TRUE, row.names=1)

#Third, apply idwas() to test each CNA amplification and each drug. The p-values and beta-values for each test will be returned as an object.

#Determine the parameters of the idwas() function...

#Set the drug_prediction parameter.
#Make sure rownames() are samples, and colnames() are drugs. Also make sure this data is a data frame.
drug_prediction<-t(as.data.frame(read.table(vignette_file("DrugPredictions.txt"), header=TRUE, row.names=1)))
#dim(drug_prediction) #165 198

#In this example, replace '.' with '-' so the TCGA sample identifiers match the
#format used in the CNV data.
rownames(drug_prediction)<-gsub(".", "-", rownames(drug_prediction), fixed=T)

#Make sure the sample identifiers in the 'drug prediction' data are of similar form as the sample identifiers in the 'data' parameter.
rows=rownames(drug_prediction)
rownames(drug_prediction)<-substring(rows, 3, nchar(rows))
drug_prediction<-as.data.frame(drug_prediction)

#Determine the number of samples you want the CNAs to be amplified in. The default is 10.
#The included CNV example is intentionally small, so use a lower threshold for this vignette.
n=5

#Indicate whether or not you would like to test CNA amplification data. If TRUE, you will test CNA amplifications. If FALSE, you will test mutation data.
cnv=TRUE

#Apply map_cnv()
mapped_cnv <- map_cnv(Cnvs=Cnvs)

#Set the data parameter.
#Make sure this data is a data frame and that colnames() are samples.
data<-as.data.frame(mapped_cnv$theCnvQuantVecList_mat)
samps<-colnames(data)
colnames(data)<-substr(samps,1,nchar(samps)-12)

#Apply idwas()
idwas_results <- idwas(drug_prediction=drug_prediction, data=data, n=n, cnv=cnv)

```