--- title: "PepMapViz: A Versatile Toolkit for Peptide Mapping, Visualization, and Comparative Exploration" author: "Zhenru Zhou , Qui Phung , Corey Bakalarski " output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{PepMapViz: A Versatile Toolkit for Peptide Mapping, Visualization, and Comparative Exploration} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # Introduction This vignette demonstrates how to use `PepMapViz` with input files. # Accessing Input Files The input files are stored in the `inst/extdata/` directory of the package. You can access them using the `system.file()` function. The example files are searching results from PEAKS software. "Donor" column is added to plot peptides from different donors. ```{r setup} library(PepMapViz) ``` ```{r} # Access the input files input_file_folder <- system.file("extdata", package = "PepMapViz") # Read the input files resulting_df <- combine_files_from_folder(input_file_folder) head(resulting_df) ``` # Strip the sequence This function takes outputs from multiple platform, a data frame with a column containing peptide sequences with modifications and converts it into a new dataframe with plain peptide sequences without modifications. ```{r} # Strip the sequence striped_data_peaks <- strip_sequence(resulting_df, "Peptide", "Sequence", "PEAKS") head(striped_data_peaks) ``` # Extract modifications information This function takes outputs from multiple platform, a data frame with a column containing modified peptide sequence with the detailed post translational modification(PTM) information and converts it into a new dataframe with plain peptide sequences and associated PTM information. ```{r} # Extract modifications information PTM_table <- data.frame(PTM_mass = c("15.99", ".98", "57.02", "42.01"), PTM_type = c("Ox", "Deamid", "Cam", "Acetyl")) converted_data_peaks <- obtain_mod( striped_data_peaks, "Peptide", "PEAKS", strip_seq_col = NULL, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" ) head(converted_data_peaks) ``` # Match peptide sequence with provided sequence and calculate positions This function matches peptide sequences from the 'peptide_data' data frame to corresponding provided whole sequences in the 'whole_seq' data frame. It calculates the start and end positions of the matched sequences and returns data frame with information about the matching positions. ```{r} # Match peptide sequence with provided sequence and calculate positions whole_seq <- data.frame( Epitope = c("Boco", "Boco"), Chain = c("HC", "LC"), Region_Sequence = c("QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYYMHWVRQAPGQGLEWMGEISPFGGRTNYNEKFKSRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARERPLYASDLWGQGTTVTVSSASTKGPSVFPLAPCSRSTSESTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSNFGTQTYTCNVDHKPSNTKVDKTVERKCCVECPPCPAPPVAGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVQFNWYVDGVEVHNAKTKPREEQFNSTFRVVSVLTVVHQDWLNGKEYKCKVSNKGLPSSIEKTISKTKGQPREPQVYTLPPSREEMTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPMLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPGK", "DIQMTQSPSSLSASVGDRVTITCRASQGISSALAWYQQKPGKAPKLLIYSASYRYTGVPSRFSGSGSGTDFTFTISSLQPEDIATYYCQQRYSLWRTFGQGTKLEIKRTVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDSKDSTYSLSSTLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC" ) ) matching_result <- match_and_calculate_positions( converted_data_peaks, 'Sequence', whole_seq, match_columns = NULL, sequence_length = c(10, 30), column_keep = c( "PTM_mass", "PTM_position", "reps", "Area", "Donor", "PTM_type" ) ) head(matching_result) ``` # Quantify matched peptide sequences This function takes peptide matching result and quantifies the matched peptide sequences based on the provided quantification method. If the quantification method is 'PSM', the function calculates the number of matched peptide sequences in each positions of the provided whole sequence. If the quantification method is 'Area', the function select the max value in area column of identical peptide sequences and calculates the sum of the areas of the matched peptide sequences in each positions of the provided whole sequence. ```{r} # Quantify matched peptide sequences by PSM matching_columns = c("Chain", "Epitope") distinct_columns = c("Donor") data_with_psm <- peptide_quantification( whole_seq, matching_result, matching_columns, distinct_columns, quantify_method = "PSM", with_PTM = TRUE, reps = TRUE ) region <- data.frame( Epitope = c("Boco", "Boco", "Boco", "Boco", "Boco", "Boco"), Chain = c("HC", "HC", "HC", "HC", "LC", "LC"), Region = c("VH", "CH1", "CH2", "CH3", "VL", "CL"), Region_start = c(1,119,229,338,1,108), Region_end = c(118,228,337,444,107,214) ) result_with_psm <- data.frame() for (i in 1:nrow(region)) { chain <- region$Chain[i] region_start <- region$Region_start[i] region_end <- region$Region_end[i] region_name <- region$Region[i] temp <- data_with_psm[data_with_psm$Chain == chain & data_with_psm$Position >= region_start & data_with_psm$Position <= region_end, ] temp$Region <- region_name result_with_psm <- rbind(result_with_psm, temp) } head(result_with_psm) ``` # Plotting peptide in whole provided sequence This function takes the quantified peptide data frame and plots the matched peptide sequences in the provided whole sequence. The function returns a ggplot object with the matched peptide sequences plotted in the provided whole sequence. ```{r, fig.width=30, fig.height=6} # Plotting peptide in whole provided sequence domain <- data.frame( domain_type = c("CDR H1", "CDR H2", "CDR H3", "CDR L1", "CDR L2", "CDR L3"), Region = c("VH", "VH", "VH", "VL", "VL", "VL"), Epitope = c("Boco", "Boco", "Boco", "Boco", "Boco", "Boco"), domain_start = c(26, 50, 97, 24, 50, 89), domain_end = c(35, 66, 107, 34, 56, 97) ) x_axis_vars <- c("Region") y_axis_vars <- c("Donor") column_order <- list( Donor = "D1,D2,D3,D4,D5,D6,D7,D8", Region = "VH,CH1,CH2,CH3,VL,CL" ) domain_color <- c( "CDR H1" = "#F8766D", "CDR H2" = "#B79F00", "CDR H3" = "#00BA38", "CDR L1" = "#00BFC4", "CDR L2" = "#619CFF", "CDR L3" = "#F564E3" ) PTM_color <- c( "Ox" = "red", "Deamid" = "cyan", "Cam" = "blue", "Acetyl" = "magenta" ) label_value = list(Donor = "D1") p_psm <- create_peptide_plot( result_with_psm, y_axis_vars, x_axis_vars, y_expand = c(0.2, 0.2), x_expand = c(0.5, 0.5), theme_options = list(legend.box = "horizontal"), labs_options = list(title = "PSM Plot", x = "Position", fill = "PSM"), color_fill_column = 'PSM', fill_gradient_options = list(limits = c(0, 160)), # Set the limits for the color scale label_size = 1.9, add_domain = TRUE, domain = domain, domain_start_column = "domain_start", domain_end_column = "domain_end", domain_type_column = "domain_type", domain_color = domain_color, PTM = TRUE, PTM_type_column = "PTM_type", PTM_color = PTM_color, add_label = TRUE, label_column = "Character", label_value = label_value, column_order = column_order ) print(p_psm) ```