Title: | A Versatile Toolkit for Peptide Mapping, Visualization, and Comparative Exploration |
---|---|
Description: | A versatile R visualization package that empowers researchers with comprehensive visualization tools for seamlessly mapping peptides to protein sequences, identifying distinct domains and regions of interest, accentuating mutations, and highlighting post-translational modifications, all while enabling comparisons across diverse experimental conditions. Potential applications of 'PepMapViz' include the visualization of cross-software mass spectrometry results at the peptide level for specific protein and domain details in a linearized format and post-translational modification coverage across different experimental conditions; unraveling insights into disease mechanisms. It also enables visualization of major histocompatibility complex-presented peptides in different antibody regions predicting immunogenicity in antibody drug development. |
Authors: | Zhenru Zhou [aut, cre], Qui Phung [ctb], Corey Bakalarski [aut], Genentech, Inc. [cph] |
Maintainer: | Zhenru Zhou <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.0.0 |
Built: | 2024-12-14 06:54:25 UTC |
Source: | CRAN |
Calculate Area/Intensity for the whole input sequence dataframe
calculate_all_Area( whole_seq, matching_result, matching_columns, distinct_columns, area_column, with_PTM = FALSE, reps = FALSE )
calculate_all_Area( whole_seq, matching_result, matching_columns, distinct_columns, area_column, with_PTM = FALSE, reps = FALSE )
whole_seq |
A dataframe holding whole sequence information. 'Region_Sequence' column is required for the sequence information. Change the column name if it is different than 'Region_Sequence'. |
matching_result |
The dataframe that contains the matched results and PTM information. |
matching_columns |
Vector of column names that should match between each row of 'whole_seq' and the 'matching_result' dataframe. |
distinct_columns |
Vector of column names that should be used to calculate Area separately for each unique combination of these columns. |
area_column |
The name of the column in 'matching_result' that contains the area/intensity information. |
with_PTM |
A boolean parameter indicating whether PTM should be considered during calculation of Area. Default is |
reps |
A boolean parameter indicating whether the area/intensity should be divided by the number of replicates. Default is |
Returns data_with_area
, a dataframe contains calculated Area for each record in 'whole_seq'.
whole_seq <- data.frame( Region_Sequence = c( "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA, "O", "C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") area_column <- "Area" data_with_area <- calculate_all_Area( whole_seq, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), area_column, with_PTM = TRUE, reps = TRUE )
whole_seq <- data.frame( Region_Sequence = c( "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA, "O", "C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") area_column <- "Area" data_with_area <- calculate_all_Area( whole_seq, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), area_column, with_PTM = TRUE, reps = TRUE )
Calculate Spectra Count (PSM) for the whole input sequence dataframe
calculate_all_PSM( whole_seq, matching_result, matching_columns, distinct_columns, with_PTM = FALSE, reps = FALSE )
calculate_all_PSM( whole_seq, matching_result, matching_columns, distinct_columns, with_PTM = FALSE, reps = FALSE )
whole_seq |
A dataframe holding whole sequence information. 'Region_Sequence' column is required for the sequence information. Change the column name if it is different than 'Region_Sequence'. |
matching_result |
The dataframe that contains the matched results and PTM information. |
matching_columns |
Vector of column names that should match between each row of 'whole_seq' and the 'matching_result' dataframe. |
distinct_columns |
Vector of column names that should be used to calculate PSM separately for each unique combination of these columns. |
with_PTM |
A boolean parameter indicating whether PTM should be considered during calculation of PSM. Default is |
reps |
A boolean parameter indicating whether the area/intensity should be divided by the number of replicates. Default is |
Returns data_with_psm
, a dataframe contains calculated PSM for each record in 'whole_seq'.
whole_seq <- data.frame( Region_Sequence = c( "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA, "O", "C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") data_with_psm <- calculate_all_PSM( whole_seq, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), with_PTM = TRUE, reps = TRUE )
whole_seq <- data.frame( Region_Sequence = c( "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA, "O", "C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") data_with_psm <- calculate_all_PSM( whole_seq, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), with_PTM = TRUE, reps = TRUE )
Calculate Area/Intensity for one row of the input sequence dataframe
calculate_Area( row, matching_result, matching_columns, distinct_columns = NULL, area_column, with_PTM = FALSE, reps = FALSE )
calculate_Area( row, matching_result, matching_columns, distinct_columns = NULL, area_column, with_PTM = FALSE, reps = FALSE )
row |
A row of dataframe containing the sequence for the 'Character' column in region_data. |
matching_result |
The dataframe that contains the matched results and PTM information. |
matching_columns |
Vector of column names that should match between the 'row' and 'matching_result' dataframes. |
distinct_columns |
Vector of column names that should be used to calculate Area separately for each unique combination of these columns. |
area_column |
The name of the column in 'matching_result' that contains the area/intensity information. |
with_PTM |
A boolean parameter indicating whether PTM should be considered. If |
reps |
A boolean parameter indicating whether the area/intensity should be divided by the number of replicates. Default is |
This function returns the modified region_data
dataframe that includes the "Area" column, and optionally "PTM" and "PTM_type" columns.
If the 'filter_conditions' do not match, an empty dataframe will be returned early.
An AttributeError is raised if 'PTM_position' and 'PTM_type' columns do not exist in the 'result' dataframe when 'with_PTM' is TRUE
.
row <- data.frame( Region_Sequence = c("XYZAAA"), Condition_1 = c("Drug1"), Condition_2 = c("Donor1"), Region_1 = c("VH"), Region_2 = c("Arm_1") ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA,"O","C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") area_column <- "Area" data_with_area <- calculate_Area( row, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), area_column, with_PTM = TRUE, reps = TRUE )
row <- data.frame( Region_Sequence = c("XYZAAA"), Condition_1 = c("Drug1"), Condition_2 = c("Donor1"), Region_1 = c("VH"), Region_2 = c("Arm_1") ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA,"O","C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") area_column <- "Area" data_with_area <- calculate_Area( row, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), area_column, with_PTM = TRUE, reps = TRUE )
Calculate Spectra Count (PSM) for one row of the input sequence dataframe
calculate_PSM( row, matching_result, matching_columns, distinct_columns, with_PTM = FALSE, reps = FALSE )
calculate_PSM( row, matching_result, matching_columns, distinct_columns, with_PTM = FALSE, reps = FALSE )
row |
A row of dataframe containing the sequence for the 'Character' column in region_data. |
matching_result |
The dataframe that contains the matched results and PTM information. |
matching_columns |
Vector of column names that should match between the 'row' and 'matching_result' dataframes. |
distinct_columns |
Vector of column names that should be used to calculate PSM separately for each unique combination of these columns. |
with_PTM |
A boolean parameter indicating whether PTM should be considered. If |
reps |
A boolean parameter indicating whether the area/intensity should be divided by the number of replicates. Default is |
This function returns the modified region_data
dataframe that includes the "PSM" column, and optionally "PTM" and "PTM_type" columns.
If the 'filter_conditions' do not match, an empty dataframe will be returned early.
An AttributeError is raised if 'PTM_position' and 'PTM_type' columns do not exist in the 'result' dataframe when 'with_PTM' is TRUE
.
row <- data.frame( Region_Sequence = c("XYZDDD"), Condition_1 = c("Drug2"), Region_1 = c("VL"), Region_2 = c("Arm_2") ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA,"O","C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") result <- calculate_PSM( row, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), with_PTM = TRUE, reps = TRUE )
row <- data.frame( Region_Sequence = c("XYZDDD"), Condition_1 = c("Drug2"), Region_1 = c("VL"), Region_2 = c("Arm_2") ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA,"O","C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") result <- calculate_PSM( row, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), with_PTM = TRUE, reps = TRUE )
This function reads all CSV and TXT files from a specified folder and combines them into a single data.table.
combine_files_from_folder(folder_path)
combine_files_from_folder(folder_path)
folder_path |
The path to the folder containing the CSV or TSV files. |
A data.table containing the combined data from all files.
folder_path <- "" combined_df <- combine_files_from_folder(folder_path) print(combined_df)
folder_path <- "" combined_df <- combine_files_from_folder(folder_path) print(combined_df)
This function converts a peptide sequence into a regular expression pattern that accounts for ambiguous amino acids. Each amino acid is replaced by a character class that includes itself, 'X', and any specific ambiguities.
convert_to_regex_pattern(peptide)
convert_to_regex_pattern(peptide)
peptide |
A character string representing the peptide sequence. |
A character string containing the regex pattern for matching.
# Convert a peptide sequence to a regex pattern peptide <- "NDEQIL" regex_pattern <- convert_to_regex_pattern(peptide) print(regex_pattern) # Output: "[NBX][DBX][EZX][QZX][ILX][ILX]"
# Convert a peptide sequence to a regex pattern peptide <- "NDEQIL" regex_pattern <- convert_to_regex_pattern(peptide) print(regex_pattern) # Output: "[NBX][DBX][EZX][QZX][ILX][ILX]"
This function generates a peptide plot using the provided data and allows for customization of the plot layout.
create_peptide_plot( data, y_axis_vars, x_axis_vars, y_expand = c(0.1, 0.15), x_expand = c(0.6, 0.6), theme_options = NULL, labs_options = NULL, color_fill_column, fill_gradient_options = list(), label_size = 3, add_domain = TRUE, domain = NULL, domain_start_column = "domain_start", domain_end_column = "domain_end", domain_type_column = "domain_type", domain_color = NULL, PTM = FALSE, PTM_type_column = "PTM_type", PTM_color = NULL, add_label = TRUE, label_column = "Character", label_value = NULL, column_order = NULL )
create_peptide_plot( data, y_axis_vars, x_axis_vars, y_expand = c(0.1, 0.15), x_expand = c(0.6, 0.6), theme_options = NULL, labs_options = NULL, color_fill_column, fill_gradient_options = list(), label_size = 3, add_domain = TRUE, domain = NULL, domain_start_column = "domain_start", domain_end_column = "domain_end", domain_type_column = "domain_type", domain_color = NULL, PTM = FALSE, PTM_type_column = "PTM_type", PTM_color = NULL, add_label = TRUE, label_column = "Character", label_value = NULL, column_order = NULL )
data |
A dataframe containing the PSM data or Area data got from peptide_cluster_quantification. |
y_axis_vars |
A list of variables for the donor and type facets. |
x_axis_vars |
A list of variables for the region facets. |
y_expand |
A numeric vector of length 2 specifying the expansion for the y-axis. Default is |
x_expand |
A numeric vector of length 2 specifying the expansion for the x-axis. Default is |
theme_options |
A list of additional theme options to customize the plot. Default is an empty list. |
labs_options |
A list of additional labs options to customize the plot labels. Default is an empty list. |
color_fill_column |
The name of the column in |
fill_gradient_options |
A list of options for |
label_size |
The size of the labels in the plot. Default is 3. |
add_domain |
A logical value indicating whether to add domain like CDR (Complementarity-Determining Region) to the plot. Default is TRUE. |
domain |
A dataframe containing the domain data with columns including 'domain_start', 'domain_end', and 'domain_type'. |
domain_start_column |
The name of the column in |
domain_end_column |
The name of the column in |
domain_type_column |
The name of the column in |
domain_color |
A list of colors for the domain types. Default is NULL. |
PTM |
A logical value indicating whether to include PTM (Post-Translational Modification) data in the plot. Default is FALSE. |
PTM_type_column |
The name of the column in |
PTM_color |
A list of colors for the PTM types. Default is NULL. |
add_label |
A logical value indicating whether to add labels to the plot. Default is TRUE. |
label_column |
The name of the column in |
label_value |
A list of column names and their values to filter the data for the labels. Default is NULL. |
column_order |
A list of column names and their order for the plot. Default is NULL. |
This function returns a ggplot object representing the PSM plot.
data <- data.frame( Character = c("X", "Y", "Z", "A", "A", "A"), Position = 1:6, Condition_1 = rep("Drug1", 6), Region_2 = rep("Arm_1", 6), Area = c(0.000000, 0.000000, 0.000000, 6.643856, 6.643856, 6.643856), Condition_2 = rep("Donor1", 6), Region_1 = rep("VH", 6), PTM = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE), PTM_type = c(NA, "O", NA, NA, NA, NA) ) domain <- data.frame( domain_type = c("CDR H1", "CDR H2", "CDR H3"), Region_1 = c("VH", "VH", "VH"), Region_2 = c("Arm_1", "Arm_1", "Arm_1"), Condition_1 = c("Drug1", "Drug1", "Drug1"), domain_start = c(1, 3, 5), domain_end = c(2, 4, 6) ) x_axis_vars <- c("Region_2", "Region_1") y_axis_vars <- c("Condition_2") domain_color <- c( "CDR H1" = "#F8766D", "CDR H2" = "#B79F00", "CDR H3" = "#00BA38", "CDR L1" = "#00BFC4", "CDR L2" = "#619CFF", "CDR L3" = "#F564E3" ) PTM_color <- c( "Ox" = "red", "Deamid" = "cyan", "Cam" = "blue", "Acetyl" = "magenta" ) p <- create_peptide_plot( data, y_axis_vars, x_axis_vars, y_expand = c(0.2, 0.2), x_expand = c(0.5, 0.5), theme_options = list(), labs_options = list(title = "PSM Plot", x = "Position", fill = "PSM"), color_fill_column = 'Area', fill_gradient_options = list(), label_size = 5, add_domain = TRUE, domain = domain, domain_start_column = "domain_start", domain_end_column = "domain_end", domain_type_column = "domain_type", domain_color = domain_color, PTM = FALSE, PTM_type_column = "PTM_type", PTM_color = PTM_color, add_label = TRUE, label_column = "Character", label_value = NULL, column_order = list(Region_1 = 'VH,VL') ) print(p)
data <- data.frame( Character = c("X", "Y", "Z", "A", "A", "A"), Position = 1:6, Condition_1 = rep("Drug1", 6), Region_2 = rep("Arm_1", 6), Area = c(0.000000, 0.000000, 0.000000, 6.643856, 6.643856, 6.643856), Condition_2 = rep("Donor1", 6), Region_1 = rep("VH", 6), PTM = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE), PTM_type = c(NA, "O", NA, NA, NA, NA) ) domain <- data.frame( domain_type = c("CDR H1", "CDR H2", "CDR H3"), Region_1 = c("VH", "VH", "VH"), Region_2 = c("Arm_1", "Arm_1", "Arm_1"), Condition_1 = c("Drug1", "Drug1", "Drug1"), domain_start = c(1, 3, 5), domain_end = c(2, 4, 6) ) x_axis_vars <- c("Region_2", "Region_1") y_axis_vars <- c("Condition_2") domain_color <- c( "CDR H1" = "#F8766D", "CDR H2" = "#B79F00", "CDR H3" = "#00BA38", "CDR L1" = "#00BFC4", "CDR L2" = "#619CFF", "CDR L3" = "#F564E3" ) PTM_color <- c( "Ox" = "red", "Deamid" = "cyan", "Cam" = "blue", "Acetyl" = "magenta" ) p <- create_peptide_plot( data, y_axis_vars, x_axis_vars, y_expand = c(0.2, 0.2), x_expand = c(0.5, 0.5), theme_options = list(), labs_options = list(title = "PSM Plot", x = "Position", fill = "PSM"), color_fill_column = 'Area', fill_gradient_options = list(), label_size = 5, add_domain = TRUE, domain = domain, domain_start_column = "domain_start", domain_end_column = "domain_end", domain_type_column = "domain_type", domain_color = domain_color, PTM = FALSE, PTM_type_column = "PTM_type", PTM_color = PTM_color, add_label = TRUE, label_column = "Character", label_value = NULL, column_order = list(Region_1 = 'VH,VL') ) print(p)
This function matches peptide sequences from the 'peptide_data' data frame to corresponding provided sequences in the 'whole_seq' data frame. It calculates the start and end positions of the matched sequences and returns a data frame with information about the matching positions.
match_and_calculate_positions( peptide_data, column, whole_seq, match_columns, sequence_length = NULL, column_keep = NULL )
match_and_calculate_positions( peptide_data, column, whole_seq, match_columns, sequence_length = NULL, column_keep = NULL )
peptide_data |
A data frame containing peptide sequence information to match. |
column |
The name of the column in peptide_data containing the peptide sequences to be matched. |
whole_seq |
A data frame containing details about antibody sequence information including the domain and region information. 'Region_Sequence' column is required for the sequence information. Change the column name if it is different than 'Region_Sequence'. |
match_columns |
A character vector of column names to match on while matching peptide sequence. |
sequence_length |
(Optional) The sequence length range of peptide that we want to keep in the result. (e.g. c(1, 5) will include peptide sequence length from 1 to 5.) |
column_keep |
(Optional) The name of the columns in peptide_data to keep in result data frame. |
A data frame with columns from 'peptide_data' and 'whole_seq' indicating the matched positions and related information.
peptide_data <- data.frame( Sequence = c("AILNK", "BXLMR", "JJNXX", "DDEEF"), Condition_1 = c("Drug1", "Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor1", "Donor2"), Region_1 = c("VH", "VL", "VH", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_1", "Arm_2"), Area = c(100, 2, 4, NA) ) whole_seq <- data.frame( Region_Sequence = c( "XYZAILNKPQR", "ABCBXLMRDEF", "GHIJJNXXKLM", "NOPDDEEFQRS", "AILXKPQR", "BNJLMRDEF", "ILNXXKLM", "DDEEXQRS", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) match_columns <- c("Condition_1", "Condition_2", "Region_1") column_keep <- c("Region_2") sequence_length <- c(1, 5) column <- "Sequence" matching_result <- match_and_calculate_positions(peptide_data, column, whole_seq, match_columns, sequence_length, column_keep)
peptide_data <- data.frame( Sequence = c("AILNK", "BXLMR", "JJNXX", "DDEEF"), Condition_1 = c("Drug1", "Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor1", "Donor2"), Region_1 = c("VH", "VL", "VH", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_1", "Arm_2"), Area = c(100, 2, 4, NA) ) whole_seq <- data.frame( Region_Sequence = c( "XYZAILNKPQR", "ABCBXLMRDEF", "GHIJJNXXKLM", "NOPDDEEFQRS", "AILXKPQR", "BNJLMRDEF", "ILNXXKLM", "DDEEXQRS", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) match_columns <- c("Condition_1", "Condition_2", "Region_1") column_keep <- c("Region_2") sequence_length <- c(1, 5) column <- "Sequence" matching_result <- match_and_calculate_positions(peptide_data, column, whole_seq, match_columns, sequence_length, column_keep)
This function takes outputs from multiple platform, a data frame with column containing modified peptide sequence with the detailed post translational modification(PTM) information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information. Due to the flexibility of outputs from multiple platform, the PTM mass to type table needs to be provided if convertion to PTM_type is needed. The result includes 'Peptide', 'PTM_position', 'PTM_type' and 'PTM_mass' columns.The function chooses the appropriate converting method based on the specified data type ('PEAKS', 'Spectronaut', 'MSFragger', 'Comet', 'DIANN', 'Skyline' or 'Maxquant'), allowing you to convert the data into a consistent format for further analysis.
obtain_mod( data, column, type, strip_seq_col = NULL, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod( data, column, type, strip_seq_col = NULL, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A data frame with the peptide sequences. |
column |
The name of the column containing the modified peptide sequences. |
type |
A character string specifying the data type (e.g. 'Skyline' or 'Maxquant'). |
strip_seq_col |
(Optional) The name of the column containing the stripped peptide sequences. |
PTM_table |
A data frame with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information. |
A data.table with 'PTM_position', 'PTM_type', 'PTM_mass', 'reps', and other columns.
library(data.table) data_skyline <- data.table( 'Peptide Modified Sequence' = c( "AGLC[+57]QTFVYGGC[+57]R", "AAAASAAEAGIATTGTEDSDDALLK", "IVGGWEC[+57]EK" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c(57.02, -0.98, 15.9949), PTM_type = c("Cam", "Amid", "Ox") ) converted_data_skyline <- obtain_mod( data_skyline, 'Peptide Modified Sequence', 'Skyline', strip_seq_col = NULL, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" ) data_maxquant <- data.table( 'Modified sequence' = c( "_(ac)AAAAELRLLEK_", "_EAAENSLVAYK_", "_AADTIGYPVM(ox)IRSAYALGGLGSGICPNK_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c('Phospho (STY)', 'Oxidation (M)'), PTM_type = c("Phos", "Ox") ) converted_data_maxquant <- obtain_mod( data_maxquant, 'Modified sequence', 'Maxquant', strip_seq_col = NULL, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
library(data.table) data_skyline <- data.table( 'Peptide Modified Sequence' = c( "AGLC[+57]QTFVYGGC[+57]R", "AAAASAAEAGIATTGTEDSDDALLK", "IVGGWEC[+57]EK" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c(57.02, -0.98, 15.9949), PTM_type = c("Cam", "Amid", "Ox") ) converted_data_skyline <- obtain_mod( data_skyline, 'Peptide Modified Sequence', 'Skyline', strip_seq_col = NULL, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" ) data_maxquant <- data.table( 'Modified sequence' = c( "_(ac)AAAAELRLLEK_", "_EAAENSLVAYK_", "_AADTIGYPVM(ox)IRSAYALGGLGSGICPNK_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c('Phospho (STY)', 'Oxidation (M)'), PTM_type = c("Phos", "Ox") ) converted_data_maxquant <- obtain_mod( data_maxquant, 'Modified sequence', 'Maxquant', strip_seq_col = NULL, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
This function takes Comet output containing a column with modified peptide sequences including PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_Comet( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_Comet( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A data.table with a column containing PTM information. |
column |
The name of the column containing the modified peptide sequences. |
PTM_table |
A data.table with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A data.table with 'PTM_position', 'PTM_type', 'reps', and other columns.
library(data.table) data <- data.table( modified_peptide = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "K.AAQQTGKLVHANFGT.K", "K.[-0.98]AATVTGKLVHANFGT.K" ), plain_peptide = c( "AAMQRGSLYQCDYSTGSCEPIR", "AAQQTGKLVHANFGT", "AATVTGKLVHANFGT" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c(57.02, -0.98, 15.9949), PTM_type = c("Cam", "Amid", "Ox") ) column <- 'modified_peptide' PTM_mass_column <- "PTM_mass" converted_data <- obtain_mod_Comet(data, column, PTM_table, PTM_annotation = TRUE, PTM_mass_column)
library(data.table) data <- data.table( modified_peptide = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "K.AAQQTGKLVHANFGT.K", "K.[-0.98]AATVTGKLVHANFGT.K" ), plain_peptide = c( "AAMQRGSLYQCDYSTGSCEPIR", "AAQQTGKLVHANFGT", "AATVTGKLVHANFGT" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c(57.02, -0.98, 15.9949), PTM_type = c("Cam", "Amid", "Ox") ) column <- 'modified_peptide' PTM_mass_column <- "PTM_mass" converted_data <- obtain_mod_Comet(data, column, PTM_table, PTM_annotation = TRUE, PTM_mass_column)
This function takes DIA-NN output containing a column with modified peptide sequences including PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_DIANN( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_DIANN( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A dataframe with 'Stripped.Sequence' column and 'Modified.Sequence' column containing modified peptide sequences. |
column |
The name of the column containing the modified peptide sequences. |
PTM_table |
A dataframe with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A dataframe with 'Peptide', 'PTM_position', and 'PTM_type' columns.
library(data.table) data <- data.table( Modified.Sequence = c( "AAAAGPGAALS(UniMod:21)PRPC(UniMod:4)DSDPATPGAQSPK", "AAAASAAEAGIATTGTEDSDDALLK", "AAAAALSGSPPQTEKPT(UniMod:21)HYR" ), Stripped.Sequence = c( "AAAAGPGAALSPRPCDSDPATPGAQSPK", "AAAASAAEAGIATTGTEDSDDALLK", "AAAAALSGSPPQTEKPTHYR" ), Condition = c("A", "B", "B") ) PTM_table <- data.table(PTM_mass = c('UniMod:21', 'UniMod:4'), PTM_type = c("Phos", "Cam")) converted_data <- obtain_mod_DIANN( data, 'Modified.Sequence', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
library(data.table) data <- data.table( Modified.Sequence = c( "AAAAGPGAALS(UniMod:21)PRPC(UniMod:4)DSDPATPGAQSPK", "AAAASAAEAGIATTGTEDSDDALLK", "AAAAALSGSPPQTEKPT(UniMod:21)HYR" ), Stripped.Sequence = c( "AAAAGPGAALSPRPCDSDPATPGAQSPK", "AAAASAAEAGIATTGTEDSDDALLK", "AAAAALSGSPPQTEKPTHYR" ), Condition = c("A", "B", "B") ) PTM_table <- data.table(PTM_mass = c('UniMod:21', 'UniMod:4'), PTM_type = c("Phos", "Cam")) converted_data <- obtain_mod_DIANN( data, 'Modified.Sequence', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
This function takes Maxquant output containing a column with modified peptide sequences including PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_Maxquant( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_Maxquant( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A data.table with a column containing modified peptide sequences. |
column |
The name of the column containing the modified peptide sequences. |
PTM_table |
A data.table with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A data.table with 'PTM_position', 'PTM_type', 'reps', and other columns.
library(data.table) data <- data.table( 'Modified sequence' = c( "_GLGPSPAGDGPS(Phospho (STY))GSGK_", "_HSSYPAGTEDDEGM(Oxidation (M))GEEPSPFR_", "_HSSYPAGTEDDEGM(Oxidation (M))GEEPS(Phospho (STY))PFR_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c('Phospho (STY)', 'Oxidation (M)'), PTM_type = c("Phos", "Ox") ) converted_data <- obtain_mod_Maxquant( data, 'Modified sequence', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
library(data.table) data <- data.table( 'Modified sequence' = c( "_GLGPSPAGDGPS(Phospho (STY))GSGK_", "_HSSYPAGTEDDEGM(Oxidation (M))GEEPSPFR_", "_HSSYPAGTEDDEGM(Oxidation (M))GEEPS(Phospho (STY))PFR_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c('Phospho (STY)', 'Oxidation (M)'), PTM_type = c("Phos", "Ox") ) converted_data <- obtain_mod_Maxquant( data, 'Modified sequence', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
This function takes MSFragger output containing a 'Assigned Modifications' column with PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_MSFragger( data, column, strip_seq_col, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_MSFragger( data, column, strip_seq_col, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A data.table with a column containing stripped sequence and a column containing PTM information. |
column |
The name of the column containing the modified peptide sequences. |
strip_seq_col |
The name of the column containing the stripped peptide sequences. |
PTM_table |
A data.table with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A data.table with 'PTM_position', 'PTM_type', 'reps', and other columns.
library(data.table) data <- data.table( Peptide = c("DDREDMLVYQAK", "EAAENSLVAYK", "IEAELQDICNDVLELLDK"), `Assigned Modifications` = c("C-term(15.9949), 6M(-0.98)", "", "N-term(42.0106)"), Condition1 = c("A", "B", "B"), Condition2 = c("C", "C", "D") ) PTM_table <- data.table( PTM_mass = c(42.0106, -0.98, 15.9949), PTM_type = c("Acet", "Amid", "Ox") ) column <- "Assigned Modifications" strip_seq_col <- "Peptide" converted_data <- obtain_mod_MSFragger( data, column, strip_seq_col, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
library(data.table) data <- data.table( Peptide = c("DDREDMLVYQAK", "EAAENSLVAYK", "IEAELQDICNDVLELLDK"), `Assigned Modifications` = c("C-term(15.9949), 6M(-0.98)", "", "N-term(42.0106)"), Condition1 = c("A", "B", "B"), Condition2 = c("C", "C", "D") ) PTM_table <- data.table( PTM_mass = c(42.0106, -0.98, 15.9949), PTM_type = c("Acet", "Amid", "Ox") ) column <- "Assigned Modifications" strip_seq_col <- "Peptide" converted_data <- obtain_mod_MSFragger( data, column, strip_seq_col, PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
This function takes PEAKS output containing a column with modified peptide sequences including PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_PEAKS( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_PEAKS( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A dataframe with a column containing modified peptide sequences. |
column |
The name of the column containing the modified peptide sequences. |
PTM_table |
A dataframe with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A data.table with 'PTM_position', 'PTM_type', 'PTM_mass', 'reps', and other columns.
library(data.table) data <- data.table( Peptide = c( "AAN(+42)Q(-0.98)RGSLYQCDYSTGSC(+57.02)EPIR", "K.AAQQTGKLVHANFGT.K", "K.(-0.98)AATVTGKLVHANFGT.K" ), Sequence = c( "AANQRGSLYQCDYSTGSCEPIR", "AAQQTGKLVHANFGT", "AATVTGKLVHANFGT" ), Condition = c("A", "B", "B") ) PTM_table <- data.table(PTM_mass = c(42, -0.98, 57.02), PTM_type = c("Acet", "Amid", "Cam")) column <- "Peptide" PTM_mass_column <- "PTM_mass" converted_data <- obtain_mod_PEAKS(data, column, PTM_table, PTM_annotation = TRUE, PTM_mass_column)
library(data.table) data <- data.table( Peptide = c( "AAN(+42)Q(-0.98)RGSLYQCDYSTGSC(+57.02)EPIR", "K.AAQQTGKLVHANFGT.K", "K.(-0.98)AATVTGKLVHANFGT.K" ), Sequence = c( "AANQRGSLYQCDYSTGSCEPIR", "AAQQTGKLVHANFGT", "AATVTGKLVHANFGT" ), Condition = c("A", "B", "B") ) PTM_table <- data.table(PTM_mass = c(42, -0.98, 57.02), PTM_type = c("Acet", "Amid", "Cam")) column <- "Peptide" PTM_mass_column <- "PTM_mass" converted_data <- obtain_mod_PEAKS(data, column, PTM_table, PTM_annotation = TRUE, PTM_mass_column)
This function takes Skyline output containing a column with modified peptide sequences including PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_Skyline( data, column, PTM_table, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_Skyline( data, column, PTM_table, PTM_annotation = FALSE, PTM_mass_column )
data |
A data.table with a column containing PTM information. |
column |
The name of the column containing the modified peptide sequences. |
PTM_table |
A data.table with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A data.table with 'PTM_position', 'PTM_type', 'reps', and other columns.
library(data.table) data <- data.table( 'Peptide Modified Sequence' = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "AAQQTGKLVHANFGT", "[-0.98]AATVTGKLVHANFGT" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c(57.02, -0.98, 15.9949), PTM_type = c("Cam", "Amid", "Ox") ) converted_data <- obtain_mod_Skyline( data, 'Peptide Modified Sequence', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
library(data.table) data <- data.table( 'Peptide Modified Sequence' = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "AAQQTGKLVHANFGT", "[-0.98]AATVTGKLVHANFGT" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c(57.02, -0.98, 15.9949), PTM_type = c("Cam", "Amid", "Ox") ) converted_data <- obtain_mod_Skyline( data, 'Peptide Modified Sequence', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass" )
This function takes Spectronaut output containing a column with modified peptide sequences including PTM information and converts it into a new dataframe with the desired format of peptide sequences and associated PTM information.
obtain_mod_Spectronaut( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
obtain_mod_Spectronaut( data, column, PTM_table = NULL, PTM_annotation = FALSE, PTM_mass_column )
data |
A data.table with a column containing modified peptide sequences. |
column |
The name of the column containing the modified peptide sequences. |
PTM_table |
A data.table with columns 'PTM_mass' and 'PTM_type' containing PTM annotation information. |
PTM_annotation |
A logical value indicating whether to include PTM annotation information in the result. |
PTM_mass_column |
The name of the column containing the PTM mass information |
A data.table with 'PTM_position', 'PTM_type', 'reps', and other columns.
library(data.table) data <- data.table( EG.ModifiedPeptide = c( "_[Acetyl (Protein N-term)]M[Oxidation (M)]DDREDLVYQAK_", "_EAAENSLVAYK_", "_IEAELQDIC[Carbamidomethyl (C)]NDVLELLDK_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c( 'Acetyl (Protein N-term)', 'Oxidation (M)', 'Carbamidomethyl (C)' ), PTM_type = c("Acet", "Ox", "Cam") ) converted_data <- obtain_mod_Spectronaut(data, 'EG.ModifiedPeptide', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass") data <- data.table( EG.IntPIMID = c( "_[+42]M[-0.98]DDREDLVYQAK_", "_EAAENSLVAYK_", "_IEAELQDIC[+57]NDVLELLDK_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table(PTM_mass = c(42, -0.98, 57), PTM_type = c("Acet", "Amid", "Cam")) PTM_mass_column <- "PTM_mass" converted_data <- obtain_mod_Spectronaut(data, 'EG.IntPIMID', PTM_table, PTM_annotation = TRUE, PTM_mass_column)
library(data.table) data <- data.table( EG.ModifiedPeptide = c( "_[Acetyl (Protein N-term)]M[Oxidation (M)]DDREDLVYQAK_", "_EAAENSLVAYK_", "_IEAELQDIC[Carbamidomethyl (C)]NDVLELLDK_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table( PTM_mass = c( 'Acetyl (Protein N-term)', 'Oxidation (M)', 'Carbamidomethyl (C)' ), PTM_type = c("Acet", "Ox", "Cam") ) converted_data <- obtain_mod_Spectronaut(data, 'EG.ModifiedPeptide', PTM_table, PTM_annotation = TRUE, PTM_mass_column = "PTM_mass") data <- data.table( EG.IntPIMID = c( "_[+42]M[-0.98]DDREDLVYQAK_", "_EAAENSLVAYK_", "_IEAELQDIC[+57]NDVLELLDK_" ), Condition = c("A", "B", "B") ) PTM_table <- data.table(PTM_mass = c(42, -0.98, 57), PTM_type = c("Acet", "Amid", "Cam")) PTM_mass_column <- "PTM_mass" converted_data <- obtain_mod_Spectronaut(data, 'EG.IntPIMID', PTM_table, PTM_annotation = TRUE, PTM_mass_column)
Peptide Quantification
peptide_quantification( whole_seq, matching_result, matching_columns, distinct_columns, quantify_method, area_column = NULL, with_PTM = FALSE, reps = FALSE )
peptide_quantification( whole_seq, matching_result, matching_columns, distinct_columns, quantify_method, area_column = NULL, with_PTM = FALSE, reps = FALSE )
whole_seq |
A dataframe holding whole sequence information. 'Region_Sequence' column is required for the sequence information. Change the column name if it is different than 'Region_Sequence'. |
matching_result |
The dataframe that contains the matched results and PTM information. |
matching_columns |
Vector of column names that should match between each row of 'whole_seq' and the 'matching_result' dataframe. |
distinct_columns |
Vector of column names that should be used to calculate PSM or Area separately for each unique combination of these columns. |
quantify_method |
A string indicating the quantification method. It can be either "PSM" or "Area". |
area_column |
The name of the column in 'matching_result' that contains the area/intensity information. Required if quantify_method is "Area". |
with_PTM |
A boolean parameter indicating whether PTM should be considered during calculation. Default is |
reps |
A boolean parameter indicating whether the area/intensity should be divided by the number of replicates. Default is |
Returns a dataframe containing the calculated PSM or Area for each record in 'whole_seq'.
whole_seq <- data.frame( Region_Sequence = c( "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA, "O", "C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") area_column <- "Area" data_with_quantification <- peptide_quantification( whole_seq, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), quantify_method = "Area", area_column = area_column, with_PTM = TRUE, reps = TRUE )
whole_seq <- data.frame( Region_Sequence = c( "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD", "XYZAAA", "XYZCCC", "XYZBBB", "XYZDDD", "XYZAAB", "XYZCCD", "XYZBBB", "XYZDDD" ), Condition_1 = c( "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2", "Drug1", "Drug1", "Drug2", "Drug2" ), Condition_2 = c( "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor1", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2", "Donor2" ), Region_1 = c( "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL", "VH", "VL" ), Region_2 = c( "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2", "Arm_1", "Arm_1", "Arm_1", "Arm_1", "Arm_2", "Arm_2", "Arm_2", "Arm_2" ) ) matching_result <- data.frame( Sequence = c("AAA", "DDD", "DDD"), Condition_1 = c("Drug1", "Drug2", "Drug2"), Condition_2 = c("Donor1", "Donor2", "Donor2"), Region_1 = c("VH", "VL", "VL"), Region_2 = c("Arm_1", "Arm_2", "Arm_2"), Start_Position = c(4, 4, 4), End_Position = c(6, 6, 6), PTM_position = c(NA, 2, 0), PTM_type = c(NA, "O", "C"), Area = c(100, 200, 200), reps = c(1, 2, 2) ) matching_columns <- c("Condition_1", "Region_2") area_column <- "Area" data_with_quantification <- peptide_quantification( whole_seq, matching_result, matching_columns, distinct_columns = c("Condition_2", "Region_1"), quantify_method = "Area", area_column = area_column, with_PTM = TRUE, reps = TRUE )
This function takes outputs from multiple platform, a data frame with a column containing peptide sequences to be stripped, and a column where the stripped sequences will be stored. The function chooses the appropriate stripping method based on the specified data type ('PEAKS', 'Spectronaut', 'MSFragger', 'Comet', 'DIANN', 'Skyline' or 'Maxquant').
strip_sequence(data, column, convert_column, type)
strip_sequence(data, column, convert_column, type)
data |
A data frame with the peptide sequences. |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
type |
A character string specifying the data type (e.g. 'Skyline' or 'Maxquant'). |
A data frame with the specified column containing stripped sequences.
library(data.table) data_skyline <- data.table( 'Peptide Modified Sequence' = c( "AGLC[+57]QTFVYGGC[+57]R", "AAAASAAEAGIATTGTEDSDDALLK", "IVGGWEC[+57]EK" ), Condition = c("A", "B", "B") ) data_maxquant <- data.table( 'Modified sequence' = c( "_(ac)AAAAELRLLEK_", "_EAAENSLVAYK_", "_AADTIGYPVM(ox)IRSAYALGGLGSGICPNK_" ), Condition = c("A", "B", "B") ) converted_data_skyline <- strip_sequence(data_skyline, 'Peptide Modified Sequence', 'Sequence', "Skyline") converted_data_maxquant <- strip_sequence(data_maxquant, 'Modified sequence', 'Sequence', "Maxquant")
library(data.table) data_skyline <- data.table( 'Peptide Modified Sequence' = c( "AGLC[+57]QTFVYGGC[+57]R", "AAAASAAEAGIATTGTEDSDDALLK", "IVGGWEC[+57]EK" ), Condition = c("A", "B", "B") ) data_maxquant <- data.table( 'Modified sequence' = c( "_(ac)AAAAELRLLEK_", "_EAAENSLVAYK_", "_AADTIGYPVM(ox)IRSAYALGGLGSGICPNK_" ), Condition = c("A", "B", "B") ) converted_data_skyline <- strip_sequence(data_skyline, 'Peptide Modified Sequence', 'Sequence', "Skyline") converted_data_maxquant <- strip_sequence(data_maxquant, 'Modified sequence', 'Sequence', "Maxquant")
This function takes Comet output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_Comet(data, column, convert_column)
strip_sequence_Comet(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( modified_peptide = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "K.AAQQTGKLVHANFGT.K", "K.[0.98]AATVTGKLVHANFGT.K" ), Condition = c("A", "B", "B") ) column <- 'modified_peptide' convert_column <- 'Sequence' converted_data <- strip_sequence_Comet(data, column, convert_column)
library(data.table) data <- data.table( modified_peptide = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "K.AAQQTGKLVHANFGT.K", "K.[0.98]AATVTGKLVHANFGT.K" ), Condition = c("A", "B", "B") ) column <- 'modified_peptide' convert_column <- 'Sequence' converted_data <- strip_sequence_Comet(data, column, convert_column)
This function takes DIANN output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_DIANN(data, column, convert_column)
strip_sequence_DIANN(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( Modified.Sequence = c( "AAAAGPGAALS(UniMod:21)PRPC(UniMod:4)DSDPATPGAQSPK", "AAAASAAEAGIATTGTEDSDDALLK", "AAAAALSGSPPQTEKPT(UniMod:21)HYR" ), Condition = c("A", "B", "B") ) column <- 'Modified.Sequence' convert_column <- 'Sequence' converted_data <- strip_sequence_DIANN(data, column, convert_column)
library(data.table) data <- data.table( Modified.Sequence = c( "AAAAGPGAALS(UniMod:21)PRPC(UniMod:4)DSDPATPGAQSPK", "AAAASAAEAGIATTGTEDSDDALLK", "AAAAALSGSPPQTEKPT(UniMod:21)HYR" ), Condition = c("A", "B", "B") ) column <- 'Modified.Sequence' convert_column <- 'Sequence' converted_data <- strip_sequence_DIANN(data, column, convert_column)
This function takes Maxquant output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_Maxquant(data, column, convert_column)
strip_sequence_Maxquant(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( 'Modified sequence' = c( "_(ac)AA(ox)AAELRLLEK_", "_EAAENSLVAYK_", "_AADTIGYPVM(ox)IRSAYALGGLGSGICPNK_" ), Condition = c("A", "B", "B") ) column <- 'Modified sequence' convert_column <- 'Sequence' converted_data <- strip_sequence_Maxquant(data, column, convert_column)
library(data.table) data <- data.table( 'Modified sequence' = c( "_(ac)AA(ox)AAELRLLEK_", "_EAAENSLVAYK_", "_AADTIGYPVM(ox)IRSAYALGGLGSGICPNK_" ), Condition = c("A", "B", "B") ) column <- 'Modified sequence' convert_column <- 'Sequence' converted_data <- strip_sequence_Maxquant(data, column, convert_column)
This function takes MSFragger output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_MSFragger(data, column, convert_column)
strip_sequence_MSFragger(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( 'Modified Peptide' = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "K.AAQQTGKLVHANFGT.K", "K.[0.98]AATVTGKLVHANFGT.K" ), Condition = c("A", "B", "B") ) column <- 'Modified Peptide' convert_column <- 'Sequence' converted_data <- strip_sequence_MSFragger(data, 'Modified Peptide', 'Sequence')
library(data.table) data <- data.table( 'Modified Peptide' = c( "AAM[15.9949]Q[-0.98]RGSLYQCDYSTGSC[57.02]EPIR", "K.AAQQTGKLVHANFGT.K", "K.[0.98]AATVTGKLVHANFGT.K" ), Condition = c("A", "B", "B") ) column <- 'Modified Peptide' convert_column <- 'Sequence' converted_data <- strip_sequence_MSFragger(data, 'Modified Peptide', 'Sequence')
This function takes PEAKS output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_PEAKS(data, column, convert_column)
strip_sequence_PEAKS(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( Peptide = c( "AAN(+0.98)Q(-0.98)RGSLYQCDYSTGSC(+57.02)EPIR", "K.AAQQTGKLVHANFGT.K", "K.(+0.98)AATVTGKLVHANFGT.K" ), Condition = c("A", "B", "B") ) column <- "Peptide" convert_column <- "Sequence" converted_data <- strip_sequence_PEAKS(data, column, convert_column)
library(data.table) data <- data.table( Peptide = c( "AAN(+0.98)Q(-0.98)RGSLYQCDYSTGSC(+57.02)EPIR", "K.AAQQTGKLVHANFGT.K", "K.(+0.98)AATVTGKLVHANFGT.K" ), Condition = c("A", "B", "B") ) column <- "Peptide" convert_column <- "Sequence" converted_data <- strip_sequence_PEAKS(data, column, convert_column)
This function takes Skyline output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_Skyline(data, column, convert_column)
strip_sequence_Skyline(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( 'Peptide Modified Sequence' = c( "AGLC[+57]QTFVYGGC[+57]R", "AAAASAAEAGIATTGTEDSDDALLK", "IVGGWEC[+57]EK" ), Condition = c("A", "B", "B") ) column <- 'Peptide Modified Sequence' convert_column <- 'Sequence' converted_data <- strip_sequence_Skyline(data, column, convert_column)
library(data.table) data <- data.table( 'Peptide Modified Sequence' = c( "AGLC[+57]QTFVYGGC[+57]R", "AAAASAAEAGIATTGTEDSDDALLK", "IVGGWEC[+57]EK" ), Condition = c("A", "B", "B") ) column <- 'Peptide Modified Sequence' convert_column <- 'Sequence' converted_data <- strip_sequence_Skyline(data, column, convert_column)
This function takes Spectronaut output containing a column with peptide sequences to be stripped and converts it into a new dataframe with the stripped sequence
strip_sequence_Spectronaut(data, column, convert_column)
strip_sequence_Spectronaut(data, column, convert_column)
data |
A dataframe with a column containing peptide sequences to be stripped |
column |
The name of the column containing the peptide sequences to be stripped. |
convert_column |
The name of the column where the stripped sequences will be stored. |
A dataframe with a column containing stripped sequence
library(data.table) data <- data.table( EG.IntPIMID = c( "_[+42]M[-16]DDREDLVYQAK_", "_EAAENSLVAYK_", "_IEAELQDIC[+57]NDVLELLDK_" ), Condition = c("A", "B", "B") ) converted_data <- strip_sequence_Spectronaut(data, 'EG.IntPIMID', 'Sequence')
library(data.table) data <- data.table( EG.IntPIMID = c( "_[+42]M[-16]DDREDLVYQAK_", "_EAAENSLVAYK_", "_IEAELQDIC[+57]NDVLELLDK_" ), Condition = c("A", "B", "B") ) converted_data <- strip_sequence_Spectronaut(data, 'EG.IntPIMID', 'Sequence')