| Title: | Root Exudate Feature Toolkit |
|---|---|
| Description: | Provides tools for molecule-oriented and reaction-centred analysis of root exudate datasets. It supports structural matching based on 'PubChem', calculation of molecular descriptors, and inference of candidate microbe-associated metabolic reactions using Kyoto Encyclopedia of Genes and Genomes ('KEGG') identifiers and Enzyme Commission ('EC') numbers. For background on these databases, see Kanehisa et al. (2023) <doi:10.1093/nar/gkac963> and Kim et al. (2023) <doi:10.1093/nar/gkac956>. |
| Authors: | Guozhen Gao [aut, cre] |
| Maintainer: | Guozhen Gao <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.1.4 |
| Built: | 2026-05-19 14:03:43 UTC |
| Source: | https://github.com/cran/REFT |
REFT is an R package for batch PubChem matching and molecular descriptor
calculation from root exudate or metabolomics annotation tables.
Calculate six descriptors from a character vector of SMILES using rcdk.
reft_calc_descriptors(smiles)reft_calc_descriptors(smiles)
smiles |
A character vector of SMILES. |
A tibble with six molecular descriptors.
if (requireNamespace("rcdk", quietly = TRUE)) { reft_calc_descriptors("OC(=O)CCC(=O)O") }if (requireNamespace("rcdk", quietly = TRUE)) { reft_calc_descriptors("OC(=O)CCC(=O)O") }
Import a microbial EC annotation table, normalize EC identifiers, extract
species names from taxonomy strings, query KEGG for EC-linked reactions, and
append reactants, products, and compound formulae. By default, no files are
written; set output_dir to explicitly request Excel outputs.
reft_kegg_microbe_run( input_file, ec_col = "EC_Number", taxonomy_col = "Taxonomy", output_dir = NULL, output_file = "microbe_ec_kegg_reactions.xlsx", sleep_sec = 0.35, verbose = TRUE )reft_kegg_microbe_run( input_file, ec_col = "EC_Number", taxonomy_col = "Taxonomy", output_dir = NULL, output_file = "microbe_ec_kegg_reactions.xlsx", sleep_sec = 0.35, verbose = TRUE )
input_file |
Path to input annotation table. |
ec_col |
Column containing EC numbers. Default is |
taxonomy_col |
Column containing taxonomy strings. Default is |
output_dir |
Output directory. If |
output_file |
Output Excel filename. Default is
|
sleep_sec |
Delay between KEGG requests in seconds. Default is |
verbose |
Whether to print progress. Default is |
A named list containing:
Full result table with EC, microbe, reaction, compounds, and formulae.
EC-to-reaction mapping table.
Reaction detail table.
Compound formula table.
toy <- data.frame( EC_Number = "1.1.1.1", Taxonomy = "k__Bacteria;p__Proteobacteria;g__Escherichia;s__Escherichia_coli" ) input_file <- tempfile(fileext = ".csv") utils::write.csv(toy, input_file, row.names = FALSE) res <- try( reft_kegg_microbe_run(input_file, output_dir = tempdir(), sleep_sec = 0, verbose = FALSE), silent = TRUE ) if (!inherits(res, "try-error")) head(res$results)toy <- data.frame( EC_Number = "1.1.1.1", Taxonomy = "k__Bacteria;p__Proteobacteria;g__Escherichia;s__Escherichia_coli" ) input_file <- tempfile(fileext = ".csv") utils::write.csv(toy, input_file, row.names = FALSE) res <- try( reft_kegg_microbe_run(input_file, output_dir = tempdir(), sleep_sec = 0, verbose = FALSE), silent = TRUE ) if (!inherits(res, "try-error")) head(res$results)
Batch match SMILES using Name, Other Name, KEGG ID, and HMDB ID in order.
reft_match_smiles( data, name_col = "Name", other_col = "Other_name(Kegg_name)", hmdb_col = "HMDB_ID", kegg_col = "Kegg_ID" )reft_match_smiles( data, name_col = "Name", other_col = "Other_name(Kegg_name)", hmdb_col = "HMDB_ID", kegg_col = "Kegg_ID" )
data |
A data frame containing query columns. |
name_col |
Compound name column. |
other_col |
Alternative name column. |
hmdb_col |
HMDB ID column. |
kegg_col |
KEGG ID column. |
A data frame with matching log and SMILES.
dat <- data.frame( Name = "Glutarate", `Other_name(Kegg_name)` = NA, HMDB_ID = NA, Kegg_ID = NA, check.names = FALSE ) res <- try(reft_match_smiles(dat), silent = TRUE) if (!inherits(res, "try-error")) head(res)dat <- data.frame( Name = "Glutarate", `Other_name(Kegg_name)` = NA, HMDB_ID = NA, Kegg_ID = NA, check.names = FALSE ) res <- try(reft_match_smiles(dat), silent = TRUE) if (!inherits(res, "try-error")) head(res)
Import an Excel table, clean query fields, match SMILES from PubChem,
calculate six molecular descriptors, and optionally write Excel outputs.
By default, no files are written; set output_dir to explicitly request
Excel outputs.
reft_run( input_file, name_col = "Name", other_col = "Other_name(Kegg_name)", hmdb_col = "HMDB_ID", kegg_col = "Kegg_ID", output_dir = NULL, output_desc_file = "metabolites_6_descriptors.xlsx", output_unmatched_file = "unmatched_smiles.xlsx", output_log_file = "pubchem_match_log.xlsx", verbose = TRUE )reft_run( input_file, name_col = "Name", other_col = "Other_name(Kegg_name)", hmdb_col = "HMDB_ID", kegg_col = "Kegg_ID", output_dir = NULL, output_desc_file = "metabolites_6_descriptors.xlsx", output_unmatched_file = "unmatched_smiles.xlsx", output_log_file = "pubchem_match_log.xlsx", verbose = TRUE )
input_file |
Path to the input Excel file. |
name_col |
Column name for compound name. Default is |
other_col |
Column name for alternative name. Default is
|
hmdb_col |
Column name for HMDB identifier. Default is |
kegg_col |
Column name for KEGG identifier. Default is |
output_dir |
Output directory. If |
output_desc_file |
Final descriptor Excel filename. |
output_unmatched_file |
Unmatched records Excel filename. |
output_log_file |
PubChem match log Excel filename. |
verbose |
Whether to print progress. Default is |
A named list with three data frames:
Final annotation table with SMILES and six descriptors.
Rows that could not be matched to SMILES.
Unique-query matching log from PubChem.
toy <- data.frame( Name = "Glutarate", `Other_name(Kegg_name)` = NA, HMDB_ID = NA, Kegg_ID = NA, check.names = FALSE ) if (requireNamespace("rcdk", quietly = TRUE)) { input_file <- tempfile(fileext = ".xlsx") writexl::write_xlsx(toy, input_file) res <- try(reft_run(input_file, output_dir = tempdir(), verbose = FALSE), silent = TRUE) if (!inherits(res, "try-error")) head(res$descriptors) }toy <- data.frame( Name = "Glutarate", `Other_name(Kegg_name)` = NA, HMDB_ID = NA, Kegg_ID = NA, check.names = FALSE ) if (requireNamespace("rcdk", quietly = TRUE)) { input_file <- tempfile(fileext = ".xlsx") writexl::write_xlsx(toy, input_file) res <- try(reft_run(input_file, output_dir = tempdir(), verbose = FALSE), silent = TRUE) if (!inherits(res, "try-error")) head(res$descriptors) }
A simplified wrapper around reft_run() for the common case where the input
file already uses the default column names. By default, no files are written;
set output_dir to explicitly request Excel outputs.
reft_run_simple(input_file, output_dir = NULL, verbose = TRUE)reft_run_simple(input_file, output_dir = NULL, verbose = TRUE)
input_file |
Path to the input Excel file. |
output_dir |
Output directory. If |
verbose |
Whether to print progress. Default is |
Same as reft_run().
toy <- data.frame( Name = "Glutarate", `Other_name(Kegg_name)` = NA, HMDB_ID = NA, Kegg_ID = NA, check.names = FALSE ) if (requireNamespace("rcdk", quietly = TRUE)) { input_file <- tempfile(fileext = ".xlsx") writexl::write_xlsx(toy, input_file) res <- try(reft_run_simple(input_file, output_dir = tempdir(), verbose = FALSE), silent = TRUE) if (!inherits(res, "try-error")) head(res$descriptors) }toy <- data.frame( Name = "Glutarate", `Other_name(Kegg_name)` = NA, HMDB_ID = NA, Kegg_ID = NA, check.names = FALSE ) if (requireNamespace("rcdk", quietly = TRUE)) { input_file <- tempfile(fileext = ".xlsx") writexl::write_xlsx(toy, input_file) res <- try(reft_run_simple(input_file, output_dir = tempdir(), verbose = FALSE), silent = TRUE) if (!inherits(res, "try-error")) head(res$descriptors) }