## Assess sample composition
### Visualize sample composition
The get_compound_belongs_to function takes a list of KEGG compound IDs and returns the pathways that these compounds belong to.
This is a function read.table to read all the composites spectra in the folder Metaboanalyst, and this function read the file called composite spectra.
fix invalid codeWed, 07 Jun 2023 ### fix invalid code:
def add(a, b):
return a - b
library(keggrest)
find_organism <- function(compounds) {
organism <- NULL
for (i in compounds) {
tryCatch({
res <- kegg_search(i)$org
}, warning = function(warn) {
res <- NULL
})
organism <- append(organism, res)
}
organism <- unique(organism)
}
The variable data reads the file composite spectra.csv in the directory D:/USUARIO/OneDrive/Documents/Analisis de datos/Metaboanalist/importan Features POS/composite spectra.csv and sotres the data in the variable data as a dataframe (because of the .table)
fix invalid codeThu, 08 Jun 2023 get_kegg_info <- function(compound_formula) {
compound_name <- met100$compound_name[met100$compound_formula == compound_formula]
kegg_compound <- keggGet(compound_name, "cpd")
kegg_id <- attr(kegg_compound, "kegg.code")
pathway_ids <- keggLink("pathway", kegg_id)$Pathway
pathway_info <- keggGet(pathway_ids)
pathway_names <- lapply(pathway_info, function(x) x[[1]]$name)
pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition)
organisms <- keggGet(pathway_ids)$ORGANISM
data.frame(compound_name = compound_name, pathway_id = pathway_ids,
pathway_name = pathway_names, pathway_description = pathway_descriptions,
organism = organisms, stringsAsFactors
Get the amino acid sequences of hsa:10458 and ece:Z5100.
fix invalid codeTue, 13 Jun 2023 compound_list <- c("C00022", "C00024", "C00031", "C00037", "C00038", "C00042", "C00049", "C00051", "C00068", "C00075", "C00087", "C00111", "C00140", "C00144", "C00162", "C00239", "C00242", "C00254", "C00267", "C00269", "C00272", "C00273", "C00274", "C00276", "C00280", "C00282", "C00283", "C00284", "C00285", "C00287", "C00288", "C00289", "C00290", "C00291", "C00292", "C00293", "C00294", "C00295", "C00296", "C00297", "C00298", "C00299", "C00300", "C00301", "C00302", "C00303", "C00304", "C00305", "
returns the column names of metabolite_info.
The function is used to test whether a given compound is associated with a specific organism.
fix invalid codeThu, 08 Jun 2023 get_kegg_info <- function(compound_formula) {
compound_name <- met100$compound_name[met100$compound_formula == compound_formula]
kegg_compound <- keggGet(compound_name, "cpd")
kegg_id <- attr(kegg_compound, "kegg.code")
pathway_ids <- keggLink("pathway", kegg_id)$Pathway
pathway_info <- keggGet(pathway_ids)
pathway_names <- lapply(pathway_info, function(x) x[[1]]$name)
pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition)
organisms <- keggGet(pathway_ids)$ORGANISM
data.frame(compound_name = compound_name, pathway_id = pathway_ids,
pathway_name = pathway_names, pathway_description = pathway_descriptions,
organism = organisms, stringsAsFact
The regular expression "^NETWORK" is used to match the variable names that starts with "NETWORK" in the names of metabolite info data frame. The function grepl returns a vector of TRUEs and FALSEs.
The function add takes two arguments a and b and returns the sum of a and b.
loops over each element of compounds.
The function loop takes a table as an argument (compounds), and returns a table with the same number of rows as the original table (organisms). The function loops through the compounds table, for each row (compound), it runs the KEGG function (compound_info <- KEGG_find(compound,"compound")), this KEGG function returns a table with several columns. The if statement checks if the compound has more than one organism associated with it, if it does then the organism is retrieved and added to the table, if not it is added as NA.
fix invalid codeTue, 13 Jun 2023 compound_list <- c("C00022", "C00024", "C00031")
library(KEGGREST)
library(data.table)
kg_id <- 'C00008'
# using the kegg_get function to link the compound with the pathway
# using the compound_pathway function to link again the compound with the pathway
# flatten the list
# convert to data.frame
# function to get the pathway associated with the compound
get_pathway <- function(kg_id){
pw <- NULL
pw <- kegg_get(kg_id, db = "compound", option = "pathway")$data
pw <- compound_pathway(kg_id)
pw <- unlist(pw)
pw <- as.data.frame(pw, stringsAsFactors = FALSE)
return(pw)
}
# function to parse the pathway
get_pathway(kg_id)
reads a table and assigns it to an object called data.
header = TRUE specifies that the first line in the file is a header and
sep = "," specifies that the file is separated by commas.
shows the results of the keggGet function using the compound and metabolite identifiers C01290 and G00092.
This means that you are getting all the pathways that the metabolite of interest is involved in.
The function readMSData allows reading data from several formats. Here, we read the data from a csv file, therefore we use the read.table function. The csv file contains the m/z, retention time and intensity values of the metabolomic profiles for each sample (n = 9 samples per group).
Then, we use the xcmsRaw function to read the raw data from the csv file.
The normalize function normalizes the data to the median. Therefore, we can compare the relative intensities between samples.
library(Keggrest)
compounds = c("C00159", "C00772", "C00678", "C00355", "C00022")
is_in_organism <- function(compound, organism)
{
compound = toString(compound)
organism = toString(organism)
tryCatch(
{
resp <- kegg_compound_2_organism(compound)
if (organism %in% resp)
{
return(TRUE)
}
else
{
return(FALSE)
}
},
error = function(err)
{
return(FALSE)
}
)
}
is_in_organism("C00159", "eco")
keggrest('C00001')
for (i = 0; i < compounds.length; i++)
{
compound = compounds[i];
}
fix invalid codeWed, 14 Jun 2023 pathway_list <- lapply(compound_list, function(cpd) {
keggGet(paste0(cpd, "/pathway"))
})
The function add takes two arguments a and b and returns the sum of a and b.
fix invalid codeWed, 07 Jun 2023
function that: library(msigdb)
msig_genes <- function(msig, db=GREAT_HG19) {
genes <- NULL
for (i in msig) {
tryCatch({
res <- db[i]$genes
}, warning = function(warn) {
res <- NULL
})
genes <- append(genes, res)
}
return(genes)
}
library(Keggrest)
compounds <- c("C00022", "C00153", "C00216", "C01167")
organism <- c("M00008")
result <- kegg.compound.organism(compounds, organism)
returns the names of the different metabolites.
fix invalid codeWed, 07 Jun 2023 - create a function that takes a kegg compound id and a vector of organism ids.
- The function should return a logical vector indicating whether the compound belongs to the organism(s) passed to the function.
- Use the function to check whether compounds in `compounds` belong to organism(s) in `organism`
list_of_compounds = list(c('C00001', 'C00002', 'C00003', 'C00004'))
get_compound_belongs_to <- function(compound) {
url <- paste('https://www.genome.jp/dbget-bin/www_bget?-f+m+cpd:', compound, sep="")
page <- read_html(url)
node <- html_node(page, '#pathway_tab_1 > ul > li > a')
text <- html_text(node)
return(text)
}
get_compound_belongs_to(list_of_compounds)
the header = TRUE means that the first line of the file is a header line. This will be used as the names of the data.frame columns. The sep = "," means that the fields in the file are separated by commas.
fix invalid codeTue, 13 Jun 2023 get_kegg_info <- function(compound_formula) {
compound_name <- met100$compound_name[met100$compound_formula == compound_formula]
kegg_compound <- keggGet(compound_name, "cpd")
kegg_id <- attr(kegg_compound, "kegg.code")
pathway_ids <- keggLink("pathway", kegg_id)$Pathway
pathway_info <- keggGet(pathway_ids)
pathway_names <- lapply(pathway_info, function(x) x[[1]]$name)
pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition)
organisms <- keggGet(pathway_ids)$ORGANISM
data.frame(compound_name = compound_name, pathway_id = pathway_ids,
pathway_name = pathway_names, pathway_description = pathway_descriptions,
organism = organisms, stringsAsFact
The organism name is stored inside the compound_info$organism variable
compounds <- c("C06483", "C06484", "C06485")
# List of KEGG compounds
compounds <- c("C06483", "C06484", "C06485")
compounds2 <- sapply(compounds,
FUN=function(x) keggrest(paste("compound", x, sep=":"),
"image"))
compounds2
# Find out if the compounds belong to an organism or not
for (i in compounds) {
if (compounds2[[i]]=="http://rest.kegg.jp/biocyc/image/compound/C06483") {
print("this is not an organism")
} else if (compounds2[[i]]=="http://rest.kegg.jp/biocyc/image/compound/C06484") {
print("This is an organism")
} else {
print("This is not an organism")
}
}