Andres Suarez

All generations.

code to explanationThu, 25 May 2023

data <- read.table("D:/USUARIO/OneDrive/Documents/Analisis de datos/Metaboanalist/importan Features POS/composite spectra.csv", header = TRUE, sep = ",") control_data <- data[, c("m.z", "RT", "Control", "Control.1", "Control.2", "Control.3", "Control.4", "Control.5", "Control.6", "Control.7", "Control.8")] treatment_data <- data[, c("m.z", "RT", "Treatment", "Treatment.1", "Treatment.2", "Treatment.3", "Treatment.4", "Treatment.5", "Treatment.6", "Treatment.7", "Treatment.8" )] myRawData <- readMSData(control_data, mzCol = 1, rtCol = 2, intensityCol = 3) control_raw <- xcmsRaw(control_data, profmethod = "bin") control_norm <- normalize(control_raw, method = "median") control_normalized_data <- as.data.frame(control_norm) control_data_norm <- normalize(control_data[, -c(1, 2)]) treatment_data_norm <- normalize(treatment_data[, -c(1, 2)])

## Assess sample composition ### Visualize sample composition

code to explanationWed, 07 Jun 2023

list_of_compounds = list(c('C00001', 'C00002', 'C00003', 'C00004')) get_compound_belongs_to <- function(compound) { url <- paste('https://www.genome.jp/dbget-bin/www_bget?-f+m+cpd:', compound, sep="") page <- read_html(url) node <- html_node(page, '#pathway_tab_1 > ul > li > a') text <- html_text(node) return(text) } get_compound_belongs_to(list_of_compounds)

The get_compound_belongs_to function takes a list of KEGG compound IDs and returns the pathways that these compounds belong to.

fix invalid codeThu, 08 Jun 2023

get_kegg_info <- function(compound_formula) { compound_name <- met100$compound_name[met100$compound_formula == compound_formula] kegg_compound <- keggGet(compound_name, "cpd") kegg_id <- attr(kegg_compound, "kegg.code") pathway_ids <- keggLink("pathway", kegg_id)$Pathway pathway_info <- keggGet(pathway_ids) pathway_names <- lapply(pathway_info, function(x) x[[1]]$name) pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition) organisms <- keggGet(pathway_ids)$ORGANISM data.frame(compound_name = compound_name, pathway_id = pathway_ids, pathway_name = pathway_names, pathway_description = pathway_descriptions, organism = organisms, stringsAsFactors = FALSE)

get_kegg_info <- function(compound_formula) { compound_name <- met100$compound_name[met100$compound_formula == compound_formula] kegg_compound <- keggGet(compound_name, "cpd") kegg_id <- attr(kegg_compound, "kegg.code") pathway_ids <- keggLink("pathway", kegg_id)$Pathway pathway_info <- keggGet(pathway_ids) pathway_names <- lapply(pathway_info, function(x) x[[1]]$name) pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition) organisms <- keggGet(pathway_ids)$ORGANISM data.frame(compound_name = compound_name, pathway_id = pathway_ids, pathway_name = pathway_names, pathway_description = pathway_descriptions, organism = organisms, stringsAsFactors

fix invalid codeTue, 13 Jun 2023

compound_list <- c("C00022", "C00024", "C00031")

compound_list <- c("C00022", "C00024", "C00031", "C00037", "C00038", "C00042", "C00049", "C00051", "C00068", "C00075", "C00087", "C00111", "C00140", "C00144", "C00162", "C00239", "C00242", "C00254", "C00267", "C00269", "C00272", "C00273", "C00274", "C00276", "C00280", "C00282", "C00283", "C00284", "C00285", "C00287", "C00288", "C00289", "C00290", "C00291", "C00292", "C00293", "C00294", "C00295", "C00296", "C00297", "C00298", "C00299", "C00300", "C00301", "C00302", "C00303", "C00304", "C00305", "

fix invalid codeThu, 08 Jun 2023

get_kegg_info <- function(compound_formula) { compound_name <- met100$compound_name[met100$compound_formula == compound_formula] kegg_compound <- keggGet(compound_name, "cpd") kegg_id <- attr(kegg_compound, "kegg.code") pathway_ids <- keggLink("pathway", kegg_id)$Pathway pathway_info <- keggGet(pathway_ids) pathway_names <- lapply(pathway_info, function(x) x[[1]]$name) pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition) organisms <- keggGet(pathway_ids)$ORGANISM data.frame(compound_name = compound_name, pathway_id = pathway_ids, pathway_name = pathway_names, pathway_description = pathway_descriptions, organism = organisms, stringsAsFactors = FALSE)}

get_kegg_info <- function(compound_formula) { compound_name <- met100$compound_name[met100$compound_formula == compound_formula] kegg_compound <- keggGet(compound_name, "cpd") kegg_id <- attr(kegg_compound, "kegg.code") pathway_ids <- keggLink("pathway", kegg_id)$Pathway pathway_info <- keggGet(pathway_ids) pathway_names <- lapply(pathway_info, function(x) x[[1]]$name) pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition) organisms <- keggGet(pathway_ids)$ORGANISM data.frame(compound_name = compound_name, pathway_id = pathway_ids, pathway_name = pathway_names, pathway_description = pathway_descriptions, organism = organisms, stringsAsFact

code to explanationWed, 07 Jun 2023

for (compound in compounds) { compound_info <- kegg_find("compound", compound) if(length(compound_info$ORGANISM) > 0) { # The compound has at least one associated organism organisms[[compound]] <- compound_info$ORGANISM } else { # The compound has no associated organisms organisms[[compound]] <- NA } }

The function loop takes a table as an argument (compounds), and returns a table with the same number of rows as the original table (organisms). The function loops through the compounds table, for each row (compound), it runs the KEGG function (compound_info <- KEGG_find(compound,"compound")), this KEGG function returns a table with several columns. The if statement checks if the compound has more than one organism associated with it, if it does then the organism is retrieved and added to the table, if not it is added as NA.

generate functionWed, 14 Jun 2023

i have a list with the kegg ID of compound and i need to link this compound with the with the metabolic pathway involved, using KEGGREST R package

library(KEGGREST) library(data.table) kg_id <- 'C00008' # using the kegg_get function to link the compound with the pathway # using the compound_pathway function to link again the compound with the pathway # flatten the list # convert to data.frame # function to get the pathway associated with the compound get_pathway <- function(kg_id){ pw <- NULL pw <- kegg_get(kg_id, db = "compound", option = "pathway")$data pw <- compound_pathway(kg_id) pw <- unlist(pw) pw <- as.data.frame(pw, stringsAsFactors = FALSE) return(pw) } # function to parse the pathway get_pathway(kg_id)

code to explanationWed, 12 Apr 2023

data <- read.table("D:/USUARIO/OneDrive/Documents/Analisis de datos/Metaboanalist/importan Features POS/composite spectra.csv", header = TRUE, sep = ",") control_data <- data[, c("m.z", "RT", "Control", "Control.1", "Control.2", "Control.3", "Control.4", "Control.5", "Control.6", "Control.7", "Control.8")] treatment_data <- data[, c("m.z", "RT", "Treatment", "Treatment.1", "Treatment.2", "Treatment.3", "Treatment.4", "Treatment.5", "Treatment.6", "Treatment.7", "Treatment.8" )] myRawData <- readMSData(control_data, mzCol = 1, rtCol = 2, intensityCol = 3) control_raw <- xcmsRaw(control_data, profmethod = "bin") control_norm <- normalize(control_raw, method = "median") control_normalized_data <- as.data.frame(control_norm) control_data_norm <- normalize(control_data[, -c(1, 2)]) treatment_data_norm <- normalize(treatment_data[, -c(1, 2)])

The function readMSData allows reading data from several formats. Here, we read the data from a csv file, therefore we use the read.table function. The csv file contains the m/z, retention time and intensity values of the metabolomic profiles for each sample (n = 9 samples per group). Then, we use the xcmsRaw function to read the raw data from the csv file. The normalize function normalizes the data to the median. Therefore, we can compare the relative intensities between samples.

generate functionWed, 07 Jun 2023

I can find out if the compounds I have in a list belong to an organism using Keggrest R package.

library(Keggrest) compounds = c("C00159", "C00772", "C00678", "C00355", "C00022") is_in_organism <- function(compound, organism) { compound = toString(compound) organism = toString(organism) tryCatch( { resp <- kegg_compound_2_organism(compound) if (organism %in% resp) { return(TRUE) } else { return(FALSE) } }, error = function(err) { return(FALSE) } ) } is_in_organism("C00159", "eco")

generate functionWed, 07 Jun 2023

library(keggrest) find_organism <- function(compounds) { organism <- NULL for (i in compounds) { tryCatch({ res <- kegg_search(i)$org }, warning = function(warn) { res <- NULL }) organism <- append(organism, res) } organism <- unique(organism) }

function that: library(msigdb) msig_genes <- function(msig, db=GREAT_HG19) { genes <- NULL for (i in msig) { tryCatch({ res <- db[i]$genes }, warning = function(warn) { res <- NULL }) genes <- append(genes, res) } return(genes) }

fix invalid codeWed, 07 Jun 2023

compounds <- c("C00022", "C00153", "C00216", "C01167") organism <- c("M00008") for (compound in compounds) { compound_info <- keggFind("compound", compounds) if(length(compound_info$organism) > 0) { cat(paste(compound, "belongs to", organism, "\n")) } else { cat(paste(compound, "has no associated organism(s) in KEGG\n")) } }

- create a function that takes a kegg compound id and a vector of organism ids. - The function should return a logical vector indicating whether the compound belongs to the organism(s) passed to the function. - Use the function to check whether compounds in `compounds` belong to organism(s) in `organism`

code to explanationThu, 25 May 2023

header = TRUE, sep = ","

the header = TRUE means that the first line of the file is a header line. This will be used as the names of the data.frame columns. The sep = "," means that the fields in the file are separated by commas.

fix invalid codeTue, 13 Jun 2023

get_kegg_info <- function(compound_formula) { compound_name <- met100$compound_name[met100$compound_formula == compound_formula] kegg_compound <- keggGet(compound_name, "cpd") kegg_id <- attr(kegg_compound, "kegg.code") pathway_ids <- keggLink("pathway", kegg_id)$Pathway pathway_info <- keggGet(pathway_ids) pathway_names <- lapply(pathway_info, function(x) x[[1]]$name) pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition) organisms <- keggGet(pathway_ids)$ORGANISM data.frame(compound_name = compound_name, pathway_id = pathway_ids, pathway_name = pathway_names, pathway_description = pathway_descriptions, organism = organisms, stringsAsFactors = FALSE)}

get_kegg_info <- function(compound_formula) { compound_name <- met100$compound_name[met100$compound_formula == compound_formula] kegg_compound <- keggGet(compound_name, "cpd") kegg_id <- attr(kegg_compound, "kegg.code") pathway_ids <- keggLink("pathway", kegg_id)$Pathway pathway_info <- keggGet(pathway_ids) pathway_names <- lapply(pathway_info, function(x) x[[1]]$name) pathway_descriptions <- lapply(pathway_info, function(x) x[[1]]$definition) organisms <- keggGet(pathway_ids)$ORGANISM data.frame(compound_name = compound_name, pathway_id = pathway_ids, pathway_name = pathway_names, pathway_description = pathway_descriptions, organism = organisms, stringsAsFact

generate functionWed, 07 Jun 2023

I can find out if the compounds I have in a list belong to an organism or not using Keggrest.

compounds <- c("C06483", "C06484", "C06485") # List of KEGG compounds compounds <- c("C06483", "C06484", "C06485") compounds2 <- sapply(compounds, FUN=function(x) keggrest(paste("compound", x, sep=":"), "image")) compounds2 # Find out if the compounds belong to an organism or not for (i in compounds) { if (compounds2[[i]]=="http://rest.kegg.jp/biocyc/image/compound/C06483") { print("this is not an organism") } else if (compounds2[[i]]=="http://rest.kegg.jp/biocyc/image/compound/C06484") { print("This is an organism") } else { print("This is not an organism") } }

Want to kickstart your project?Use the new AI Studio to create your code