## ----setup, echo = FALSE-------------------------------------------------------------------------- knitr::opts_chunk$set( error = TRUE, cache = FALSE, eval = TRUE, out.width = "100%" ) options(width = 100) ## ----useEnsembl----------------------------------------------------------------------------------- library(biomaRt) ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl") ## ----listEnsembl---------------------------------------------------------------------------------- listEnsembl() ## ----ensembl1------------------------------------------------------------------------------------- ensembl <- useEnsembl(biomart = "genes") ## ----ensembl2------------------------------------------------------------------------------------- ensembl ## ----listDatasets--------------------------------------------------------------------------------- datasets <- listDatasets(ensembl) head(datasets) ## ----searchDatasets, echo = TRUE, eval = TRUE----------------------------------------------------- searchDatasets(mart = ensembl, pattern = "hsapiens") ## ----ensembl3, eval=TRUE-------------------------------------------------------------------------- ensembl <- useDataset(dataset = "hsapiens_gene_ensembl", mart = ensembl) ## ----ensembl4, eval = FALSE----------------------------------------------------------------------- # ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl") ## ----mirrors, eval = FALSE------------------------------------------------------------------------ # ensembl <- useEnsembl( # biomart = "ensembl", # dataset = "hsapiens_gene_ensembl", # mirror = "useast" # ) ## ----archiveMarts, echo = TRUE, eval = TRUE------------------------------------------------------- listEnsemblArchives() ## ----archiveMarts3, echo = TRUE, eval = TRUE------------------------------------------------------ listEnsembl(version = 110) ensembl_110 <- useEnsembl( biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 110 ) ## ----listEnsemblGenomes--------------------------------------------------------------------------- listEnsemblGenomes() ## ----plants1-------------------------------------------------------------------------------------- ensembl_plants <- useEnsemblGenomes(biomart = "plants_mart") searchDatasets(ensembl_plants, pattern = "Arabidopsis") ## ------------------------------------------------------------------------------------------------- ensembl_arabidopsis <- useEnsemblGenomes( biomart = "plants_mart", dataset = "athaliana_eg_gene" ) ## ----filters-------------------------------------------------------------------------------------- filters <- listFilters(ensembl) filters[1:5, ] ## ----attributes----------------------------------------------------------------------------------- attributes <- listAttributes(ensembl) attributes[1:5, ] ## ----getBM1, echo=TRUE, eval=TRUE----------------------------------------------------------------- affyids <- c("202763_at", "209310_s_at", "207500_at") getBM( attributes = c("affy_hg_u133_plus_2", "entrezgene_id"), filters = "affy_hg_u133_plus_2", values = affyids, mart = ensembl ) ## ----searchAttributes, echo = TRUE, eval = TRUE--------------------------------------------------- searchAttributes(mart = ensembl, pattern = "hgnc") ## ----searchFilters, echo = TRUE, eval = TRUE------------------------------------------------------ searchFilters(mart = ensembl, pattern = "ensembl.*id") ## ----filtervalues, fig.cap='The options available to the Chromosome/Scaffold field are limited to a pretermined list based on the values in this dataset.', echo = FALSE---- knitr::include_graphics("filtervalues.png") ## ----chromosomeNames, results = FALSE------------------------------------------------------------- listFilterOptions(mart = ensembl, filter = "chromosome_name") ## ----listFilterOptions, results = TRUE------------------------------------------------------------ searchFilterOptions(mart = ensembl, filter = "chromosome_name", pattern = "^GL") searchFilterOptions( mart = ensembl, filter = "phenotype_description", pattern = "Crohn" ) ## ----filterType----------------------------------------------------------------------------------- filterType("with_affy_hg_u133_plus_2", ensembl) ## ----attributePages------------------------------------------------------------------------------- pages <- attributePages(ensembl) pages ## ----listAttributes------------------------------------------------------------------------------- head(listAttributes(ensembl, page = "feature_page")) ## ----columnsAndKeyTypes--------------------------------------------------------------------------- mart <- useEnsembl(dataset = "hsapiens_gene_ensembl", biomart = "ensembl") head(keytypes(mart), n = 3) head(columns(mart), n = 3) ## ----keys1---------------------------------------------------------------------------------------- k <- keys(mart, keytype = "chromosome_name") head(k, n = 3) ## ----keys2---------------------------------------------------------------------------------------- k <- keys(mart, keytype = "chromosome_name", pattern = "LRG") head(k, n = 3) ## ----select--------------------------------------------------------------------------------------- affy <- c("202763_at", "209310_s_at", "207500_at") select( mart, keys = affy, columns = c("affy_hg_u133_plus_2", "entrezgene_id"), keytype = "affy_hg_u133_plus_2" ) ## ----cacheInfo------------------------------------------------------------------------------------ biomartCacheInfo() ## ----cache-location, echo=1:2--------------------------------------------------------------------- Sys.setenv(BIOMART_CACHE = tempdir()) biomartCacheInfo() Sys.unsetenv("BIOMART_CACHE") ## ----task1, echo=TRUE,eval=TRUE------------------------------------------------------------------- affyids <- c("202763_at", "209310_s_at", "207500_at") getBM( attributes = c( "affy_hg_u133_plus_2", "hgnc_symbol", "chromosome_name", "start_position", "end_position", "band" ), filters = "affy_hg_u133_plus_2", values = affyids, mart = ensembl ) ## ----task2, echo=TRUE,eval=TRUE------------------------------------------------------------------- entrez <- c("673", "837") goids <- getBM( attributes = c("entrezgene_id", "go_id"), filters = "entrezgene_id", values = entrez, mart = ensembl ) head(goids) ## ----task3, echo=TRUE,eval=TRUE------------------------------------------------------------------- go <- c("GO:0051330", "GO:0000080", "GO:0000114", "GO:0000082") chrom <- c(17, 20, "Y") getBM( attributes = "hgnc_symbol", filters = c("go", "chromosome_name"), values = list(go, chrom), mart = ensembl ) ## ----task4, echo=TRUE,eval=TRUE------------------------------------------------------------------- refseqids <- c("NM_005359", "NM_000546") ipro <- getBM( attributes = c("refseq_mrna", "interpro", "interpro_description"), filters = "refseq_mrna", values = refseqids, mart = ensembl ) ipro ## ----task5, eval = TRUE--------------------------------------------------------------------------- getBM( attributes = c("affy_hg_u133_plus_2", "ensembl_gene_id"), filters = c("chromosome_name", "start", "end"), values = list(16, 1100000, 1250000), mart = ensembl ) ## ----task6, echo=TRUE, eval = TRUE---------------------------------------------------------------- getBM( attributes = c("entrezgene_id", "hgnc_symbol"), filters = "go", values = "GO:0004707", mart = ensembl ) ## ----task7, eval=TRUE----------------------------------------------------------------------------- entrez <- c("673", "7157", "837") getSequence( id = entrez, type = "entrezgene_id", seqType = "coding_gene_flank", upstream = 100, mart = ensembl ) ## ----task8, echo=TRUE,eval=TRUE------------------------------------------------------------------- utr5 <- getSequence( chromosome = 3, start = 185514033, end = 185535839, type = "entrezgene_id", seqType = "5utr", mart = ensembl ) utr5 ## ----task9, echo=TRUE, eval=TRUE------------------------------------------------------------------ protein <- getSequence( id = c(27112, 653067), type = "entrezgene_id", seqType = "peptide", mart = ensembl ) protein ## ----task10, echo=TRUE, eval=TRUE----------------------------------------------------------------- snpmart <- useEnsembl(biomart = "snp", dataset = "hsapiens_snp") ## ----task10b-------------------------------------------------------------------------------------- getBM( attributes = c("refsnp_id", "allele", "chrom_start", "chrom_strand"), filters = c("chr_name", "start", "end"), values = list(8, 148350, 148400), mart = snpmart ) ## ----homologs-1----------------------------------------------------------------------------------- human <- useEnsembl("ensembl", dataset = "hsapiens_gene_ensembl") BRCA2_human <- getBM( mart = human, filters = "hgnc_symbol", value = "BRCA2", attributes = c("ensembl_gene_id", "chromosome_name", "start_position") ) BRCA2_human ## ----homolog-2------------------------------------------------------------------------------------ homologs <- getHomologs( ensembl_gene_ids = BRCA2_human$ensembl_gene_id, species_from = "human", species_to = "mouse" ) homologs ## ----homologs-3----------------------------------------------------------------------------------- mouse <- useEnsembl("ensembl", dataset = "mmusculus_gene_ensembl") BRCA2_mouse <- getBM( mart = mouse, filters = "ensembl_gene_id", values = homologs$mmusculus_homolog_ensembl_gene, attributes = c("refseq_mrna", "chromosome_name", "start_position") ) BRCA2_mouse ## ----ssl-verifypeer, eval = FALSE----------------------------------------------------------------- # setEnsemblSSL(list(ssl_verifypeer = FALSE)) ## ----ssl-cipher-list, eval = FALSE---------------------------------------------------------------- # setEnsemblSSL(list(ssl_cipher_list = "DEFAULT@SECLEVEL=1")) ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()