Skip to contents

Queries the provided regions and produces M and Coverage matrices and their genomic positions. Parallelized across files using threads from the "iscream.threads" option. The output of query_all may be used to create a BSseq object: do.call(BSseq, query_all(...)).

Usage

query_all(
  bedfiles,
  regions,
  aligner = "biscuit",
  merged = TRUE,
  sparse = FALSE,
  prealloc = 10000,
  nthreads = NULL
)

Arguments

bedfiles

A vector of bedfile paths

regions

A vector of genomic regions strings

aligner

The aligner used to produce the BED files - one of "biscuit", "bismark", "bsbolt".

merged

Whether the input strands have been merged/collapsed

sparse

Whether to return M and coverage matrices as sparse matrices ("dgCMatrix"). Set this TRUE only for scWGBS data

prealloc

The number of rows to initialize the matrices with. If the number of methyltion loci are approximately known, this can reduce runtime as fewer resizes need to be made.

nthreads

Set number of threads to use overriding the "iscream.threads" option. See ?set_threads for more information.

Value

A named list of

  • M and coverage matrices

  • a character vector of chromosomes and numeric vector of corresponding CpG base positions

  • a character vector of the input sample names

Examples

bedfiles <- system.file("extdata", package = "iscream") |>
  list.files(pattern = "[a|b|c|d].bed.gz$", full.names = TRUE)
# examine the bedfiles
colnames <- c("chr", "start", "end", "beta", "coverage")
lapply(bedfiles, function(i) knitr::kable(read.table(i, col.names = colnames)))
#> [[1]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     1|   2|  1.0|        1|
#> |chr1 |     3|   4|  1.0|        1|
#> |chr1 |     5|   6|  0.0|        2|
#> |chr1 |     7|   8|  0.0|        1|
#> |chr1 |     9|  10|  0.5|        2|
#> |chr1 |    11|  12|  1.0|        2|
#> |chr1 |    13|  14|  1.0|        3|
#> 
#> [[2]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     1|   2|    0|        2|
#> |chr1 |     5|   6|    1|        2|
#> |chr1 |     7|   8|    1|        1|
#> |chr1 |    11|  12|    0|        2|
#> |chr1 |    13|  14|    1|        1|
#> 
#> [[3]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     3|   4|    1|        2|
#> |chr1 |     7|   8|    0|        2|
#> |chr1 |     9|  10|    1|        1|
#> 
#> [[4]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     1|   2|  1.0|        1|
#> |chr1 |     3|   4|  1.0|        2|
#> |chr1 |     7|   8|  0.0|        1|
#> |chr1 |     9|  10|  0.5|        2|
#> |chr1 |    13|  14|  1.0|        1|
#> 

# make a vector of regions
regions <- c("chr1:1-6", "chr1:7-10", "chr1:11-14")
query_all(bedfiles, regions)
#> [20:33:26.490593] [iscream::query_all] [info] Querying 3 regions from 4 bedfiles
#> 
#> [20:33:26.491009] [iscream::query_all] [info] Creating metadata vectors
#> [20:33:26.491061] [iscream::query_all] [info] nrows 10000 - 9993 extra rows allocated with 0 resizes
#> [20:33:26.491066] [iscream::query_all] [info] Creating dense matrix
#> $M
#>         a b c d
#> chr1:2  1 0 0 1
#> chr1:4  1 0 2 2
#> chr1:6  0 2 0 0
#> chr1:8  0 1 0 0
#> chr1:10 1 0 1 1
#> chr1:12 2 0 0 0
#> chr1:14 3 1 0 1
#> 
#> $Cov
#>         a b c d
#> chr1:2  1 2 0 1
#> chr1:4  1 0 2 2
#> chr1:6  2 2 0 0
#> chr1:8  1 1 2 1
#> chr1:10 2 0 1 2
#> chr1:12 2 2 0 0
#> chr1:14 3 1 0 1
#> 
#> $pos
#> [1]  1  3  5  7  9 11 13
#> 
#> $chr
#> [1] "chr1" "chr1" "chr1" "chr1" "chr1" "chr1" "chr1"
#> 
#> $sampleNames
#> [1] "a" "b" "c" "d"
#> 
# for BSseq object run
if (FALSE) { # \dontrun{
library(bsseq)
do.call(BSseq, query_all(bedfiles, regions))
} # }