Skip to contents

Queries the provided regions and produces M and Coverage matrices and their genomic positions. Parallelized across files using threads from the "iscream.threads" option. The output of query_all may be used to create a BSseq object: do.call(BSseq, query_all(...)).

Usage

query_all(
  bedfiles,
  regions,
  bismark = FALSE,
  merged = TRUE,
  sparse = FALSE,
  nthreads = NULL
)

Arguments

bedfiles

A vector of bedfile paths

regions

A vector of genomic regions strings

bismark

hether the input is a bismark coverage (or BSbolt Bedgraph) file

merged

Whether the input strands have been merged/collapsed

sparse

Whether to return M and coverage matrices as sparse matrices ("dgCMatrix"). Set this TRUE only for scWGBS data

nthreads

Set number of threads to use overriding the "iscream.threads" option. See ?set_threads for more information.

Value

A named list of

  • M and coverage matrices

  • a character vector of chromosomes and numeric vector of corresponding CpG base positions

  • a character vector of the input sample names

Examples

bedfiles <- system.file("extdata", package = "iscream") |>
  list.files(pattern = "[a|b|c|d].bed.gz$", full.names = TRUE)
# examine the bedfiles
colnames <- c("chr", "start", "end", "beta", "coverage")
lapply(bedfiles, function(i) knitr::kable(read.table(i, col.names = colnames)))
#> [[1]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     1|   2|  1.0|        1|
#> |chr1 |     3|   4|  1.0|        1|
#> |chr1 |     5|   6|  0.0|        2|
#> |chr1 |     7|   8|  0.0|        1|
#> |chr1 |     9|  10|  0.5|        2|
#> |chr1 |    11|  12|  1.0|        2|
#> |chr1 |    13|  14|  1.0|        3|
#> 
#> [[2]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     1|   2|    0|        2|
#> |chr1 |     5|   6|    1|        2|
#> |chr1 |     7|   8|    1|        1|
#> |chr1 |    11|  12|    0|        2|
#> |chr1 |    13|  14|    1|        1|
#> 
#> [[3]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     3|   4|    1|        2|
#> |chr1 |     7|   8|    0|        2|
#> |chr1 |     9|  10|    1|        1|
#> 
#> [[4]]
#> 
#> 
#> |chr  | start| end| beta| coverage|
#> |:----|-----:|---:|----:|--------:|
#> |chr1 |     1|   2|  1.0|        1|
#> |chr1 |     3|   4|  1.0|        2|
#> |chr1 |     7|   8|  0.0|        1|
#> |chr1 |     9|  10|  0.5|        2|
#> |chr1 |    13|  14|  1.0|        1|
#> 

# make a vector of regions
regions <- c("chr1:1-6", "chr1:7-10", "chr1:11-14")
query_all(bedfiles, regions)
#> [16:49:59.725572] [iscream::query_all] [info] Querying 3 regions from 4 bedfiles
#> 
#> [16:49:59.726032] [iscream::query_all] [info] Creating metadata vectors
#> [16:49:59.726077] [iscream::query_all] [info] nrows 2005 - 1998 extra rows allocated with 1 resizes
#> [16:49:59.726080] [iscream::query_all] [info] Creating dense matrix
#> $M
#>         a b c d
#> chr1:2  1 0 0 1
#> chr1:4  1 0 2 2
#> chr1:6  0 2 0 0
#> chr1:8  0 1 0 0
#> chr1:10 1 0 1 1
#> chr1:12 2 0 0 0
#> chr1:14 3 1 0 1
#> 
#> $Cov
#>         a b c d
#> chr1:2  1 2 0 1
#> chr1:4  1 0 2 2
#> chr1:6  2 2 0 0
#> chr1:8  1 1 2 1
#> chr1:10 2 0 1 2
#> chr1:12 2 2 0 0
#> chr1:14 3 1 0 1
#> 
#> $pos
#> [1]  1  3  5  7  9 11 13
#> 
#> $chr
#> [1] "chr1" "chr1" "chr1" "chr1" "chr1" "chr1" "chr1"
#> 
#> $sampleNames
#> [1] "a" "b" "c" "d"
#> 
# for BSseq object run
if (FALSE) { # \dontrun{
library(bsseq)
do.call(BSseq, query_all(bedfiles, regions))
} # }