Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add bedrecord/interval converters #27

Merged
merged 6 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions pybedlite/bed_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,20 @@
- :class:`~pybedtools.bed_record.BedRecord` -- Lightweight class for storing information
pertaining to a BED record.
"""

from __future__ import annotations

import attr
import enum
from typing import Optional
from typing import Tuple
from typing import List
from typing import ClassVar
from typing import Type
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pybedlite.overlap_detector import Interval

Check warning on line 27 in pybedlite/bed_record.py

View check run for this annotation

Codecov / codecov/patch

pybedlite/bed_record.py#L27

Added line #L27 was not covered by tests
msto marked this conversation as resolved.
Show resolved Hide resolved


"""Maximum BED fields that can be present in a well formed BED file written to specification"""
Expand Down Expand Up @@ -188,3 +196,27 @@
)
fields = self.bed_fields[:number_of_output_fields]
return "\t".join(fields)

@classmethod
def from_interval(cls: Type["BedRecord"], interval: Interval) -> "BedRecord":
msto marked this conversation as resolved.
Show resolved Hide resolved
"""
Construct a `BedRecord` from a `Interval` instance.

**Note that `Interval` cannot represent a `BedRecord` with a missing strand.**
Converting a record with no strand to `Interval` and then back to `BedRecord` will result in
a record with **positive strand**.

Args:
interval: The `Interval` instance to convert.

Returns:
A `BedRecord` corresponding to the same region specified in the interval.
"""

return BedRecord(
chrom=interval.refname,
start=interval.start,
end=interval.end,
strand=BedStrand.Negative if interval.negative else BedStrand.Positive,
name=interval.name,
)
1 change: 1 addition & 0 deletions pybedlite/bed_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- :class:`~pybedtools.bed_source.BedSource` -- Reader class for parsing BED files and iterate
over their contained records
"""

import io
from typing import IO
from typing import Optional
Expand Down
40 changes: 30 additions & 10 deletions pybedlite/overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,14 @@
from typing import List
from typing import Optional
from typing import Set
from typing import Type

import attr
import cgranges as cr

from pybedlite.bed_record import BedStrand
from pybedlite.bed_source import BedSource
from pybedlite.bed_record import BedRecord


@attr.s(frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -100,11 +102,34 @@
"""Returns the length of the interval."""
return self.end - self.start

@classmethod
def from_bedrecord(cls: Type["Interval"], record: BedRecord) -> "Interval":
nh13 marked this conversation as resolved.
Show resolved Hide resolved
"""
Construct an `Interval` from a `BedRecord` instance.

Note that when the `BedRecord` does not have a specified strand, the `Interval`'s negative
attribute is set to False. This mimics the behavior of `OverlapDetector.from_bed()` when
reading a record that does not have a specified strand.

Args:
record: The `BedRecord` instance to convert.

Returns:
An `Interval` corresponding to the same region specified in the record.
"""
return cls(
refname=record.chrom,
start=record.start,
end=record.end,
negative=record.strand is BedStrand.Negative,
name=record.name,
)


class OverlapDetector(Iterable[Interval]):
"""Detects and returns overlaps between a set of genomic regions and another genomic region.

Since :class:`~samwell.overlap_detector.Interval` objects are used both to populate the
Since :class:`~pybedlite.overlap_detector.Interval` objects are used both to populate the
overlap detector and to query it, the coordinate system in use is also 0-based open-ended.

The same interval may be added multiple times, but only a single instance will be returned
Expand Down Expand Up @@ -236,20 +261,15 @@

@classmethod
def from_bed(cls, path: Path) -> "OverlapDetector":
"""Builds an :class:`~samwell.overlap_detector.OverlapDetector` from a BED file.
"""Builds a :class:`~pybedlite.overlap_detector.OverlapDetector` from a BED file.
Args:
path: the path to the BED file
Returns:
An overlap detector for the regions in the BED file.
"""
detector = OverlapDetector()

for region in BedSource(path):
locatable = Interval(
refname=region.chrom,
start=region.start,
end=region.end,
negative=region.strand == BedStrand.Negative,
name=region.name,
)
detector.add(locatable)
msto marked this conversation as resolved.
Show resolved Hide resolved
detector.add(Interval.from_bedrecord(region))

Check warning on line 273 in pybedlite/overlap_detector.py

View check run for this annotation

Codecov / codecov/patch

pybedlite/overlap_detector.py#L273

Added line #L273 was not covered by tests

return detector
52 changes: 52 additions & 0 deletions pybedlite/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pytest
from typing import List
from pybedlite.bed_record import BedRecord
from pybedlite.bed_record import BedStrand


@pytest.fixture
def bed_records() -> List[BedRecord]:
return [
BedRecord(
chrom="1",
start=100,
end=150,
name="test_record1",
score=100,
strand=BedStrand.Positive,
thick_start=100,
thick_end=100,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[50],
block_starts=[0],
),
BedRecord(
chrom="1",
start=200,
end=300,
name="test_record2",
score=100,
strand=BedStrand.Negative,
thick_start=210,
thick_end=290,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[100],
block_starts=[0],
),
BedRecord(
chrom="2",
start=200,
end=300,
name="test_record3",
score=None,
strand=None,
thick_start=None,
thick_end=None,
item_rgb=None,
block_count=None,
block_sizes=None,
block_starts=None,
),
]
40 changes: 40 additions & 0 deletions pybedlite/tests/test_overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from pybedlite.overlap_detector import Interval
from pybedlite.overlap_detector import OverlapDetector
from pybedlite.bed_record import BedStrand
from pybedlite.bed_record import BedRecord


def run_test(targets: List[Interval], query: Interval, results: List[Interval]) -> None:
Expand Down Expand Up @@ -148,3 +150,41 @@ def test_iterable() -> None:
assert list(detector) == [a]
detector.add_all([a, b, c, d, e])
assert list(detector) == [a, a, b, c, d, e]


def test_conversion_to_interval(bed_records: List[BedRecord]) -> None:
"""
Test that we can convert a BedRecord to an Interval.
"""

# I don't think pytest.mark.parametrize can accept a fixture and expand over its values.
# For loop it is.
for record in bed_records:
interval = Interval.from_bedrecord(record)

assert interval.refname == record.chrom
assert interval.start == record.start
assert interval.end == record.end
assert interval.negative is (record.strand is BedStrand.Negative)
assert interval.name == record.name


def test_construction_from_interval(bed_records: List[BedRecord]) -> None:
"""
Test that we can convert a BedRecord to an Interval and back.
"""

# I don't think pytest.mark.parametrize can accept a fixture and expand over its values.
# For loop it is.
for record in bed_records:
new_record = BedRecord.from_interval(Interval.from_bedrecord(record))

assert new_record.chrom == record.chrom
assert new_record.start == record.start
assert new_record.end == record.end
assert new_record.name == record.name

if record.strand is None:
assert new_record.strand is BedStrand.Positive
else:
assert new_record.strand is record.strand
49 changes: 0 additions & 49 deletions pybedlite/tests/test_pybedlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,6 @@
from pybedlite.bed_writer import BedWriter
from pybedlite.bed_source import BedSource
from pybedlite.bed_record import BedRecord
from pybedlite.bed_record import BedStrand


@pytest.fixture
def bed_records() -> List[BedRecord]:
return [
BedRecord(
chrom="1",
start=100,
end=150,
name="test_record1",
score=100,
strand=BedStrand.Positive,
thick_start=100,
thick_end=100,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[50],
block_starts=[0],
),
BedRecord(
chrom="1",
start=200,
end=300,
name="test_record2",
score=100,
strand=BedStrand.Negative,
thick_start=210,
thick_end=290,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[100],
block_starts=[0],
),
BedRecord(
chrom="2",
start=200,
end=300,
name="test_record3",
score=None,
strand=None,
thick_start=None,
thick_end=None,
item_rgb=None,
block_count=None,
block_sizes=None,
block_starts=None,
),
]


SNIPPET_BED = """\
Expand Down
Loading