From 18bc1c25d3e03db556d802e1df732c2f316c1747 Mon Sep 17 00:00:00 2001 From: Matt Stone Date: Fri, 8 Mar 2024 12:55:27 -0500 Subject: [PATCH] feat: add bedrecord/interval converters --- pybedlite/bed_record.py | 46 +++++++++++++++++++++++++++++++ pybedlite/tests/test_pybedlite.py | 32 +++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/pybedlite/bed_record.py b/pybedlite/bed_record.py index 0ba692f..e4596ea 100644 --- a/pybedlite/bed_record.py +++ b/pybedlite/bed_record.py @@ -11,12 +11,16 @@ - :class:`~pybedtools.bed_record.BedRecord` -- Lightweight class for storing information pertaining to a BED record. """ + import attr import enum from typing import Optional from typing import Tuple from typing import List from typing import ClassVar +from typing import Type + +from pybedlite.overlap_detector import Interval """Maximum BED fields that can be present in a well formed BED file written to specification""" @@ -188,3 +192,45 @@ def as_bed_line(self, number_of_output_fields: Optional[int] = None) -> str: ) fields = self.bed_fields[:number_of_output_fields] return "\t".join(fields) + + def to_interval(self) -> Interval: + """ + Convert a `BedRecord` to an `Interval` instance. + + Note that when the `BedRecord` does not have a specified strand, the `Interval`'s negative + attribute is set to False. This mimics the behavior of `OverlapDetector.from_bed()` when + reading a record that does not have a specified strand. + + Returns: + An `Interval` corresponding to the same region specified in the record. + """ + return Interval( + refname=self.chrom, + start=self.start, + end=self.end, + negative=self.strand is BedStrand.Negative, + name=self.name, + ) + + @classmethod + def from_interval(cls: Type["BedRecord"], interval: Interval) -> "BedRecord": + """ + Construct a `BedRecord` instance from an `Interval` instance. + + **Note that `Interval` cannot represent a `BedRecord` with a missing strand.** + Converting a record with no strand to `Interval` and then back to `BedRecord` will result in + a record with **positive strand**. + + Args: + interval: The `Interval` instance to convert. + + Returns: + A `BedRecord` corresponding to the same region specified in the interval. + """ + return cls( + chrom=interval.refname, + start=interval.start, + end=interval.end, + strand=BedStrand.Negative if interval.negative else BedStrand.Positive, + name=interval.name, + ) diff --git a/pybedlite/tests/test_pybedlite.py b/pybedlite/tests/test_pybedlite.py index e9d46f1..c15e26b 100644 --- a/pybedlite/tests/test_pybedlite.py +++ b/pybedlite/tests/test_pybedlite.py @@ -252,3 +252,35 @@ def test_preopened_bed_writing( record_number=i, num_fields=bed_field_number, ) + + +@pytest.mark.parametrize("record", bed_records()) +def test_conversion_to_interval(record: BedRecord) -> None: + """ + Test that we can convert a BedRecord to an Interval. + """ + interval = record.to_interval() + + assert interval.refname == record.chrom + assert interval.start == record.start + assert interval.end == record.end + assert interval.negative is (record.strand is BedStrand.Negative) + assert interval.name == record.name + + +@pytest.mark.parametrize("record", bed_records()) +def test_construction_from_interval(record: BedRecord) -> None: + """ + Test that we can convert a BedRecord to an Interval and back. + """ + new_record = BedRecord.from_interval(record.to_interval()) + + assert new_record == record.chrom + assert new_record.start == record.start + assert new_record.end == record.end + assert new_record.name == record.name + + if record.strand is None: + assert new_record.strand is BedStrand.Positive + else: + assert new_record.strand is record.strand