Skip to content

Commit

Permalink
fixups
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Jul 30, 2024
1 parent 878aa6f commit 49ff82c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 95 deletions.
12 changes: 1 addition & 11 deletions pybedlite/bed_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,20 +183,10 @@ def bed_fields(self) -> List[str]:
]

@property
def reference_name(self) -> str:
def refname(self) -> str:
"""A reference sequence name."""
return self.chrom

Check warning on line 188 in pybedlite/bed_record.py

View check run for this annotation

Codecov / codecov/patch

pybedlite/bed_record.py#L188

Added line #L188 was not covered by tests

@property
def zero_based_start(self) -> int:
"""A 0-based start position."""
return self.start

@property
def zero_based_open_end(self) -> int:
"""A 0-based open-ended position."""
return self.end

@property
def is_negative(self) -> bool:
"""True if the interval is on the negative strand, False otherwise"""
Expand Down
78 changes: 23 additions & 55 deletions pybedlite/overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
a set of genomic regions and another genomic region. The overlap detector may contain any
interval-like Python objects that have the following properties:
* `reference_name` (str): The reference sequence name
* `zero_based_start` (int): A 0-based start position
* `zero_based_end` (int): A 0-based exclusive end position
* `refname` (str): The reference sequence name
* `start` (int): A 0-based start position
* `end` (int): A 0-based exclusive end position
This is encapsulated in the :class:`~pybedlite.overlap_detector.GenomicSpan` protocol.
Expand Down Expand Up @@ -87,15 +87,15 @@ class GenomicSpan(Protocol):
"""

@property
def reference_name(self) -> str:
def refname(self) -> str:
"""A reference sequence name."""

@property
def zero_based_start(self) -> int:
def start(self) -> int:
"""A 0-based start position."""

@property
def zero_based_open_end(self) -> int:
def end(self) -> int:
"""A 0-based open-ended position."""


Expand Down Expand Up @@ -135,6 +135,11 @@ def __attrs_post_init__(self) -> None:
if self.end <= self.start:
raise ValueError(f"end <= start: {self.end} <= {self.start}")

@property
def is_negative(self) -> bool:
"""True if the interval is on the negative strand, False otherwise"""
return self.negative

def overlap(self, other: "Interval") -> int:
"""Returns the overlap between this interval and the other, or zero if there is none.
Expand Down Expand Up @@ -174,25 +179,6 @@ def from_bedrecord(cls: Type["Interval"], record: BedRecord) -> "Interval":
name=record.name,
)

@property
def reference_name(self) -> str:
return self.refname

@property
def zero_based_start(self) -> int:
"""A 0-based start position."""
return self.start

@property
def zero_based_open_end(self) -> int:
"""A 0-based open-ended position."""
return self.end

@property
def is_negative(self) -> bool:
"""True if the interval is on the negative strand, False otherwise"""
return self.negative


GenericGenomicsSpan = TypeVar("GenericGenomicsSpan", bound=Union[GenomicSpan, StrandedGenomicSpan])
"""
Expand Down Expand Up @@ -231,7 +217,7 @@ def add(self, interval: GenericGenomicsSpan) -> None:
if not isinstance(interval, Hashable):
raise ValueError(f"Interval feature is not hashable but should be: {interval}")

refname = interval.reference_name
refname = interval.refname
if refname not in self._refname_to_tree:
self._refname_to_tree[refname] = cr.cgranges() # type: ignore
self._refname_to_indexed[refname] = False
Expand All @@ -244,7 +230,7 @@ def add(self, interval: GenericGenomicsSpan) -> None:

# Add the interval to the tree
tree = self._refname_to_tree[refname]
tree.add(refname, interval.zero_based_start, interval.zero_based_open_end, interval_idx)
tree.add(refname, interval.start, interval.end, interval_idx)

# Flag this tree as needing to be indexed after adding a new interval, but defer
# indexing
Expand All @@ -269,21 +255,15 @@ def overlaps_any(self, interval: GenomicSpan) -> bool:
True if and only if the given interval overlaps with any interval in this
detector.
"""
refname = interval.reference_name
refname = interval.refname
tree = self._refname_to_tree.get(refname)
if tree is None:
return False
else:
if not self._refname_to_indexed[refname]:
tree.index()
try:
next(
iter(
tree.overlap(
refname, interval.zero_based_start, interval.zero_based_open_end
)
)
)
next(iter(tree.overlap(refname, interval.start, interval.end)))
except StopIteration:
return False
else:
Expand All @@ -299,7 +279,7 @@ def get_overlaps(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
The list of intervals in this detector that overlap the given interval, or the empty
list if no overlaps exist. The intervals will be return in ascending genomic order.
"""
refname = interval.reference_name
refname = interval.refname
tree = self._refname_to_tree.get(refname)
if tree is None:
return []
Expand All @@ -310,17 +290,15 @@ def get_overlaps(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
# NB: only return unique instances of intervals
intervals: Set[GenericGenomicsSpan] = {
ref_intervals[index]
for _, _, index in tree.overlap(
refname, interval.zero_based_start, interval.zero_based_open_end
)
for _, _, index in tree.overlap(refname, interval.start, interval.end)
}
return sorted(
intervals,
key=lambda intv: (
intv.zero_based_start,
intv.zero_based_open_end,
intv.start,
intv.end,
self._is_negative(intv),
intv.reference_name,
intv.refname,
),
)

Expand All @@ -330,7 +308,7 @@ def _is_negative(interval: GenomicSpan) -> bool:

def get_enclosing_intervals(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
"""Returns the set of intervals in this detector that wholly enclose the query interval.
i.e. query.start >= target.start and query.end <= target.end.
i.e. `query.start >= target.start` and `query.end <= target.end`.
Args:
interval: the query interval
Expand All @@ -339,12 +317,7 @@ def get_enclosing_intervals(self, interval: GenomicSpan) -> List[GenericGenomics
The intervals will be returned in ascending genomic order.
"""
results = self.get_overlaps(interval)
return [
i
for i in results
if interval.zero_based_start >= i.zero_based_start
and interval.zero_based_open_end <= i.zero_based_open_end
]
return [i for i in results if interval.start >= i.start and interval.end <= i.end]

def get_enclosed(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
"""Returns the set of intervals in this detector that are enclosed by the query
Expand All @@ -358,12 +331,7 @@ def get_enclosed(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
The intervals will be return in ascending genomic order.
"""
results = self.get_overlaps(interval)
return [
i
for i in results
if i.zero_based_start >= interval.zero_based_start
and i.zero_based_open_end <= interval.zero_based_open_end
]
return [i for i in results if i.start >= interval.start and i.end <= interval.end]

@classmethod
def from_bed(cls, path: Path) -> "OverlapDetector":
Expand Down
67 changes: 38 additions & 29 deletions pybedlite/tests/test_overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ def test_arbitrary_interval_types() -> None:

@dataclass(eq=True, frozen=True)
class ZeroBasedOpenEndedProtocol:
reference_name: str
zero_based_start: int
zero_based_open_end: int
refname: str
start: int
end: int

@property
def is_negative(self) -> bool:
Expand All @@ -213,22 +213,17 @@ def is_negative(self) -> bool:
@dataclass(eq=True, frozen=True)
class OneBasedProtocol:
contig: str
start: int
one_based_start: int
end: int

@property
def reference_name(self) -> str:
def refname(self) -> str:
return self.contig

@property
def zero_based_start(self) -> int:
def start(self) -> int:
"""A 0-based start position."""
return self.start - 1

@property
def zero_based_open_end(self) -> int:
"""A 0-based open-ended position."""
return self.end
return self.one_based_start - 1

@property
def is_negative(self) -> bool:
Expand All @@ -237,27 +232,33 @@ def is_negative(self) -> bool:

@dataclass(eq=True, frozen=True)
class ZeroBasedUnstranded:
reference_name: str
refname: str
zero_based_start: int
zero_based_open_end: int
end: int

@property
def start(self) -> int:
"""A 0-based start position."""
return self.zero_based_start

@dataclass(eq=True, frozen=True)
class ZeroBasedStranded:
reference_name: str
refname: str
zero_based_start: int
zero_based_open_end: int
end: int
is_negative: bool

@property
def start(self) -> int:
"""A 0-based start position."""
return self.zero_based_start

# Create minimal features of all supported structural types
zero_based_protocol = ZeroBasedOpenEndedProtocol(
reference_name="chr1", zero_based_start=1, zero_based_open_end=50
)
one_based_protocol = OneBasedProtocol(contig="chr1", start=10, end=60)
zero_based_unstranded = ZeroBasedUnstranded(
reference_name="chr1", zero_based_start=20, zero_based_open_end=70
)
zero_based_protocol = ZeroBasedOpenEndedProtocol(refname="chr1", start=1, end=50)
one_based_protocol = OneBasedProtocol(contig="chr1", one_based_start=10, end=60)
zero_based_unstranded = ZeroBasedUnstranded(refname="chr1", zero_based_start=20, end=70)
zero_based_stranded = ZeroBasedStranded(
reference_name="chr1", zero_based_start=30, zero_based_open_end=80, is_negative=True
refname="chr1", zero_based_start=30, end=80, is_negative=True
)
# Set up an overlap detector to hold all the features we care about
AllKinds: TypeAlias = Union[
Expand Down Expand Up @@ -314,11 +315,19 @@ def test_the_overlap_detector_wont_accept_a_non_hashable_feature() -> None:

@dataclass # A dataclass missing both `eq` and `frozen` does not implement __hash__.
class ChromFeature:
reference_name: str
refname: str
zero_based_start: int
zero_based_open_end: int
end: int

@property
def start(self) -> int:
"""A 0-based start position."""
return self.zero_based_start

Check warning on line 325 in pybedlite/tests/test_overlap_detector.py

View check run for this annotation

Codecov / codecov/patch

pybedlite/tests/test_overlap_detector.py#L325

Added line #L325 was not covered by tests

@property
def is_negative(self) -> bool:
"""True if the interval is on the negative strand, False otherwise"""
return False

Check warning on line 330 in pybedlite/tests/test_overlap_detector.py

View check run for this annotation

Codecov / codecov/patch

pybedlite/tests/test_overlap_detector.py#L330

Added line #L330 was not covered by tests

with pytest.raises(ValueError):
OverlapDetector(
[ChromFeature(reference_name="chr1", zero_based_start=0, zero_based_open_end=30)]
)
OverlapDetector([ChromFeature(refname="chr1", zero_based_start=0, end=30)])

0 comments on commit 49ff82c

Please sign in to comment.