Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FragmentWithOverhangs #387

Merged
merged 5 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305)
- Added a parser and writer for the `pileup` sequence alignment format (#329)
- Added option to fragmenter to fragment with only certain overhangs (#387)

### Fixed
- `fastq` parser no longer becomes de-aligned when reading (#325)
Expand All @@ -19,4 +20,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Oops, we weren't keeping a changelog before this tag!

[unreleased]: https://github.com/TimothyStiles/poly/compare/v0.26.0...main
[0.26.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.26.0
[0.26.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.26.0
28 changes: 24 additions & 4 deletions synthesis/fragment/fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func NextOverhang(currentOverhangs []string) string {
}

// optimizeOverhangIteration takes in a sequence and optimally fragments it.
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string) ([]string, float64, error) {
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string, buildOverhangs []string) ([]string, float64, error) {
// If the sequence is smaller than maxFragment size, stop iteration.
if len(sequence) < maxFragmentSize {
existingFragments = append(existingFragments, sequence)
Expand Down Expand Up @@ -136,6 +136,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
var bestOverhangEfficiency float64
var bestOverhangPosition int
var alreadyExists bool
var buildAvailable bool
for overhangOffset := 0; overhangOffset <= maxFragmentSize-minFragmentSize; overhangOffset++ {
// We go from max -> min, so we can maximize the size of our fragments
overhangPosition := maxFragmentSize - overhangOffset
Expand All @@ -148,7 +149,18 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
alreadyExists = true
}
}
if !alreadyExists {
// Make sure overhang is in set of buildOverhangs. If buildOverhangs is
// blank, skip this check.
buildAvailable = false
if len(buildOverhangs) == 0 {
buildAvailable = true
}
for _, buildOverhang := range buildOverhangs {
if buildOverhang == overhangToTest || transform.ReverseComplement(buildOverhang) == overhangToTest {
buildAvailable = true
}
}
if !alreadyExists && buildAvailable {
Koeng101 marked this conversation as resolved.
Show resolved Hide resolved
// See if this overhang is a palindrome
if !checks.IsPalindromic(overhangToTest) {
// Get this overhang set's efficiency
Expand All @@ -169,7 +181,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
existingFragments = append(existingFragments, sequence[:bestOverhangPosition])
existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
sequence = sequence[bestOverhangPosition-4:]
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs, buildOverhangs)
}

// Fragment fragments a sequence into fragments between the min and max size,
Expand All @@ -178,5 +190,13 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
// last 4 base pairs are the initial overhang set.
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) {
sequence = strings.ToUpper(sequence)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...))
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...), []string{})
}

// FragmentWithOverhangs fragments a sequence with only a certain overhang set.
// This is useful if you are constraining the set of possible overhangs when
// doing more advanced forms of cloning.
func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string, buildOverhangs []string) ([]string, float64, error) {
Koeng101 marked this conversation as resolved.
Show resolved Hide resolved
Koeng101 marked this conversation as resolved.
Show resolved Hide resolved
sequence = strings.ToUpper(sequence)
Koeng101 marked this conversation as resolved.
Show resolved Hide resolved
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...), buildOverhangs)
}
10 changes: 10 additions & 0 deletions synthesis/fragment/fragment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,13 @@ func TestRegressionTestMatching12(t *testing.T) {
t.Errorf("Expected efficiency of .99 - approximately matches NEB ligase fidelity viewer of .97. Got: %g", efficiency)
}
}

func TestFragmentWithOverhangs(t *testing.T) {
defaultOverhangs := []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"}
gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa"

_, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs)
if err != nil {
t.Errorf(err.Error())
}
}