pafpy.tag
A module for wrapping SAM-like optional fields (tags) generally used in PAF files.
The full specifications can be found here.
The main class of interest in this module is Tag. It can be imported into your
project like so
from pafpy import Tag
Expand source code
"""A module for wrapping SAM-like optional fields (tags) generally used in PAF files.
The full specifications can be found [here][specs].
The main class of interest in this module is `pafpy.tag.Tag`. It can be imported into your
project like so
```py
from pafpy import Tag
```
[specs]: https://samtools.github.io/hts-specs/SAMtags.pdf
"""
import re
from typing import NamedTuple, Pattern, Type, Union
DELIM = ":"
class InvalidTagFormat(Exception):
"""An exception used to indicate the format of a tag string is invalid."""
pass
class UnknownTagTypeChar(Exception):
"""An exception used to indicate that the passed TYPE character from a `Tag` is not
known.
"""
pass
class TagType(NamedTuple):
"""A tuple holding information relating to a `Tag`'s type."""
char: str
"""The single character representing the tag type."""
python_type: Type
"""The python type used to encode the associated `Tag` value."""
value_regex: Pattern
"""A regular expression that the `Tag` value must conform to for this type."""
TagTypes = {
"A": TagType(
char="A", python_type=str, value_regex=re.compile(r"(?P<value>[!-~])")
),
"i": TagType(
char="i", python_type=int, value_regex=re.compile(r"(?P<value>[-+]?\d+)"),
),
"f": TagType(
char="f",
python_type=float,
value_regex=re.compile(r"(?P<value>[-+]?(\d*\.?\d+([eE][-+]?\d+)?)|inf)"),
),
"Z": TagType(
char="Z", python_type=str, value_regex=re.compile(r"(?P<value>[ !-~]*)"),
),
}
TAG_REGEX = re.compile(
rf"^(?P<tag>\w{{2}}){DELIM}(?P<type>[{''.join(TagTypes)}]){DELIM}(?P<value>.*)$"
)
class Tag(NamedTuple):
"""Class representing a single SAM-like optional field (tag).
There are two ways to construct a `Tag`:
1. The default constructor, where you specify each member variable manually.
2. Using the `Tag.from_str` factory constructor method.
> *Note: It is recommended that you construct tags using the `Tag.from_str`
constructor as it has some additional logic to ensure the type of the value is
inferred correctly.*
## Example
```py
from pafpy import Tag
# default constructor
tag = Tag(tag="NM", type="i", value=50)
assert str(tag) == "NM:i:50"
assert tag.value == 50
# from_str factory constructor
tag_from_str = Tag.from_str("NM:i:50")
assert tag_from_str == tag
```
"""
tag: str
"""The two character key identifying the tag. e.g. "NM" or "cg"."""
type: str
"""A single character indicating the type of `value`. e.g. "A" or "i"."""
value: Union[str, float, int]
"""The value of the tag."""
def __str__(self) -> str:
return DELIM.join([self.tag, self.type, str(self.value)])
@staticmethod
def from_str(string: str) -> "Tag":
"""Construct a `Tag` from a string.
## Example
```py
from pafpy import Tag
string = "NM:i:50"
tag = Tag.from_str(string)
assert tag.tag == "NM"
assert tag.type == "i"
assert tag.value == 50
```
## Errors
If `string` is not formatted according to the [specs][specs], an
`InvalidTagFormat` exception will be raised.
[specs]: https://samtools.github.io/hts-specs/SAMtags.pdf
"""
match = TAG_REGEX.search(string)
if not match:
raise InvalidTagFormat(f"{string} is not in valid TAG:TYPE:VALUE format.")
tag = match.group("tag")
# this dict access should not fail as we would not have got a regex match if
# there was an invalid tag type
tag_type = TagTypes[match.group("type")]
value_string = match.group("value")
value_match = tag_type.value_regex.match(value_string)
if not value_match:
raise InvalidTagFormat(f"VALUE of tag {string} is not the expected TYPE")
value = tag_type.python_type(value_string)
return Tag(tag, tag_type.char, value)
Classes
class InvalidTagFormat (...)-
An exception used to indicate the format of a tag string is invalid.
Expand source code
class InvalidTagFormat(Exception): """An exception used to indicate the format of a tag string is invalid.""" passAncestors
- builtins.Exception
- builtins.BaseException
class Tag (tag: str, type: str, value: Union[str, float, int])-
Class representing a single SAM-like optional field (tag).
There are two ways to construct a
Tag:- The default constructor, where you specify each member variable manually.
- Using the
Tag.from_str()factory constructor method.
Note: It is recommended that you construct tags using the
Tag.from_str()constructor as it has some additional logic to ensure the type of the value is inferred correctly.Example
from pafpy import Tag # default constructor tag = Tag(tag="NM", type="i", value=50) assert str(tag) == "NM:i:50" assert tag.value == 50 # from_str factory constructor tag_from_str = Tag.from_str("NM:i:50") assert tag_from_str == tagExpand source code
class Tag(NamedTuple): """Class representing a single SAM-like optional field (tag). There are two ways to construct a `Tag`: 1. The default constructor, where you specify each member variable manually. 2. Using the `Tag.from_str` factory constructor method. > *Note: It is recommended that you construct tags using the `Tag.from_str` constructor as it has some additional logic to ensure the type of the value is inferred correctly.* ## Example ```py from pafpy import Tag # default constructor tag = Tag(tag="NM", type="i", value=50) assert str(tag) == "NM:i:50" assert tag.value == 50 # from_str factory constructor tag_from_str = Tag.from_str("NM:i:50") assert tag_from_str == tag ``` """ tag: str """The two character key identifying the tag. e.g. "NM" or "cg".""" type: str """A single character indicating the type of `value`. e.g. "A" or "i".""" value: Union[str, float, int] """The value of the tag.""" def __str__(self) -> str: return DELIM.join([self.tag, self.type, str(self.value)]) @staticmethod def from_str(string: str) -> "Tag": """Construct a `Tag` from a string. ## Example ```py from pafpy import Tag string = "NM:i:50" tag = Tag.from_str(string) assert tag.tag == "NM" assert tag.type == "i" assert tag.value == 50 ``` ## Errors If `string` is not formatted according to the [specs][specs], an `InvalidTagFormat` exception will be raised. [specs]: https://samtools.github.io/hts-specs/SAMtags.pdf """ match = TAG_REGEX.search(string) if not match: raise InvalidTagFormat(f"{string} is not in valid TAG:TYPE:VALUE format.") tag = match.group("tag") # this dict access should not fail as we would not have got a regex match if # there was an invalid tag type tag_type = TagTypes[match.group("type")] value_string = match.group("value") value_match = tag_type.value_regex.match(value_string) if not value_match: raise InvalidTagFormat(f"VALUE of tag {string} is not the expected TYPE") value = tag_type.python_type(value_string) return Tag(tag, tag_type.char, value)Ancestors
- builtins.tuple
Static methods
def from_str(string: str) -> Tag-
Construct a
Tagfrom a string.Example
from pafpy import Tag string = "NM:i:50" tag = Tag.from_str(string) assert tag.tag == "NM" assert tag.type == "i" assert tag.value == 50Errors
If
stringis not formatted according to the specs, anInvalidTagFormatexception will be raised.Expand source code
@staticmethod def from_str(string: str) -> "Tag": """Construct a `Tag` from a string. ## Example ```py from pafpy import Tag string = "NM:i:50" tag = Tag.from_str(string) assert tag.tag == "NM" assert tag.type == "i" assert tag.value == 50 ``` ## Errors If `string` is not formatted according to the [specs][specs], an `InvalidTagFormat` exception will be raised. [specs]: https://samtools.github.io/hts-specs/SAMtags.pdf """ match = TAG_REGEX.search(string) if not match: raise InvalidTagFormat(f"{string} is not in valid TAG:TYPE:VALUE format.") tag = match.group("tag") # this dict access should not fail as we would not have got a regex match if # there was an invalid tag type tag_type = TagTypes[match.group("type")] value_string = match.group("value") value_match = tag_type.value_regex.match(value_string) if not value_match: raise InvalidTagFormat(f"VALUE of tag {string} is not the expected TYPE") value = tag_type.python_type(value_string) return Tag(tag, tag_type.char, value)
Instance variables
var tag : str-
The two character key identifying the tag. e.g. "NM" or "cg".
var type : str-
A single character indicating the type of
value. e.g. "A" or "i". var value : Union[str, float, int]-
The value of the tag.
class TagType (char: str, python_type: Type, value_regex: Pattern[~AnyStr])-
A tuple holding information relating to a
Tag's type.Expand source code
class TagType(NamedTuple): """A tuple holding information relating to a `Tag`'s type.""" char: str """The single character representing the tag type.""" python_type: Type """The python type used to encode the associated `Tag` value.""" value_regex: Pattern """A regular expression that the `Tag` value must conform to for this type."""Ancestors
- builtins.tuple
Instance variables
class UnknownTagTypeChar (...)-
An exception used to indicate that the passed TYPE character from a
Tagis not known.Expand source code
class UnknownTagTypeChar(Exception): """An exception used to indicate that the passed TYPE character from a `Tag` is not known. """ passAncestors
- builtins.Exception
- builtins.BaseException