pafpy.tag

A module for wrapping SAM-like optional fields (tags) generally used in PAF files.

The full specifications can be found here.

The main class of interest in this module is Tag. It can be imported into your project like so

from pafpy import Tag
Expand source code
"""A module for wrapping SAM-like optional fields (tags) generally used in PAF files.

The full specifications can be found [here][specs].

The main class of interest in this module is `pafpy.tag.Tag`. It can be imported into your
project like so

```py
from pafpy import Tag
```
[specs]: https://samtools.github.io/hts-specs/SAMtags.pdf
"""
import re
from typing import NamedTuple, Pattern, Type, Union

DELIM = ":"


class InvalidTagFormat(Exception):
    """An exception used to indicate the format of a tag string is invalid."""

    pass


class UnknownTagTypeChar(Exception):
    """An exception used to indicate that the passed TYPE character from a `Tag` is not
    known.
    """

    pass


class TagType(NamedTuple):
    """A tuple holding information relating to a `Tag`'s type."""

    char: str
    """The single character representing the tag type."""
    python_type: Type
    """The python type used to encode the associated `Tag` value."""
    value_regex: Pattern
    """A regular expression that the `Tag` value must conform to for this type."""


TagTypes = {
    "A": TagType(
        char="A", python_type=str, value_regex=re.compile(r"(?P<value>[!-~])")
    ),
    "i": TagType(
        char="i", python_type=int, value_regex=re.compile(r"(?P<value>[-+]?\d+)"),
    ),
    "f": TagType(
        char="f",
        python_type=float,
        value_regex=re.compile(r"(?P<value>[-+]?(\d*\.?\d+([eE][-+]?\d+)?)|inf)"),
    ),
    "Z": TagType(
        char="Z", python_type=str, value_regex=re.compile(r"(?P<value>[ !-~]*)"),
    ),
}


TAG_REGEX = re.compile(
    rf"^(?P<tag>\w{{2}}){DELIM}(?P<type>[{''.join(TagTypes)}]){DELIM}(?P<value>.*)$"
)


class Tag(NamedTuple):
    """Class representing a single SAM-like optional field (tag).

    There are two ways to construct a `Tag`:

    1. The default constructor, where you specify each member variable manually.
    2. Using the `Tag.from_str` factory constructor method.

    > *Note: It is recommended that you construct tags using the `Tag.from_str`
    constructor as it has some additional logic to ensure the type of the value is
    inferred correctly.*

    ## Example
    ```py
    from pafpy import Tag

    # default constructor
    tag = Tag(tag="NM", type="i", value=50)
    assert str(tag) == "NM:i:50"
    assert tag.value == 50

    # from_str factory constructor
    tag_from_str = Tag.from_str("NM:i:50")
    assert tag_from_str == tag
    ```
    """

    tag: str
    """The two character key identifying the tag. e.g. "NM" or "cg"."""
    type: str
    """A single character indicating the type of `value`. e.g. "A" or "i"."""
    value: Union[str, float, int]
    """The value of the tag."""

    def __str__(self) -> str:
        return DELIM.join([self.tag, self.type, str(self.value)])

    @staticmethod
    def from_str(string: str) -> "Tag":
        """Construct a `Tag` from a string.

        ## Example
        ```py
        from pafpy import Tag

        string = "NM:i:50"
        tag = Tag.from_str(string)

        assert tag.tag == "NM"
        assert tag.type == "i"
        assert tag.value == 50
        ```

        ## Errors
        If `string` is not formatted according to the [specs][specs], an
        `InvalidTagFormat` exception will be raised.

        [specs]: https://samtools.github.io/hts-specs/SAMtags.pdf
        """
        match = TAG_REGEX.search(string)
        if not match:
            raise InvalidTagFormat(f"{string} is not in valid TAG:TYPE:VALUE format.")

        tag = match.group("tag")
        # this dict access should not fail as we would not have got a regex match if
        # there was an invalid tag type
        tag_type = TagTypes[match.group("type")]

        value_string = match.group("value")
        value_match = tag_type.value_regex.match(value_string)
        if not value_match:
            raise InvalidTagFormat(f"VALUE of tag {string} is not the expected TYPE")

        value = tag_type.python_type(value_string)

        return Tag(tag, tag_type.char, value)

Classes

class InvalidTagFormat (...)

An exception used to indicate the format of a tag string is invalid.

Expand source code
class InvalidTagFormat(Exception):
    """An exception used to indicate the format of a tag string is invalid."""

    pass

Ancestors

  • builtins.Exception
  • builtins.BaseException
class Tag (tag: str, type: str, value: Union[str, float, int])

Class representing a single SAM-like optional field (tag).

There are two ways to construct a Tag:

  1. The default constructor, where you specify each member variable manually.
  2. Using the Tag.from_str() factory constructor method.

Note: It is recommended that you construct tags using the Tag.from_str() constructor as it has some additional logic to ensure the type of the value is inferred correctly.

Example

from pafpy import Tag

# default constructor
tag = Tag(tag="NM", type="i", value=50)
assert str(tag) == "NM:i:50"
assert tag.value == 50

# from_str factory constructor
tag_from_str = Tag.from_str("NM:i:50")
assert tag_from_str == tag
Expand source code
class Tag(NamedTuple):
    """Class representing a single SAM-like optional field (tag).

    There are two ways to construct a `Tag`:

    1. The default constructor, where you specify each member variable manually.
    2. Using the `Tag.from_str` factory constructor method.

    > *Note: It is recommended that you construct tags using the `Tag.from_str`
    constructor as it has some additional logic to ensure the type of the value is
    inferred correctly.*

    ## Example
    ```py
    from pafpy import Tag

    # default constructor
    tag = Tag(tag="NM", type="i", value=50)
    assert str(tag) == "NM:i:50"
    assert tag.value == 50

    # from_str factory constructor
    tag_from_str = Tag.from_str("NM:i:50")
    assert tag_from_str == tag
    ```
    """

    tag: str
    """The two character key identifying the tag. e.g. "NM" or "cg"."""
    type: str
    """A single character indicating the type of `value`. e.g. "A" or "i"."""
    value: Union[str, float, int]
    """The value of the tag."""

    def __str__(self) -> str:
        return DELIM.join([self.tag, self.type, str(self.value)])

    @staticmethod
    def from_str(string: str) -> "Tag":
        """Construct a `Tag` from a string.

        ## Example
        ```py
        from pafpy import Tag

        string = "NM:i:50"
        tag = Tag.from_str(string)

        assert tag.tag == "NM"
        assert tag.type == "i"
        assert tag.value == 50
        ```

        ## Errors
        If `string` is not formatted according to the [specs][specs], an
        `InvalidTagFormat` exception will be raised.

        [specs]: https://samtools.github.io/hts-specs/SAMtags.pdf
        """
        match = TAG_REGEX.search(string)
        if not match:
            raise InvalidTagFormat(f"{string} is not in valid TAG:TYPE:VALUE format.")

        tag = match.group("tag")
        # this dict access should not fail as we would not have got a regex match if
        # there was an invalid tag type
        tag_type = TagTypes[match.group("type")]

        value_string = match.group("value")
        value_match = tag_type.value_regex.match(value_string)
        if not value_match:
            raise InvalidTagFormat(f"VALUE of tag {string} is not the expected TYPE")

        value = tag_type.python_type(value_string)

        return Tag(tag, tag_type.char, value)

Ancestors

  • builtins.tuple

Static methods

def from_str(string: str) -> Tag

Construct a Tag from a string.

Example

from pafpy import Tag

string = "NM:i:50"
tag = Tag.from_str(string)

assert tag.tag == "NM"
assert tag.type == "i"
assert tag.value == 50

Errors

If string is not formatted according to the specs, an InvalidTagFormat exception will be raised.

Expand source code
@staticmethod
def from_str(string: str) -> "Tag":
    """Construct a `Tag` from a string.

    ## Example
    ```py
    from pafpy import Tag

    string = "NM:i:50"
    tag = Tag.from_str(string)

    assert tag.tag == "NM"
    assert tag.type == "i"
    assert tag.value == 50
    ```

    ## Errors
    If `string` is not formatted according to the [specs][specs], an
    `InvalidTagFormat` exception will be raised.

    [specs]: https://samtools.github.io/hts-specs/SAMtags.pdf
    """
    match = TAG_REGEX.search(string)
    if not match:
        raise InvalidTagFormat(f"{string} is not in valid TAG:TYPE:VALUE format.")

    tag = match.group("tag")
    # this dict access should not fail as we would not have got a regex match if
    # there was an invalid tag type
    tag_type = TagTypes[match.group("type")]

    value_string = match.group("value")
    value_match = tag_type.value_regex.match(value_string)
    if not value_match:
        raise InvalidTagFormat(f"VALUE of tag {string} is not the expected TYPE")

    value = tag_type.python_type(value_string)

    return Tag(tag, tag_type.char, value)

Instance variables

var tag : str

The two character key identifying the tag. e.g. "NM" or "cg".

var type : str

A single character indicating the type of value. e.g. "A" or "i".

var value : Union[str, float, int]

The value of the tag.

class TagType (char: str, python_type: Type, value_regex: Pattern[~AnyStr])

A tuple holding information relating to a Tag's type.

Expand source code
class TagType(NamedTuple):
    """A tuple holding information relating to a `Tag`'s type."""

    char: str
    """The single character representing the tag type."""
    python_type: Type
    """The python type used to encode the associated `Tag` value."""
    value_regex: Pattern
    """A regular expression that the `Tag` value must conform to for this type."""

Ancestors

  • builtins.tuple

Instance variables

var char : str

The single character representing the tag type.

var python_type : Type

The python type used to encode the associated Tag value.

var value_regex : Pattern[~AnyStr]

A regular expression that the Tag value must conform to for this type.

class UnknownTagTypeChar (...)

An exception used to indicate that the passed TYPE character from a Tag is not known.

Expand source code
class UnknownTagTypeChar(Exception):
    """An exception used to indicate that the passed TYPE character from a `Tag` is not
    known.
    """

    pass

Ancestors

  • builtins.Exception
  • builtins.BaseException