pafpy.pafrecord

This module contains objects for working with single alignment records within a PAF file.

The main class of interest here is PafRecord. It provides a set of member variables relating to each field within the record, as well as some convenience methods for common tasks.

To use PafRecord within your code, import it like so

from pafpy import PafRecord
Expand source code
"""This module contains objects for working with single alignment records within a PAF
file.

The main class of interest here is `pafpy.pafrecord.PafRecord`. It provides a set of member
variables relating to each field within the record, as well as some convenience methods
for common tasks.

To use `PafRecord` within your code, import it like so

```py
from pafpy import PafRecord
```
"""
from enum import Enum
from typing import Dict, NamedTuple, Optional

from pafpy.strand import Strand
from pafpy.tag import Tag

Tags = Dict[str, Tag]

DELIM = "\t"
MIN_FIELDS = 12


class MalformattedRecord(Exception):
    """An exception indicating that a `PafRecord` is not in the expected format."""

    pass


class AlignmentType(Enum):
    """An enum for storing mappings between the value in the `tp` tag and the type of
    alignment this value indicates.
    """

    Primary = "P"
    Secondary = "S"
    Inversion = "I"
    Unknown = "*"


class PafRecord(NamedTuple):
    """A single entry (row) in a [PAF][paf] file.

    There are two ways to construct a `PafRecord`:
    
    1. The default constructor: where you specify each member variable manually.
    2. The `PafRecord.from_str` factory constructor: where you create a
       `PafRecord` directly from a `str`.

    ## Example
    ```py
    from pafpy import PafRecord, Strand, Tag

    # default constructor
    record1 = PafRecord(
            qname="query_name",
            qlen=1239,
            qstart=65,
            qend=1239,
            strand=Strand.Forward,
            tname="target_name",
            tlen=4378340,
            tstart=2555250,
            tend=2556472,
            mlen=1139,
            blen=1228,
            mapq=60,
            tags={"NM": Tag.from_str("NM:i:8")},
    )

    # from_str factory constructor
    line = "query_name\t1239\t65\t1239\t+\ttarget_name\t4378340\t2555250\t2556472\t1139\t1228\t60\tNM:i:8"
    record2 = PafRecord.from_str(line)

    assert record1 == record2
    ```

    [paf]: https://github.com/lh3/miniasm/blob/master/PAF.md
    """

    qname: str = "*"
    """Query sequence name."""
    qlen: int = 0
    """Query sequence length."""
    qstart: int = 0
    """Query start (0-based; BED-like; closed)."""
    qend: int = 0
    """Query end (0-based; BED-like; open)."""
    strand: Strand = Strand.Unmapped
    """`pafpy.strand.Strand.Forward` if query/target on the same strand; 
    `pafpy.strand.Strand.Reverse` if opposite; 
    `pafpy.strand.Strand.Unmapped` if unmapped."""
    tname: str = "*"
    """Target sequence name."""
    tlen: int = 0
    """Target sequence length."""
    tstart: int = 0
    """Target start on original strand (0-based)."""
    tend: int = 0
    """Target end on original strand (0-based)."""
    mlen: int = 0
    """Number of matching bases in the mapping."""
    blen: int = 0
    """Alignment block length. Number of bases, including gaps, in the mapping."""
    mapq: int = 255
    """Mapping quality (0-255; 255 for missing)."""
    tags: Optional[Tags] = None
    """[SAM-like optional fields (tags)](https://samtools.github.io/hts-specs/SAMtags.pdf). 
    It is recommended to use `PafRecord.get_tag` to retrieve individual tags."""

    def __str__(self) -> str:
        tag_str = "" if self.tags is None else DELIM.join(map(str, self.tags.values()))
        fields = [
            self.qname,
            self.qlen,
            self.qstart,
            self.qend,
            self.strand,
            self.tname,
            self.tlen,
            self.tstart,
            self.tend,
            self.mlen,
            self.blen,
            self.mapq,
            tag_str,
        ]
        return DELIM.join(map(str, fields)).rstrip()

    @staticmethod
    def from_str(line: str) -> "PafRecord":
        """Construct a `PafRecord` from a string.

        > *Note: If there are duplicate SAM-like tags, only the last one will be
        retained.*

        ## Example
        ```py
        from pafpy import PafRecord

        line = "query_name\t123\t65\t123\t+\ttname\t43783\t25552\t25564\t1139\t1228\t60"
        record = PafRecord.from_str(line)

        assert record.qname == "query_name"
        assert record.mapq == 60
        ```

        ## Errors
        - If there are less than the expected number of fields (12), this function will
        raise a `MalformattedRecord` exception.
        - If there is an invalid tag, an `pafpy.tag.InvalidTagFormat` exception will
        be raised.
        """
        fields = line.rstrip().split(DELIM)
        if len(fields) < MIN_FIELDS:
            raise MalformattedRecord(
                f"Expected {MIN_FIELDS} fields, but got {len(fields)}\n{line}"
            )
        tags: Tags = dict()
        for tag_str in fields[12:]:
            tag = Tag.from_str(tag_str)
            tags[tag.tag] = tag

        return PafRecord(
            qname=fields[0],
            qlen=int(fields[1]),
            qstart=int(fields[2]),
            qend=int(fields[3]),
            strand=Strand(fields[4]),
            tname=fields[5],
            tlen=int(fields[6]),
            tstart=int(fields[7]),
            tend=int(fields[8]),
            mlen=int(fields[9]),
            blen=int(fields[10]),
            mapq=int(fields[11]),
            tags=tags or None,
        )

    @property
    def query_aligned_length(self) -> int:
        """Length of the aligned query sequence.

        This is equal to the absolute value of `PafRecord.qend` - `PafRecord.qstart`.
        """
        return abs(self.qend - self.qstart)

    @property
    def query_coverage(self) -> float:
        """Proportion of the query sequence involved in the alignment.

        This is equal to `PafRecord.query_aligned_length` - `PafRecord.qlen`

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(qlen=10, qstart=5, qend=9)
        assert record.query_coverage == 0.4
        ```
        """
        try:
            return self.query_aligned_length / self.qlen
        except ZeroDivisionError:
            return 0.0

    @property
    def target_coverage(self) -> float:
        """Proportion of the target sequence involved in the alignment.

        This is equal to `PafRecord.target_aligned_length` - `PafRecord.tlen`

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(tlen=10, tstart=5, tend=9)
        assert record.target_coverage == 0.4
        ```
        """
        try:
            return self.target_aligned_length / self.tlen
        except ZeroDivisionError:
            return 0.0

    @property
    def target_aligned_length(self) -> int:
        """Length of the aligned target sequence.

        This is equal to the absolute value of `PafRecord.tend` - `PafRecord.tstart`.
        """
        return abs(self.tend - self.tstart)

    @property
    def relative_length(self) -> float:
        """Relative (aligned) length of the query sequence to the target.

        This is equal to `PafRecord.query_aligned_length` -
        `PafRecord.target_aligned_length`.

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(qlen=50, qstart=10, qend=20, tlen=100, tstart=50, tend=90)
        assert record.relative_length == 0.25
        ```
        """
        try:
            return self.query_aligned_length / self.target_aligned_length
        except ZeroDivisionError:
            return 0.0

    def blast_identity(self) -> float:
        """Calculates the [BLAST identity][blast] for the record.

        BLAST identity is defined as the number of matching bases (`PafRecord.mlen`)
        over the number of alignment columns (`PafRecord.blen`).

         > *Note: If your PAF file was produced by minimap2, it is strongly advised
        that you ensure either the `-c` or `--cs` options were used when generating the
        alignment; otherwise the BLAST identity will be very inaccurate. See the
        [minimap2 FAQ][faq] for more information.*

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(
            mlen=43,  # number of matches
            blen=50,  # number of alignment columns
        )
        assert record.blast_identity() == 0.86
        ```
        [blast]: https://lh3.github.io/2018/11/25/on-the-definition-of-sequence-identity#blast-identity
        [faq]: https://github.com/lh3/minimap2/blob/master/FAQ.md#1-alignment-different-with-option--a-or--c
        """
        try:
            return self.mlen / self.blen
        except ZeroDivisionError:
            return 0.0

    def is_unmapped(self) -> bool:
        """Is the record unmapped?

        A record is considered unmapped if the strand is `*`
        (`pafpy.strand.Strand.Unmapped`) - as per the minimap2
        [`--paf-no-hit`][io-opts] parameter behaviour.

        ## Example
        ```py
        from pafpy import PafRecord, Strand

        record = PafRecord(strand=Strand("*"))
        assert record.is_unmapped()
        ```

        [io-opts]: https://lh3.github.io/minimap2/minimap2.html#7
        """
        return self.strand is Strand.Unmapped

    def is_primary(self) -> bool:
        """Is the record a primary alignment?

        This is determined from the [`tp` tag][mm2-tags].

        > *Note: Supplementary alignments will also return `True`.*

        ## Example
        ```py
        from pafpy import PafRecord, Strand, Tag

        tag = Tag.from_str("tp:A:P")
        record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
        assert record.is_primary()
        ```

        ## Errors
        If the value in the `tp` tag is unknown, a `ValueError` exception will be
        raised.

        [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
        """
        if self.is_unmapped():
            return False

        aln_tag = self.get_tag("tp")
        if aln_tag is None:
            raise ValueError("tp tag not in record.")
        else:
            aln_type = AlignmentType(aln_tag.value[0].upper())

        return aln_type is AlignmentType.Primary

    def is_secondary(self) -> bool:
        """Is the record a secondary alignment?

        This is determined from the [`tp` tag][mm2-tags].

        > *Note: Supplementary alignments will return `False` as they are considered
        primary.*

        ## Example
        ```py
        from pafpy import PafRecord, Strand, Tag

        tag = Tag.from_str("tp:A:S")
        record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
        assert record.is_secondary()
        ```

        ## Errors
        If the value in the `tp` tag is unknown, a `ValueError` exception will be
        raised.

        [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
        """
        if self.is_unmapped():
            return False

        aln_tag = self.get_tag("tp")
        if aln_tag is None:
            raise ValueError("tp tag not in record.")
        else:
            aln_type = AlignmentType(aln_tag.value[0].upper())

        return aln_type is AlignmentType.Secondary

    def is_inversion(self) -> bool:
        """Is the alignment an inversion?

        This is determined from the [`tp` tag][mm2-tags]. For more information about
        inversions (from minimap2) refer to the [minimap2 alignment options][aln-opts].

        ## Example
        ```py
        from pafpy import PafRecord, Strand, Tag

        tag = Tag.from_str("tp:A:I")
        record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
        assert record.is_inversion()
        ```

        ## Errors
        If the value in the `tp` tag is unknown, a `ValueError` exception will be
        raised.

        [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
        [aln-opts]: https://lh3.github.io/minimap2/minimap2.html#6
        """
        if self.is_unmapped():
            return False

        aln_tag = self.get_tag("tp")
        if aln_tag is None:
            raise ValueError("tp tag not in record.")
        else:
            aln_type = AlignmentType(aln_tag.value[0].upper())

        return aln_type is AlignmentType.Inversion

    def get_tag(self, tag: str, default: Optional[Tag] = None) -> Optional[Tag]:
        """Retrieve a tag from the record if it is present; otherwise, return `default`.

        If `default` is not specified, `None` will be used as the default.

        ## Example
        ```py
        from pafpy import PafRecord, Tag

        # tag is present
        expected = Tag.from_str("de:f:0.1")
        tags = {"de": expected}
        record = PafRecord(tags=tags)
        tag = "de"

        actual = record.get_tag(tag)

        assert actual == expected

        # tag is not present, returns default
        tag = "NM"
        default = Tag.from_str("NM:i:0")

        actual = record.get_tag(tag, default=default)

        assert actual == default
        ```
        """
        return default if self.tags is None else self.tags.get(tag, default)

Classes

class AlignmentType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enum for storing mappings between the value in the tp tag and the type of alignment this value indicates.

Expand source code
class AlignmentType(Enum):
    """An enum for storing mappings between the value in the `tp` tag and the type of
    alignment this value indicates.
    """

    Primary = "P"
    Secondary = "S"
    Inversion = "I"
    Unknown = "*"

Ancestors

  • enum.Enum

Class variables

var Inversion
var Primary
var Secondary
var Unknown
class MalformattedRecord (...)

An exception indicating that a PafRecord is not in the expected format.

Expand source code
class MalformattedRecord(Exception):
    """An exception indicating that a `PafRecord` is not in the expected format."""

    pass

Ancestors

  • builtins.Exception
  • builtins.BaseException
class PafRecord (qname: str = '*', qlen: int = 0, qstart: int = 0, qend: int = 0, strand: Strand = *, tname: str = '*', tlen: int = 0, tstart: int = 0, tend: int = 0, mlen: int = 0, blen: int = 0, mapq: int = 255, tags: Union[Dict[str, pafpy.tag.Tag], NoneType] = None)

A single entry (row) in a PAF file.

There are two ways to construct a PafRecord:

  1. The default constructor: where you specify each member variable manually.
  2. The PafRecord.from_str() factory constructor: where you create a PafRecord directly from a str.

Example

from pafpy import PafRecord, Strand, Tag

# default constructor
record1 = PafRecord(
        qname="query_name",
        qlen=1239,
        qstart=65,
        qend=1239,
        strand=Strand.Forward,
        tname="target_name",
        tlen=4378340,
        tstart=2555250,
        tend=2556472,
        mlen=1139,
        blen=1228,
        mapq=60,
        tags={"NM": Tag.from_str("NM:i:8")},
)

# from_str factory constructor
line = "query_name  1239    65      1239    +       target_name     4378340 2555250 2556472 1139    1228    60      NM:i:8"
record2 = PafRecord.from_str(line)

assert record1 == record2
Expand source code
class PafRecord(NamedTuple):
    """A single entry (row) in a [PAF][paf] file.

    There are two ways to construct a `PafRecord`:
    
    1. The default constructor: where you specify each member variable manually.
    2. The `PafRecord.from_str` factory constructor: where you create a
       `PafRecord` directly from a `str`.

    ## Example
    ```py
    from pafpy import PafRecord, Strand, Tag

    # default constructor
    record1 = PafRecord(
            qname="query_name",
            qlen=1239,
            qstart=65,
            qend=1239,
            strand=Strand.Forward,
            tname="target_name",
            tlen=4378340,
            tstart=2555250,
            tend=2556472,
            mlen=1139,
            blen=1228,
            mapq=60,
            tags={"NM": Tag.from_str("NM:i:8")},
    )

    # from_str factory constructor
    line = "query_name\t1239\t65\t1239\t+\ttarget_name\t4378340\t2555250\t2556472\t1139\t1228\t60\tNM:i:8"
    record2 = PafRecord.from_str(line)

    assert record1 == record2
    ```

    [paf]: https://github.com/lh3/miniasm/blob/master/PAF.md
    """

    qname: str = "*"
    """Query sequence name."""
    qlen: int = 0
    """Query sequence length."""
    qstart: int = 0
    """Query start (0-based; BED-like; closed)."""
    qend: int = 0
    """Query end (0-based; BED-like; open)."""
    strand: Strand = Strand.Unmapped
    """`pafpy.strand.Strand.Forward` if query/target on the same strand; 
    `pafpy.strand.Strand.Reverse` if opposite; 
    `pafpy.strand.Strand.Unmapped` if unmapped."""
    tname: str = "*"
    """Target sequence name."""
    tlen: int = 0
    """Target sequence length."""
    tstart: int = 0
    """Target start on original strand (0-based)."""
    tend: int = 0
    """Target end on original strand (0-based)."""
    mlen: int = 0
    """Number of matching bases in the mapping."""
    blen: int = 0
    """Alignment block length. Number of bases, including gaps, in the mapping."""
    mapq: int = 255
    """Mapping quality (0-255; 255 for missing)."""
    tags: Optional[Tags] = None
    """[SAM-like optional fields (tags)](https://samtools.github.io/hts-specs/SAMtags.pdf). 
    It is recommended to use `PafRecord.get_tag` to retrieve individual tags."""

    def __str__(self) -> str:
        tag_str = "" if self.tags is None else DELIM.join(map(str, self.tags.values()))
        fields = [
            self.qname,
            self.qlen,
            self.qstart,
            self.qend,
            self.strand,
            self.tname,
            self.tlen,
            self.tstart,
            self.tend,
            self.mlen,
            self.blen,
            self.mapq,
            tag_str,
        ]
        return DELIM.join(map(str, fields)).rstrip()

    @staticmethod
    def from_str(line: str) -> "PafRecord":
        """Construct a `PafRecord` from a string.

        > *Note: If there are duplicate SAM-like tags, only the last one will be
        retained.*

        ## Example
        ```py
        from pafpy import PafRecord

        line = "query_name\t123\t65\t123\t+\ttname\t43783\t25552\t25564\t1139\t1228\t60"
        record = PafRecord.from_str(line)

        assert record.qname == "query_name"
        assert record.mapq == 60
        ```

        ## Errors
        - If there are less than the expected number of fields (12), this function will
        raise a `MalformattedRecord` exception.
        - If there is an invalid tag, an `pafpy.tag.InvalidTagFormat` exception will
        be raised.
        """
        fields = line.rstrip().split(DELIM)
        if len(fields) < MIN_FIELDS:
            raise MalformattedRecord(
                f"Expected {MIN_FIELDS} fields, but got {len(fields)}\n{line}"
            )
        tags: Tags = dict()
        for tag_str in fields[12:]:
            tag = Tag.from_str(tag_str)
            tags[tag.tag] = tag

        return PafRecord(
            qname=fields[0],
            qlen=int(fields[1]),
            qstart=int(fields[2]),
            qend=int(fields[3]),
            strand=Strand(fields[4]),
            tname=fields[5],
            tlen=int(fields[6]),
            tstart=int(fields[7]),
            tend=int(fields[8]),
            mlen=int(fields[9]),
            blen=int(fields[10]),
            mapq=int(fields[11]),
            tags=tags or None,
        )

    @property
    def query_aligned_length(self) -> int:
        """Length of the aligned query sequence.

        This is equal to the absolute value of `PafRecord.qend` - `PafRecord.qstart`.
        """
        return abs(self.qend - self.qstart)

    @property
    def query_coverage(self) -> float:
        """Proportion of the query sequence involved in the alignment.

        This is equal to `PafRecord.query_aligned_length` - `PafRecord.qlen`

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(qlen=10, qstart=5, qend=9)
        assert record.query_coverage == 0.4
        ```
        """
        try:
            return self.query_aligned_length / self.qlen
        except ZeroDivisionError:
            return 0.0

    @property
    def target_coverage(self) -> float:
        """Proportion of the target sequence involved in the alignment.

        This is equal to `PafRecord.target_aligned_length` - `PafRecord.tlen`

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(tlen=10, tstart=5, tend=9)
        assert record.target_coverage == 0.4
        ```
        """
        try:
            return self.target_aligned_length / self.tlen
        except ZeroDivisionError:
            return 0.0

    @property
    def target_aligned_length(self) -> int:
        """Length of the aligned target sequence.

        This is equal to the absolute value of `PafRecord.tend` - `PafRecord.tstart`.
        """
        return abs(self.tend - self.tstart)

    @property
    def relative_length(self) -> float:
        """Relative (aligned) length of the query sequence to the target.

        This is equal to `PafRecord.query_aligned_length` -
        `PafRecord.target_aligned_length`.

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(qlen=50, qstart=10, qend=20, tlen=100, tstart=50, tend=90)
        assert record.relative_length == 0.25
        ```
        """
        try:
            return self.query_aligned_length / self.target_aligned_length
        except ZeroDivisionError:
            return 0.0

    def blast_identity(self) -> float:
        """Calculates the [BLAST identity][blast] for the record.

        BLAST identity is defined as the number of matching bases (`PafRecord.mlen`)
        over the number of alignment columns (`PafRecord.blen`).

         > *Note: If your PAF file was produced by minimap2, it is strongly advised
        that you ensure either the `-c` or `--cs` options were used when generating the
        alignment; otherwise the BLAST identity will be very inaccurate. See the
        [minimap2 FAQ][faq] for more information.*

        ## Example
        ```py
        from pafpy import PafRecord

        record = PafRecord(
            mlen=43,  # number of matches
            blen=50,  # number of alignment columns
        )
        assert record.blast_identity() == 0.86
        ```
        [blast]: https://lh3.github.io/2018/11/25/on-the-definition-of-sequence-identity#blast-identity
        [faq]: https://github.com/lh3/minimap2/blob/master/FAQ.md#1-alignment-different-with-option--a-or--c
        """
        try:
            return self.mlen / self.blen
        except ZeroDivisionError:
            return 0.0

    def is_unmapped(self) -> bool:
        """Is the record unmapped?

        A record is considered unmapped if the strand is `*`
        (`pafpy.strand.Strand.Unmapped`) - as per the minimap2
        [`--paf-no-hit`][io-opts] parameter behaviour.

        ## Example
        ```py
        from pafpy import PafRecord, Strand

        record = PafRecord(strand=Strand("*"))
        assert record.is_unmapped()
        ```

        [io-opts]: https://lh3.github.io/minimap2/minimap2.html#7
        """
        return self.strand is Strand.Unmapped

    def is_primary(self) -> bool:
        """Is the record a primary alignment?

        This is determined from the [`tp` tag][mm2-tags].

        > *Note: Supplementary alignments will also return `True`.*

        ## Example
        ```py
        from pafpy import PafRecord, Strand, Tag

        tag = Tag.from_str("tp:A:P")
        record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
        assert record.is_primary()
        ```

        ## Errors
        If the value in the `tp` tag is unknown, a `ValueError` exception will be
        raised.

        [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
        """
        if self.is_unmapped():
            return False

        aln_tag = self.get_tag("tp")
        if aln_tag is None:
            raise ValueError("tp tag not in record.")
        else:
            aln_type = AlignmentType(aln_tag.value[0].upper())

        return aln_type is AlignmentType.Primary

    def is_secondary(self) -> bool:
        """Is the record a secondary alignment?

        This is determined from the [`tp` tag][mm2-tags].

        > *Note: Supplementary alignments will return `False` as they are considered
        primary.*

        ## Example
        ```py
        from pafpy import PafRecord, Strand, Tag

        tag = Tag.from_str("tp:A:S")
        record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
        assert record.is_secondary()
        ```

        ## Errors
        If the value in the `tp` tag is unknown, a `ValueError` exception will be
        raised.

        [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
        """
        if self.is_unmapped():
            return False

        aln_tag = self.get_tag("tp")
        if aln_tag is None:
            raise ValueError("tp tag not in record.")
        else:
            aln_type = AlignmentType(aln_tag.value[0].upper())

        return aln_type is AlignmentType.Secondary

    def is_inversion(self) -> bool:
        """Is the alignment an inversion?

        This is determined from the [`tp` tag][mm2-tags]. For more information about
        inversions (from minimap2) refer to the [minimap2 alignment options][aln-opts].

        ## Example
        ```py
        from pafpy import PafRecord, Strand, Tag

        tag = Tag.from_str("tp:A:I")
        record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
        assert record.is_inversion()
        ```

        ## Errors
        If the value in the `tp` tag is unknown, a `ValueError` exception will be
        raised.

        [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
        [aln-opts]: https://lh3.github.io/minimap2/minimap2.html#6
        """
        if self.is_unmapped():
            return False

        aln_tag = self.get_tag("tp")
        if aln_tag is None:
            raise ValueError("tp tag not in record.")
        else:
            aln_type = AlignmentType(aln_tag.value[0].upper())

        return aln_type is AlignmentType.Inversion

    def get_tag(self, tag: str, default: Optional[Tag] = None) -> Optional[Tag]:
        """Retrieve a tag from the record if it is present; otherwise, return `default`.

        If `default` is not specified, `None` will be used as the default.

        ## Example
        ```py
        from pafpy import PafRecord, Tag

        # tag is present
        expected = Tag.from_str("de:f:0.1")
        tags = {"de": expected}
        record = PafRecord(tags=tags)
        tag = "de"

        actual = record.get_tag(tag)

        assert actual == expected

        # tag is not present, returns default
        tag = "NM"
        default = Tag.from_str("NM:i:0")

        actual = record.get_tag(tag, default=default)

        assert actual == default
        ```
        """
        return default if self.tags is None else self.tags.get(tag, default)

Ancestors

  • builtins.tuple

Static methods

def from_str(line: str) -> PafRecord

Construct a PafRecord from a string.

Note: If there are duplicate SAM-like tags, only the last one will be retained.

Example

from pafpy import PafRecord

line = "query_name      123     65      123     +       tname   43783   25552   25564   1139    1228    60"
record = PafRecord.from_str(line)

assert record.qname == "query_name"
assert record.mapq == 60

Errors

  • If there are less than the expected number of fields (12), this function will raise a MalformattedRecord exception.
  • If there is an invalid tag, an InvalidTagFormat exception will be raised.
Expand source code
@staticmethod
def from_str(line: str) -> "PafRecord":
    """Construct a `PafRecord` from a string.

    > *Note: If there are duplicate SAM-like tags, only the last one will be
    retained.*

    ## Example
    ```py
    from pafpy import PafRecord

    line = "query_name\t123\t65\t123\t+\ttname\t43783\t25552\t25564\t1139\t1228\t60"
    record = PafRecord.from_str(line)

    assert record.qname == "query_name"
    assert record.mapq == 60
    ```

    ## Errors
    - If there are less than the expected number of fields (12), this function will
    raise a `MalformattedRecord` exception.
    - If there is an invalid tag, an `pafpy.tag.InvalidTagFormat` exception will
    be raised.
    """
    fields = line.rstrip().split(DELIM)
    if len(fields) < MIN_FIELDS:
        raise MalformattedRecord(
            f"Expected {MIN_FIELDS} fields, but got {len(fields)}\n{line}"
        )
    tags: Tags = dict()
    for tag_str in fields[12:]:
        tag = Tag.from_str(tag_str)
        tags[tag.tag] = tag

    return PafRecord(
        qname=fields[0],
        qlen=int(fields[1]),
        qstart=int(fields[2]),
        qend=int(fields[3]),
        strand=Strand(fields[4]),
        tname=fields[5],
        tlen=int(fields[6]),
        tstart=int(fields[7]),
        tend=int(fields[8]),
        mlen=int(fields[9]),
        blen=int(fields[10]),
        mapq=int(fields[11]),
        tags=tags or None,
    )

Instance variables

var blen : int

Alignment block length. Number of bases, including gaps, in the mapping.

var mapq : int

Mapping quality (0-255; 255 for missing).

var mlen : int

Number of matching bases in the mapping.

var qend : int

Query end (0-based; BED-like; open).

var qlen : int

Query sequence length.

var qname : str

Query sequence name.

var qstart : int

Query start (0-based; BED-like; closed).

var query_aligned_length : int

Length of the aligned query sequence.

This is equal to the absolute value of PafRecord.qend - PafRecord.qstart.

Expand source code
@property
def query_aligned_length(self) -> int:
    """Length of the aligned query sequence.

    This is equal to the absolute value of `PafRecord.qend` - `PafRecord.qstart`.
    """
    return abs(self.qend - self.qstart)
var query_coverage : float

Proportion of the query sequence involved in the alignment.

This is equal to PafRecord.query_aligned_length - PafRecord.qlen

Example

from pafpy import PafRecord

record = PafRecord(qlen=10, qstart=5, qend=9)
assert record.query_coverage == 0.4
Expand source code
@property
def query_coverage(self) -> float:
    """Proportion of the query sequence involved in the alignment.

    This is equal to `PafRecord.query_aligned_length` - `PafRecord.qlen`

    ## Example
    ```py
    from pafpy import PafRecord

    record = PafRecord(qlen=10, qstart=5, qend=9)
    assert record.query_coverage == 0.4
    ```
    """
    try:
        return self.query_aligned_length / self.qlen
    except ZeroDivisionError:
        return 0.0
var relative_length : float

Relative (aligned) length of the query sequence to the target.

This is equal to PafRecord.query_aligned_length - PafRecord.target_aligned_length.

Example

from pafpy import PafRecord

record = PafRecord(qlen=50, qstart=10, qend=20, tlen=100, tstart=50, tend=90)
assert record.relative_length == 0.25
Expand source code
@property
def relative_length(self) -> float:
    """Relative (aligned) length of the query sequence to the target.

    This is equal to `PafRecord.query_aligned_length` -
    `PafRecord.target_aligned_length`.

    ## Example
    ```py
    from pafpy import PafRecord

    record = PafRecord(qlen=50, qstart=10, qend=20, tlen=100, tstart=50, tend=90)
    assert record.relative_length == 0.25
    ```
    """
    try:
        return self.query_aligned_length / self.target_aligned_length
    except ZeroDivisionError:
        return 0.0
var strandStrand

Strand.Forward if query/target on the same strand; Strand.Reverse if opposite; Strand.Unmapped if unmapped.

var tags : Union[Dict[str, Tag], NoneType]

SAM-like optional fields (tags). It is recommended to use PafRecord.get_tag() to retrieve individual tags.

var target_aligned_length : int

Length of the aligned target sequence.

This is equal to the absolute value of PafRecord.tend - PafRecord.tstart.

Expand source code
@property
def target_aligned_length(self) -> int:
    """Length of the aligned target sequence.

    This is equal to the absolute value of `PafRecord.tend` - `PafRecord.tstart`.
    """
    return abs(self.tend - self.tstart)
var target_coverage : float

Proportion of the target sequence involved in the alignment.

This is equal to PafRecord.target_aligned_length - PafRecord.tlen

Example

from pafpy import PafRecord

record = PafRecord(tlen=10, tstart=5, tend=9)
assert record.target_coverage == 0.4
Expand source code
@property
def target_coverage(self) -> float:
    """Proportion of the target sequence involved in the alignment.

    This is equal to `PafRecord.target_aligned_length` - `PafRecord.tlen`

    ## Example
    ```py
    from pafpy import PafRecord

    record = PafRecord(tlen=10, tstart=5, tend=9)
    assert record.target_coverage == 0.4
    ```
    """
    try:
        return self.target_aligned_length / self.tlen
    except ZeroDivisionError:
        return 0.0
var tend : int

Target end on original strand (0-based).

var tlen : int

Target sequence length.

var tname : str

Target sequence name.

var tstart : int

Target start on original strand (0-based).

Methods

def blast_identity(self) -> float

Calculates the BLAST identity for the record.

BLAST identity is defined as the number of matching bases (PafRecord.mlen) over the number of alignment columns (PafRecord.blen).

Note: If your PAF file was produced by minimap2, it is strongly advised that you ensure either the -c or --cs options were used when generating the alignment; otherwise the BLAST identity will be very inaccurate. See the minimap2 FAQ for more information.

Example

from pafpy import PafRecord

record = PafRecord(
    mlen=43,  # number of matches
    blen=50,  # number of alignment columns
)
assert record.blast_identity() == 0.86
Expand source code
def blast_identity(self) -> float:
    """Calculates the [BLAST identity][blast] for the record.

    BLAST identity is defined as the number of matching bases (`PafRecord.mlen`)
    over the number of alignment columns (`PafRecord.blen`).

     > *Note: If your PAF file was produced by minimap2, it is strongly advised
    that you ensure either the `-c` or `--cs` options were used when generating the
    alignment; otherwise the BLAST identity will be very inaccurate. See the
    [minimap2 FAQ][faq] for more information.*

    ## Example
    ```py
    from pafpy import PafRecord

    record = PafRecord(
        mlen=43,  # number of matches
        blen=50,  # number of alignment columns
    )
    assert record.blast_identity() == 0.86
    ```
    [blast]: https://lh3.github.io/2018/11/25/on-the-definition-of-sequence-identity#blast-identity
    [faq]: https://github.com/lh3/minimap2/blob/master/FAQ.md#1-alignment-different-with-option--a-or--c
    """
    try:
        return self.mlen / self.blen
    except ZeroDivisionError:
        return 0.0
def get_tag(self, tag: str, default: Union[pafpy.tag.Tag, NoneType] = None) -> Union[Tag, NoneType]

Retrieve a tag from the record if it is present; otherwise, return default.

If default is not specified, None will be used as the default.

Example

from pafpy import PafRecord, Tag

# tag is present
expected = Tag.from_str("de:f:0.1")
tags = {"de": expected}
record = PafRecord(tags=tags)
tag = "de"

actual = record.get_tag(tag)

assert actual == expected

# tag is not present, returns default
tag = "NM"
default = Tag.from_str("NM:i:0")

actual = record.get_tag(tag, default=default)

assert actual == default
Expand source code
def get_tag(self, tag: str, default: Optional[Tag] = None) -> Optional[Tag]:
    """Retrieve a tag from the record if it is present; otherwise, return `default`.

    If `default` is not specified, `None` will be used as the default.

    ## Example
    ```py
    from pafpy import PafRecord, Tag

    # tag is present
    expected = Tag.from_str("de:f:0.1")
    tags = {"de": expected}
    record = PafRecord(tags=tags)
    tag = "de"

    actual = record.get_tag(tag)

    assert actual == expected

    # tag is not present, returns default
    tag = "NM"
    default = Tag.from_str("NM:i:0")

    actual = record.get_tag(tag, default=default)

    assert actual == default
    ```
    """
    return default if self.tags is None else self.tags.get(tag, default)
def is_inversion(self) -> bool

Is the alignment an inversion?

This is determined from the tp tag. For more information about inversions (from minimap2) refer to the minimap2 alignment options.

Example

from pafpy import PafRecord, Strand, Tag

tag = Tag.from_str("tp:A:I")
record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
assert record.is_inversion()

Errors

If the value in the tp tag is unknown, a ValueError exception will be raised.

Expand source code
def is_inversion(self) -> bool:
    """Is the alignment an inversion?

    This is determined from the [`tp` tag][mm2-tags]. For more information about
    inversions (from minimap2) refer to the [minimap2 alignment options][aln-opts].

    ## Example
    ```py
    from pafpy import PafRecord, Strand, Tag

    tag = Tag.from_str("tp:A:I")
    record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
    assert record.is_inversion()
    ```

    ## Errors
    If the value in the `tp` tag is unknown, a `ValueError` exception will be
    raised.

    [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
    [aln-opts]: https://lh3.github.io/minimap2/minimap2.html#6
    """
    if self.is_unmapped():
        return False

    aln_tag = self.get_tag("tp")
    if aln_tag is None:
        raise ValueError("tp tag not in record.")
    else:
        aln_type = AlignmentType(aln_tag.value[0].upper())

    return aln_type is AlignmentType.Inversion
def is_primary(self) -> bool

Is the record a primary alignment?

This is determined from the tp tag.

Note: Supplementary alignments will also return True.

Example

from pafpy import PafRecord, Strand, Tag

tag = Tag.from_str("tp:A:P")
record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
assert record.is_primary()

Errors

If the value in the tp tag is unknown, a ValueError exception will be raised.

Expand source code
def is_primary(self) -> bool:
    """Is the record a primary alignment?

    This is determined from the [`tp` tag][mm2-tags].

    > *Note: Supplementary alignments will also return `True`.*

    ## Example
    ```py
    from pafpy import PafRecord, Strand, Tag

    tag = Tag.from_str("tp:A:P")
    record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
    assert record.is_primary()
    ```

    ## Errors
    If the value in the `tp` tag is unknown, a `ValueError` exception will be
    raised.

    [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
    """
    if self.is_unmapped():
        return False

    aln_tag = self.get_tag("tp")
    if aln_tag is None:
        raise ValueError("tp tag not in record.")
    else:
        aln_type = AlignmentType(aln_tag.value[0].upper())

    return aln_type is AlignmentType.Primary
def is_secondary(self) -> bool

Is the record a secondary alignment?

This is determined from the tp tag.

Note: Supplementary alignments will return False as they are considered primary.

Example

from pafpy import PafRecord, Strand, Tag

tag = Tag.from_str("tp:A:S")
record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
assert record.is_secondary()

Errors

If the value in the tp tag is unknown, a ValueError exception will be raised.

Expand source code
def is_secondary(self) -> bool:
    """Is the record a secondary alignment?

    This is determined from the [`tp` tag][mm2-tags].

    > *Note: Supplementary alignments will return `False` as they are considered
    primary.*

    ## Example
    ```py
    from pafpy import PafRecord, Strand, Tag

    tag = Tag.from_str("tp:A:S")
    record = PafRecord(strand=Strand.Forward, tags={tag.tag: tag})
    assert record.is_secondary()
    ```

    ## Errors
    If the value in the `tp` tag is unknown, a `ValueError` exception will be
    raised.

    [mm2-tags]: https://lh3.github.io/minimap2/minimap2.html#10
    """
    if self.is_unmapped():
        return False

    aln_tag = self.get_tag("tp")
    if aln_tag is None:
        raise ValueError("tp tag not in record.")
    else:
        aln_type = AlignmentType(aln_tag.value[0].upper())

    return aln_type is AlignmentType.Secondary
def is_unmapped(self) -> bool

Is the record unmapped?

A record is considered unmapped if the strand is * (Strand.Unmapped) - as per the minimap2 --paf-no-hit parameter behaviour.

Example

from pafpy import PafRecord, Strand

record = PafRecord(strand=Strand("*"))
assert record.is_unmapped()
Expand source code
def is_unmapped(self) -> bool:
    """Is the record unmapped?

    A record is considered unmapped if the strand is `*`
    (`pafpy.strand.Strand.Unmapped`) - as per the minimap2
    [`--paf-no-hit`][io-opts] parameter behaviour.

    ## Example
    ```py
    from pafpy import PafRecord, Strand

    record = PafRecord(strand=Strand("*"))
    assert record.is_unmapped()
    ```

    [io-opts]: https://lh3.github.io/minimap2/minimap2.html#7
    """
    return self.strand is Strand.Unmapped