Python pysam 模块,TabixFile() 实例源码


项目:vcfpy    作者:bihealth    | 项目源码 | 文件源码
def __init__(self, stream, path=None, tabix_path=None,
                 record_checks=None, parsed_samples=None):
        #: stream (``file``-like object) to read from = stream
        #: optional ``str`` with the path to the stream
        self.path = path
        #: optional ``str`` with path to tabix file
        self.tabix_path = tabix_path
        #: checks to perform on records, can contain 'FORMAT' and 'INFO'
        self.record_checks = tuple(record_checks or [])
        #: if set, list of samples to parse for
        self.parsed_samples = parsed_samples
        #: the ``pysam.TabixFile`` used for reading from index bgzip-ed VCF;
        #: constructed on the fly
        self.tabix_file = None
        # the iterator through the Tabix file to use
        self.tabix_iter = None
        #: the parser to use
        self.parser = parser.Parser(stream, self.path, self.record_checks)
        #: the Header
        self.header = self.parser.parse_header(parsed_samples)
项目:personal-identification-pipeline    作者:TeamErlich    | 项目源码 | 文件源码
def __init__(self, filename):
        self.tabix_file_name = filename
        # TODO: catch TABIX exceptions
        self.tabix = pysam.TabixFile(filename)
项目:pheweb    作者:statgen    | 项目源码 | 文件源码
def IndexedVariantFileReader(phenocode):
    filepath = common_filepaths['pheno_gz'](phenocode)

    with read_gzip(filepath) as f:
        reader = csv.reader(f, dialect='pheweb-internal-dialect')
        colnames = next(reader)
    assert colnames[0].startswith('#')
    colnames[0] = colnames[0][1:]
    for field in colnames:
        assert field in conf.parse.per_variant_fields or field in conf.parse.per_assoc_fields, (field)
    colidxs = {field: colnum for colnum, field in enumerate(colnames)}

    with pysam.TabixFile(filepath, parser=None) as tabix_file:
        yield _ivfr(tabix_file, colidxs)
项目:pheweb    作者:statgen    | 项目源码 | 文件源码
def context(self):
        with pysam.TabixFile(self._filepath, parser=None) as tabix_file:
            yield _mr(tabix_file, self._colidxs, self._colidxs_for_pheno, self._info_for_pheno)
项目:vcfpy    作者:bihealth    | 项目源码 | 文件源码
def fetch(self, chrom_or_region, begin=None, end=None):
        """Jump to the start position of the given chromosomal position
        and limit iteration to the end position

        :param str chrom_or_region: name of the chromosome to jump to if
            begin and end are given and a samtools region string otherwise
            (e.g. "chr1:123,456-123,900").
        :param int begin: 0-based begin position (inclusive)
        :param int end: 0-based end position (exclusive)
        if begin is not None and end is None:
            raise ValueError('begin and end must both be None or neither')
        # close tabix file if any and is open
        if self.tabix_file and not self.tabix_file.closed:
        # open tabix file if not yet open
        if not self.tabix_file or self.tabix_file.closed:
            self.tabix_file = pysam.TabixFile(
                filename=self.path, index=self.tabix_path)
        # jump to the next position
        if begin is None:
            self.tabix_iter = self.tabix_file.fetch(region=chrom_or_region)
            self.tabix_iter = self.tabix_file.fetch(
                reference=chrom_or_region, start=begin, end=end)
        return self
项目:bioframe    作者:mirnylab    | 项目源码 | 文件源码
def read_tabix(fp, chrom=None, start=None, end=None):
    with closing(pysam.TabixFile(fp)) as f:
        names = list(f.header) or None
        df = pd.read_csv(
            io.StringIO('\n'.join(f.fetch(chrom, start, end))),
            sep='\t', header=None, names=names)
    return df
项目:grocsvs    作者:grocsvs    | 项目源码 | 文件源码
def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None, 
    if start is not None:
        if start < 0:
            raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end))
    if end is not None:
        if start >= end:
            raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end))

    readclouds_path = os.path.join(

    tabix = pysam.TabixFile(readclouds_path)

    if chrom is not None and chrom not in tabix.contigs:
        print("MISSING:", chrom)
        return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split())

    if usecols is not None and "num_reads" not in usecols:

    s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
    readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols)
    readclouds["chrom"] = readclouds["chrom"].astype("string")

    if min_reads_per_frag > 0:
        readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag]

    return readclouds
项目:grocsvs    作者:grocsvs    | 项目源码 | 文件源码
def validate(self):
        assert os.path.exists(self.bam), "missing bam file '{}' for sample '{}' and dataset '{}'".format(

    # @staticmethod
    # def from_longranger_dir(self, longranger_dir):
    #     fragments = os.path.join(longranger_dir,
    #         "REPORT_SINGLE_PARTITION/fork0/files/fragments.h5")

    #     bam = os.path.join(longranger_dir,
    #         "fork0/files/phased_possorted_bam.bam")

    #     phased_fragments = os.path.join(longranger_dir,
    #         "_SNPINDEL_PHASER/PHASE_SNPINDELS/fork0/files/"
    #         "fragment_phasing.tsv.gz")

    #     self.validate()

    #     return TenXDataset(bam, fragments, phased_fragments)

    # def load_phased_fragments(self, chrom=None, start=None, end=None):

    #     columns = ["chrom", "start_pos", "end_pos", "phase_set", "ps_start", 
    #                "ps_end", "bc", "h0", "h1", "hmix", "unkn"]

    #     try:
    #         tabix = pysam.TabixFile(self.phased_fragments)
    #         s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
    #         frags = pandas.read_table(s)
    #         frags.columns = columns
    #     except (IOError, ValueError):
    #         frags = pandas.DataFrame(columns=columns)

    #     return frags

    # def load_fragments(self, chrom=None, start=None, end=None):
    #     tabix = pysam.TabixFile()

        # try:
        #     fragments = utilities.read_data_frame(self.fragments)
        #     goodbcs = utilities.get_good_barcodes(fragments)
        #     fragments = fragments.loc[fragments["bc"].isin(goodbcs)]
        #     # fragments = fragments.loc[fragments["num_reads"]>5]
        #     if chrom is not None:
        #         fragments = fragments.loc[fragments["chrom"]==chrom]

        #     return fragments
        # except:
        #     logging.exception("Unable to load fragments from fragments file "
        #         "'{}'".format(self.fragments))
        #     raise