Python pysam 模块,faidx() 实例源码

我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用pysam.faidx()

项目:CSI    作者:YangLab    | 项目源码 | 文件源码
def check_fasta(fa_f, pysam_flag=True):
    if not os.path.isfile(fa_f + '.fai'):
        pysam.faidx(fa_f)
    if pysam_flag:  # return pysam FastaFile object
        fa = pysam.FastaFile(fa_f)
        return fa
    else:  # return fasta file path
        return fa_f
项目:iCount    作者:tomazc    | 项目源码 | 文件源码
def chrom_length(fasta_in):
    """
    Compute chromosome lengths of fasta file and store them into a file.

    More about the .fai file format can be found here:
    http://www.htslib.org/doc/faidx.html

    Parameters
    ----------
    fasta_in : str
        Path to genome FASTA file (can be .gz).

    Returns
    -------
    str
        Absolute path to output file.

    """
    iCount.log_inputs(LOGGER, level=logging.INFO)

    temp = iCount.files.decompress_to_tempfile(fasta_in)
    pysam.faidx(temp)  # pylint: disable=no-member

    fai_file = os.path.abspath(fasta_in + '.fai')
    shutil.move(temp + '.fai', fai_file)
    LOGGER.info('Fai file saved to : %s', fai_file)
    return fai_file
项目:MANTIS    作者:OSU-SRLab    | 项目源码 | 文件源码
def get_sequence(self, locus):
        if self.genome_path is None:
            tprint('MSILocusLoader error: Can not use .get_sequence() without' 
                + ' specifying the path to the reference genome!')
            exit(1)

        sequence = []
        for subseq in pysam.faidx(self.genome_path, locus)[1:]:
            sequence.append(subseq.strip())
        return ''.join(sequence).upper()
        # end .get_sequence()    

    # end MSILocusLoader class definition.
项目:lncScore    作者:WGLab    | 项目源码 | 文件源码
def index_fasta(infile):
    if os.path.isfile(infile):
        pass
    else:
        print >>sys.stderr, "Indexing " + infile + ' ...',
        pysam.faidx(infile)
        print >>sys.stderr, "Done!"
#==============================================================================
# transfer the .bed file into a .fasta file
#==============================================================================
项目:lncScore    作者:WGLab    | 项目源码 | 文件源码
def bed_to_fasta(inbed,refgenome):
    '''extract features of sequence from bed line'''

    transtab = string.maketrans("ACGTNX","TGCANX")
    mRNA_seq = ''
    if inbed.strip():
        try:
            fields = inbed.split()
            chrom = fields[0]
            tx_start = int( fields[1] )
            geneName = fields[3]
            strand = fields[5].replace(" ","_")         
            exon_starts = map(int, fields[11].rstrip( ',\n' ).split( ',' ) )
            exon_starts = map((lambda x: x + tx_start ), exon_starts)
            exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
            exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);   
        except:
            print >>sys.stderr,"Wrong format!" + inbed 
            return None
        for st,end in zip(exon_starts, exon_ends):
            exon_coord = chrom + ':' + str(st +1) + '-' + str(end)
            tmp = pysam.faidx(refgenome,exon_coord)
            mRNA_seq += ''.join([i.rstrip('\n\r') for i in tmp[1:]])
        if strand =='-':
            mRNA_seq = mRNA_seq.upper().translate(transtab)[::-1]               
        return (geneName, mRNA_seq)
#==============================================================================
# transfer multiple-line fasta format into two-line fasta format
#==============================================================================
项目:lncScore    作者:WGLab    | 项目源码 | 文件源码
def index_fasta(infile):
    if os.path.isfile(infile):
        pass
    else:
        print >>sys.stderr, "Indexing " + infile + ' ...',
        pysam.faidx(infile)
        print >>sys.stderr, "Done!"
#==============================================================================
# transfer the .bed file into a .fasta file
#==============================================================================
项目:lncScore    作者:WGLab    | 项目源码 | 文件源码
def bed_to_fasta(inbed,refgenome):
    '''extract features of sequence from bed line'''

    transtab = string.maketrans("ACGTNX","TGCANX")
    mRNA_seq = ''
    if inbed.strip():
        try:
            fields = inbed.split()
            chrom = fields[0]
            tx_start = int( fields[1] )
            geneName = fields[3]
            strand = fields[5].replace(" ","_")         
            exon_starts = map(int, fields[11].rstrip( ',\n' ).split( ',' ) )
            exon_starts = map((lambda x: x + tx_start ), exon_starts)
            exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
            exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);   
        except:
            print >>sys.stderr,"Wrong format!" + inbed 
            return None
        for st,end in zip(exon_starts, exon_ends):
            exon_coord = chrom + ':' + str(st +1) + '-' + str(end)
            tmp = pysam.faidx(refgenome,exon_coord)
            mRNA_seq += ''.join([i.rstrip('\n\r') for i in tmp[1:]])
        if strand =='-':
            mRNA_seq = mRNA_seq.upper().translate(transtab)[::-1]               
        return (geneName, mRNA_seq)
#==============================================================================
# transfer multiple-line fasta format into two-line fasta format
#==============================================================================
项目:FALCON-polish    作者:PacificBiosciences    | 项目源码 | 文件源码
def run(referenceset, fastq, gff, fasta, contigset, alignmentset, options, log_level):
    #'--log-file foo.log',
    #'--verbose',
    #'--debug', # requires 'ipdb'
    #'-j NWORKERS',
    #'--algorithm quiver',
    #'--diploid', # binary
    #'--minConfidence 40',
    #'--minCoverage 5',
    #'--alignmentSetRefWindows',
    cmd = "variantCaller --log-level {log_level} {options} --referenceFilename {referenceset} -o {fastq} -o {gff} -o {fasta} {alignmentset}"
    system(cmd.format(**locals()))
    try:
        say('Converting fasta {!r} to contigset {!r}'.format(fasta, contigset))
        # Convert to contigset.xml

        import pysam
        pysam.faidx(fasta) # pylint: disable=no-member
        # I do not know why pylint does not see this defined.

        ds = ContigSet(fasta, strict=True)
        ds.write(contigset, relPaths=True)
        say('Successfully wrapped fasta {!r} in contigset {!r}'.format(fasta, contigset))
    except Exception:
        say(traceback.format_exc())
        say('Skipping conversion to contigset.')