Python tqdm 模块,tqdm_notebook() 实例源码

我们从Python开源项目中,提取了以下36个代码示例,用于说明如何使用tqdm.tqdm_notebook()

项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def pdb_downloader_and_metadata(self, outdir=None, pdb_file_type=None, force_rerun=False):
        """Download ALL mapped experimental structures to each protein's structures directory.

        Args:
            outdir (str): Path to output directory, if GEM-PRO directories were not set or other output directory is
                desired
            pdb_file_type (str): Type of PDB file to download, if not already set or other format is desired
            force_rerun (bool): If files should be re-downloaded if they already exist

        """

        if not pdb_file_type:
            pdb_file_type = self.pdb_file_type

        counter = 0
        for g in tqdm(self.genes):
            pdbs = g.protein.pdb_downloader_and_metadata(outdir=outdir, pdb_file_type=pdb_file_type, force_rerun=force_rerun)

            if pdbs:
                counter += len(pdbs)

        log.info('Updated PDB metadata dataframe. See the "df_pdb_metadata" attribute for a summary dataframe.')
        log.info('Saved {} structures total'.format(counter))
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def tqdm_callback(N, notebook=True):
    """
    Return a :module:`tqdm` progress bar expecting *N* iterations,
    either suitable with jupyter if *notebook* is true and for the
    terminal otherwise. The progress bar includes an additional method
    :function:`callback` (function of one ignored parameter) meant to
    be past as a callback function called to update the progress bar.
    """
    if notebook:
        progress_bar = tqdm.tqdm_notebook(total=N)
    else:
        progress_bar = tqdm.tqdm(total=N)
    def callback(self, i):
        self.update()
    progress_bar.callback = partial(callback, progress_bar)
    return progress_bar
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def __download_competition_file(self, competition, file_name, browser):

        url = 'https://www.kaggle.com/c/%s/download/%s' % (competition, file_name)
        res = browser.get(url, stream=True)

        total_size = int(res.headers.get('content-length', 0)); 

        if res.status_code != 200:
            print('error downloading %s' % file_name)
            return False

        file_name = os.path.basename(url)

        pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc=file_name)
        chunk_size = 32 * 1024

        with open(file_name, 'wb') as file_handle:
            for data in res.iter_content(chunk_size):
                file_handle.write(data) 
                pbar.update(chunk_size)

        return True
项目:Auspex    作者:BBN-Q    | 项目源码 | 文件源码
def reset(self, stream=None):
        """ Reset the progress bar(s) """
        logger.debug("Update stream descriptor for progress bars.")
        if stream is not None:
            self.stream = stream
        if self.stream is None:
            logger.warning("No stream is associated with the progress bars!")
            self.axes = []
        else:
            self.axes = self.stream.descriptor.axes
        self.num = min(self.num, len(self.axes))
        self.totals = [self.stream.descriptor.num_points_through_axis(axis) for axis in range(self.num)]
        self.chunk_sizes = [max(1,self.stream.descriptor.num_points_through_axis(axis+1)) for axis in range(self.num)]
        logger.debug("Reset the progress bars to initial states.")
        self.bars   = []
        for i in range(self.num):
            if self.notebook:
                self.bars.append(tqdm_notebook(total=self.totals[i]/self.chunk_sizes[i]))
            else:
                self.bars.append(tqdm(total=self.totals[i]/self.chunk_sizes[i]))
项目:Auspex    作者:BBN-Q    | 项目源码 | 文件源码
def update(self):
        """ Update the status of the progress bar(s) """
        if self.stream is None:
            logger.warning("No stream is associated with the progress bars!")
            num_data = 0
        else:
            num_data = self.stream.points_taken
        logger.debug("Update the progress bars.")
        for i in range(self.num):
            if num_data == 0:
                # Reset the progress bar with a new one
                if self.notebook:
                    self.bars[i].sp(close=True)
                    self.bars[i] = tqdm_notebook(total=self.totals[i]/self.chunk_sizes[i])
                else:
                    self.bars[i].close()
                    self.bars[i] = tqdm(total=self.totals[i]/self.chunk_sizes[i])
            pos = int(10*num_data / self.chunk_sizes[i])/10.0 # One decimal is good enough
            if pos > self.bars[i].n:
                self.bars[i].update(pos - self.bars[i].n)
            num_data = num_data % self.chunk_sizes[i]
项目:Kaggle_Buddy    作者:NickYi1990    | 项目源码 | 文件源码
def ka_bagging_2class_or_reg_lgbm(X_train, y_train, seed, bag_round, params
                                 , X_test, using_notebook=True, num_boost_round=0):
    '''
        early version
    '''
    # create array object to hold predictions
    baggedpred=np.zeros(shape=X_test.shape[0]).astype(np.float32)
    #loop for as many times as we want bags
    if using_notebook:
        for n in tqdm_notebook(range(0, bag_round)):
            #shuffle first, aids in increasing variance and forces different results
            X_train, y_train=shuffle(X_train, y_train, random_state=seed+n)
            params['seed'] = seed + n
            model = lightgbm.train(params, lightgbm.Dataset(X_train, y_train), num_boost_round=num_boost_round)
            pred = model.predict(X_test)
            baggedpred += pred/bag_round

    return baggedpred
项目:kaggle-quora-question-pairs    作者:stys    | 项目源码 | 文件源码
def compute_sgd(data):
    logging.info('Computing SGD')

    n_splits = 10
    folder = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for ix_first, ix_second in tqdm_notebook(folder.split(np.zeros(data['y_train'].shape[0]), data['y_train']),
                                             total=n_splits):
        # {'en__l1_ratio': 0.0001, 'en__alpha': 1e-05}
        model = SGDClassifier(
            loss='log',
            penalty='elasticnet',
            fit_intercept=True,
            n_iter=100,
            shuffle=True,
            n_jobs=-1,
            l1_ratio=0.0001,
            alpha=1e-05,
            class_weight=None)
        model = model.fit(data['X_train'][ix_first, :], data['y_train'][ix_first])
        data['y_train_pred'][ix_second] = logit(model.predict_proba(data['X_train'][ix_second, :])[:, 1])
        data['y_test_pred'].append(logit(model.predict_proba(data['X_test'])[:, 1]))

    data['y_test_pred'] = np.array(data['y_test_pred']).T.mean(axis=1)

    return data
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def download_patric_genomes(self, ids, force_rerun=False):
        """Download genome files from PATRIC given a list of PATRIC genome IDs and load them as strains.

        Args:
            ids (str, list): PATRIC ID or list of PATRIC IDs
            force_rerun (bool): If genome files should be downloaded again even if they exist

        """
        ids = ssbio.utils.force_list(ids)

        counter = 0
        log.info('Downloading sequences from PATRIC...')
        for patric_id in tqdm(ids):
            f = ssbio.databases.patric.download_coding_sequences(patric_id=patric_id, seqtype='protein',
                                                                 outdir=self.sequences_by_organism_dir,
                                                                 force_rerun=force_rerun)
            if f:
                self.load_strain(patric_id, f)
                counter += 1
                log.debug('{}: downloaded sequence'.format(patric_id))
            else:
                log.warning('{}: unable to download sequence'.format(patric_id))

        log.info('Created {} new strain GEM-PROs, accessible at "strains" attribute'.format(counter))
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def build_strain_specific_models(self, save_models=False):
        """Using the orthologous genes matrix, create and modify the strain specific models based on if orthologous
            genes exist.

        Also store the sequences directly in the reference GEM-PRO protein sequence attribute for the strains.
        """

        if len(self.df_orthology_matrix) == 0:
            raise RuntimeError('Empty orthology matrix')

        # Create an emptied copy of the reference GEM-PRO
        for strain_gempro in tqdm(self.strains):
            log.debug('{}: building strain specific model'.format(strain_gempro.id))

            # For each genome, load the metabolic model or genes from the reference GEM-PRO
            logging.disable(logging.WARNING)
            if self._empty_reference_gempro.model:
                strain_gempro.load_cobra_model(self._empty_reference_gempro.model)
            elif self._empty_reference_gempro.genes:
                strain_gempro.genes = [x.id for x in self._empty_reference_gempro.genes]
            logging.disable(logging.NOTSET)

            # Get a list of genes which do not have orthology in the strain
            not_in_strain = self.df_orthology_matrix[pd.isnull(self.df_orthology_matrix[strain_gempro.id])][strain_gempro.id].index.tolist()

            # Mark genes non-functional
            self._pare_down_model(strain_gempro=strain_gempro, genes_to_remove=not_in_strain)

            # Load sequences into the base and strain models
            self._load_strain_sequences(strain_gempro=strain_gempro)

            if save_models:
                cobra.io.save_json_model(model=strain_gempro.model,
                                         filename=op.join(self.model_dir, '{}.json'.format(strain_gempro.id)))
                strain_gempro.save_pickle(op.join(self.model_dir, '{}_gp.pckl'.format(strain_gempro.id)))


        log.info('Created {} new strain-specific models and loaded in sequences'.format(len(self.strains)))
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def align_orthologous_genes_pairwise(self, gapopen=10, gapextend=0.5):
        """For each gene in the base strain, run a pairwise alignment for all orthologous gene sequences to it."""
        for ref_gene in tqdm(self.reference_gempro.genes):
            if len(ref_gene.protein.sequences) > 1:
                alignment_dir = op.join(self.sequences_by_gene_dir, ref_gene.id)
                if not op.exists(alignment_dir):
                    os.mkdir(alignment_dir)
                ref_gene.protein.pairwise_align_sequences_to_representative(gapopen=gapopen, gapextend=gapextend,
                                                                            outdir=alignment_dir, parse=True)
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def set_representative_sequence(self, force_rerun=False):
        """Automatically consolidate loaded sequences (manual, UniProt, or KEGG) and set a single representative sequence.

        Manually set representative sequences override all existing mappings. UniProt mappings override KEGG mappings
        except when KEGG mappings have PDBs associated with them and UniProt doesn't.

        Args:
            force_rerun (bool): Set to True to recheck stored sequences

        """

        # TODO: rethink use of multiple database sources - may lead to inconsistency with genome sources

        sequence_missing = []
        successfully_mapped_counter = 0
        for g in tqdm(self.genes):
            repseq = g.protein.set_representative_sequence(force_rerun=force_rerun)

            if not repseq:
                sequence_missing.append(g.id)
            elif not repseq.sequence_file:
                sequence_missing.append(g.id)
            else:
                successfully_mapped_counter += 1

        log.info('{}/{}: number of genes with a representative sequence'.format(len(self.genes_with_a_representative_sequence),
                                                                                len(self.genes)))
        log.info('See the "df_representative_sequences" attribute for a summary dataframe.')
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def get_sequence_properties(self, representatives_only=True):
        """Run Biopython ProteinAnalysis and EMBOSS pepstats to summarize basic statistics of all protein sequences.
        Results are stored in the protein's respective SeqProp objects at ``.annotations``

        Args:
            representative_only (bool): If analysis should only be run on the representative sequences

        """
        for g in tqdm(self.genes):
            g.protein.get_sequence_properties(representative_only=representatives_only)
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def blast_seqs_to_pdb(self, seq_ident_cutoff=0, evalue=0.0001, all_genes=False, display_link=False,
                          outdir=None, force_rerun=False):
        """BLAST each representative protein sequence to the PDB. Saves raw BLAST results (XML files).

        Args:
            seq_ident_cutoff (float, optional): Cutoff results based on percent coverage (in decimal form)
            evalue (float, optional): Cutoff for the E-value - filters for significant hits. 0.001 is liberal,
                0.0001 is stringent (default).
            all_genes (bool): If all genes should be BLASTed, or only those without any structures currently mapped
            display_link (bool, optional): Set to True if links to the HTML results should be displayed
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            force_rerun (bool, optional): If existing BLAST results should not be used, set to True. Default is False

        """
        counter = 0

        for g in tqdm(self.genes_with_a_representative_sequence):
            # If all_genes=False, BLAST only genes without a uniprot -> pdb mapping
            if g.protein.num_structures_experimental > 0 and not all_genes and not force_rerun:
                log.debug('{}: skipping BLAST, {} experimental structures already mapped '
                          'and all_genes flag is False'.format(g.id,
                                                               g.protein.num_structures_experimental))
                continue

            # BLAST the sequence to the PDB
            new_pdbs = g.protein.blast_representative_sequence_to_pdb(seq_ident_cutoff=seq_ident_cutoff,
                                                                   evalue=evalue,
                                                                   display_link=display_link,
                                                                   outdir=outdir,
                                                                   force_rerun=force_rerun)

            if new_pdbs:
                counter += 1
                log.debug('{}: {} PDBs BLASTed'.format(g.id, len(new_pdbs)))
            else:
                log.debug('{}: no BLAST results'.format(g.id))

        log.info('Completed sequence --> PDB BLAST. See the "df_pdb_blast" attribute for a summary dataframe.')
        log.info('{}: number of genes with additional structures added from BLAST'.format(counter))
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def get_dssp_annotations(self, representatives_only=True, force_rerun=False):
        """Run DSSP on structures and store calculations.

        Annotations are stored in the protein structure's chain sequence at:
        ``<chain_prop>.seq_record.letter_annotations['*-dssp']``

        Args:
            representative_only (bool): If analysis should only be run on the representative structure
            force_rerun (bool): If calculations should be rerun even if an output file exists

        """
        for g in tqdm(self.genes):
            g.protein.get_dssp_annotations(representative_only=representatives_only, force_rerun=force_rerun)
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def get_msms_annotations(self, representatives_only=True, force_rerun=False):
        """Run MSMS on structures and store calculations.

        Annotations are stored in the protein structure's chain sequence at:
        ``<chain_prop>.seq_record.letter_annotations['*-msms']``

        Args:
            representative_only (bool): If analysis should only be run on the representative structure
            force_rerun (bool): If calculations should be rerun even if an output file exists

        """
        for g in tqdm(self.genes):
            g.protein.get_msms_annotations(representative_only=representatives_only, force_rerun=force_rerun)
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def find_disulfide_bridges(self, representatives_only=True):
        """Run Biopython's disulfide bridge finder and store found bridges.

        Annotations are stored in the protein structure's chain sequence at:
        ``<chain_prop>.seq_record.annotations['SSBOND-biopython']``

        Args:
            representative_only (bool): If analysis should only be run on the representative structure

        """
        for g in tqdm(self.genes):
            g.protein.find_disulfide_bridges(representative_only=representatives_only)

    ### END STRUCTURE RELATED METHODS ###
    ####################################################################################################################
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def download_progress(res, stream, save_name):
    total_size = int(res.headers.get('content-length', 0)); 

    chunk_size = 32 * 1024
    pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc=save_name)

    for data in res.iter_content(chunk_size):
        stream.write(data)
        pbar.update(chunk_size)
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def upload_data(gpu_ip, job_hash, data_path):
    url = 'http://%s:%s/runJobDecorator' % (gpu_ip, settings.GPU_PORT)
    file_size = path.getsize(data_path)
    pbar = tqdm(total=file_size, unit='B', unit_scale=True)

    def callback(monitor):
        progress = monitor.bytes_read - callback.last_bytes_read
        pbar.update(progress)
        callback.last_bytes_read = monitor.bytes_read
    callback.last_bytes_read = 0

    with open(data_path, 'rb') as f:
        data = {
            'file': ('uploads.pkl', f, 'application/octet-stream'),
            'hash': job_hash
        }
        encoder = MultipartEncoder(
            fields=data
        )
        monitor = MultipartEncoderMonitor(encoder, callback)
        r = requests.post(url, data=monitor, headers={
            'Content-Type': monitor.content_type})

    remove(data_path)
    # pbar might not close when the user interrupts, need to fix this
    pbar.close()
    status_check(r)
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def download_and_unzip_result(url, job_hash):
    r = requests.get(url, stream=True)
    status_check(r)
    total_size = int(r.headers.get('content-length', 0))
    with open('download.zip', 'wb') as f:
        pbar = tqdm(total=total_size, unit='B', unit_scale=True)

        chunk_size = 1024 * 32  # 32kb
        for data in r.iter_content(chunk_size):
            f.write(data)
            pbar.update(chunk_size)
        # again there might be a pbar issue here
        pbar.close()

    zip_content = open("download.zip", "rb").read()
    z = ZipFile(io.BytesIO(zip_content))
    z.extractall()
    remove('download.zip')

    result = None  # output of the script
    new_files = None  # names of new files created by the script

    pickle_path = path.abspath(path.join(job_hash, job_hash + '.pkl'))
    if path.isfile(pickle_path):
        with open(pickle_path, 'rb') as f:
            # Hack: a workaround for dill's pickling problem
            # import_all()
            result = dill.load(f)
            # unimport_all()
        remove(pickle_path)

    if path.isdir(job_hash):
        new_files = listdir(job_hash)
        for name in new_files:
            rename(path.join(job_hash, name), name)
        rmtree(job_hash)

    return result, new_files
项目:Auspex    作者:BBN-Q    | 项目源码 | 文件源码
def run(self):
        self.stream = self.sink.input_streams[0]
        axes = self.stream.descriptor.axes
        num_axes = len(axes)
        totals = [self.stream.descriptor.num_points_through_axis(axis) for axis in range(num_axes)]
        chunk_sizes = [max(1,self.stream.descriptor.num_points_through_axis(axis+1)) for axis in range(num_axes)]
        self.num = min(self.num, num_axes)

        self.bars   = []
        for i in range(self.num):
            if self.notebook:
                self.bars.append(tqdm_notebook(total=totals[i]/chunk_sizes[i]))
            else:
                self.bars.append(tqdm(total=totals[i]/chunk_sizes[i]))
        self.w_id   = 0
        while True:
            if self.stream.done() and self.w_id==self.stream.num_points():
                break

            new_data = np.array(await self.stream.queue.get()).flatten()
            while self.stream.queue.qsize() > 0:
                new_data = np.append(new_data, np.array(self.stream.queue.get_nowait()).flatten())
            self.w_id += new_data.size
            num_data = self.stream.points_taken
            for i in range(self.num):
                if num_data == 0:
                    if self.notebook:
                        self.bars[i].sp(close=True)
                        # Reset the progress bar with a new one
                        self.bars[i] = tqdm_notebook(total=totals[i]/chunk_sizes[i])
                    else:
                        # Reset the progress bar with a new one
                        self.bars[i].close()
                        self.bars[i] = tqdm(total=totals[i]/chunk_sizes[i])
                pos = int(10*num_data / chunk_sizes[i])/10.0 # One decimal is good enough
                if pos > self.bars[i].n:
                    self.bars[i].update(pos - self.bars[i].n)
                num_data = num_data % chunk_sizes[i]
项目:autoreject    作者:autoreject    | 项目源码 | 文件源码
def _pbar(iterable, desc, leave=True, position=None, verbose='progressbar'):
    if verbose == 'progressbar':
        from mne.utils import ProgressBar

        _ProgressBar = ProgressBar
        if not version_is_greater_equal('mne', '0.14'):
            class _ProgressBar(ProgressBar):
                def __iter__(self):
                    """Iterate to auto-increment the pbar with 1."""
                    self.max_value = len(iterable)
                    for obj in iterable:
                        yield obj
                    self.update_with_increment_value(1)

        pbar = _ProgressBar(iterable, mesg=desc, spinner=True)
    elif verbose == 'tqdm':
        from tqdm import tqdm
        pbar = tqdm(iterable, desc=desc, leave=leave, position=position,
                    dynamic_ncols=True)
    elif verbose == 'tqdm_notebook':
        from tqdm import tqdm_notebook
        pbar = tqdm_notebook(iterable, desc=desc, leave=leave,
                             position=position, dynamic_ncols=True)
    elif verbose is False:
        pbar = iterable
    return pbar
项目:keras-toolbox    作者:hadim    | 项目源码 | 文件源码
def augment(X, Y, n=5, elastic_alpha_range=20, rotation_range=30,
            shift_x_range=0.1, shift_y_range=0.1):
    """I admit this code is very ugly (much worse than Keras ImageDataGenerator API
       Note to myself : dont reinvent the wheel...
    """


    X_transformed = np.zeros([X.shape[0] * n * 3] + list(X.shape[1:]))
    Y_transformed = np.zeros([X.shape[0] * n * 3] + list(X.shape[1:]))

    z = 0
    for i in tnrange(n):

        for j, (XX, YY) in tqdm_notebook(enumerate(zip(X, Y)), total=len(X), disable=False, leave=False):
            X_transformed[z], Y_transformed[z] = elastic_transform([XX, YY], elastic_alpha_range, sigma=10)
            z += 1

        for j, (XX, YY) in tqdm_notebook(enumerate(zip(X, Y)), total=len(X), disable=False, leave=False):
            X_transformed[z], Y_transformed[z] = random_rotation([XX, YY], rg=rotation_range,
                                                                  row_index=1, col_index=2,
                                                                  channel_index=0,
                                                                  fill_mode='nearest', cval=0.)
            z += 1

        for j, (XX, YY) in tqdm_notebook(enumerate(zip(X, Y)), total=len(X), disable=False, leave=False):
            X_transformed[z], Y_transformed[z] = random_shift([XX, YY], shift_x_range, shift_y_range,
                                                               row_index=1, col_index=2,
                                                               channel_index=0,
                                                               fill_mode='nearest', cval=0.)
            z += 1

    return X_transformed, Y_transformed
项目:lang2program    作者:kelvinguu    | 项目源码 | 文件源码
def verboserate(iterable, *args, **kwargs):
    """Iterate verbosely.

    Args:
        desc (str): prefix for the progress bar
        total (int): total length of the iterable
        See more options for tqdm.tqdm.

    """
    progress = tqdm_notebook if in_ipython() else tqdm
    for val in progress(iterable, *args, **kwargs):
        yield val
项目:lang2program    作者:kelvinguu    | 项目源码 | 文件源码
def verboserate(iterable, *args, **kwargs):
    """Iterate verbosely.

    Args:
        desc (str): prefix for the progress bar
        total (int): total length of the iterable
        See more options for tqdm.tqdm.

    """
    progress = tqdm_notebook if in_ipython() else tqdm
    for val in progress(iterable, *args, **kwargs):
        yield val
项目:skorch    作者:dnouri    | 项目源码 | 文件源码
def on_epoch_begin(self, net, X=None, X_valid=None, **kwargs):
        # Assume it is a number until proven otherwise.
        batches_per_epoch = self.batches_per_epoch

        if self.batches_per_epoch == 'auto':
            batches_per_epoch = self._get_batches_per_epoch(net, X, X_valid)
        elif self.batches_per_epoch == 'count':
            # No limit is known until the end of the first epoch.
            batches_per_epoch = None

        if self._use_notebook():
            self.pbar = tqdm.tqdm_notebook(total=batches_per_epoch)
        else:
            self.pbar = tqdm.tqdm(total=batches_per_epoch)
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def kegg_mapping_and_metadata(self, kegg_organism_code, custom_gene_mapping=None, outdir=None,
                                  set_as_representative=False, force_rerun=False):
        """Map all genes in the model to KEGG IDs using the KEGG service.

        Steps:
            1. Download all metadata and sequence files in the sequences directory
            2. Creates a KEGGProp object in the protein.sequences attribute
            3. Returns a Pandas DataFrame of mapping results

        Args:
            kegg_organism_code (str): The three letter KEGG code of your organism
            custom_gene_mapping (dict): If your model genes differ from the gene IDs you want to map,
                custom_gene_mapping allows you to input a dictionary which maps model gene IDs to new ones.
                Dictionary keys must match model gene IDs.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            set_as_representative (bool): If mapped KEGG IDs should be set as representative sequences
            force_rerun (bool): If you want to overwrite any existing mappings and files

        """

        # First map all of the organism's KEGG genes to UniProt
        kegg_to_uniprot = ssbio.databases.kegg.map_kegg_all_genes(organism_code=kegg_organism_code, target_db='uniprot')

        successfully_mapped_counter = 0

        for g in tqdm(self.genes):
            if custom_gene_mapping:
                kegg_g = custom_gene_mapping[g.id]
            else:
                kegg_g = g.id

            # Download both FASTA and KEGG metadata files
            kegg_prop = g.protein.load_kegg(kegg_id=kegg_g, kegg_organism_code=kegg_organism_code,
                                            download=True, outdir=outdir, set_as_representative=set_as_representative,
                                            force_rerun=force_rerun)

            # Update potentially old UniProt ID
            if kegg_g in kegg_to_uniprot.keys():
                kegg_prop.uniprot = kegg_to_uniprot[kegg_g]
                if g.protein.representative_sequence:
                    if g.protein.representative_sequence.kegg == kegg_prop.kegg:
                        g.protein.representative_sequence.uniprot = kegg_to_uniprot[kegg_g]

            # Keep track of missing mappings - missing is defined by no available sequence
            if kegg_prop.sequence_file:
                successfully_mapped_counter += 1

            log.debug('{}: loaded KEGG information for gene'.format(g.id))

        log.info('{}/{}: number of genes mapped to KEGG'.format(successfully_mapped_counter, len(self.genes)))
        log.info('Completed ID mapping --> KEGG. See the "df_kegg_metadata" attribute for a summary dataframe.')
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def uniprot_mapping_and_metadata(self, model_gene_source, custom_gene_mapping=None, outdir=None,
                                     set_as_representative=False, force_rerun=False):
        """Map all genes in the model to UniProt IDs using the UniProt mapping service.
        Also download all metadata and sequences.

        Args:
            model_gene_source (str): the database source of your model gene IDs.
                See: http://www.uniprot.org/help/api_idmapping
                Common model gene sources are:

                * Ensembl Genomes - ``ENSEMBLGENOME_ID`` (i.e. E. coli b-numbers)
                * Entrez Gene (GeneID) - ``P_ENTREZGENEID``
                * RefSeq Protein - ``P_REFSEQ_AC``

            custom_gene_mapping (dict): If your model genes differ from the gene IDs you want to map,
                custom_gene_mapping allows you to input a dictionary which maps model gene IDs to new ones.
                Dictionary keys must match model genes.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            set_as_representative (bool): If mapped UniProt IDs should be set as representative sequences
            force_rerun (bool): If you want to overwrite any existing mappings and files

        """

        # Allow model gene --> custom ID mapping ({'TM_1012':'TM1012'})
        if custom_gene_mapping:
            genes_to_map = list(custom_gene_mapping.values())
        else:
            genes_to_map = [x.id for x in self.genes]

        # Map all IDs first to available UniProts
        genes_to_uniprots = bs_unip.mapping(fr=model_gene_source, to='ACC', query=genes_to_map)

        successfully_mapped_counter = 0
        for g in tqdm(self.genes):
            if custom_gene_mapping and g.id in custom_gene_mapping.keys():
                uniprot_gene = custom_gene_mapping[g.id]
            else:
                uniprot_gene = g.id

            if uniprot_gene not in list(genes_to_uniprots.keys()):
                log.debug('{}: unable to map to UniProt'.format(g.id))
            else:
                for mapped_uniprot in genes_to_uniprots[uniprot_gene]:
                    try:
                        uniprot_prop = g.protein.load_uniprot(uniprot_id=mapped_uniprot, download=True, outdir=outdir,
                                                              set_as_representative=set_as_representative,
                                                              force_rerun=force_rerun)
                    except HTTPError as e:
                        log.error('{}, {}: unable to complete web request'.format(g.id, mapped_uniprot))
                        print(e)
                        continue

                    if uniprot_prop.sequence_file or uniprot_prop.metadata_file:
                        successfully_mapped_counter += 1

        log.info('{}/{}: number of genes mapped to UniProt'.format(successfully_mapped_counter, len(self.genes)))
        log.info('Completed ID mapping --> UniProt. See the "df_uniprot_metadata" attribute for a summary dataframe.')
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def get_scratch_predictions(self, path_to_scratch, results_dir, scratch_basename='scratch', num_cores=1,
                                exposed_buried_cutoff=25, custom_gene_mapping=None):
        """Run and parse ``SCRATCH`` results to predict secondary structure and solvent accessibility.
        Annotations are stored in the protein's representative sequence at:

            * ``.annotations``
            * ``.letter_annotations``

        Args:
            path_to_scratch (str): Path to SCRATCH executable
            results_dir (str): Path to SCRATCH results folder, which will have the files (scratch.ss, scratch.ss8,
                scratch.acc, scratch.acc20)
            scratch_basename (str): Basename of the SCRATCH results ('scratch' is default)
            num_cores (int): Number of cores to use to parallelize SCRATCH run
            exposed_buried_cutoff (int): Cutoff of exposed/buried for the acc20 predictions
            custom_gene_mapping (dict): Default parsing of SCRATCH output files is to look for the model gene IDs. If
                your output files contain IDs which differ from the model gene IDs, use this dictionary to map model
                gene IDs to result file IDs. Dictionary keys must match model genes.

        """
        if not self.genome_path:
            # Write all sequences as one file
            all_seqs = self.write_representative_sequences_file(outname=self.id)

        # Runs SCRATCH or loads existing results in results_dir
        scratch = SCRATCH(project_name=scratch_basename, seq_file=self.genome_path)
        scratch.run_scratch(path_to_scratch=path_to_scratch, num_cores=num_cores, outdir=results_dir)

        counter = 0

        # Adding the scratch annotations to the representative_sequences letter_annotations
        for g in tqdm(self.genes_with_a_representative_sequence):
            if custom_gene_mapping:
                g_id = custom_gene_mapping[g.id]
            else:
                g_id = g.id

            if g_id in scratch.sspro_summary():
                # Secondary structure
                g.protein.representative_sequence.annotations.update(scratch.sspro_summary()[g_id])
                g.protein.representative_sequence.annotations.update(scratch.sspro8_summary()[g_id])
                g.protein.representative_sequence.letter_annotations['SS-sspro'] = scratch.sspro_results()[g_id]
                g.protein.representative_sequence.letter_annotations['SS-sspro8'] = scratch.sspro8_results()[g_id]

                # Solvent accessibility
                g.protein.representative_sequence.annotations.update(scratch.accpro_summary()[g_id])
                g.protein.representative_sequence.annotations.update(scratch.accpro20_summary(exposed_buried_cutoff)[g_id])
                g.protein.representative_sequence.letter_annotations['RSA-accpro'] = scratch.accpro_results()[g_id]
                g.protein.representative_sequence.letter_annotations['RSA-accpro20'] = scratch.accpro20_results()[g_id]

                counter += 1
            else:
                log.error('{}: missing SCRATCH results'.format(g.id))

        log.info('{}/{}: number of genes with SCRATCH predictions loaded'.format(counter, len(self.genes)))
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def get_tmhmm_predictions(self, tmhmm_results, custom_gene_mapping=None):
        """Parse TMHMM results and store in the representative sequences.

        This is a basic function to parse pre-run TMHMM results. Run TMHMM from the 
        web service (http://www.cbs.dtu.dk/services/TMHMM/) by doing the following:

            1. Write all representative sequences in the GEM-PRO using the function ``write_representative_sequences_file``
            2. Upload the file to http://www.cbs.dtu.dk/services/TMHMM/ and choose "Extensive, no graphics" as the output
            3. Copy and paste the results (ignoring the top header and above "HELP with output formats") into a file and save it
            4. Run this function on that file

        Args:
            tmhmm_results (str): Path to TMHMM results (long format)
            custom_gene_mapping (dict): Default parsing of TMHMM output is to look for the model gene IDs. If
                your output file contains IDs which differ from the model gene IDs, use this dictionary to map model
                gene IDs to result file IDs. Dictionary keys must match model genes.

        """
        # TODO: refactor to Protein class?
        tmhmm_dict = ssbio.protein.sequence.properties.tmhmm.parse_tmhmm_long(tmhmm_results)

        counter = 0
        for g in tqdm(self.genes_with_a_representative_sequence):
            if custom_gene_mapping:
                g_id = custom_gene_mapping[g.id]
            else:
                g_id = g.id

            if g_id in tmhmm_dict:
                log.debug('{}: loading TMHMM results'.format(g.id))
                if not tmhmm_dict[g_id]:
                    log.error("{}: missing TMHMM results".format(g.id))
                g.protein.representative_sequence.annotations['num_tm_helix-tmhmm'] = tmhmm_dict[g_id]['num_tm_helices']
                g.protein.representative_sequence.letter_annotations['TM-tmhmm'] = tmhmm_dict[g_id]['sequence']
                counter += 1
            else:
                log.error("{}: missing TMHMM results".format(g.id))

        log.info('{}/{}: number of genes with TMHMM predictions loaded'.format(counter, len(self.genes)))

    ### END SEQUENCE RELATED METHODS ###
    ####################################################################################################################

    ####################################################################################################################
    ### STRUCTURE RELATED METHODS ###
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def get_itasser_models(self, homology_raw_dir, custom_itasser_name_mapping=None, outdir=None, force_rerun=False):
        """Copy generated I-TASSER models from a directory to the GEM-PRO directory.

        Args:
            homology_raw_dir (str): Root directory of I-TASSER folders.
            custom_itasser_name_mapping (dict): Use this if your I-TASSER folder names differ from your model gene names.
                Input a dict of {model_gene: ITASSER_folder}.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            force_rerun (bool): If homology files should be copied again even if they exist in the GEM-PRO directory

        """
        counter = 0

        for g in tqdm(self.genes):
            if custom_itasser_name_mapping and g.id in custom_itasser_name_mapping:
                hom_id = custom_itasser_name_mapping[g.id]
                if not op.exists(op.join(homology_raw_dir, hom_id)):
                    hom_id = g.id
            else:
                hom_id = g.id

            # The name of the actual pdb file will be $GENEID_model1.pdb
            new_itasser_name = hom_id + '_model1'
            orig_itasser_dir = op.join(homology_raw_dir, hom_id)

            try:
                itasser_prop = g.protein.load_itasser_folder(ident=hom_id, itasser_folder=orig_itasser_dir,
                                                             organize=True, outdir=outdir,
                                                             organize_name=new_itasser_name,
                                                             force_rerun=force_rerun)
            except OSError:
                log.debug('{}: homology model folder unavailable'.format(g.id))
                continue
            except IOError:
                log.debug('{}: homology model unavailable'.format(g.id))
                continue

            if itasser_prop.structure_file:
                counter += 1
            else:
                log.debug('{}: homology model file unavailable, perhaps modelling did not finish'.format(g.id))

        log.info('Completed copying of {} I-TASSER models to GEM-PRO directory. See the "df_homology_models" attribute for a summary dataframe.'.format(counter))
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def set_representative_structure(self, seq_outdir=None, struct_outdir=None, pdb_file_type=None,
                                     engine='needle', always_use_homology=False, rez_cutoff=0.0,
                                     seq_ident_cutoff=0.5, allow_missing_on_termini=0.2,
                                     allow_mutants=True, allow_deletions=False,
                                     allow_insertions=False, allow_unresolved=True,
                                     clean=True, force_rerun=False):
        """Set all representative structure for proteins from a structure in the structures attribute.

        Each gene can have a combination of the following, which will be analyzed to set a representative structure.

            * Homology model(s)
            * Ranked PDBs
            * BLASTed PDBs

        If the ``always_use_homology`` flag is true, homology models are always set as representative when they exist.
        If there are multiple homology models, we rank by the percent sequence coverage.

        Args:
            seq_outdir (str): Path to output directory of sequence alignment files, must be set if GEM-PRO directories
                were not created initially
            struct_outdir (str): Path to output directory of structure files, must be set if GEM-PRO directories
                were not created initially
            pdb_file_type (str): ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` -
                choose a file type for files downloaded from the PDB
            engine (str): ``biopython`` or ``needle`` - which pairwise alignment program to use.
                ``needle`` is the standard EMBOSS tool to run pairwise alignments.
                ``biopython`` is Biopython's implementation of needle. Results can differ!
            always_use_homology (bool): If homology models should always be set as the representative structure
            rez_cutoff (float): Resolution cutoff, in Angstroms (only if experimental structure)
            seq_ident_cutoff (float): Percent sequence identity cutoff, in decimal form
            allow_missing_on_termini (float): Percentage of the total length of the reference sequence which will be ignored
                when checking for modifications. Example: if 0.1, and reference sequence is 100 AA, then only residues
                5 to 95 will be checked for modifications.
            allow_mutants (bool): If mutations should be allowed or checked for
            allow_deletions (bool): If deletions should be allowed or checked for
            allow_insertions (bool): If insertions should be allowed or checked for
            allow_unresolved (bool): If unresolved residues should be allowed or checked for
            clean (bool): If structures should be cleaned
            force_rerun (bool): If sequence to structure alignment should be rerun

        """
        for g in tqdm(self.genes):
            repstruct = g.protein.set_representative_structure(seq_outdir=seq_outdir,
                                                               struct_outdir=struct_outdir,
                                                               pdb_file_type=pdb_file_type,
                                                               engine=engine,
                                                               rez_cutoff=rez_cutoff,
                                                               seq_ident_cutoff=seq_ident_cutoff,
                                                               always_use_homology=always_use_homology,
                                                               allow_missing_on_termini=allow_missing_on_termini,
                                                               allow_mutants=allow_mutants,
                                                               allow_deletions=allow_deletions,
                                                               allow_insertions=allow_insertions,
                                                               allow_unresolved=allow_unresolved,
                                                               clean=clean,
                                                               force_rerun=force_rerun)

        log.info('{}/{}: number of genes with a representative structure'.format(len(self.genes_with_a_representative_structure),
                                                                                 len(self.genes)))
        log.info('See the "df_representative_structures" attribute for a summary dataframe.')
项目:autoreject    作者:autoreject    | 项目源码 | 文件源码
def clean_by_interp(inst, picks=None, verbose='progressbar'):
    """Clean epochs/evoked by LOOCV.

    Parameters
    ----------
    inst : instance of mne.Evoked or mne.Epochs
        The evoked or epochs object.
    picks : ndarray, shape(n_channels,) | None
        The channels to be considered for autoreject. If None, defaults
        to data channels {'meg', 'eeg'}.
    verbose : 'tqdm', 'tqdm_notebook', 'progressbar' or False
        The verbosity of progress messages.
        If `'progressbar'`, use `mne.utils.ProgressBar`.
        If `'tqdm'`, use `tqdm.tqdm`.
        If `'tqdm_notebook'`, use `tqdm.tqdm_notebook`.
        If False, suppress all output messages.

    Returns
    -------
    inst_clean : instance of mne.Evoked or mne.Epochs
        Instance after interpolation of bad channels.
    """
    inst_interp = inst.copy()
    mesg = 'Creating augmented epochs'
    picks = _handle_picks(info=inst_interp.info, picks=picks)

    BaseEpochs = _get_epochs_type()
    ch_names = [inst.info['ch_names'][p] for p in picks]
    for ch_idx, (pick, ch) in enumerate(_pbar(list(zip(picks, ch_names)),
                                        desc=mesg, verbose=verbose)):
        inst_clean = inst.copy()
        inst_clean.info['bads'] = [ch]
        interpolate_bads(inst_clean, picks=picks, reset_bads=True, mode='fast')

        pick_interp = mne.pick_channels(inst_clean.info['ch_names'], [ch])[0]

        if isinstance(inst, mne.Evoked):
            inst_interp.data[pick] = inst_clean.data[pick_interp]
        elif isinstance(inst, BaseEpochs):
            inst_interp._data[:, pick] = inst_clean._data[:, pick_interp]
        else:
            raise ValueError('Unrecognized type for inst')
    return inst_interp
项目:tsne_animate    作者:hardkun    | 项目源码 | 文件源码
def animate(self,X,y,useTqdm=0,filename=None,return_anim=True):
        pos = self.getSteps(X,y)
        y_mapping = {i:n for n,i in enumerate(set(y))}

        last_iter = pos[len(pos)-1].reshape(-1, 2)
        lims = np.max(last_iter,axis=0),np.min(last_iter,axis=0)
        NCOLORS = len(y_mapping)
        fig = plt.figure()
        fig.set_tight_layout(True)
        ax = fig.add_subplot(111)
        jet = plt.get_cmap('jet') 
        cNorm  = colors.Normalize(vmin=0, vmax=NCOLORS)
        scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)

        A,B = np.array(list(zip(*pos[0].reshape(-1, 2))))
        dots_list = []
        for i in range(NCOLORS):
            colorVal = scalarMap.to_rgba(i)
            a,b = A[y == i],B[y == i]
            dots, = ax.plot(b,a,'o',color=colorVal)
            dots_list.append(dots)


        def init():
            ax.set_xlim([lims[0][0],lims[1][0]])
            ax.set_ylim([lims[0][1],lims[1][1]])
            return [i for i in dots_list]

        def update(i):
            for j in range(len(dots_list)):
                a,b = np.array(list(zip(*pos[i].reshape(-1, 2))))
                a,b = a[y == j],b[y == j]
                dots_list[j].set_xdata(a)
                dots_list[j].set_ydata(b) 
            return [i for i in dots_list]+[ax]

        if useTqdm==0:
            frames = np.arange(0, len(pos)-1)
        elif useTqdm==1:
            from tqdm import tqdm
            frames = tqdm(np.arange(0, len(pos)-1))
        elif useTqdm==2:
            from tqdm import tqdm_notebook
            frames = tqdm_notebook(np.arange(0, len(pos)-1))

        anim = FuncAnimation(fig, update, frames=frames, init_func=init, interval=50)
        if return_anim:
            return anim
        if filename==None:
            plt.show()
        else:
            #anim.save(filename, fps=20, codec='libx264')
            anim.save(filename, dpi=80, writer='imagemagick')
项目:tmtk    作者:thehyve    | 项目源码 | 文件源码
def create_tree_from_clinical(clinical_object, concept_tree=None):
    """

    :param clinical_object:
    :param concept_tree:
    :return:
    """
    if not concept_tree:
        concept_tree = ConceptTree()

    column_map_ids = clinical_object.ColumnMapping.ids
    no_bar = True if len(column_map_ids) < 200 else False
    bar_format = '{l_bar}{bar} | {n_fmt}/{total_fmt} nodes ready, {rate_fmt}'

    for var_id, variable in tqdm.tqdm_notebook(clinical_object.all_variables.items(),
                                               bar_format=bar_format,
                                               unit=' nodes',
                                               leave=False,
                                               dynamic_ncols=True,
                                               disable=no_bar):
        data_args = variable.column_map_data

        # Don't need these, they're in the tree.
        for k in [Mappings.cat_cd_s, Mappings.data_label_s]:
            data_args.pop(k)
        concept_path = path_converter(variable.concept_path, to_internal=True)
        categories = {} if variable.is_numeric else variable.word_map_dict

        if categories:
            node_type = 'categorical'
        else:
            node_type = 'empty' if variable.is_empty else 'numeric'

        # Store node type in `data` so it can be changed back after renaming OMIT
        data_args.update({'ctype': node_type})

        # Store column header of variable.
        data_args.update({'dfh': variable.header})

        # Add filename to SUBJ_ID and OMIT, this is a work around for unique path constraint.
        if variable.data_label in {"SUBJ_ID", "OMIT"}:
            concept_path = concept_path.replace("SUBJ ID", "SUBJ_ID")
            node_type = 'codeleaf'

        # Add categorical values to concept tree (if any)
        for i, datafile_value in enumerate(categories):
            oid = var_id.create_category(i + 1)
            mapped = categories[datafile_value]
            mapped = mapped if not pd.isnull(mapped) else ''
            categorical_path = path_join(concept_path, mapped)
            concept_tree.add_node(categorical_path, oid,
                                  node_type='alpha',
                                  data_args={Mappings.df_value_s: datafile_value})

        concept_tree.add_node(concept_path, var_id,
                              node_type=node_type, data_args=data_args)

    return concept_tree
项目:jaccardSegment    作者:bermanmaxim    | 项目源码 | 文件源码
def parallel_process(array, function, n_jobs=8, use_kwargs=False, front_num=3):
    """
        A parallel version of the map function with a progress bar. 

        Args:
            array (array-like): An array to iterate over.
            function (function): A python function to apply to the elements of array
            n_jobs (int, default=16): The number of cores to use
            use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
                keyword arguments to function 
            front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 
                Useful for catching bugs
        Returns:
            [function(array[0]), function(array[1]), ...]
    """
    #We run the first few iterations serially to catch bugs
    if front_num > 0:
        front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]]
    #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
    if n_jobs==1:
        return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])]
    #Assemble the workers
    with ProcessPoolExecutor(max_workers=n_jobs) as pool:
        #Pass the elements of array into function
        if use_kwargs:
            futures = [pool.submit(function, **a) for a in array[front_num:]]
        else:
            futures = [pool.submit(function, a) for a in array[front_num:]]
        kwargs = {
            'total': len(futures),
            'unit': 'it',
            'unit_scale': True,
            'leave': True,
            'smoothing': 0.1,
        }
        #Print out the progress as tasks complete
        for f in tqdm(as_completed(futures), **kwargs):
            pass
    out = []
    #Get the results from the futures. 
    for i, future in tqdm(enumerate(futures)):
        try:
            out.append(future.result())
        except Exception as e:
            out.append(e)
    return front + out
项目:soundDB    作者:gjoseph92    | 项目源码 | 文件源码
def __iter__(self):
        state = self.prepareState(self._endpoint, self._filters, **self._prepareStateParams)
        entries = self._endpoint(sort= self._sort, n= self._n, **self._filters)

        if self._progbar:
            try:
                get_ipython
                inNotebook = True
            except NameError:
                inNotebook = False

            if not inNotebook:
                sys.stderr.write("Locating data...")

        entries = list(entries)

        if self._progbar and not inNotebook:
            sys.stderr.write("\r")

        if self._progbar:
            try:
                get_ipython # will fail faster and more reliably than tqdm_notebook
                entriesIterable = tqdm_notebook(entries, unit= "entries")
            except (NameError, AttributeError, TypeError):
                entriesIterable = tqdm(entries, unit= "entries")
        else:
            entriesIterable = entries

        def iterate():
            for entry in entriesIterable:
                try:
                    data = self.parse(entry, state= state) if state is not None else self.parse(entry)
                    yield entry, data
                except KeyboardInterrupt:
                    self._write('Interrupted while parsing "{}"'.format(entry.path))
                    break
                except GeneratorExit:
                    raise GeneratorExit
                except:
                    self._write('Error while parsing "{}":'.format(entry.path))
                    self._write( traceback.format_exc() )

        # chain the operations together
        # each function in self._chain is a generator which takes an iterator
        # (remember that you call a generator to "activate" it: calling a generator returns an iterator)
        # so end condition for the loop is that `iterate` refers to an iterator
        iterate = iterate()
        for do in self._chain:
            iterate = do(iterate)
        return iterate