Python tqdm 模块,tqdm() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tqdm.tqdm()

项目:cloud-volume    作者:seung-lab    | 项目源码 | 文件源码
def test_gc_stresstest():
  with Storage('gs://seunglab-test/cloudvolume/connection_pool/', n_threads=0) as stor:
    stor.put_file('test', 'some string')

  n_trials = 500
  pbar = tqdm(total=n_trials)

  @retry
  def create_conn(interface):
    # assert GC_POOL.total_connections() <= GC_POOL.max_connections * 5
    bucket = GC_POOL.get_connection()
    blob = bucket.get_blob('cloudvolume/connection_pool/test')
    blob.download_as_string()
    GC_POOL.release_connection(bucket)
    pbar.update()

  with ThreadedQueue(n_threads=20) as tq:
    for _ in range(n_trials):
      tq.put(create_conn)

  pbar.close()
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def action_label_counts(directory, data_loader, n_actions=18, n=None):
    episode_paths = frame.episode_paths(directory)
    label_counts = [0, 0]
    action_label_counts = [[0, 0] for i in range(n_actions)]
    if n is not None:
        np.random.shuffle(episode_paths)
        episode_paths = episode_paths[:n]
    for episode_path in tqdm.tqdm(episode_paths):
        try:
            features, labels = data_loader.load_features_and_labels([episode_path])
        except:
            traceback.print_exc()
        else:
            for label in range(len(label_counts)):
                label_counts[label] += np.count_nonzero(labels == label)
                for action in range(n_actions):
                    actions = np.reshape(np.array(features["action"]), [-1])
                    action_label_counts[action][label] += np.count_nonzero(
                        np.logical_and(labels == label, actions == action))
    return label_counts, action_label_counts
项目:twentybn-dl    作者:TwentyBN    | 项目源码 | 文件源码
def extract_chunks(files, num_images, out_path):
    with tqdm(total=num_images,
              unit='images',
              ncols=80,
              unit_scale=True) as pbar:
        process = tar(cat(files, _piped=True), 'xvz', _iter=True, _cwd=out_path)

        def kill():
            try:
                process.kill()
            except:
                pass
        atexit.register(kill)
        for line in process:
            if line.strip().endswith('.jpg'):
                pbar.update(1)
项目:cloud-volume    作者:seung-lab    | 项目源码 | 文件源码
def test_s3_stresstest():
  with Storage('s3://seunglab-test/cloudvolume/connection_pool/', n_threads=0) as stor:
    stor.put_file('test', 'some string')

  n_trials = 500
  pbar = tqdm(total=n_trials)

  @retry
  def create_conn(interface):
    conn = S3_POOL.get_connection()  
    # assert S3_POOL.total_connections() <= S3_POOL.max_connections * 5
    bucket = conn.get_object(
      Bucket='seunglab-test',
      Key='cloudvolume/connection_pool/test',
    )
    S3_POOL.release_connection(conn)
    pbar.update()

  with ThreadedQueue(n_threads=20) as tq:
    for _ in range(n_trials):
      tq.put(create_conn)

  pbar.close()
项目:astrobase    作者:waqasbhatti    | 项目源码 | 文件源码
def serial_varfeatures(lclist,
                       outdir,
                       maxobjects=None,
                       timecols=None,
                       magcols=None,
                       errcols=None,
                       mindet=1000,
                       lcformat='hat-sql',
                       nworkers=None):

    if maxobjects:
        lclist = lclist[:maxobjects]

    tasks = [(x, outdir, timecols, magcols, errcols, mindet, lcformat)
             for x in lclist]

    for task in tqdm(tasks):
        result = varfeatures_worker(task)
项目:DeepWorks    作者:daigo0927    | 项目源码 | 文件源码
def valid(self, batch_size = 128, weights_file = None):

        if weights_file is not None:
            self.saver.restore(self.sess, weights_file)

        data_size = self.x_test.shape[0]
        num_batches = int(data_size/batch_size)

        acc_vals = []
        permute_idx = np.random.permutation(np.arange(data_size))
        for b in tqdm(np.arange(num_batches)):
            x_val = self.x_test[permute_idx[b*batch_size:(b+1)*batch_size]]
            y_val = self.y_test[permute_idx[b*batch_size:(b+1)*batch_size]]

            acc_val = self.sess.run(self.accuracy,
                                    feed_dict = {self.images:x_val, self.labels:y_val})
            acc_vals.append(acc_val)

        print('validation accuracy : {}'.format(np.mean(acc_vals)))
项目:skymod    作者:DelusionalLogic    | 项目源码 | 文件源码
def download_file(self, name, url, headers, filename):
        r = super().getSession().get(
            url,
            allow_redirects=True,
            headers=headers,
            stream=True
        )

        if r.status_code != 200:
            raise RuntimeError(
                "Failed downloading file due to non 200 return code. "
                "Return code was " + str(r.status_code)
            )

        total_size = int(r.headers.get("content-length", 0))
        with tqdm(desc=name, total=total_size, unit='B',
                  unit_scale=True, miniters=1) as bar:
            with open(filename, 'wb') as fd:
                for chunk in r.iter_content(32*1024):
                    bar.update(len(chunk))
                    fd.write(chunk)
项目:ssbio    作者:SBRG    | 项目源码 | 文件源码
def pdb_downloader_and_metadata(self, outdir=None, pdb_file_type=None, force_rerun=False):
        """Download ALL mapped experimental structures to each protein's structures directory.

        Args:
            outdir (str): Path to output directory, if GEM-PRO directories were not set or other output directory is
                desired
            pdb_file_type (str): Type of PDB file to download, if not already set or other format is desired
            force_rerun (bool): If files should be re-downloaded if they already exist

        """

        if not pdb_file_type:
            pdb_file_type = self.pdb_file_type

        counter = 0
        for g in tqdm(self.genes):
            pdbs = g.protein.pdb_downloader_and_metadata(outdir=outdir, pdb_file_type=pdb_file_type, force_rerun=force_rerun)

            if pdbs:
                counter += len(pdbs)

        log.info('Updated PDB metadata dataframe. See the "df_pdb_metadata" attribute for a summary dataframe.')
        log.info('Saved {} structures total'.format(counter))
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def run(self, records, total=None, fast_run=True, write=True):
        # this shouldn't ever actually get used now
        raise ValueError()
        records = self.filter(records)
        for record in tqdm(records, mininterval=2, total=total):
            gene = self.GENE_CLASS(record, self.organism_info, self.login)
            try:
                gene.create_item(fast_run=fast_run, write=write)
            except Exception as e:
                exc_info = sys.exc_info()
                traceback.print_exception(*exc_info)
                msg = wdi_helpers.format_msg(gene.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], None,
                                             str(e), msg_type=type(e))
                wdi_core.WDItemEngine.log("ERROR", msg)
                gene.status = msg

            if gene.status is not True:
                self.failed.append(gene.entrez)
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def cleanup(self, releases, last_updated):
        """

        :param releases:
        :param last_updated:
        :param failed: list of entrez ids to skip
        :return:
        """
        print(self.failed)
        entrez_qid = wdi_helpers.id_mapper('P351', ((PROPS['found in taxon'], self.organism_info['wdid']),))
        print(len(entrez_qid))
        entrez_qid = {entrez: qid for entrez, qid in entrez_qid.items() if entrez not in self.failed}
        print(len(entrez_qid))
        filter = {PROPS['Entrez Gene ID']: '', PROPS['found in taxon']: self.organism_info['wdid']}
        frc = FastRunContainer(wdi_core.WDBaseDataType, wdi_core.WDItemEngine, base_filter=filter, use_refs=True)
        frc.clear()
        for qid in tqdm(entrez_qid.values()):
            remove_deprecated_statements(qid, frc, releases, last_updated, list(PROPS.values()), self.login)
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def run(self, records, total=None, fast_run=True, write=True):
        records = self.filter(records)
        for record in tqdm(records, mininterval=2, total=total):
            # print(record['entrezgene'])
            gene = self.GENE_CLASS(record, self.organism_info, self.chr_num_wdid, self.login)
            try:
                gene.create_item(fast_run=fast_run, write=write)
            except Exception as e:
                exc_info = sys.exc_info()
                traceback.print_exception(*exc_info)
                msg = wdi_helpers.format_msg(gene.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], None,
                                             str(e), msg_type=type(e))
                wdi_core.WDItemEngine.log("ERROR", msg)
                gene.status = msg
            if gene.status is not True:
                self.failed.append(gene.entrez)
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def run(self, records, total=None, fast_run=True, write=True):
        records = self.filter(records)
        for record in tqdm(records, mininterval=2, total=total):
            entrez_gene = str(record['entrezgene']['@value'])
            if entrez_gene not in self.gene_wdid_mapping:
                wdi_core.WDItemEngine.log("WARNING", format_msg(entrez_gene, "P351", None,
                                                                "Gene item not found during protein creation", None))
                continue
            gene_wdid = self.gene_wdid_mapping[entrez_gene]

            # handle multiple protiens
            if 'uniprot' in record and 'Swiss-Prot' in record['uniprot']['@value']:
                uniprots = record['uniprot']['@value']['Swiss-Prot']
                for uniprot in uniprots:
                    record['uniprot']['@value']['Swiss-Prot'] = uniprot
                    self.run_one(record, gene_wdid, write)
            else:
                self.run_one(record, gene_wdid, write)
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def lookupLabels(changes):
        pids = set(s.pid for s in changes)
        qids = set(s.qid for s in changes)
        values = set(s.value for s in changes if s.value and PROP_TYPE.get(s.pid) == "WikibaseItem")
        ref_qids = set(chain(*[
            [s['value'] for s in change.ref_list if s['value'] and PROP_TYPE.get(s['prop']) == "WikibaseItem"]
            for change in changes]))
        ref_pids = set(chain(*[[s['prop'] for s in change.ref_list] for change in changes]))
        labels = dict()
        x = pids | qids | values | ref_qids | ref_pids
        x = set(y for y in x if y)
        for chunk in tqdm(chunks(x, 500), total=len(x) / 500):
            l = getConceptLabels(tuple(chunk))
            labels.update(l)

        for c in changes:
            if c.pid and c.pid in labels:
                c.pid_label = labels[c.pid]
            if c.qid and c.qid in labels:
                c.qid_label = labels[c.qid]
            if c.value and c.value in labels:
                c.value_label = labels[c.value]
            for ref in c.ref_list:
                ref['value_label'] = labels.get(ref['value'], '')
                ref['prop_label'] = labels.get(ref['prop'], '')
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def get_revisions_past_weeks(qids, weeks):
    """
    Get the revision IDs for revisions on `qids` items in the past `weeks` weeks
    :param qids: set of qids
    :param weeks: int
    :return:
    """
    revisions = set()
    qids_str = '"' + '","'.join(qids) + '"'
    for week in tqdm(range(weeks)):
        query = '''select rev_id, rev_page, rev_timestamp, page_id, page_namespace, page_title, page_touched FROM revision
                           inner join page on revision.rev_page = page.page_id WHERE
                           rev_timestamp > DATE_FORMAT(DATE_SUB(DATE_SUB(NOW(),INTERVAL {week} WEEK), INTERVAL 1 WEEK),'%Y%m%d%H%i%s') AND
                           rev_timestamp < DATE_FORMAT(DATE_SUB(NOW(), INTERVAL {week} WEEK),'%Y%m%d%H%i%s') AND
                           page_content_model = "wikibase-item" AND
                           page.page_title IN({qids});
                    '''.format(qids=qids_str, week=week)
        revision_df = query_wikidata_mysql(query)
        print(len(revision_df))
        print(revision_df.head(2))
        print(revision_df.tail(2))
        revisions.update(set(revision_df.rev_id))
    return revisions
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def main(chebi_iedb_map, log_dir="./logs", fast_run=False, write=True):
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)
    wdi_core.WDItemEngine.setup_logging(log_dir=log_dir, logger_name='WD_logger', log_name=log_name,
                                        header=json.dumps(__metadata__))

    chebi_qid_map = id_mapper(PROPS['ChEBI-ID'])

    for chebi, iedb in tqdm(chebi_iedb_map.items()):
        if chebi not in chebi_qid_map:
            msg = wdi_helpers.format_msg(iedb, PROPS['IEDB Epitope ID'], None, "ChEBI:{} not found".format(chebi), "ChEBI not found")
            print(msg)
            wdi_core.WDItemEngine.log("WARNING", msg)
            continue
        s = [wdi_core.WDExternalID(iedb, PROPS['IEDB Epitope ID'], references=create_references(iedb))]
        item = wdi_core.WDItemEngine(wd_item_id=chebi_qid_map[chebi], data=s, domain="drugs", fast_run=fast_run,
                                     fast_run_base_filter={PROPS['ChEBI-ID']: ''}, fast_run_use_refs=True,
                                     ref_handler=ref_handlers.update_retrieved_if_new, global_ref_mode="CUSTOM")
        wdi_helpers.try_write(item, iedb, PROPS['IEDB Epitope ID'], login, edit_summary="Add IEDB Epitope ID",
                              write=write)
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def create_uniprot_relationships(login, release_wdid, collection, taxon=None, write=True, run_one=False):
    # only do uniprot proteins that are already in wikidata
    # returns list of qids of items that are modified or skipped (excluding created)
    if taxon:
        uniprot2wd = wdi_helpers.id_mapper(UNIPROT, (("P703", taxon),))
        fast_run_base_filter = {UNIPROT: "", "P703": taxon}
    else:
        uniprot2wd = wdi_helpers.id_mapper(UNIPROT)
        fast_run_base_filter = {UNIPROT: ""}

    cursor = collection.find({'_id': {'$in': list(uniprot2wd.keys())}}).batch_size(20)
    qids = []
    for n, doc in tqdm(enumerate(cursor), total=cursor.count(), mininterval=10.0):
        wd_item = create_for_one_protein(login, doc, release_wdid, uniprot2wd, fast_run_base_filter, write=write)
        if wd_item and not wd_item.create_new_item:
            qids.append(wd_item.wd_item_id)
        if run_one:
            break
    return qids
项目:SharesData    作者:xjkj123    | 项目源码 | 文件源码
def UpDataShare():
    thread = []
    MaxThread = 3
    num=0
    code = Tools().GetShareCode()
    for x in code:
        y = threading.Thread(target=ChildThead, args=(x,))
        thread.append(y)
    try:
        for t in tqdm(thread):
            t.start()
            while True:
                time.sleep(0.05)
                if len(threading.enumerate()) < MaxThread:
                    if len(code) - num < 13:
                        t.join()
                    num = num + 1
                    break
    except:
        print "1223"
项目:holcrawl    作者:shaypal5    | 项目源码 | 文件源码
def crawl_by_file(file_path, verbose, year=None):
    """Crawls IMDB and builds movie profiles for a movies in the given file."""
    results = {res_type : 0 for res_type in _result.ALL_TYPES}
    titles = _titles_from_file(file_path)
    if verbose:
        print("Crawling over all {} IMDB movies in {}...".format(
            len(titles), file_path))
    movie_pbar = tqdm(titles, miniters=1, maxinterval=0.0001,
                      mininterval=0.00000000001, total=len(titles))
    for title in movie_pbar:
        res = crawl_by_title(title, verbose, year, movie_pbar)
        results[res] += 1
    print("{} IMDB movie profiles crawled.".format(len(titles)))
    for res_type in _result.ALL_TYPES:
        print('{} {}.'.format(results[res_type], res_type))


# === uniting movie profiles to csv ===
项目:holcrawl    作者:shaypal5    | 项目源码 | 文件源码
def build_united_profiles(verbose):
    """Build movie profiles with data from all resources."""
    os.makedirs(_UNITED_DIR_PATH, exist_ok=True)
    prof_names = sorted(_prof_names_in_all_resources())
    if verbose:
        print("Building movie profiles with data from all resources.")
        prof_names = tqdm(prof_names)
    for prof_name in prof_names:
        file_name = prof_name + '.json'
        imdb_prof_path = os.path.join(_IMDB_DIR_PATH, file_name)
        with open(imdb_prof_path, 'r') as imbd_prof_file:
            imdb_prof = json.load(imbd_prof_file)
        meta_prof_path = os.path.join(_METACRITIC_DIR_PATH, file_name)
        with open(meta_prof_path, 'r') as meta_prof_file:
            meta_prof = json.load(meta_prof_file)
        united_prof = {**imdb_prof, **meta_prof}
        united_prof_fpath = os.path.join(_UNITED_DIR_PATH, file_name)
        with open(united_prof_fpath, 'w+') as unite_prof_file:
            json.dump(united_prof, unite_prof_file, indent=2, sort_keys=True)
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def tqdm_callback(N, notebook=True):
    """
    Return a :module:`tqdm` progress bar expecting *N* iterations,
    either suitable with jupyter if *notebook* is true and for the
    terminal otherwise. The progress bar includes an additional method
    :function:`callback` (function of one ignored parameter) meant to
    be past as a callback function called to update the progress bar.
    """
    if notebook:
        progress_bar = tqdm.tqdm_notebook(total=N)
    else:
        progress_bar = tqdm.tqdm(total=N)
    def callback(self, i):
        self.update()
    progress_bar.callback = partial(callback, progress_bar)
    return progress_bar
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def _generate_all_features(self):
        """
            generates all features for all mentions
            and frees from memory: self.embeddings and self.features

            pregenerate all feature vectors to increase get_batch speed
        """
        print('DataLoader: generating all features')
        # self.mention_features = {m: self._make_mention_features(m) for ms in self.document_mentions for m in ms}
        assert self.embeddings is not None
        assert self.features is not None

        for ms in tqdm(self.document_mentions):
            for m in ms:
                self.mention_features[m] = self._make_mention_features(m)

        self.features_size = len(self.mention_features[m])
        self.embeddings = None
        print('DataLoader: generating all features finished')
项目:wurst    作者:IndEcol    | 项目源码 | 文件源码
def add_input_info_for_external_exchanges(activities, names):
    """Add details on exchange inputs from other databases"""
    names = set(names)
    cache = {}

    for ds in tqdm(activities):
        for exc in ds['exchanges']:
            if 'input' not in exc or exc['input'][0] in names:
                continue
            if exc['input'] not in cache:
                cache[exc['input']] = ActivityDataset.get(
                    ActivityDataset.database == exc['input'][0],
                    ActivityDataset.code == exc['input'][1],
                )
            obj = cache[exc['input']]
            exc['name'] = obj.name
            exc['product'] = obj.product
            exc['unit'] = obj.data['unit']
            exc['location'] = obj.location
            if exc['type'] == 'biosphere':
                exc['categories'] = obj.data['categories']
项目:hadan-gcloud    作者:youkpan    | 项目源码 | 文件源码
def __init__(self, dirName):
        """
        Args:
            dirName (string): directory where to load the corpus
        """
        self.MAX_NUMBER_SUBDIR = 10
        self.conversations = []
        __dir = os.path.join(dirName, "dialogs")
        number_subdir = 0
        for sub in tqdm(os.scandir(__dir), desc="Ubuntu dialogs subfolders", total=len(os.listdir(__dir))):
            if number_subdir == self.MAX_NUMBER_SUBDIR:
                print("WARNING: Early stoping, only extracting {} directories".format(self.MAX_NUMBER_SUBDIR))
                return

            if sub.is_dir():
                number_subdir += 1
                for f in os.scandir(sub.path):
                    if f.name.endswith(".tsv"):
                        self.conversations.append({"lines": self.loadLines(f.path)})
项目:hadan-gcloud    作者:youkpan    | 项目源码 | 文件源码
def vec2bin(input_path, output_path):
    input_fd  = open(input_path, "rb")
    output_fd = open(output_path, "wb")

    header = input_fd.readline()
    output_fd.write(header)

    vocab_size, vector_size = map(int, header.split())

    for line in tqdm(range(vocab_size)):
        word = []
        while True:
            ch = input_fd.read(1)
            output_fd.write(ch)
            if ch == b' ':
                word = b''.join(word).decode('utf-8')
                break
            if ch != b'\n':
                word.append(ch)
        vector = np.fromstring(input_fd.readline(), sep=' ', dtype='float32')
        output_fd.write(vector.tostring())

    input_fd.close()
    output_fd.close()
项目:crema    作者:bmcfee    | 项目源码 | 文件源码
def evaluate(input_path, n_jobs):

    aud, ann = zip(*crema.utils.get_ann_audio(input_path))

    test_idx = set(pd.read_json('index_test.json')['id'])

    # drop anything not in the test set
    ann = [ann_i for ann_i in ann if crema.utils.base(ann_i) in test_idx]
    aud = [aud_i for aud_i in aud if crema.utils.base(aud_i) in test_idx]

    stream = tqdm(zip(ann, aud), desc='Evaluating test set', total=len(ann))

    results = Parallel(n_jobs=n_jobs)(delayed(track_eval)(ann_i, aud_i)
                                      for ann_i, aud_i in stream)
    df = pd.DataFrame.from_dict(dict(results), orient='index')

    print('Results')
    print('-------')
    print(df.describe())

    df.to_json(os.path.join(OUTPUT_PATH, 'test_scores.json'))
项目:treelstm.pytorch    作者:dasguptar    | 项目源码 | 文件源码
def train(self, dataset):
        self.model.train()
        self.optimizer.zero_grad()
        total_loss = 0.0
        indices = torch.randperm(len(dataset))
        for idx in tqdm(range(len(dataset)),desc='Training epoch ' + str(self.epoch + 1) + ''):
            ltree, lsent, rtree, rsent, label = dataset[indices[idx]]
            linput, rinput = Var(lsent), Var(rsent)
            target = Var(map_label_to_target(label, dataset.num_classes))
            if self.args.cuda:
                linput, rinput = linput.cuda(), rinput.cuda()
                target = target.cuda()
            output = self.model(ltree, linput, rtree, rinput)
            loss = self.criterion(output, target)
            total_loss += loss.data[0]
            loss.backward()
            if idx % self.args.batchsize == 0 and idx > 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
        self.epoch += 1
        return total_loss / len(dataset)

    # helper function for testing
项目:treelstm.pytorch    作者:dasguptar    | 项目源码 | 文件源码
def test(self, dataset):
        self.model.eval()
        total_loss = 0
        predictions = torch.zeros(len(dataset))
        indices = torch.arange(1, dataset.num_classes + 1)
        for idx in tqdm(range(len(dataset)),desc='Testing epoch  ' + str(self.epoch) + ''):
            ltree, lsent, rtree, rsent, label = dataset[idx]
            linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True)
            target = Var(map_label_to_target(label, dataset.num_classes), volatile=True)
            if self.args.cuda:
                linput, rinput = linput.cuda(), rinput.cuda()
                target = target.cuda()
            output = self.model(ltree, linput, rtree, rinput)
            loss = self.criterion(output, target)
            total_loss += loss.data[0]
            output = output.data.squeeze().cpu()
            predictions[idx] = torch.dot(indices, torch.exp(output))
        return total_loss / len(dataset), predictions
项目:PyCasia    作者:lucaskjaero    | 项目源码 | 文件源码
def load_dataset(self, dataset, verbose=True):
        """
        Load a directory of gnt files. Yields the image and label in tuples.
        :param dataset: The directory to load.
        :return:  Yields (Pillow.Image.Image, label) pairs.
        """
        assert self.get_dataset(dataset) is True, "Datasets aren't properly downloaded, " \
                                                  "rerun to try again or download datasets manually."

        if verbose:
            print("Loading %s" % dataset)

        dataset_path = self.base_dataset_path + dataset
        for path in tqdm(glob.glob(dataset_path + "/*.gnt")):
            for image, label in self.load_gnt_file(path):
                yield image, label
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def __download_competition_file(self, competition, file_name, browser):

        url = 'https://www.kaggle.com/c/%s/download/%s' % (competition, file_name)
        res = browser.get(url, stream=True)

        total_size = int(res.headers.get('content-length', 0)); 

        if res.status_code != 200:
            print('error downloading %s' % file_name)
            return False

        file_name = os.path.basename(url)

        pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc=file_name)
        chunk_size = 32 * 1024

        with open(file_name, 'wb') as file_handle:
            for data in res.iter_content(chunk_size):
                file_handle.write(data) 
                pbar.update(chunk_size)

        return True
项目:traffic_detection_yolo2    作者:wAuner    | 项目源码 | 文件源码
def frames2video(name, path):
    """
    Merges images in path into a video

    :param path: path with prediction images
    :return:
    """
    batch_size = 100
    fnames = os.listdir(path)
    fnames.sort()


    #images = np.array([plt.imread(os.path.join(path, fname)) for fname in fnames])
    # h, w, c = images[0].shape
    videowriter = imageio.get_writer(name + '_video.mp4', fps=25)

    for fname in tqdm.tqdm(fnames):
        videowriter.append_data(plt.imread(os.path.join(path, fname)))
    videowriter.close()
项目:pdpipe    作者:shaypal5    | 项目源码 | 文件源码
def _op(self, df, verbose):
        inter_df = df
        colnames = list(self._bin_map.keys())
        if verbose:
            colnames = tqdm.tqdm(colnames)
        for colname in colnames:
            if verbose:
                colnames.set_description(colname)
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_bin"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=source_col.apply(
                    self._get_col_binner(self._bin_map[colname])),
                loc=loc,
                column_name=new_name)
        return inter_df
项目:pdpipe    作者:shaypal5    | 项目源码 | 文件源码
def _op(self, df, verbose):
        columns_to_encode = self._columns
        if self._columns is None:
            columns_to_encode = list(set(df.select_dtypes(
                include=['object', 'category']).columns).difference(
                    self._exclude_columns))
        if verbose:
            columns_to_encode = tqdm.tqdm(columns_to_encode)
        inter_df = df
        for colname in columns_to_encode:
            lbl_enc = sklearn.preprocessing.LabelEncoder()
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_enc"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=lbl_enc.fit_transform(source_col),
                loc=loc,
                column_name=new_name)
            self.encoders[colname] = lbl_enc
        return inter_df
项目:text    作者:pytorch    | 项目源码 | 文件源码
def segment(self, *args):
        """Segment one or more datasets with this subword field.

        Arguments:
            Positional arguments: Dataset objects or other indexable
                mutable sequences to segment. If a Dataset object is provided,
                all columns corresponding to this field are used; individual
                columns can also be provided directly.
        """
        sources = []
        for arg in args:
            if isinstance(arg, Dataset):
                sources += [getattr(arg, name) for name, field in
                            arg.fields.items() if field is self]
            else:
                sources.append(arg)
        for data in sources:
            for x in tqdm(data, 'segmenting'):
                x[:] = self.vocab.segment(x)
项目:pytorch_word2vec    作者:bamtercelboo    | 项目源码 | 文件源码
def cbow_train(self):
        print("CBOW Training......")
        self.cbow_model.save_embedding(self.data.id2word, 'cbow_begin_embedding.txt')
        pos_all_pairs = self.data.get_cbow_batch_all_pairs(self.batch_size, self.context_size)
        pair_count = len(pos_all_pairs)
        process_bar = tqdm(range(int(pair_count / self.batch_size)))
        for _ in process_bar:
            pos_pairs = self.data.get_cbow_batch_pairs(self.batch_size, self.window_size)
            if self.using_hs:
                pos_pairs, neg_pairs = self.data.get_cbow_pairs_by_huffman(pos_pairs)
            else:
                pos_pairs, neg_pairs = self.data.get_cbow_pairs_by_neg_sampling(pos_pairs, self.context_size)

            pos_u = [pair[0] for pair in pos_pairs]
            pos_v = [int(pair[1]) for pair in pos_pairs]
            neg_u = [pair[0] for pair in neg_pairs]
            neg_v = [int(pair[1]) for pair in neg_pairs]

            self.optimizer.zero_grad()
            loss = self.cbow_model.forward(pos_u, pos_v, neg_u, neg_v)
            loss.backward()
            self.optimizer.step()
        print("CBOW Trained and Saving File......")
        self.cbow_model.save_embedding(self.data.id2word, self.output_file_name)
        print("CBOW Trained and Saved File.")
项目:embeddings    作者:vzhong    | 项目源码 | 文件源码
def load_word2emb(self, show_progress=True, batch_size=1000):
        fin_name = self.ensure_file(path.join('fasttext', '{}.zip'.format(self.lang)), url=self.url.format(self.lang))
        seen = set()

        with zipfile.ZipFile(fin_name) as fin:
            content = fin.read('wiki.{}.vec'.format(self.lang))
            lines = content.splitlines()
            if show_progress:
                lines = tqdm(lines)
            batch = []
            for line in lines:
                elems = line.decode().rstrip().split()
                vec = [float(n) for n in elems[-self.d_emb:]]
                word = ' '.join(elems[:-self.d_emb])
                if word in seen:
                    continue
                seen.add(word)
                batch.append((word, vec))
                if len(batch) == batch_size:
                    self.insert_batch(batch)
                    batch.clear()
            if batch:
                self.insert_batch(batch)
项目:embeddings    作者:vzhong    | 项目源码 | 文件源码
def load_word2emb(self, show_progress=True, batch_size=1000):
        fin_name = self.ensure_file(path.join('glove', '{}.zip'.format(self.name)), url=self.setting.url)
        seen = set()

        with zipfile.ZipFile(fin_name) as fin:
            fname_zipped = [fzipped.filename for fzipped in fin.filelist if str(self.d_emb) in fzipped.filename][0]
            content = fin.read(fname_zipped)
            lines = content.splitlines()
            if show_progress:
                lines = tqdm(lines, total=self.setting.size)
            batch = []
            for line in lines:
                elems = line.decode().rstrip().split()
                vec = [float(n) for n in elems[-self.d_emb:]]
                word = ' '.join(elems[:-self.d_emb])
                if word in seen:
                    continue
                seen.add(word)
                batch.append((word, vec))
                if len(batch) == batch_size:
                    self.insert_batch(batch)
                    batch.clear()
            if batch:
                self.insert_batch(batch)
项目:embeddings    作者:vzhong    | 项目源码 | 文件源码
def load_word2emb(self, show_progress=True, batch_size=1000):
        fin_name = self.ensure_file('kazuma.tar.gz', url=self.url)
        seen = set()

        with tarfile.open(fin_name, 'r:gz') as fzip:
            ftxt = fzip.extractfile('charNgram.txt')
            content = ftxt.read()
            ftxt.close()
            lines = content.splitlines()
            if show_progress:
                lines = tqdm(lines)
            batch = []
            for line in lines:
                elems = line.decode().rstrip().split()
                vec = [float(n) for n in elems[-self.d_emb:]]
                word = ' '.join(elems[:-self.d_emb])
                if word in seen:
                    continue
                seen.add(word)
                batch.append((word, vec))
                if len(batch) == batch_size:
                    self.insert_batch(batch)
                    batch.clear()
            if batch:
                self.insert_batch(batch)
项目:SiteFab    作者:ebursztein    | 项目源码 | 文件源码
def render_posts(self):
        """Render posts using jinja2 templates."""

        for post in tqdm(self.posts, unit=' pages', miniters=1, desc="Posts"):
            template_name = "%s.html" % post.meta.template
            template = self.jinja2.get_template(template_name)
            html = post.html.decode("utf-8", 'ignore')
            rv = template.render(content=html, meta=post.meta, posts=self.posts, plugin_data=self.plugin_data, config=self.config,
            categories=self.posts_by_category.get_as_dict(), tags=self.posts_by_tag.get_as_dict(), templates=self.posts_by_template.get_as_dict(), 
            microdata=self.posts_by_microdata.get_as_dict())

            # Liniting            
            linter_results = self.linter.lint(post, rv, self)
            # Are we stopping on linting errors?
            if linter_results.has_errors and self.config.linter.stop_on_error:
                print post.filename
                for error in linter_results.info:
                    print "\t-%s:%s" % (error[0], error[1])
                sys.exit(-1)

            path = "%s%s/" % (self.get_output_dir(), post.meta.permanent_url)
            path = path.replace('//', '/')
            files.write_file(path, 'index.html', rv)

    ### Templates functions ###
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def read(self, file_path: str):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        instances = []
        with open(file_path, 'r') as snli_file:
            logger.info("Reading SNLI instances from jsonl dataset at: %s", file_path)
            for line in tqdm.tqdm(snli_file):
                example = json.loads(line)

                label = example["gold_label"]
                if label == '-':
                    # These were cases where the annotators disagreed; we'll just skip them.  It's
                    # like 800 out of 500k examples in the training data.
                    continue

                premise = example["sentence1"]
                hypothesis = example["sentence2"]
                instances.append(self.text_to_instance(premise, hypothesis, label))
        if not instances:
            raise ConfigurationError("No instances were read from the given filepath {}. "
                                     "Is the path correct?".format(file_path))
        return Dataset(instances)
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def read(self, file_path):
        instances = []
        with open(file_path, "r") as data_file:
            logger.info("Reading instances from lines in file at: %s", file_path)
            for line_num, line in enumerate(tqdm.tqdm(data_file)):
                line = line.strip("\n")

                if not line:
                    continue

                line_parts = line.split('\t')
                if len(line_parts) != 2:
                    raise ConfigurationError("Invalid line format: %s (line number %d)" % (line, line_num + 1))
                source_sequence, target_sequence = line_parts
                instances.append(self.text_to_instance(source_sequence, target_sequence))
        if not instances:
            raise ConfigurationError("No instances read!")
        return Dataset(instances)
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def evaluate(model: Model,
             dataset: Dataset,
             iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset, num_epochs=1, cuda_device=cuda_device, for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)

    return model.get_metrics()
项目:torch_light    作者:ne7ermore    | 项目源码 | 文件源码
def train():
    rnn.train()
    total_loss = 0
    hidden = rnn.init_hidden(args.batch_size)
    for data, label in tqdm(training_data, mininterval=1,
                desc='Train Processing', leave=False):
        optimizer.zero_grad()
        hidden = repackage_hidden(hidden)
        target, hidden = rnn(data, hidden)
        loss = criterion(target, label)

        loss.backward()
        torch.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.data
    return total_loss[0]/training_data.sents_size

# ##############################################################################
# Save Model
# ##############################################################################
项目:torch_light    作者:ne7ermore    | 项目源码 | 文件源码
def train():
    rnn.train()
    total_loss = 0
    hidden = rnn.init_hidden()
    for data, label in tqdm(training_data, mininterval=1,
                desc='Train Processing', leave=False):
        optimizer.zero_grad()
        hidden = repackage_hidden(hidden)
        target, hidden = rnn(data, hidden)
        loss = criterion(target, label)

        loss.backward()
        optimizer.step()

        total_loss += loss.data
    return total_loss[0]/training_data.sents_size

# ##############################################################################
# Save Model
# ##############################################################################
项目:AVSR-Deep-Speech    作者:pandeydivesh15    | 项目源码 | 文件源码
def fit(self, 
            data_x_train,
            data_x_dev=None,
            data_x_test=None,
            n_epochs=10,
            batch_size=10):
        assert n_epochs > 0
        assert batch_size < data_x_train.shape[0]

        size_x_train = data_x_train.shape[0]

        n_batches = size_x_train / batch_size

        for e in range(n_epochs):
            epoch_costs = np.zeros(n_batches)
            bar = tqdm(range(n_batches), desc='Epoch: {:d}'.format(e))

            for i in bar:
                batch_x = data_x_train[i*batch_size:(i+1)*batch_size]
                err = self.partial_fit(batch_x)
                epoch_costs[i] = err

            mean_cost = epoch_costs.mean()
            print 'Train error: {:.4f}'.format(mean_cost)

            if data_x_dev is not None:
                random_indices = np.random.randint(0, data_x_dev.shape[0], batch_size)
                batch_x = data_x_dev[random_indices]
                err = self.get_cost(batch_x)
                print 'Validation data error: {:.4f}'.format(err)

        if data_x_test is not None:
                err = self.get_cost(data_x_test)
                print 'Test data error: {:.4f}'.format(err)
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def get_tqdm_progressbar(iterator):
    sys.stderr.flush()
    return tqdm.tqdm(iterator, bar_format='{desc}{percentage:3.0f}%|{bar}|[{elapsed}<{remaining}, {rate_fmt}]'  , ncols=72)
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def validate(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols))
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4)
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(args.model_path[:args.model_path.find('_')], n_classes)
    state = convert_state_dict(torch.load(args.model_path)['model_state'])
    model.load_state_dict(state)
    model.eval()

    for i, (images, labels) in tqdm(enumerate(valloader)):
        model.cuda()
        images = Variable(images.cuda(), volatile=True)
        labels = Variable(labels.cuda(), volatile=True)

        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()

        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def setup(self, pre_encode=False):
        sbd_path = get_data_path('sbd')
        voc_path = get_data_path('pascal')

        target_path = self.root + '/SegmentationClass/pre_encoded/'
        if not os.path.exists(target_path):
            os.makedirs(target_path)

        sbd_train_list = tuple(open(sbd_path + 'dataset/train.txt', 'r'))
        sbd_train_list = [id_.rstrip() for id_ in sbd_train_list]

        self.files['train_aug'] = self.files['train'] + sbd_train_list

        if pre_encode:
            print("Pre-encoding segmentation masks...")
            for i in tqdm(sbd_train_list):
                lbl_path = sbd_path + 'dataset/cls/' + i + '.mat'
                lbl = io.loadmat(lbl_path)['GTcls'][0]['Segmentation'][0].astype(np.int32)
                lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min())
                m.imsave(target_path + i + '.png', lbl)

            for i in tqdm(self.files['trainval']):
                lbl_path = self.root + '/SegmentationClass/' + i + '.png'
                lbl = self.encode_segmap(m.imread(lbl_path))
                lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min())
                m.imsave(target_path + i + '.png', lbl)
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def build_feature_files(base_directory,
                        new_directory,
                        data_loader,
                        n=None,
                        negative_example_keep_prob=1.0):
    os.makedirs(new_directory, exist_ok=False)
    episode_paths = frame.episode_paths(base_directory)
    label_counts = [0, 0]
    if n is not None:
        np.random.shuffle(episode_paths)
        episode_paths = episode_paths[:n]
    for episode_path in tqdm.tqdm(episode_paths):
        try:
            features, labels = data_loader.load_features_and_labels([episode_path])
        except:
            traceback.print_exc()
        else:
            keep = np.logical_or(labels, (np.less(
                np.random.rand(len(labels)), negative_example_keep_prob)))
            labels = labels[keep]

            for i in range(len(label_counts)):
                label_counts[i] += np.count_nonzero(labels == i)
            features = {k: v[keep] for k, v in features.items()}
            new_path = path_relative_to_new_directory(base_directory, new_directory, episode_path,
                                                      ".features")
            os.makedirs(os.path.dirname(new_path), exist_ok=True)
            with open(new_path, 'wb') as f:
                pickle.dump((features, labels), f)
    return label_counts
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def copy_episodes(indir, outdir, n):
    episode_paths = frame.episode_paths(indir)
    np.random.shuffle(episode_paths)
    episode_paths = episode_paths[:n]
    start = len(indir)
    for p in tqdm.tqdm(episode_paths):
        assert p.startswith(indir), p
        outfile = outdir + p[start:]
        os.makedirs(os.path.dirname(outfile), exist_ok=True)
        shutil.copyfile(p, outfile)
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def label_episodes(directory, classifier):
    episode_paths = frame.episode_paths(directory)
    data_loader = DataLoader(hparams=classifier.hparams)
    for episode_path in tqdm.tqdm(episode_paths):
        try:
            data_loader.predict_episodes(classifier, [episode_path], prefix="frame/classifier_")
        except EOFError as e:
            traceback.print_exception(e)
            print("Error reading {}".format(episode_path))
            os.remove(episode_path)