Python multiprocessing 模块,cpu_count() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用multiprocessing.cpu_count()

项目:Starfish    作者:BillWang139967    | 项目源码 | 文件源码
def uptime(self):
        with open('/proc/uptime', 'r') as f:
            uptime, idletime = f.readline().split()
            up_seconds = int(float(uptime))
            idle_seconds = int(float(idletime))
            # in some machine like Linode VPS, idle time may bigger than up time
            if idle_seconds > up_seconds:
                cpu_count = multiprocessing.cpu_count()
                idle_seconds = idle_seconds/cpu_count
                # in some VPS, this value may still bigger than up time
                # may be the domain 0 machine has more cores
                # we calclate approximately for it
                if idle_seconds > up_seconds:
                    for n in range(2,10):
                        if idle_seconds/n < up_seconds:
                            idle_seconds = idle_seconds/n
                            break
            fmt = '{days} ? {hours} ?? {minutes} ? {seconds} ?'
            uptime_string = strfdelta(datetime.timedelta(seconds = up_seconds), fmt)
            idletime_string = strfdelta(datetime.timedelta(seconds = idle_seconds), fmt)
        return {
            'up': uptime_string,
            'idle': idletime_string,
            'idle_rate': div_percent(idle_seconds, up_seconds),
        }
项目:ComicSpider    作者:QuantumLiu    | 项目源码 | 文件源码
def download_chapter_m(self):
        '''
        ???????????
        Download all pages of the chapter using multiprocessing
        '''
        results=[]
        if not self.pages:
            print('No page')
            return None
        mp=Pool(min(8,max(cpu_count(),4)))
        for page in self.pages:
            results.append(mp.apply_async(self.download_page,(page,)))
        mp.close()
        mp.join()
        num=sum([result.get() for result in results])
        print('Downloaded {} pages'.format(num))
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def suggestedWorkerCount():
        if 'linux' in sys.platform:
            ## I think we can do a little better here..
            ## cpu_count does not consider that there is little extra benefit to using hyperthreaded cores.
            try:
                cores = {}
                pid = None

                for line in open('/proc/cpuinfo'):
                    m = re.match(r'physical id\s+:\s+(\d+)', line)
                    if m is not None:
                        pid = m.groups()[0]
                    m = re.match(r'cpu cores\s+:\s+(\d+)', line)
                    if m is not None:
                        cores[pid] = int(m.groups()[0])
                return sum(cores.values())
            except:
                return multiprocessing.cpu_count()

        else:
            return multiprocessing.cpu_count()
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def suggestedWorkerCount():
        if 'linux' in sys.platform:
            ## I think we can do a little better here..
            ## cpu_count does not consider that there is little extra benefit to using hyperthreaded cores.
            try:
                cores = {}
                pid = None

                for line in open('/proc/cpuinfo'):
                    m = re.match(r'physical id\s+:\s+(\d+)', line)
                    if m is not None:
                        pid = m.groups()[0]
                    m = re.match(r'cpu cores\s+:\s+(\d+)', line)
                    if m is not None:
                        cores[pid] = int(m.groups()[0])
                return sum(cores.values())
            except:
                return multiprocessing.cpu_count()

        else:
            return multiprocessing.cpu_count()
项目:detox    作者:tox-dev    | 项目源码 | 文件源码
def tox_addoption(parser):
    def positive_integer(value):
        ivalue = int(value)
        if ivalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive int value" % value)
        return ivalue

    try:
        num_proc = multiprocessing.cpu_count()
    except Exception:
        num_proc = 2
    parser.add_argument(
        "-n", "--num",
        type=positive_integer,
        action="store",
        default=num_proc,
        dest="numproc",
        help="set the number of concurrent processes "
             "(default %s)." % num_proc)
项目:girder_worker    作者:girder    | 项目源码 | 文件源码
def test_celery_task_revoke_in_queue(self, params):
        # Fill up queue
        blockers = []
        for _ in range(0, multiprocessing.cpu_count()):
            blockers .append(cancelable.delay(sleep_interval=0.1))

        result = cancelable.delay()
        result.revoke()

        assert wait_for_status(self.getCurrentUser(), result.job, JobStatus.CANCELED)

        # Now clean up the blockers
        for blocker in blockers:
            blocker.revoke()

        return result.job
项目:girder_worker    作者:girder    | 项目源码 | 文件源码
def test_traditional_task_cancel_in_queue(self, params):
        # Fill up queue
        blockers = []
        for _ in range(0, multiprocessing.cpu_count()):
            blockers .append(cancelable.delay(sleep_interval=0.1))

        jobModel = self.model('job', 'jobs')
        job = jobModel.createJob(
            title='test_traditional_task_cancel',
            type='worker', handler='worker_handler',
            user=self.getCurrentUser(), public=False, args=(self.girder_worker_run_cancelable,),
            kwargs={'inputs': {},
                    'outputs': {}})

        job['kwargs']['jobInfo'] = utils.jobInfoSpec(job)

        jobModel.save(job)
        jobModel.scheduleJob(job)
        jobModel.cancelJob(job)

        # Now clean up the blockers
        for blocker in blockers:
            blocker.revoke()

        return job
项目:GANGogh    作者:rkjones4    | 项目源码 | 文件源码
def for_genre(genre,num):
    pool = ThreadPool(multiprocessing.cpu_count()-1)
    nums = list(range(1,num))
    results = pool.starmap(soupit,zip(nums,itertools.repeat(genre)))
    pool.close()
    pool.join()

    #build up the list of urls with the results of all the sub-processes that succeeded in a single list
    new_results = []
    for j in results:
        if j:
            for i in j:
                new_results.append(i)

    pool = ThreadPool(multiprocessing.cpu_count()-1)
    pool.starmap(dwnld,zip(enumerate(new_results),itertools.repeat(genre)))
    pool.close
    pool.close()
项目:fingerprint-securedrop    作者:freedomofpress    | 项目源码 | 文件源码
def __init__(self, model_timestamp, world, model_type, 
                 hyperparameters, feature_scaling=True,
                 n_cores=multiprocessing.cpu_count(), k=10):
        """
        Args:
            model [string]: machine learning algorithm to be used
            parameters [dict]: hyperparameter set to be used for the
                               machine learning algorithm
            k [int]: number of k-folds
            world [dict]: world type (open- or closed- world)
                          and parameters if necessary
        """

        self.model_timestamp = model_timestamp
        self.hyperparameters = hyperparameters
        self.model_type = model_type
        self.world_type = world["type"]
        self.frac_obs = world["observed_fraction"]
        self.n_cores = n_cores
        self.k = k
        self.feature_scaling = feature_scaling
        self.db = database.ModelStorage()
        self.train_class_balance = 'DEFAULT'
        self.base_rate = 'DEFAULT'
项目:ronin    作者:tliron    | 项目源码 | 文件源码
def __init__(self, command=None, jobs=None):
        """
        :param command: ``cargo`` command; defaults to the context's ``rust.cargo_command``
        :type command: basestring or ~types.FunctionType
        :param jobs: number of jobs; defaults to CPU count + 1
        :type jobs: int
        """

        super(CargoBuild, self).__init__()
        self.command = lambda ctx: which(ctx.fallback(command, 'rust.cargo_command',
                                                      DEFAULT_CARGO_COMMAND))
        self.add_argument('build')
        self.add_argument_unfiltered('--manifest-path', '$in')
        if jobs is None:
            jobs = cpu_count() + 1
        self.jobs(jobs)
        self.hooks.append(_cargo_output_path_hook)
        self.hooks.append(_cargo_debug_hook)
项目:TextRankPlus    作者:zuoxiaolei    | 项目源码 | 文件源码
def run():
    '''
    ????
    '''
    reload(sys)
    sys.setdefaultencoding('utf8')
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)
    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info("running %s" % ' '.join(sys.argv))

    outp1 = r'wiki_model'
    outp2 = r'vector.txt'
    model = Word2Vec(sentences, size=400, window=5, min_count=5, workers=multiprocessing.cpu_count())
    model.save(outp1)
    model.wv.save_word2vec_format(outp2, binary=False)

    testData = ['??','??','??','??']
    for i in testData:
        temp = model.most_similar(i)
        for j in temp:
            print '%f %s'%(j[1],j[0])
        print ''
项目:mongodb_consistent_backup    作者:Percona-Lab    | 项目源码 | 文件源码
def __init__(self, task_name, manager, config, timer, base_dir, backup_dir, **kwargs):
        self.task_name  = task_name
        self.manager    = manager
        self.config     = config
        self.timer      = timer
        self.base_dir   = base_dir
        self.backup_dir = backup_dir
        self.args       = kwargs
        self.verbose    = self.config.verbose

        self.runnning  = False
        self.stopped   = False
        self.completed = False
        self.exit_code = 255

        self.thread_count          = None
        self.cpu_count             = cpu_count()
        self.compression_method    = 'none'
        self.compression_supported = ['none']
        self.timer_name            = self.__class__.__name__

        signal(SIGINT, SIG_IGN)
        signal(SIGTERM, self.close)
项目:onsager_deep_learning    作者:mborgerding    | 项目源码 | 文件源码
def random_access_problem(which=1):
    import raputil as ru
    if which == 1:
        opts = ru.Problem.scenario1()
    else:
        opts = ru.Problem.scenario2()

    p = ru.Problem(**opts)
    x1 = p.genX(1)
    y1 = p.fwd(x1)
    A = p.S
    M,N = A.shape
    nbatches = int(math.ceil(1000 /x1.shape[1]))
    prob = NumpyGenerator(p=p,nbatches=nbatches,A=A,opts=opts,iid=(which==1))
    if which==2:
        prob.maskX_ = tf.expand_dims( tf.constant( (np.arange(N) % (N//2) < opts['Nu']).astype(np.float32) ) , 1)

    _,prob.noise_var = p.add_noise(y1)

    unused = p.genYX(nbatches) # for legacy reasons -- want to compare against a previous run
    (prob.yval, prob.xval) = p.genYX(nbatches)
    (prob.yinit, prob.xinit) = p.genYX(nbatches)
    import multiprocessing as mp
    prob.nsubprocs = mp.cpu_count()
    return prob
项目:highfive    作者:abau171    | 项目源码 | 文件源码
def run_worker_pool(job_handler, host="localhost", port=48484,
                      *, max_workers=None):
    """
    Runs a pool of workers which connect to a remote HighFive master and begin
    executing calls.
    """

    if max_workers is None:
        max_workers = multiprocessing.cpu_count()

    processes = []
    for _ in range(max_workers):
        p = multiprocessing.Process(target=worker_main,
                args=(job_handler, host, port))
        p.start()
        processes.append(p)

    logger.debug("workers started")

    for p in processes:
        p.join()

    logger.debug("all workers completed")
项目:DataProperty    作者:thombashi    | 项目源码 | 文件源码
def to_dp_matrix(self, value_matrix):
        self.__update_dp_converter()
        logger.debug("max_workers = {}".format(self.max_workers))

        value_matrix = self.__strip_data_matrix(value_matrix)

        if self.__is_dp_matrix(value_matrix):
            logger.debug("already a dataproperty matrix")
            return value_matrix

        if not self.max_workers:
            self.max_workers = multiprocessing.cpu_count()

        if self.max_workers <= 1:
            return self.__to_dp_matrix_st(value_matrix)

        return self.__to_dp_matrix_mt(value_matrix)
项目:glassdoor-analysis    作者:THEdavehogue    | 项目源码 | 文件源码
def multi_scrub_text(reviews):
    '''
    Function to lemmatize text - utilizes multiprocessing for parallelization

    INPUT:
        reviews: array-like, pandas DataFrame column containing review texts

    OUTPUT:
        lemmatized: pandas DataFrame column with cleaned texts
    '''
    lemmatized = []
    cpus = cpu_count() - 1
    pool = Pool(processes=cpus)
    lemmatized = pool.map(lemmatize_text, reviews)
    pool.close()
    pool.join()
    return lemmatized
项目:glassdoor-analysis    作者:THEdavehogue    | 项目源码 | 文件源码
def multi_core_scrape(num_pages, db_coll):
    '''
    Map the API scrape across number of processors - 1 for performance boost.

    INPUT:
        num_pages: int, number of pages to scrape
        db_coll: pymongo collection object, collection to add documents to

    OUTPUT:
        None, records inserted into MongoDB
    '''
    cpus = cpu_count() - 1
    pool = Pool(processes=cpus)
    pages = range(1, num_pages + 1)
    employers = pool.map(scrape_api_page, pages)
    pool.close()
    pool.join()
    print 'Inserting Employer Records into MongoDB . . .'
    pbar = ProgressBar()
    for page in pbar(employers):
        db_coll.insert_many(page)
项目:GALEX    作者:rahul-aedula95    | 项目源码 | 文件源码
def degreetocart(data_f1):

    global df2
    df2 = data_f1.copy()
    print "phase 1"

    df2['X'] = np.nan
    df2['Y'] = np.nan
    df2['Z'] = np.nan
    df2 = df2.astype(float)

    print "phase 2"
    num_cores = multiprocessing.cpu_count()


    results_x = Parallel(n_jobs=num_cores)(delayed(xloop)(i) for i in xrange(0,len(df2)))
    print "phase 3"
    #print results_x

    #print results_x

    #print " this is "
    #print results_x[0]

    results_y = Parallel(n_jobs=num_cores)(delayed(yloop)(i) for i in xrange(0,len(df2)))
    print "phase 4"
    results_z = Parallel(n_jobs=num_cores)(delayed(zloop)(i) for i in xrange(0,len(df2)))
    print "phase 5"
    #print results_y



    #Parallel(n_jobs=num_cores)(delayed(adjloop)(i) for i in xrange(0,len(df2)))
    for i in xrange(0,len(df2)):
        print i
        df2['X'][i] = results_x[i]
        df2['Y'][i] = results_y[i]
        df2['Z'][i] = results_z[i]
项目:keras-text    作者:raghakot    | 项目源码 | 文件源码
def _parse_spacy_kwargs(**kwargs):
    """Supported args include:

    Args:
        n_threads/num_threads: Number of threads to use. Uses num_cpus - 1 by default.
        batch_size: The number of texts to accumulate into a common working set before processing.
            (Default value: 1000)
    """
    n_threads = kwargs.get('n_threads') or kwargs.get('num_threads')
    batch_size = kwargs.get('batch_size')

    if n_threads is None or n_threads is -1:
        n_threads = cpu_count() - 1
    if batch_size is None or batch_size is -1:
        batch_size = 1000
    return n_threads, batch_size
项目:avocado-misc-tests    作者:avocado-framework    | 项目源码 | 文件源码
def test(self):
        '''
        Test Execution with necessary args
        '''
        dir = self.params.get('dir', default='.')
        nprocs = self.params.get('nprocs', default=None)
        seconds = self.params.get('seconds', default=60)
        args = self.params.get('args', default='')
        if not nprocs:
            nprocs = multiprocessing.cpu_count()
        loadfile = os.path.join(self.srcdir, 'client.txt')
        cmd = '%s/dbench %s %s -D %s -c %s -t %d' % (self.srcdir, nprocs, args,
                                                     dir, loadfile, seconds)
        process.run(cmd)

        self.results = process.system_output(cmd)
        pattern = re.compile(r"Throughput (.*?) MB/sec (.*?) procs")
        (throughput, procs) = pattern.findall(self.results)[0]
        perf_json = {'throughput': throughput, 'procs': procs}
        output_path = os.path.join(self.outputdir, "perf.json")
        json.dump(perf_json, open(output_path, "w"))
项目:sequana    作者:sequana    | 项目源码 | 文件源码
def __init__(self, parent=None):
        super().__init__(parent=parent)


        self.ui = Ui_Snakemake()
        self.ui.setupUi(self)

        # This is for the --cluster-config case
        # Note the double underscore that is used later to be replaced by a dash
        self.ui.snakemake_options_cluster_cluster__config_value = FileBrowser()
        self.ui.horizontalLayout_4.addWidget(
            self.ui.snakemake_options_cluster_cluster__config_value)

        self._application = "sequana_gui"
        self._section = "snakemake_dialog"
        self.read_settings()

        # Set maximum of local cores to be used
        cpu = multiprocessing.cpu_count()
        self.ui.snakemake_options_local_cores_value.setMaximum(cpu)
项目:pytorch-skipthoughts    作者:kaniblu    | 项目源码 | 文件源码
def load_embeddings_mp(path, word_dim, processes=None):

    if processes is None:
        processes = multiprocessing.cpu_count()

    pool = mp.Pool(processes, initializer=_mp_initialize,
                   initargs=(word_dim,))

    with open(path, "r") as f:
        iterator = chunks(f, n=processes,
                          k=processes * 10000)
        ret = {}
        for batches in iterator:
            results = pool.map_async(_mp_process, batches)
            results = results.get()
            results = aggregate_dicts(*results)

            ret.update(results)

        return ret
项目:POT    作者:rflamary    | 项目源码 | 文件源码
def parmap(f, X, nprocs=multiprocessing.cpu_count()):
    """ paralell map for multiprocessing """
    q_in = multiprocessing.Queue(1)
    q_out = multiprocessing.Queue()

    proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
            for _ in range(nprocs)]
    for p in proc:
        p.daemon = True
        p.start()

    sent = [q_in.put((i, x)) for i, x in enumerate(X)]
    [q_in.put((None, None)) for _ in range(nprocs)]
    res = [q_out.get() for _ in range(len(sent))]

    [p.join() for p in proc]

    return [x for i, x in sorted(res)]
项目:Dallinger    作者:Dallinger    | 项目源码 | 文件源码
def load_user_config(self):
        config = get_config()
        workers = config.get("threads")
        if workers == "auto":
            workers = str(multiprocessing.cpu_count() * 2 + 1)

        host = config.get("host")
        bind_address = "{}:{}".format(host, self.port)
        self.options = {
            'bind': bind_address,
            'workers': workers,
            'worker_class': WORKER_CLASS,
            'loglevels': self.loglevels,
            'loglevel': self.loglevels[config.get("loglevel")],
            'errorlog': '-',
            'accesslog': '-',
            'proc_name': 'dallinger_experiment_server',
            'limit_request_line': '0',
            'when_ready': when_ready,
        }
项目:nimo    作者:wolfram2012    | 项目源码 | 文件源码
def __init__(self,fitness,args=[],kwargs={},population_size=100,n_processes="AUTO"):
        self.fitness = fitness
        self.args = args
        self.kwargs = kwargs
        self.population_size = population_size
        self.n_processes = n_processes
        if self.n_processes == "AUTO":
            self.n_processes = mp.cpu_count()

        self.run_data = None

        self.running_workers = 0

        self.best_score = np.inf
        self.population = []
        self.bests = []
        self.worsts = []
        self.history = []
        self.iter = 0
项目:swarm    作者:a7vinx    | 项目源码 | 文件源码
def get_do_task(self):
        proc=[]
        if self._args.process_num==0:
            for cur in range(multiprocessing.cpu_count()):
                p=multiprocessing.Process(target=self._get_do_task_proc)
                p.start()
                proc.append(p)
        else:
            for cur in range(self._args.process_num):
                p=multiprocessing.Process(target=self._get_do_task_proc)
                p.start()
                proc.append(p)
        # start a new thread to listen command from master host
        # use daemon argtment so we need not to wait for this thread to exit
        t=threading.Thread(target=self._response_master)
        t.daemon=True
        t.start()
        for cur in proc:
            cur.join()
        LOG.debug('task completed')
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def __init__(self, fname, processes=None, lemmatize=utils.HAS_PATTERN, dictionary=None, filter_namespaces=('0',)):
        """
        Initialize the corpus. Unless a dictionary is provided, this scans the
        corpus once, to determine its vocabulary.

        If `pattern` package is installed, use fancier shallow parsing to get
        token lemmas. Otherwise, use simple regexp tokenization. You can override
        this automatic logic by forcing the `lemmatize` parameter explicitly.

        """
        self.fname = fname
        self.filter_namespaces = filter_namespaces
        self.metadata = False
        if processes is None:
            processes = max(1, multiprocessing.cpu_count() - 1)
        self.processes = processes
        self.lemmatize = lemmatize
        if dictionary is None:
            self.dictionary = Dictionary(self.get_texts())
        else:
            self.dictionary = dictionary
项目:veros    作者:dionhaefner    | 项目源码 | 文件源码
def parse_cli():
    parser = argparse.ArgumentParser(description="Run Veros benchmarks")
    parser.add_argument("-f", "--fortran-library", type=str, help="Path to pyOM2 fortran library")
    parser.add_argument("-s", "--sizes", nargs="*", type=float, required=True,
                        help="Problem sizes to test (total number of elements)")
    parser.add_argument("-c", "--components", nargs="*", choices=COMPONENTS, default=["numpy"], metavar="COMPONENT",
                        help="Numerical backend components to benchmark (possible values: {})".format(", ".join(COMPONENTS)))
    parser.add_argument("-n", "--nproc", type=int, default=multiprocessing.cpu_count(),
                        help="Number of processes / threads for parallel execution")
    parser.add_argument("-o", "--outfile", default="benchmark_{}.json".format(time.time()), help="JSON file to write timings to")
    parser.add_argument("-t", "--timesteps", default=1000, type=int, help="Number of time steps that each benchmark is run for")
    parser.add_argument("--only", nargs="*", default=AVAILABLE_BENCHMARKS,
                        help="Run only these benchmarks (possible values: {})".format(", ".join(AVAILABLE_BENCHMARKS)),
                        choices=AVAILABLE_BENCHMARKS, required=False, metavar="BENCHMARK")
    parser.add_argument("--mpiexec", default="mpiexec", help="Executable used for calling MPI (e.g. mpirun, mpiexec)")
    parser.add_argument("--slurm", action="store_true", help="Run benchmarks using SLURM scheduling command (srun)")
    parser.add_argument("--debug", action="store_true", help="Additionally print each command that is executed")
    parser.add_argument("--float-type", default="float64", help="Data type for floating point arrays in Veros components")
    parser.add_argument("--burnin", default=3, type=int, help="Number of iterations to exclude in timings")
    return parser.parse_args()
项目:demos    作者:dfirence    | 项目源码 | 文件源码
def run_concurrently( queue ):
    start = time.time()
    cpus  = mp.cpu_count()
    qsize = queue.qsize()
    procs = []
    with ProcessPoolExecutor( cpus ) as executor:
        for n in xrange( qsize ):
            proc = mp.Process( target=run_plugin, args=( queue.get(),) )
            procs.append( proc )
            proc.start()
            time.sleep( 0.05 )
        for proc in procs:
            proc.join()
            time.sleep( 0.05 )
    #end = '[+] Ends  {:30} {}: {:.2f}s'.format( 'Concurrency of', qsize, 'tasks',time.time() - start)
    t = '{:.2f}s'.format( time.time() - start )
    end = '[+] Ends  [ {} ] Concurrent Tasks'.format( qsize )

    print ('\033[1;32;40m' + '{:35}--> {}{}'.format(end, t, '\n'))
    print '{}{}'.format( '-' * 48, '\n' )
    #print '{}{}{}{}'.format( end, '\n', '-' * 48, '\n' )
    return
项目:MyCluster    作者:yinminggang    | 项目源码 | 文件源码
def trainWord2Vector(sentence_count, vector_dimension, train_count):

    lines, model_out, vector_out = "sources/splited_words.txt", "result/word2vec.model", "result/pre_word2vec.vector"
    logging.info("??????")
    sentences = LineSentence(lines)
    # ??min_count=3??????3?? ????????????word2vec.vector?
    # workers????????????CPU??  ???3
    # sg?????????
    model = Word2Vec(sentences, sg=1, size=vector_dimension, window=8,
                     min_count=0, workers=multiprocessing.cpu_count())
    # ?????  ??????
    for i in range(train_count):
        model.train(sentences=sentences, total_examples=sentence_count, epochs=model.iter)

    # trim unneeded model memory = use(much) less RAM
    # model.init_sims(replace=True)
    model.save(model_out)
    model.wv.save_word2vec_format(vector_out)
项目:MyCluster    作者:yinminggang    | 项目源码 | 文件源码
def trainWord2Vector(sentence_count, vector_dimension, train_count):

    lines, model_out, vector_out = "com/com/test1/test1sources/splited_words.txt", \
                                   "com/com/test1/test1sources/word2vec.model", \
                                   "com/com/test1/test1sources/word2vec.vector"
    logging.info("??????")
    sentences = LineSentence(lines)
    # ??min_count=3??????3?? ????????????word2vec.vector?
    # workers????????????CPU??  ???3
    model = Word2Vec(sentences, sg=1, size=vector_dimension, window=8,
                     min_count=0, workers=multiprocessing.cpu_count())
    # ?????  ??????
    for i in range(train_count):
        model.train(sentences=sentences, total_examples=sentence_count, epochs=model.iter)

    # trim unneeded model memory = use(much) less RAM
    # model.init_sims(replace=True)
    model.save(model_out)
    model.wv.save_word2vec_format(vector_out)
项目:sparrow    作者:BillWang139967    | 项目源码 | 文件源码
def uptime(self):
        with open('/proc/uptime', 'r') as f:
            uptime, idletime = f.readline().split()
            up_seconds = int(float(uptime))
            idle_seconds = int(float(idletime))
            # in some machine like Linode VPS, idle time may bigger than up time
            if idle_seconds > up_seconds:
                cpu_count = multiprocessing.cpu_count()
                idle_seconds = idle_seconds/cpu_count
                # in some VPS, this value may still bigger than up time
                # may be the domain 0 machine has more cores
                # we calclate approximately for it
                if idle_seconds > up_seconds:
                    for n in range(2,10):
                        if idle_seconds/n < up_seconds:
                            idle_seconds = idle_seconds/n
                            break
            fmt = '{days} ? {hours} ?? {minutes} ? {seconds} ?'
            uptime_string = strfdelta(datetime.timedelta(seconds = up_seconds), fmt)
            idletime_string = strfdelta(datetime.timedelta(seconds = idle_seconds), fmt)
        return {
            'up': uptime_string,
            'idle': idletime_string,
            'idle_rate': div_percent(idle_seconds, up_seconds),
        }
项目:GAEDataExport    作者:nikhilsaraf    | 项目源码 | 文件源码
def main():
    input_dir, output_dir = getDirs()
    table_list = listFiles(input_dir)

    concurrency = cpu_count()
    print 'Using {0:d} Processes'.format(concurrency)
    pool = Pool(concurrency)

    # perform the passed in write action (function) for each csv row
    time_capture = TimeCapture(time.time())
    results = pool.map(
        multiprocess,
        izip(repeat(output_dir),
            [copy.deepcopy(time_capture) for i in range(len(table_list))],
            table_list,
            repeat(write)))
    time_capture.end(1)

    pool.close()
    pool.join()

    print 'Finished Successfully!'
    displayResults(results, time_capture.total_time)
项目:AshsSDK    作者:thehappydinoa    | 项目源码 | 文件源码
def __init__(self, max_workers=None):
        """Initializes a new ThreadPoolExecutor instance.

        Args:
            max_workers: The maximum number of threads that can be used to
                execute the given calls.
        """
        if max_workers is None:
            # Use this number because ThreadPoolExecutor is often
            # used to overlap I/O instead of CPU work.
            max_workers = (cpu_count() or 1) * 5
        if max_workers <= 0:
            raise ValueError("max_workers must be greater than 0")

        self._max_workers = max_workers
        self._work_queue = queue.Queue()
        self._threads = set()
        self._shutdown = False
        self._shutdown_lock = threading.Lock()
项目:python-GreyTheory    作者:Kalvar    | 项目源码 | 文件源码
def _execute_models(self, gmnn_queue=[], is_gm11=False): 
        if self._is_empty(gmnn_queue):
            return []
        pool        = mp.Pool()
        cpu_count   = self.cpu_count
        length      = len(gmnn_queue)
        block_count = long(math.ceil(length / float(cpu_count)))
        start_index = 0
        end_length  = cpu_count
        for block in xrange(0, block_count):
            for gm_model in gmnn_queue[start_index:end_length]:
                if is_gm11 == False:
                    pool.apply_async(gm_model.analyze())
                else:
                    pool.apply_async(gm_model.forecast())

            start_index += cpu_count
            end_length  += cpu_count
            if end_length > length:
                end_length = length

        self._close_pool(pool)
        return gmnn_queue
项目:MyPythonLib    作者:BillWang139967    | 项目源码 | 文件源码
def uptime(self):
        with open('/proc/uptime', 'r') as f:
            uptime, idletime = f.readline().split()
            up_seconds = int(float(uptime))
            idle_seconds = int(float(idletime))
            # in some machine like Linode VPS, idle time may bigger than up time
            if idle_seconds > up_seconds:
                cpu_count = multiprocessing.cpu_count()
                idle_seconds = idle_seconds/cpu_count
                # in some VPS, this value may still bigger than up time
                # may be the domain 0 machine has more cores
                # we calclate approximately for it
                if idle_seconds > up_seconds:
                    for n in range(2,10):
                        if idle_seconds/n < up_seconds:
                            idle_seconds = idle_seconds/n
                            break
            fmt = '{days} ? {hours} ?? {minutes} ? {seconds} ?'
            uptime_string = strfdelta(datetime.timedelta(seconds = up_seconds), fmt)
            idletime_string = strfdelta(datetime.timedelta(seconds = idle_seconds), fmt)
        return {
            'up': uptime_string,
            'idle': idletime_string,
            'idle_rate': div_percent(idle_seconds, up_seconds),
        }
项目:NebulaSolarDash    作者:toddlerya    | 项目源码 | 文件源码
def get_cpus(self):
        """
        ??CPU?????????
        """
        try:
            pipe = os.popen("cat /proc/cpuinfo |" + "grep 'model name'")
            data = pipe.read().strip().split(':')[-1]
            pipe.close()

            if not data:
                pipe = os.popen("cat /proc/cpuinfo |" + "grep 'Processor'")
                data = pipe.read().strip().split(':')[-1]
                pipe.close()

            cpus = multiprocessing.cpu_count()

            data = "{CPUS} x {CPU_TYPE}".format(CPUS=cpus, CPU_TYPE=data)

        except Exception as err:
            print err
            data = str(err)

        return data
项目:universe    作者:openai    | 项目源码 | 文件源码
def configure(self, n=1, pool_size=None, episode_limit=None):
        self.n = n
        self.envs = [self.spec.make() for _ in range(self.n)]

        if pool_size is None:
            pool_size = min(len(self.envs), multiprocessing.cpu_count() - 1)
            pool_size = max(1, pool_size)

        self.worker_n = []
        m = int((self.n + pool_size - 1) / pool_size)
        for i in range(0, self.n, m):
            envs = self.envs[i:i+m]
            self.worker_n.append(Worker(envs, i))

        if episode_limit is not None:
            self._episode_id.episode_limit = episode_limit
项目:songci    作者:wings27    | 项目源码 | 文件源码
def save_emblems_field(
            self, emblem_with_field_list, field_name, index=True):
        total_len = len(emblem_with_field_list)
        self.logger.info('Saving field [%s], total=%d', field_name, total_len)

        workers = (multiprocessing.cpu_count() or 1)
        emblem_freq_chunks = MapReduceDriver.chunks(
            emblem_with_field_list, int(total_len / workers))

        if index:
            self.data_source.create_index(
                self.COLLECTION_EMBLEM, 'name', unique=True)
            self.data_source.create_index(
                self.COLLECTION_EMBLEM, field_name)
            field = emblem_with_field_list[0][1]
            if isinstance(field, dict):
                for key in field.keys():
                    self.data_source.create_index(
                        self.COLLECTION_EMBLEM, field_name + '.' + key)

        with multiprocessing.Pool(processes=workers) as pool:
            pool.starmap(
                self._save_emblems_field,
                zip(emblem_freq_chunks, repeat(field_name)))
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def command_line(self):

        cmd = csb.apps.ArgHandler(self.program, __doc__)
        cpu = multiprocessing.cpu_count()

        cmd.add_scalar_option('database', 'd', str, 'PDBS25 database directory (containing PDBS25cs.scs)', required=True)
        cmd.add_scalar_option('shifts', 's', str, 'assigned chemical shifts table (NMR STAR file fragment)', required=True)    

        cmd.add_scalar_option('window', 'w', int, 'sliding window size', default=8)
        cmd.add_scalar_option('top', 't', int, 'maximum number per starting position', default=25)                
        cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu)

        cmd.add_scalar_option('verbosity', 'v', int, 'verbosity level', default=1)        
        cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')        
        cmd.add_boolean_option('filtered-map', 'f', 'make an additional filtered fragment map of centroids', default=False)

        cmd.add_positional_argument('QUERY', str, 'query sequence  (FASTA file)')

        return cmd
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def command_line(self):

        cpu = multiprocessing.cpu_count()
        cmd = csb.apps.ArgHandler(self.program, __doc__)

        cmd.add_scalar_option('pdb', 'p', str, 'the PDB database (a directory containing all PDB files)', required=True)
        cmd.add_scalar_option('native', 'n', str, 'native structure of the target (PDB file)', required=True)
        cmd.add_scalar_option('chain', 'c', str, 'chain identifier (if not specified, the first chain)', default=None)

        cmd.add_scalar_option('top', 't', int, 'read top N fragments per position', default=25)
        cmd.add_scalar_option('cpu', 'C', int, 'maximum degree of parallelism', default=cpu)
        cmd.add_scalar_option('rmsd', 'r', float, 'RMSD cutoff for precision and coverage', default=1.5)         
        cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')

        cmd.add_boolean_option('save-structures', 's', 'create a PDB file for each fragment, superimposed over the native', default=False)

        cmd.add_positional_argument('library', str, 'Fragment library file in Rosetta NNmake format')

        return cmd
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def runmany(self, contexts, workers=mp.cpu_count(), cpu=1):

        if workers > len(contexts):
            workers = len(contexts)

        results = []
        taskargs = [(self.program, self.db, cpu, c) for c in contexts]

        pool = mp.Pool(workers)

        try:
            for c in pool.map(_task, taskargs):
                results.append(c)
        except KeyboardInterrupt:
            pass
        finally:
            pool.terminate()

        return results
项目:katana-sdk-python2    作者:kusanagi    | 项目源码 | 文件源码
def __init__(self, callbacks, args, **kwargs):
        """Constructor.

        :param callbacks: Callbacks for registered action handlers.
        :type callbacks: dict
        :param args: CLI arguments.
        :type args: dict
        :param error_callback: Callback to use when errors occur.
        :type error_callback: function
        :param source_file: Full path to component source file.
        :type source_file: str

        """

        self.__args = args
        self.__socket = None
        self.__schema_registry = get_schema_registry()
        self._pool = ThreadPool(cpu_count() * 5)

        self.callbacks = callbacks
        self.error_callback = kwargs.get('error_callback')
        self.source_file = kwargs.get('source_file')

        self.context = None
        self.poller = None
项目:avocado-misc-tests    作者:avocado-framework-tests    | 项目源码 | 文件源码
def test(self):
        '''
        Test Execution with necessary args
        '''
        dir = self.params.get('dir', default='.')
        nprocs = self.params.get('nprocs', default=None)
        seconds = self.params.get('seconds', default=60)
        args = self.params.get('args', default='')
        if not nprocs:
            nprocs = multiprocessing.cpu_count()
        loadfile = os.path.join(self.sourcedir, 'client.txt')
        cmd = '%s/dbench %s %s -D %s -c %s -t %d' % (self.sourcedir, nprocs,
                                                     args, dir, loadfile,
                                                     seconds)
        process.run(cmd)

        self.results = process.system_output(cmd)
        pattern = re.compile(r"Throughput (.*?) MB/sec (.*?) procs")
        (throughput, procs) = pattern.findall(self.results)[0]
        self.whiteboard = json.dumps({'throughput': throughput,
                                      'procs': procs})
项目:flickr_downloader    作者:Denisolt    | 项目源码 | 文件源码
def __init__(self, job_queue, initializer=None, auth_generator=None,
                 num_processes=None, session=requests.Session):
        if num_processes is None:
            num_processes = multiprocessing.cpu_count() or 1

        if num_processes < 1:
            raise ValueError("Number of processes should at least be 1.")

        self._job_queue = job_queue
        self._response_queue = queue.Queue()
        self._exc_queue = queue.Queue()
        self._processes = num_processes
        self._initializer = initializer or _identity
        self._auth = auth_generator or _identity
        self._session = session
        self._pool = [
            thread.SessionThread(self._new_session(), self._job_queue,
                                 self._response_queue, self._exc_queue)
            for _ in range(self._processes)
        ]
项目:Starfish    作者:BillWang139967    | 项目源码 | 文件源码
def cpuinfo(self):
        models = []
        bitss = []
        cpuids = []
        with open('/proc/cpuinfo', 'r') as f:
            for line in f:
                if 'model name' in line or 'physical id' in line or 'flags' in line:
                    item, value = line.strip().split(':')
                    item = item.strip()
                    value = value.strip()
                    if item == 'model name':
                        models.append(re.sub('\s+', ' ', value))
                    elif item == 'physical id':
                        cpuids.append(value)
                    elif item == 'flags':
                        if ' lm ' in value:
                            bitss.append('64bit')
                        else:
                            bitss.append('32bit')
        cores = [{'model': x, 'bits': y} for x, y in zip(models, bitss)]
        cpu_count = len(set(cpuids))
        if cpu_count == 0: cpu_count = 1
        return {
            'cores': cores,
            'cpu_count': cpu_count,
            'core_count': len(cores),
        }
项目:rca-evaluation    作者:sieve-microservices    | 项目源码 | 文件源码
def main():
    processes = []
    for i in range(int(cpu_count())):
        p = Process(target=worker)
        processes.append(p)
        p.start()

    for p in processes:
        p.join()
项目:core-framework    作者:RedhawkSDR    | 项目源码 | 文件源码
def get_nonnuma_affinity_ctx( affinity_ctx ):
    # test should run but affinity will be ignored
    import multiprocessing
    maxcpus=multiprocessing.cpu_count()
    maxnodes=1
    all_cpus='0-'+str(maxcpus-1)
    all_cpus_sans0='0-'+str(maxcpus-1)
    if maxcpus == 2:
        all_cpus_sans0='0-1'
    elif maxcpus == 1 :
        all_cpus='0'
        all_cpus_sans0=''

    numa_layout=[ all_cpus ]
    affinity_match={ "all" :  all_cpus,
             "sock0":  all_cpus,
             "sock1": all_cpus,
             "sock0sans0":  all_cpus_sans0,
             "sock1sans0":  all_cpus_sans0,
             "5" : all_cpus,
             "8-10" : all_cpus }

    affinity_ctx['maxcpus']=maxcpus
    affinity_ctx['maxnodes']=maxnodes
    affinity_ctx['all_cpus']=all_cpus
    affinity_ctx['all_cpus_sans0']=all_cpus_sans0
    affinity_ctx['numa_layout']=numa_layout
    affinity_ctx['affinity_match']=affinity_match
    affinity_ctx['affinity_dev_match']=copy.copy(affinity_match)
项目:ComicSpider    作者:QuantumLiu    | 项目源码 | 文件源码
def download_all_chapters_p(self):
        '''
        ????????
        '''
        mp=Pool(min(8,max(cpu_count(),4)))
        for key in self.chapters.keys():
            mp.apply_async(self.download_chapter,(key,False))
        mp.close()
        mp.join()
项目:gap_statistic    作者:milesgranger    | 项目源码 | 文件源码
def __init__(self, n_jobs: int=-1, parallel_backend: str='joblib') -> None:
        """
        Construct OptimalK to use n_jobs (multiprocessing using joblib, multiprocessing, or single core.
        :param n_jobs - int: Number of CPU cores to use. Use all cores if n_jobs == -1
        """
        self.parallel_backend = parallel_backend if parallel_backend in ['joblib', 'multiprocessing'] else None
        self.n_jobs = n_jobs if 1 <= n_jobs <= cpu_count() else cpu_count()  # type: int
        self.n_jobs = 1 if parallel_backend is None else self.n_jobs