Python theano 模块,gpuarray() 实例源码

我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用theano.gpuarray()

项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def test_multi_process_simultaneous(n_gpu=2, worker_func_maker=unpickle_func, bar_loop=False):
    barrier = mp.Barrier(n_gpu)
    if PROFILE:
        target = sim_profiling_worker
    else:
        target = simultaneous_worker
    procs = [mp.Process(target=target,
                        args=(rank, worker_func_maker, barrier, bar_loop))
            for rank in range(1, n_gpu)]
    for p in procs:
        p.start()

    theano.gpuarray.use("cuda0")
    f_train, name = build_train_func()

    barrier.wait()
    # workers build or unpickle
    time.sleep(1)
    barrier.wait()
    # workers are ready.
    test_the_function(f_train, name=name, barrier=barrier, bar_loop=bar_loop)

    for p in procs:
        p.join()
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def init_gpus(rank, n_parallel=None):
    import theano
    import theano.gpuarray
    try:
        theano.gpuarray.use("cuda" + str(rank))
    except Exception as exc:
        if n_parallel is not None:
            raise exc("Master unable to use GPU.")
        else:
            sync.workers_OK.value = False
            raise exc("Worker rank {} unable to use GPU.".format(rank))
    finally:
        sync.barrier_out.wait()
    if n_parallel is not None:
        if sync.workers_OK.value:
            print("Synkhronos: {} GPUs initialized, master rank: {}".format(
                n_parallel, rank))
        else:
            raise RuntimeError("Workers did not initialize GPUs.")
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def test_one_process(gpu=0):
    theano.gpuarray.use("cuda" + str(gpu))

    f_train, train_name = build_train_func()
    pickle_func(f_train)
    f_unpkl, unpkl_name = unpickle_func()

    test_the_function(f_train, train_name)
    test_the_function(f_unpkl, unpkl_name)
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def test_multi_process_sequence(n_gpu=2, worker_func_maker=unpickle_func):
    barrier = mp.Barrier(n_gpu)
    if PROFILE:
        target = seq_profiling_worker
    else:
        target = sequence_worker
    procs = [mp.Process(target=target,
                        args=(rank, n_gpu, barrier, worker_func_maker))
        for rank in range(1, n_gpu)]
    for p in procs:
        p.start()

    theano.gpuarray.use("cuda0")
    f_train, name = build_train_func()
    pickle_func(f_train)

    barrier.wait()
    # workers make function (maybe unpickle).
    barrier.wait()
    for i in range(n_gpu):
        time.sleep(1)
        barrier.wait()
        if i == 0:
            test_the_function(f_train, name)

    for p in procs:
        p.join()
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def sequence_worker(rank, n_gpu, barrier, function_maker):
    theano.gpuarray.use("cuda" + str(rank))
    # maybe master makes the function
    barrier.wait()
    f_train, name = function_maker(rank=rank)  # maybe unpickle
    barrier.wait()
    for i in range(n_gpu):
        time.sleep(1)
        barrier.wait()
        if i == rank:
            test_the_function(f_train, name=name, rank=rank)
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def simultaneous_worker(rank, function_maker, barrier, bar_loop):
    theano.gpuarray.use("cuda" + str(rank))
    # maybe master makes the function
    barrier.wait()
    f_train, name = function_maker(rank)
    barrier.wait()
    test_the_function(f_train, name=name, rank=rank, barrier=barrier, bar_loop=bar_loop)
项目:Synkhronos    作者:astooke    | 项目源码 | 文件源码
def __init__(self, n_gpu, rank, master_rank):
        gpu_ctx = theano.gpuarray.get_context(None)
        clique_id = gpu_coll.GpuCommCliqueId(gpu_ctx)
        if rank == master_rank:
            sync.dict["gpu_comm_id"] = clique_id.comm_id
            sync.barrier.wait()
        else:
            sync.barrier.wait()
            clique_id.comm_id = sync.dict["gpu_comm_id"]
        self.comm = gpu_coll.GpuComm(clique_id, n_gpu, rank)
        self.n_gpu = n_gpu
        self.avg_fac = 1. / n_gpu
        self.master_rank = master_rank
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def init_device(device='gpu0'):

    if device.startswith('cuda'):

        import os
        if 'THEANO_FLAGS' in os.environ:
            raise ValueError('Use theanorc to set the theano config')

        os.environ['THEANO_FLAGS'] = 'device={0}'.format(device)
        import theano.gpuarray
        # This is a bit of black magic that may stop working in future
        # theano releases
        ctx = theano.gpuarray.type.get_context(None)
        drv = None

    elif device.startswith('gpu'):

        gpuid = int(device[-1])

        import pycuda.driver as drv
        drv.init()
        dev = drv.Device(gpuid)
        ctx = dev.make_context()
        import theano.sandbox.cuda
        theano.sandbox.cuda.use(device)
        import theano
    else:
        drv=None
        ctx=None
        import theano.sandbox.cuda
        theano.sandbox.cuda.use(device)
        import theano

    from theano import function, config, shared, sandbox, tensor

    vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
    iters = 1000

    rng = np.random.RandomState(22)
    arr = rng.rand(vlen)

    shared_x = theano.shared(np.asarray(arr, config.floatX))
    shared_xx = theano.shared(np.asarray(arr, config.floatX))

    x=tensor.fvector("x")
    # compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray)
    f = function([], tensor.exp(x), givens=[(x,shared_x)]) 


    if np.any([isinstance(x.op, tensor.Elemwise) and
                  ('Gpu' not in type(x.op).__name__)
                  for x in f.maker.fgraph.toposort()]):
        print('Used the cpu')
    else:
        print('Used the gpu')

    # if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu':
    #     raise TypeError('graph not compiled on GPU') 

    return drv,ctx, arr, shared_x, shared_xx
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def traverse(out, x, x_copy, d, visited=None):
    """
    Function used by scan to parse the tree and figure out which nodes
    it needs to replace.

    There are two options :
        1) x and x_copy or on host, then you would replace x with x_copy
        2) x is on gpu, x_copy on host, then you need to replace
        host_from_gpu(x) with x_copy
    This happens because initially shared variables are on GPU... which is
    fine for the main computational graph but confuses things a bit for the
    inner graph of scan.

    """
    # ``visited`` is a set of nodes that are already known and don't need to be
    # checked again, speeding up the traversal of multiply-connected graphs.
    # if a ``visited`` set is given, it will be updated in-place so the callee
    # knows which nodes we have seen.
    if visited is None:
        visited = set()
    if out in visited:
        return d
    visited.add(out)
    from theano.sandbox import cuda
    from theano.gpuarray.basic_ops import gpu_from_host, host_from_gpu
    from theano.gpuarray import pygpu_activated
    from theano.gpuarray.type import GpuArrayType
    if out == x:
        if isinstance(x.type, cuda.CudaNdarrayType):
            d[out] = cuda.gpu_from_host(x_copy)
        else:
            assert isinstance(x.type, GpuArrayType)
            d[out] = gpu_from_host(x.type.context_name)(x_copy)
        return d
    elif out.owner is None:
        return d
    elif (cuda.cuda_available and
          out.owner.op == cuda.host_from_gpu and
          out.owner.inputs == [x]):
        d[out] = tensor.as_tensor_variable(x_copy)
        return d
    elif (pygpu_activated and
          out.owner.op == host_from_gpu and
          out.owner.inputs == [x]):
        d[out] = tensor.as_tensor_variable(x_copy)
        return d
    else:
        for inp in out.owner.inputs:
            d = traverse(inp, x, x_copy, d, visited)
        return d


# Hashing a dictionary/list/tuple by xoring the hash of each element