Python numpy 模块,split() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.split()

项目:pybot    作者:spillai    | 项目源码 | 文件源码
def setup_ps3eye_dataset(filename, start_idx=0, max_length=None, every_k_frames=1, scale=1): 
    dataset = stereo_dataset(filename=filename, 
                             channel='CAMERA', start_idx=start_idx, max_length=max_length, 
                             every_k_frames=every_k_frames, scale=scale, split='horizontal')

    # Setup one-time calibration
    calib_params = setup_ps3eye(scale=scale)
    dataset.calib = calib_params
    dataset.scale = scale
    return dataset


# def bumblebee_stereo_calib_params_ming(scale=1.0): 
#     fx, fy = 809.53*scale, 809.53*scale
#     cx, cy = 321.819*scale, 244.555*scale
#     baseline = 0.119909
#     return get_calib_params(fx, fy, cx, cy, baseline=baseline)

# def bumblebee_stereo_calib_params(scale=1.0): 
#     fx, fy = 0.445057*640*scale, 0.59341*480*scale
#     cx, cy = 0.496427*640*scale, 0.519434*480*scale
#     baseline = 0.120018 
#     return get_calib_params(fx, fy, cx, cy, baseline=baseline)
项目:CausalGAN    作者:mkocaoglu    | 项目源码 | 文件源码
def save_figure_images(model_type, tensor, filename, size, padding=2, normalize=False, scale_each=False):

    print('[*] saving:',filename)

    #nrow=size[0]
    nrow=size[1]#Was this number per row and now number of rows?

    if model_type=='began':
        began_save_image(tensor,filename,nrow,padding,normalize,scale_each)
    elif model_type=='dcgan':
        #images = np.split(tensor,len(tensor))
        images=tensor
        dcgan_save_images(images,size,filename)


#Began originally
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after):
        if Thr < 0:
            dd_0 = np.where(ang_data<Thr)[0]
        elif Thr >=0:
            dd_0 = np.where(ang_data>=Thr)[0]
        dd_1 = np.diff(dd_0,n=1)
        dd_2 = np.where(dd_1 > 1)[0]+1
        dd_3 = np.split(dd_0,dd_2)
        spike_peak = []
        if Thr < 0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmin()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        elif Thr >=0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmax()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        return np.array(spike_peak)
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after):
        if Thr < 0:
            dd_0 = np.where(ang_data<Thr)[0]
        elif Thr >=0:
            dd_0 = np.where(ang_data>=Thr)[0]
        dd_1 = np.diff(dd_0,n=1)
        dd_2 = np.where(dd_1 > 1)[0]+1
        dd_3 = np.split(dd_0,dd_2)
        spike_peak = []
        if Thr < 0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmin()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        elif Thr >=0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmax()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        return np.array(spike_peak)
项目:SRGAN-tensorflow    作者:zoharli    | 项目源码 | 文件源码
def batch_ssim(dbatch):
    im1,im2=np.split(dbatch,2)
    imgsize=im1.shape[1]*im1.shape[2]
    avg1=im1.mean((1,2),keepdims=1)
    avg2=im2.mean((1,2),keepdims=1)
    std1=im1.std((1,2),ddof=1)
    std2=im2.std((1,2),ddof=1)
    cov=((im1-avg1)*(im2-avg2)).mean((1,2))*imgsize/(imgsize-1)
    avg1=np.squeeze(avg1)
    avg2=np.squeeze(avg2)
    k1=0.01
    k2=0.03
    c1=(k1*255)**2
    c2=(k2*255)**2
    c3=c2/2
    return np.mean((2*avg1*avg2+c1)*2*(cov+c3)/(avg1**2+avg2**2+c1)/(std1**2+std2**2+c2))
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def read_pts_file(self, pts_path):
        """Read a pts file that contains the coordinates of the landmarks.

        """
        with open(pts_path) as f:
            content = f.readlines()
        content = content[3:-1] # exclude the 4 cases and the last case.
        nbr = len(content)
        X = np.zeros((nbr,1))
        Y = np.zeros((nbr,1))
        for i in xrange(nbr):
            line = content[i].split(' ')
            X[i] = np.float(line[0])
            Y[i] = np.float(line[1].replace('\n', ''))

        # remove 1 to start counting from 0 (python)        
        X = X - 1
        Y = Y - 1

        return X,Y
项目:kor-char-rnn-tensorflow    作者:insikk    | 项目源码 | 文件源码
def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size *
                                                   self.seq_length))

        # When the data (tensor) is too small,
        # let's give them a better error message
        if self.num_batches == 0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor)
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
项目:Pytorch-Sketch-RNN    作者:alexis-jacq    | 项目源码 | 文件源码
def forward(self, inputs, batch_size, hidden_cell=None):
        if hidden_cell is None:
            # then must init with zeros
            if use_cuda:
                hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda())
                cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda())
            else:
                hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size))
                cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size))
            hidden_cell = (hidden, cell)
        _, (hidden,cell) = self.lstm(inputs.float(), hidden_cell)
        # hidden is (2, batch_size, hidden_size), we want (batch_size, 2*hidden_size):
        hidden_forward, hidden_backward = torch.split(hidden,1,0)
        hidden_cat = torch.cat([hidden_forward.squeeze(0), hidden_backward.squeeze(0)],1)
        # mu and sigma:
        mu = self.fc_mu(hidden_cat)
        sigma_hat = self.fc_sigma(hidden_cat)
        sigma = torch.exp(sigma_hat/2.)
        # N ~ N(0,1)
        z_size = mu.size()
        if use_cuda:
            N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)).cuda())
        else:
            N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)))
        z = mu + sigma*N
        # mu and sigma_hat are needed for LKL loss
        return z, mu, sigma_hat
项目:gtzan.keras    作者:Hguimaraes    | 项目源码 | 文件源码
def splitsongs_melspect(self, X, y, cnn_type = '1D'):
    temp_X = []
    temp_y = []

    for i, song in enumerate(X):
      song_slipted = np.split(song, self.augment_factor)
      for s in song_slipted:
        temp_X.append(s)
        temp_y.append(y[i])

    temp_X = np.array(temp_X)
    temp_y = np.array(temp_y)

    if not cnn_type == '1D':
      temp_X = temp_X[:, np.newaxis]

    return temp_X, temp_y
项目:scikit-kge    作者:mnick    | 项目源码 | 文件源码
def _optim(self, xys):
        idx = np.arange(len(xys))
        self.batch_size = np.ceil(len(xys) / self.nbatches)
        batch_idx = np.arange(self.batch_size, len(xys), self.batch_size)

        for self.epoch in range(1, self.max_epochs + 1):
            # shuffle training examples
            self._pre_epoch()
            shuffle(idx)

            # store epoch for callback
            self.epoch_start = timeit.default_timer()

            # process mini-batches
            for batch in np.split(idx, batch_idx):
                # select indices for current batch
                bxys = [xys[z] for z in batch]
                self._process_batch(bxys)

            # check callback function, if false return
            for f in self.post_epoch:
                if not f(self):
                    break
项目:self-augmented-net    作者:msraig    | 项目源码 | 文件源码
def RealUnlabelDataLoadProcess(pipe, datafile, params):
    path, file = os.path.split(datafile)
    batchSize = params['batchSize']
    dataset = RealDataLoaderSVBRDF(path, file)

    dataset.shuffle(params['randomSeed'])
    pipe.send(dataset.dataSize)
    counter = 0
    posInDataSet = 0
    epoch = 0

    while(True):
        imgbatch = dataset.GetBatch(posInDataSet, batchSize)
        for i in range(0, batchSize):
            imgbatch[i,:,:,:] = autoExposure(imgbatch[i,:,:,:])
        pipe.send(imgbatch)
        counter = counter + batchSize
        posInDataSet = (posInDataSet + batchSize) % dataset.dataSize
        newepoch = counter / dataset.dataSize
        if(newepoch != epoch):
            dataset.shuffle()
        epoch = newepoch
项目:ANN-PONR-Python3    作者:anon-42    | 项目源码 | 文件源码
def chooseErrorData(self, game, lesson=None):
        ''' 
        Choose saved error function data by lesson and game name in 
        history database.
        '''
        self.history.setGame(game)
        self.load()
        if lesson is not None:
            self.error_data_training = np.split(self.data[0,:], 
                np.argwhere(self.data[0,:] == -1))[lesson][1:]
            self.error_data_test = np.split(self.data[1,:], 
                np.argwhere(self.data[1,:] == -1))[lesson][1:]
        else:
            self.error_data_training = np.delete(self.data[0,:], 
                np.argwhere(self.data[0,:]==-1))
            self.error_data_test = np.delete(self.data[1,:], 
                np.argwhere(self.data[1,:]==-1))

# ------------------- for test and show reasons only ----------------------
项目:Tree-LSTM-LM    作者:vgene    | 项目源码 | 文件源码
def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size *
                                                   self.seq_length))

        # When the data (tensor) is too small,
        # let's give them a better error message
        if self.num_batches == 0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor) # maybe useless?
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
项目:jitcdde    作者:neurophysik    | 项目源码 | 文件源码
def orthonormalise(self, n_lyap, delay):
        """
        Orthonormalise separation functions (with Gram-Schmidt) and return their norms after orthogonalisation (but before normalisation).
        """

        vectors = np.split(np.arange(self.n, dtype=int), n_lyap+1)[1:]

        norms = []
        for i,vector in enumerate(vectors):
            for j in range(i):
                sp = self.scalar_product(delay, vector, vectors[j])
                self.subtract_from_past(vector, vectors[j], sp)
            norm = self.norm(delay, vector)
            if norm > NORM_THRESHOLD:
                self.scale_past(vector, 1./norm)
            norms.append(norm)

        return np.array(norms)
项目:brainpipe    作者:EtienneCmb    | 项目源码 | 文件源码
def _fit(x, y, train, test, self, n_jobs):
    """Sub fit function
    """
    nsuj, nfeat = x.shape
    iteract = product(range(nfeat), zip(train, test))
    ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)(
            np.concatenate(tuple(x[i].iloc[k[0]])),
            np.concatenate(tuple(x[i].iloc[k[1]])),
            np.concatenate(tuple(y[0].iloc[k[0]])),
            np.concatenate(tuple(y[0].iloc[k[1]])),
            self) for i, k in iteract)
    # Re-arrange ypred and ytrue:
    ypred, ytrue = zip(*ya)
    ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)]
    ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)]
    da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)])
    return da, ytrue, ypred
项目:densecap-tensorflow    作者:rampage644    | 项目源码 | 文件源码
def generate_batches(positive_batch, negative_batch, batch_size):
    positive_boxes, positive_scores, positive_labels = positive_batch
    negative_boxes, negative_scores, negative_labels = negative_batch

    half_batch = batch_size // 2

    pos_batch = np.concatenate([positive_boxes, positive_scores, positive_labels], axis=1)
    neg_batch = np.concatenate([negative_boxes, negative_scores, negative_labels], axis=1)

    np.random.shuffle(pos_batch)
    np.random.shuffle(neg_batch)

    pos_batch = pos_batch[:half_batch]
    pad_size = half_batch - len(pos_batch)
    pos_batch = np.concatenate([pos_batch, neg_batch[:pad_size]])
    neg_batch = neg_batch[pad_size:pad_size+half_batch]

    return (
        np.split(pos_batch, [4, 6], axis=1),
        np.split(neg_batch, [4, 6], axis=1)
    )
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def get_sample(self, N=600, scale=False):
        all_data = self.pre_process(self.file_name)
        #print('data_type: ' + str(all_data.dtypes))
        all_data = all_data.values
        xs = all_data[:, 2:]
        y = all_data[:, 1]
        if scale:
            xs = preprocessing.scale(xs)
        if N != -1:
            perm = np.random.permutation(xs.shape[0])
            xs = xs[perm]
            y = y[perm]
            xs_train, xs_test = np.split(xs, [N])
            y_train, y_test = np.split(y, [N])
            return xs_train, xs_test, y_train, y_test
        else:
            return xs, y
项目:bolero    作者:rock-learning    | 项目源码 | 文件源码
def set_params(self, params):
        """Utility function: set currently optimizable parameters."""
        weights, goals, goal_vels = np.split(params, (self.n_weights,
            self.n_weights + (self.n_dmps - 1) * self.n_task_dims))
        G = np.split(goals, [i * self.n_task_dims
                             for i in range(1, self.n_dmps - 1)])
        self.weights = [w.reshape(self.n_weights_per_dmp[i], self.n_task_dims)
                        for i, w in enumerate(np.split(
                            weights, self.split_weights * self.n_task_dims)[
                                :self.n_dmps])]

        for i in range(self.n_dmps - 1):
            self.subgoals[i + 1] = G[i]
        if self.learn_goal_velocities:
            self.subgoal_velocities = np.split(
                goal_vels, [i * self.n_task_dims
                            for i in xrange(1, self.n_dmps)])
项目:dl4nlp    作者:yohokuno    | 项目源码 | 文件源码
def flatten_cost_gradient(cost_gradient_hetero, shapes):
    """
    Allow cost function to have heterogeneous parameters (which is not allowed in numpy array)
    :param cost_gradient_hetero: cost function that receives heterogeneous parameters
    :param shapes: list of shapes of parameter
    :return: cost function that receives concatenated parameters and returns concatenated gradients
    """
    def cost_gradient_wrapper(concatenated_parameters, input, output):
        all_parameters = []

        for shape in shapes:
            split_index = np.prod(shape)
            single_parameter, concatenated_parameters = np.split(concatenated_parameters, [split_index])
            single_parameter = single_parameter.reshape(shape)
            all_parameters.append(single_parameter)

        cost, gradients = cost_gradient_hetero(all_parameters, input, output)
        flatten_gradients = [gradient.flatten() for gradient in gradients]
        concatenated_gradients = np.concatenate(flatten_gradients)
        return cost, concatenated_gradients

    return cost_gradient_wrapper
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def ests_ll_quad(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses Gaussian quadrature, and thus returns an *approximate*
        integral.
        """
        mu0, gamma0, err0 = np.split(params, 3)
        x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1))  # (QCOUNTXnhospXnmeas)
        loc = mu0 + np.outer(QC1, gamma0)
        loc = np.tile(loc, (self.n, 1, 1))
        loc = np.transpose(loc, (1, 0, 2))
        scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1))
        zs = lpdf_3d(x=x, loc=loc, scale=scale)

        w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1))
        wted = np.nansum(w2 * zs, axis=2).T  # (nhosp X QCOUNT)
        qh = np.tile(QC1, (self.n, 1))  # (nhosp X QCOUNT)
        combined = wted + norm.logpdf(qh)  # (nhosp X QCOUNT)

        return logsumexp(np.nan_to_num(combined), b=QC2, axis=1)  # (nhosp)
项目:hydrus    作者:mark-r-g    | 项目源码 | 文件源码
def ests_ll_exact(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses an exact integral and returns exact ll values, i.e.
        it does not use quadrature to approximate the integral.
        """
        mu, gamma, err = np.split(params, 3)
        d = self.num2 - mu
        q = self.w2 / err**2
        r = d * q

        f = self.w2 @ (2 * np.log(abs(err)) + LOG2PI)
        a = q @ gamma**2
        b = r @ gamma
        c = nsum_row(d * r)

        return .5 * (b * b / (a+1) - c - f - np.log1p(a))
项目:tacotron    作者:jinfagang    | 项目源码 | 文件源码
def restore_shape(arry, step, r):
    '''Reduces and adjust the shape and content of `arry` according to r.

    Args:
      arry: A 2d array with shape of [T, C]
      step: An int. Overlapping span.
      r: Reduction factor

    Returns:
      A 2d array with shape of [-1, C*r]
    '''
    T, C = arry.shape
    sliced = np.split(arry, list(range(step, T, step)), axis=0)

    started = False
    for s in sliced:
        if not started:
            restored = np.vstack(np.split(s, r, axis=1))
            started = True
        else:
            restored = np.vstack((restored, np.vstack(np.split(s, r, axis=1))))

    # Trim zero paddings
    restored = restored[:np.count_nonzero(restored.sum(axis=1))]
    return restored
项目:varapp-backend-py    作者:varapp    | 项目源码 | 文件源码
def parallel_apply_bitwise(genotypes, variant_ids, conditions, active_idx, is_and):
        """Run c_apply_bitwise in parallel. Takes the same arguments."""
        N = len(genotypes)
        nprocs = mp.cpu_count()
        pool = mp.Pool(processes=nprocs)
        B = round(N/nprocs + 0.5)  # batch size
        # Split variant_ids in batches (genotype batches are equally-sized, but not
        #   variant ids, in case a subset was given)
        split_at = variant_ids.searchsorted([(k+1)*B+1 for k in range(nprocs-1)])
        variant_ids_batches = np.split(variant_ids, split_at)
        assert len(variant_ids_batches) == nprocs
        # Run one job for each batch
        passing = [pool.apply(c_apply_bitwise,
            args=(genotypes[k*B:(k+1)*B,:],
                   variant_ids_batches[k],
                   conditions, active_idx, is_and, B))
            for k in range(nprocs)]
        passing = np.concatenate(passing)
        pool.close()
        return passing

    #@timer
项目:factorix    作者:gbouchar    | 项目源码 | 文件源码
def create_minibatch_indices(n, minibatch_size, shuffling=True):
    """
    :param n: total number of indices from which to pick from
    :param minibatch_size: size of the minibatches (must be lower than n)
    :return: (list of random indices, number of random duplicate indices in the last minibatch to complete it)
    """
    if shuffling:
        all_indices = np.random.permutation(n)  # shuffle order randomly
    else:
        all_indices = np.arange(n)
    n_steps = (n - 1) // minibatch_size + 1  # how many batches fit per epoch
    n_rem = n_steps * minibatch_size - n  # remainder
    if n_rem > 0:
        inds_to_add = np.random.randint(0, n_rem, size=n_rem)
        all_indices = np.concatenate((all_indices, inds_to_add))
    return np.split(all_indices, n_steps), n_rem
项目:sciDT    作者:edvisees    | 项目源码 | 文件源码
def make_folds(train_X, train_Y, num_folds):
  num_points = train_X.shape[0]
  fol_len = num_points / num_folds
  rem = num_points % num_folds
  X_folds = numpy.split(train_X, num_folds) if rem == 0 else numpy.split(train_X[:-rem], num_folds)
  Y_folds = numpy.split(train_Y, num_folds) if rem == 0 else numpy.split(train_Y[:-rem], num_folds)
  cv_folds = []
  for i in range(num_folds):
    train_folds_X = []
    train_folds_Y = []
    for j in range(num_folds):
      if i != j:
        train_folds_X.append(X_folds[j])
        train_folds_Y.append(Y_folds[j])
    train_fold_X = numpy.concatenate(train_folds_X)
    train_fold_Y = numpy.concatenate(train_folds_Y)
    cv_folds.append(((train_fold_X, train_fold_Y), (X_folds[i], Y_folds[i])))
  return cv_folds
项目:rltools    作者:sisl    | 项目源码 | 文件源码
def __init__(self, arrays, lengths=None):
        if lengths is None:
            # Without provided lengths, `arrays` is interpreted as a list of arrays
            # and self.lengths is set to the list of lengths for those arrays
            self.arrays = arrays
            self.stacked = np.concatenate(arrays, axis=0)
            self.lengths = np.array([len(a) for a in arrays])
        else:
            # With provided lengths, `arrays` is interpreted as concatenated data
            # and self.lengths is set to the provided lengths.
            self.arrays = np.split(arrays, np.cumsum(lengths)[:-1])
            self.stacked = arrays
            self.lengths = np.asarray(lengths, dtype=int)
            assert all(len(a) == l for a, l in util.safezip(self.arrays, self.lengths))
            self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)])
            assert self.boundaries[-1] == len(self.stacked)
项目:the-magical-csv-merge-machine    作者:entrepreneur-interet-general    | 项目源码 | 文件源码
def __init__(self, t, lexicon, maxTokens = 0, scorer = tokenization_based_score, distinctCount = 0, stopWords = None):
        super(TokenizedMatcher, self).__init__(t)
        currentMax = maxTokens
        self.scorer = scorer
        self.phrasesMap = validated_lexical_map(lexicon)
        self.tokenIdx = dict()
        self.distinctCount = distinctCount
        self.stopWords = stop_words_as_normalized_list(stopWords)
        for np in self.phrasesMap.keys():
            tokens = list([t for t in np.split(' ') if t not in self.stopWords])
            if len(tokens) < 1: continue
            if maxTokens < 1 and len(tokens) > currentMax:
                currentMax = len(tokens)
                if currentMax > DTC:
                    logging.warning('Full tokenization of lexicon: encountered token of length {}, above DTC!'.format(currentMax))
            matchedRefPhrase = ' '.join(tokens[:currentMax])
            if matchedRefPhrase not in self.tokenIdx or len(self.tokenIdx[matchedRefPhrase]) < len(np):
                self.tokenIdx[matchedRefPhrase] = np
        self.maxTokens = currentMax
        logging.info('SET UP %d-token matcher (%s-defined length) for <%s> with lexicon of size %d, total variants %d',
            self.maxTokens, 'user' if maxTokens > 0 else 'data', self.t, len(self.phrasesMap), len(self.tokenIdx))
项目:the-magical-csv-merge-machine    作者:entrepreneur-interet-general    | 项目源码 | 文件源码
def __init__(self, variantsMapFile, targetType, keepContext, domainType = None, scorer = tokenization_based_score):
        super(VariantExpander, self).__init__(targetType)
        self.domainType = domainType
        self.keepContext = keepContext # if true, then the main variant will be surrounded by original context in the normalized value
        self.variantsMap = file_to_variant_map(variantsMapFile) # map from original alternative variant to original main variant
        self.scorer = scorer
        self.tokenIdx = defaultdict(set) # map from alternative variant as joined-normalized-token-list to original alternative variant
        self.minTokens = 3
        self.maxTokens = DTC
        # map of alternative variant`s (including main or not!), from normalized string to list of original strings:
        phrasesMap = validated_lexical_map(self.variantsMap.keys(), tokenize = True)
        for (phrase, altVariants) in phrasesMap.items():
            tokens = phrase.split()
            l = len(tokens)
            if l < 1 or l > DTC: continue
            self.minTokens = min(self.minTokens, l)
            self.maxTokens = max(self.maxTokens, l)
            matchedVariantPhrase = ' '.join(tokens[:self.maxTokens])
            for altVariant in altVariants:
                self.tokenIdx[matchedVariantPhrase].add(altVariant)
                if altVariant not in self.variantsMap:
                    raise RuntimeError('Alternative variant {} not found in variants map'.format(altVariant))
项目:imgProcessor    作者:radjkarl    | 项目源码 | 文件源码
def _capture(f, t, t0, factor):
    '''
    capture signal and return its standard deviation
    #TODO: more detail
    '''
    n_per_sec = len(t) / t[-1]

    # len of one split:
    n = int(t0 * factor * n_per_sec)
    s = len(f) // n
    m = s * n
    f = f[:m]
    ff = np.split(f, s)
    m = np.mean(ff, axis=1)

    return np.std(m)
项目:dong_iccv_2017    作者:woozzu    | 项目源码 | 文件源码
def preprocess(img, desc, len_desc, txt_encoder):
    img = Variable(img.cuda() if not args.no_cuda else img)
    desc = Variable(desc.cuda() if not args.no_cuda else desc)

    len_desc = len_desc.numpy()
    sorted_indices = np.argsort(len_desc)[::-1]
    original_indices = np.argsort(sorted_indices)
    packed_desc = nn.utils.rnn.pack_padded_sequence(
        desc[sorted_indices, ...].transpose(0, 1),
        len_desc[sorted_indices]
    )
    _, txt_feat = txt_encoder(packed_desc)
    txt_feat = txt_feat.squeeze()
    txt_feat = txt_feat[original_indices, ...]

    txt_feat_np = txt_feat.data.cpu().numpy() if not args.no_cuda else txt_feat.data.numpy()
    txt_feat_mismatch = torch.Tensor(np.roll(txt_feat_np, 1, axis=0))
    txt_feat_mismatch = Variable(txt_feat_mismatch.cuda() if not args.no_cuda else txt_feat_mismatch)
    txt_feat_np_split = np.split(txt_feat_np, [txt_feat_np.shape[0] // 2])
    txt_feat_relevant = torch.Tensor(np.concatenate([
        np.roll(txt_feat_np_split[0], -1, axis=0),
        txt_feat_np_split[1]
    ]))
    txt_feat_relevant = Variable(txt_feat_relevant.cuda() if not args.no_cuda else txt_feat_relevant)
    return img, txt_feat, txt_feat_mismatch, txt_feat_relevant
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def dump_source_translation(model, source_buckets, vocab_inv_source, vocab_inv_target, beam_width=8, normalization_alpha=0):
    for source_bucket in source_buckets:
        if beam_width == 1: # greedy
            batchsize = 24
            if len(source_bucket) > batchsize:
                num_sections = len(source_bucket) // batchsize - 1
                if len(source_bucket) % batchsize > 0:
                    num_sections += 1
                indices = [(i + 1) * batchsize for i in range(num_sections)]
                source_sections = np.split(source_bucket, indices, axis=0)
            else:
                source_sections = [source_bucket]

            for source_batch in source_sections:
                translation_batch = translate_greedy(model, source_batch, source_batch.shape[1] * 2, len(vocab_inv_target), beam_width)
                for index in range(len(translation_batch)):
                    source = source_batch[index]
                    translation = translation_batch[index]
                    dump_translation(vocab_inv_source, vocab_inv_target, source, translation)
        else:   # beam search
            for index in range(len(source_bucket)):
                source = source_bucket[index]
                translations = translate_beam_search(model, source, source.size * 2, len(vocab_inv_target), beam_width, normalization_alpha, return_all_candidates=True)
                dump_all_translation(vocab_inv_source, vocab_inv_target, source, translations)
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def compute_accuracy(model, buckets, batchsize=100):
    result = []
    for bucket_index, dataset in enumerate(buckets):
        acc = []
        # split into minibatch
        if len(dataset) > batchsize:
            num_sections = len(dataset) // batchsize - 1
            if len(dataset) % batchsize > 0:
                num_sections += 1
            indices = [(i + 1) * batchsize for i in range(num_sections)]
            sections = np.split(dataset, indices, axis=0)
        else:
            sections = [dataset]
        # compute accuracy
        for batch_index, batch in enumerate(sections):
            printr("computing accuracy ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections)))
            acc.append(compute_accuracy_batch(model, batch))

        result.append(sum(acc) / len(acc))
        printr("")

    return result
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def compute_perplexity(model, buckets, batchsize=100):
    result = []
    for bucket_index, dataset in enumerate(buckets):
        ppl = []
        # split into minibatch
        if len(dataset) > batchsize:
            num_sections = len(dataset) // batchsize - 1
            if len(dataset) % batchsize > 0:
                num_sections += 1
            indices = [(i + 1) * batchsize for i in range(num_sections)]
            sections = np.split(dataset, indices, axis=0)
        else:
            sections = [dataset]
        # compute accuracy
        for batch_index, batch in enumerate(sections):
            sys.stdout.write("\rcomputing perplexity ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections)))
            sys.stdout.flush()
            ppl.append(compute_perplexity_batch(model, batch))

        result.append(sum(ppl) / len(ppl))

        sys.stdout.write("\r" + stdout.CLEAR)
        sys.stdout.flush()
    return result
项目:visual_mpc    作者:febert    | 项目源码 | 文件源码
def __init__(self):

        dict_ = cPickle.load(open(file_path + '/dict_.pkl', "rb"))
        gen_images = dict_['gen_images']

        self.num_ex = 4
        self.row_list = []

        if 'ground_truth' in dict_:
            ground_truth = dict_['ground_truth']
            if not isinstance(ground_truth, list):
                ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
                ground_truth = [np.squeeze(g) for g in ground_truth]
            ground_truth = ground_truth[1:]

            self.row_list.append((ground_truth, 'Ground Truth'))

        self.row_list.append((gen_images, 'Gen Images'))

        self.build_figure()
项目:visual_mpc    作者:febert    | 项目源码 | 文件源码
def save_distrib_visual(self, full_images, use_genimg = True):
        #assumes full_images is already rescaled to [0,1]
        orig_images = np.split(full_images, full_images.shape[0], axis = 0)
        orig_images = [im.reshape(1,64,64,3) for im in orig_images]

        # the first image of corr_gen_images is the first image of the original images!
        file_path =self.policyparams['current_dir'] + '/videos_distrib'
        if use_genimg:
            cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = make_color_scheme(self.rec_input_distrib)
            distrib = add_crosshairs(distrib, self.desig_pix)
            frame_list = assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1)
        else:
            cPickle.dump([orig_images, self.rec_input_distrib],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = make_color_scheme(self.rec_input_distrib)
            distrib = add_crosshairs(distrib, self.desig_pix)
            frame_list = assemble_gif([orig_images, distrib], num_exp=1)

        npy_to_gif(frame_list, self.policyparams['rec_distrib'])
项目:nupic-example-code    作者:htm-community    | 项目源码 | 文件源码
def fft(self, audio, highpass, lowpass):
    """
    Fast fourier transform conditioning

    Output:
    'output' contains the strength of each frequency in the audio signal
    frequencies are marked by its position in 'output':
    frequency = index * rate / buffesize
    output.size = buffersize/2 
    Method:
    Use numpy's FFT (numpy.fft.fft)
    Find the magnitude of the complex numbers returned (abs value)
    Split the FFT array in half, because we have mirror frequencies
     (they're the complex conjugates)
    Use just the first half to apply the bandpass filter

    Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result
    """
    left,right = numpy.split(numpy.abs(numpy.fft.fft(audio)),2)
    output = left[highpass:lowpass]
    return output
项目:mean-teacher    作者:CuriousAI    | 项目源码 | 文件源码
def test_batches_from_two_sets():
    data1 = np.array(['a', 'b'])
    data2 = np.array(['c', 'd', 'e'])

    batch_generator = combine_batches(
        eternal_batches(data1, batch_size=1),
        eternal_batches(data2, batch_size=2)
    )

    first_six_batches = list(islice(batch_generator, 6))
    assert [len(batch) for batch in first_six_batches] == [3, 3, 3, 3, 3, 3]

    batch_portions1 = [batch[:1] for batch in first_six_batches]
    batch_portions2 = [batch[1:] for batch in first_six_batches]

    returned1 = np.concatenate(batch_portions1)
    returned2 = np.concatenate(batch_portions2)

    epochs1 = np.split(returned1, 3)
    epochs2 = np.split(returned2, 4)

    assert all(sorted(items) == ['a', 'b'] for items in epochs1)
    assert all(sorted(items) == ['c', 'd', 'e'] for items in epochs2)
项目:mean-teacher    作者:CuriousAI    | 项目源码 | 文件源码
def test_stratified_batches():
    data = np.array([('a', -1), ('b', 0), ('c', 1), ('d', -1), ('e', -1)],
                    dtype=[('x', np.str_, 8), ('y', np.int32)])

    assert list(data['x']) == ['a', 'b', 'c', 'd', 'e']
    assert list(data['y']) == [-1, 0, 1, -1, -1]

    batch_generator = training_batches(data, batch_size=3, n_labeled_per_batch=1)

    first_ten_batches = list(islice(batch_generator, 10))

    labeled_batch_portions = [batch[:1] for batch in first_ten_batches]
    unlabeled_batch_portions = [batch[1:] for batch in first_ten_batches]

    labeled_epochs = np.split(np.concatenate(labeled_batch_portions), 5)
    unlabeled_epochs = np.split(np.concatenate(unlabeled_batch_portions), 4)

    assert ([sorted(items['x'].tolist()) for items in labeled_epochs] ==
            [['b', 'c']] * 5)
    assert ([sorted(items['y'].tolist()) for items in labeled_epochs] ==
            [[0, 1]] * 5)
    assert ([sorted(items['x'].tolist()) for items in unlabeled_epochs] ==
            [['a', 'b', 'c', 'd', 'e']] * 4)
    assert ([sorted(items['y'].tolist()) for items in unlabeled_epochs] ==
            [[-1, -1, -1, -1, -1]] * 4)
项目:chinese-char-rnn    作者:indiejoseph    | 项目源码 | 文件源码
def create_batches(self):
    self.num_batches = int(self.train.size / (self.batch_size * self.seq_length))
    self.num_valid_batches = int(self.valid.size / (self.batch_size * self.seq_length))

    # When the data (tensor) is too small, let's give them a better error message
    if self.num_batches == 0:
      assert False, "Not enough data. Make seq_length and batch_size small."

    self.train = self.train[:self.num_batches * self.batch_size * self.seq_length]
    self.valid = self.valid[:self.num_valid_batches * self.batch_size * self.seq_length]
    xdata = self.train
    ydata = np.copy(self.train)
    ydata[:-1] = xdata[1:]
    ydata[-1] = xdata[0]
    x_valid = self.valid
    y_valid = np.copy(self.valid)
    y_valid[:-1] = x_valid[1:]
    y_valid[-1] = x_valid[0]
    self.x_valid = np.split(x_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1)
    self.y_valid = np.split(y_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1)
    self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1)
    self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)
项目:unet-color    作者:4g    | 项目源码 | 文件源码
def arrange_images(Y):
    concat_image = None
    Y = (Y + 1)/2
    for yi in np.split(Y, 10):
        image = None
        for y in yi:
            img = cv2.merge((y[0, :, :], y[1, :, :], y[2, :, :]))
            if image is None:
                image = img
            else:
                image = np.concatenate((image, img))
        if concat_image is None:
            concat_image = image
        else:
            concat_image = np.concatenate((concat_image, image), axis=1)
    return concat_image
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def make_video(file_path, conf):
    print 'reading files from:', file_path
    ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb"))
    gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb"))
    distrib = cPickle.load(open(file_path + '/output_distrib_list.pkl', "rb"))

    ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
    ground_truth = np.squeeze(ground_truth)

    fused_gif = video_prediction.utils_vpred.create_gif.assemble_gif([ground_truth, gen_images, distrib])

    import re
    itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1)
    video_prediction.utils_vpred.create_gif.npy_to_gif(fused_gif, file_path +'/' + conf['experiment_name'] + '_' + str(itr_vis))

    return fused_gif
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def comp_video(file_path, conf, suffix = None):
    print 'reading files from:', file_path
    ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb"))
    gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb"))

    ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
    ground_truth = np.squeeze(ground_truth)

    fused_gif = assemble_gif([ground_truth, gen_images])

    itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1)

    if not suffix:
        name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis)
    else: name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis) + suffix
    npy_to_gif(fused_gif, name)

    return fused_gif
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def save_distrib_visual(self, full_images, use_genimg = True):
        #assumes full_images is already rescaled to [0,1]
        orig_images = np.split(full_images, full_images.shape[0], axis = 0)
        orig_images = [im.reshape(1,64,64,3) for im in orig_images]

        # the first image of corr_gen_images is the first image of the original images!
        file_path =self.policyparams['current_dir'] + '/videos_distrib'
        if use_genimg:
            cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = makegif.pix_distrib_video(self.rec_input_distrib)
            distrib = makegif.add_crosshairs(distrib, self.desig_pix)
            frame_list = makegif.assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1)
        else:
            cPickle.dump([orig_images, self.rec_input_distrib],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = makegif.pix_distrib_video(self.rec_input_distrib)
            distrib = makegif.add_crosshairs(distrib, self.desig_pix)
            frame_list = makegif.assemble_gif([orig_images, distrib], num_exp=1)

        makegif.npy_to_gif(frame_list, self.policyparams['rec_distrib'])
项目:MNIST-Neural-Net    作者:MLavrentyev    | 项目源码 | 文件源码
def genTrainData(self):
        data = []
        with open('../train-data.csv', 'r') as f:
            data = [list(map(int,rec)) for rec in csv.reader(f, delimiter=',')]

        data = np.array(data)
        labels = data[:,0]
        data = np.delete(data, 0, 1)

        data = np.split(data, [(int)(data.shape[0]*.75)])[0]
        labels = np.split(labels, [(int)(labels.shape[0]*.75)])[0]

        testData = np.split(data, [(int)(data.shape[0]*.75)])[1]
        testLabels = np.split(labels, [(int)(labels.shape[0]*.75)])[1]

        return data, labels, testData, testLabels
项目:Sisyphus    作者:davidbrandfonbrener    | 项目源码 | 文件源码
def run_trial(self, trial_input, t_connectivity = None, use_input = True):

        rnn_inputs = np.split(trial_input, trial_input.shape[0], axis=0)
        state = np.expand_dims(self.init_state[0, :], 0)
        rnn_outputs = []
        rnn_states = []
        for i, rnn_input in enumerate(rnn_inputs):
            if t_connectivity:
                output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input)
            else:
                output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input)

            rnn_outputs.append(output)
            rnn_states.append(state)

        return np.array(rnn_outputs), np.array(rnn_states)


    # apply the RNN to a whole batch of inputs
项目:Sisyphus    作者:davidbrandfonbrener    | 项目源码 | 文件源码
def run_trials(self, trial_input, batch_size, t_connectivity = None, use_input = True):

        rnn_inputs = np.split(trial_input, trial_input.shape[1], axis=1)
        state = np.expand_dims(self.init_state[0, :], 0)
        state = np.repeat(state, batch_size, 0)
        rnn_outputs = []
        rnn_states = []
        for rnn_input in rnn_inputs:
            if t_connectivity:
                output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input)
            else:
                output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input)

            rnn_outputs.append(output)
            rnn_states.append(state)

        return np.array(rnn_outputs), np.array(rnn_states)
项目:char-classify    作者:ekatek    | 项目源码 | 文件源码
def __init__(self, data, target, hidden_layers):
        """ Must submit either a net configuration, or something to load from """
        if hidden_layers == [] and model_filename == "":
            raise Exception("Must provide a net configuration or a file to load from")

        """ Divide the data into training and test """
        self.trainsize = int(len(data) * 5 / 6)
        self.testsize = len(data) - self.trainsize
        self.x_train, self.x_test = np.split(data, [self.trainsize])
        self.y_train, self.y_test = np.split(target, [self.trainsize])

        """ Create the underlying neural network model """
        self.sizes = [len(data[0])]
        self.sizes.extend(hidden_layers)
        self.sizes.append(len(set(target)))
        self.model = L.Classifier(BaseNetwork(self.sizes))

        """ Create the underlying optimizer """
        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)
项目:ieml    作者:IEMLdev    | 项目源码 | 文件源码
def _compute_table_rank(self, contained):
        logger.log(logging.DEBUG, "Computing tables relations")

        tables_rank = [([], []) for _ in range(6)]

        indices = [
            set(l) for l in np.split(contained.indices, contained.indptr)[1:-1]
        ]

        for root in self.dictionary.roots:
            for t0, t1 in combinations(self.dictionary.roots[root], 2):
                commons = [self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index]]

                rank = max(map(lambda t: t.rank, commons))
                tables_rank[rank][0].extend((t0.index, t1.index))
                tables_rank[rank][1].extend((t1.index, t0.index))

        return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank]
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def prepare_faces():
    data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False)
    X = data.data
    y = data.target

    X = np.split(X, 40)
    y = np.split(y, 40)

    X_train = [x[0:7, :] for x in X]
    X_test = [x[7:, :] for x in X]
    y_train = [a[0:7] for a in y]
    y_test = [a[7:] for a in y]
    X_train = np.concatenate(X_train)
    X_test = np.concatenate(X_test)
    y_train = pd.Series(np.concatenate(y_train))
    y_test = pd.Series(np.concatenate(y_test))

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train = pd.DataFrame(scaler.fit_transform(X_train))
    X_test = pd.DataFrame(scaler.transform(X_test))

    return X_train, y_train, X_test, y_test, scaler
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def prepare_faces():
    data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False)
    X = data.data
    y = data.target

    X = np.split(X, 40)
    y = np.split(y, 40)

    X_train = [x[0:7, :] for x in X]
    X_test = [x[7:, :] for x in X]
    y_train = [a[0:7] for a in y]
    y_test = [a[7:] for a in y]
    X_train = np.concatenate(X_train)
    X_test = np.concatenate(X_test)
    y_train = np.concatenate(y_train)
    y_test = np.concatenate(y_test)

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, y_train, X_test, y_test, scaler