Python numpy 模块,unique() 实例源码


项目:pybot    作者:spillai    | 项目源码 | 文件源码
def __init__(self, filename, target_map, classifier='svm'): 

        self.seed_ = 0
        self.filename_ = filename
        self.target_map_ = target_map
        self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32)
        self.epoch_no_ = 0
        self.st_time_ = time.time()

        # Setup classifier
        print('====> Building Classifier, setting class weights') 
        if classifier == 'svm': 
            self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']}
            self.clf_base_ = LinearSVC(random_state=self.seed_)
        elif classifier == 'sgd': 
            self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'], 
            self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_, 
                                      warm_start=True, n_jobs=-1, n_iter=1, verbose=4)
            raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]' 
                            % classifier)
项目:rca-evaluation    作者:sieve-microservices    | 项目源码 | 文件源码
def silhouette_score(series, clusters):
    distances = np.zeros((series.shape[0], series.shape[0]))
    for idx_a, metric_a in enumerate(series):
        for idx_b, metric_b in enumerate(series):
            distances[idx_a, idx_b] = _sbd(metric_a, metric_b)[0]
    labels = np.zeros(series.shape[0])
    for i, (cluster, indicies) in enumerate(clusters):
        for index in indicies:
            labels[index] = i

    # silhouette is only defined, if we have 2 clusters with assignments at 
    # minimum
    if len(np.unique(labels)) == 1 or (len(np.unique(labels)) >= distances.shape[0]):
    #if len(np.unique(labels)) == 1:
        return labels, -1
        return labels, _silhouette_score(distances, labels, metric='precomputed')
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def transform(self, img, lbl):
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        lbl = self.encode_segmap(lbl)
        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)
        assert(np.all(classes == np.unique(lbl)))

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()
        return img, lbl
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_normalized_dispersion(mat_mean, mat_var, nbins=20):
    mat_disp = (mat_var - mat_mean) / np.square(mat_mean)

    quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins))
    quantiles = np.append(quantiles, mat_mean.max())

    # merge bins with no difference in value
    quantiles = np.unique(quantiles)

    if len(quantiles) <= 1:
        # pathological case: the means are all identical. just return raw dispersion.
        return mat_disp

    # calc median dispersion per bin
    (disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles)

    # calc median absolute deviation of dispersion per bin
    disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0
    disp_abs_dev = abs(mat_disp - disp_meds_arr)
    (disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles)

    # calculate normalized dispersion
    disp_mads_arr = disp_mads[disp_bins-1]
    disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr
    return disp_norm
项目:j3dview    作者:blank63    | 项目源码 | 文件源码
def gl_init(self,array_table):
        self.gl_hide = False

        self.gl_vertex_array = gl.VertexArray()

        self.gl_vertex_buffer = gl.Buffer()

        self.gl_element_count = 3*gl_count_triangles(self)
        self.gl_element_buffer = gl.Buffer()

        vertex_type =  numpy.dtype([array_table[attribute].field() for attribute in self.attributes])
        vertex_count = sum(len(primitive.vertices) for primitive in self.primitives)
        vertex_array = numpy.empty(vertex_count,vertex_type)

        for attribute in self.attributes:

        vertex_array,element_map = numpy.unique(vertex_array,return_inverse=True)
        element_array = gl_create_element_array(self,element_map,self.gl_element_count)

项目:Python-Machine-Learning-By-Example    作者:PacktPublishing    | 项目源码 | 文件源码
def get_best_split(X, y, criterion):
    """ Obtain the best splitting point and resulting children for the data set X, y
        X, y (numpy.ndarray, data set)
        criterion (gini or entropy)
        dict {index: index of the feature, value: feature value, children: left and right children}
    best_index, best_value, best_score, children = None, None, 1, None
    for index in range(len(X[0])):
        for value in np.sort(np.unique(X[:, index])):
            groups = split_node(X, y, index, value)
            impurity = weighted_impurity([groups[0][1], groups[1][1]], criterion)
            if impurity < best_score:
                best_index, best_value, best_score, children = index, value, impurity, groups
    return {'index': best_index, 'value': best_value, 'children': children}
项目:tissue_analysis    作者:VirtualPlants    | 项目源码 | 文件源码
def consideronlylabels(self, list2consider, verbose = False):
        Add labels to the ignoredlabels list (set) and update the self._labels cache.
        if isinstance(list2consider, int):
            list2consider = [list2consider]

        toignore = set(np.unique(self.image))-set(list2consider)
        integers = np.vectorize(lambda x : int(x))
        toignore = integers(list(toignore)).tolist()

        if verbose: print 'Adding labels', toignore,'to the list of labels to ignore...'
        if verbose: print 'Updating labels list...'
        self._labels = self.__labels()
项目:NumpyDL    作者:oujago    | 项目源码 | 文件源码
def main(max_iter):
    # prepare

    # data
    digits = load_digits()

    X_train =
    X_train /= np.max(X_train)

    Y_train =
    n_classes = np.unique(Y_train).size

    # model
    model = npdl.model.Model()
    model.add(npdl.layers.Dense(n_out=500, n_in=64, activation=npdl.activations.ReLU()))
    model.add(npdl.layers.Dense(n_out=n_classes, activation=npdl.activations.Softmax()))
    model.compile(loss=npdl.objectives.SCCE(), optimizer=npdl.optimizers.SGD(lr=0.005))

    # train,, max_iter=max_iter, validation_split=0.1)
项目:segmentation_DLMI    作者:imatge-upc    | 项目源码 | 文件源码
def get_weighted_mask(self, image_shape, mask_shape,ROI_mask=None, labels_mask=None):

        if labels_mask is  None:
            raise ValueError('SamplingScheme error: please specify a labels_mask for this sampling scheme')
        mask_boundaries = self.get_mask_boundaries(image_shape, mask_shape,ROI_mask)

        final_mask = np.zeros((self.n_categories,) + labels_mask.shape, dtype="int16")
        for index_cat in range(self.n_categories):
            final_mask[index_cat] = (labels_mask == index_cat,) * mask_boundaries

        final_mask = 1.0 * final_mask / np.reshape(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1),(self.n_categories,)+(1,)*len(image_shape))

        return final_mask
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def get_channel_id_by_file_name(self, filename):
        Checking parameters of NCS, NSE and NTT Files for given filename and
        return channel_id if result is consistent
        :param filename:
        channel_ids = []
        channel_ids += [k for k in self.parameters_ncs if
                        self.parameters_ncs[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_nse if
                        self.parameters_nse[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_ntt if
                        self.parameters_ntt[k]['filename'] == filename]
        if len(np.unique(np.asarray(channel_ids))) == 1:
            return channel_ids[0]
        elif len(channel_ids) > 1:
            raise ValueError(
                    'Ambiguous channel ids detected. Filename %s is associated'
                    ' to different channels of NCS and NSE and NTT %s'
                    '' % (filename, channel_ids))
        else:  # if filename was not detected
            return None
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __read_unit(self, unit_id, channel_idx):
        Creates unit with unit id for given channel id.
        # define a name for spiketrain
        # (unique identifier: 1000 * elid + unit_nb)
        name = "Unit {0}".format(1000 * channel_idx + unit_id)
        # define description for spiketrain
        desc = 'Unit from channel: {0}, id: {1}'.format(
            channel_idx, self.__get_unit_classification(unit_id))

        un = Unit(
            file_origin='.'.join([self._filenames['nev'], 'nev']))

        # add additional annotations

        return un
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __draw_pk2(self):
        if self.units is not None:
            unique_units = np.unique(self.units)
            unique_units = unique_units.tolist()
            pca_1,pca_2 = self.PCAusedList.currentText().split("-")
            pca_1 =
            pca_2 =
            if self.wavePCAs[0].shape[0]>2:
                xs = self.wavePCAs[:,pca_1]
                ys = self.wavePCAs[:,pca_2]
                self.PcaScatterItem = []
                seg_num = 5000
                for i,ite_unit in enumerate(unique_units):
                    mask = self.units==ite_unit
                    temp_xs = xs[mask]
                    temp_ys = ys[mask]
                    segs = int(ceil(temp_xs.shape[0]/float(seg_num)))
                    for j in range(segs):
                        temp_xs_j = temp_xs[j*seg_num:(j+1)*seg_num]
                        temp_ys_j = temp_ys[j*seg_num:(j+1)*seg_num]
                for i in range(len(self.PcaScatterItem)):
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def get_channel_id_by_file_name(self, filename):
        Checking parameters of NCS, NSE and NTT Files for given filename and
        return channel_id if result is consistent
        :param filename:
        channel_ids = []
        channel_ids += [k for k in self.parameters_ncs if
                        self.parameters_ncs[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_nse if
                        self.parameters_nse[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_ntt if
                        self.parameters_ntt[k]['filename'] == filename]
        if len(np.unique(np.asarray(channel_ids))) == 1:
            return channel_ids[0]
        elif len(channel_ids) > 1:
            raise ValueError(
                    'Ambiguous channel ids detected. Filename %s is associated'
                    ' to different channels of NCS and NSE and NTT %s'
                    '' % (filename, channel_ids))
        else:  # if filename was not detected
            return None
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __read_unit(self, unit_id, channel_idx):
        Creates unit with unit id for given channel id.
        # define a name for spiketrain
        # (unique identifier: 1000 * elid + unit_nb)
        name = "Unit {0}".format(1000 * channel_idx + unit_id)
        # define description for spiketrain
        desc = 'Unit from channel: {0}, id: {1}'.format(
            channel_idx, self.__get_unit_classification(unit_id))

        un = Unit(
            file_origin='.'.join([self._filenames['nev'], 'nev']))

        # add additional annotations

        return un
项目:spikefuel    作者:duguyue100    | 项目源码 | 文件源码
def cal_event_count(timestamps):
    """Calculate event count based on timestamps.

    timestamps : numpy.ndarray
        timestamps array in 1D array

    event_arr : numpy.ndarray
        array has 2 rows, first row contains timestamps,
        second row consists of corresponding event count at particular
    event_ts, event_count = np.unique(timestamps, return_counts=True)

    return np.asarray((event_ts, event_count))
项目:pscore_match    作者:kellieotto    | 项目源码 | 文件源码
def recode_groups(groups, propensity):
    # Code groups as 0 and 1
    groups = (groups == groups.unique()[0])
    N = len(groups)
    N1 = groups[groups == 1].index
    N2 = groups[groups == 0].index
    g1 = propensity[groups == 1]
    g2 = propensity[groups == 0]
    # Check if treatment groups got flipped - the smaller should correspond to N1/g1
    if len(N1) > len(N2):
       N1, N2, g1, g2 = N2, N1, g2, g1
    return groups, N1, N2, g1, g2

############################# Base Matching Class ##############################
项目:PyGPS    作者:gregstarr    | 项目源码 | 文件源码
def minScalErr(stec,el,z,thisBias):
    this determines the slope of the vTEC vs. Elevation line, which
    should be minimized in the minimum scalloping technique for
    receiver bias removal
        stec - time indexed Series of slant TEC values
        el - corresponding elevation values, also Series
        z - mapping function values to convert to vTEC from entire file, may
            contain nans, Series
        thisBias - the bias to be tested and minimized

    intel=np.asarray(el[stec.index],int) # bin the elevation values into int
    zmap = z[stec.index]
                              /zmap[intel==i])) for i in np.unique(intel) if i>30])

    return np.polyfit(c[:,0],c[:,1],1)[0]
项目:risk-slim    作者:ustunb    | 项目源码 | 文件源码
def filter_sort_unique(self, max_objval=float('Inf')):
        # filter
        if max_objval < float('inf'):
            good_idx = self.objvals <= max_objval
            self.objvals = self.objvals[good_idx]

        if len(self.objvals) > 0:
            sort_idx = np.argsort(self.objvals)
            self.objvals = self.objvals[sort_idx]

            # unique
            b = np.ascontiguousarray(
                np.dtype((np.void, * self.P)))
            _, unique_idx = np.unique(b, return_index=True)
            self.objvals = self.objvals[unique_idx]
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def reset(self):
        """ Resets the state of the generator"""
        self.step = 0
        Y = np.argmax(self.Y,1)
        labels = np.unique(Y)
        idx = []
        smallest = len(Y)
        for i,label in enumerate(labels):
            where = np.where(Y==label)[0]
            if smallest > len(where): 
                self.slabel = i
                smallest = len(where)
        self.idx = idx
        self.labels = labels
        self.n_per_class = int(self.batch_size // len(labels))
        self.n_batches = int(np.ceil((smallest//self.n_per_class)))+1
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def __init__(self, X, Y, batch_size,cropsize=0, truncate=False, sequential=False,
                 random=True, val=False, class_weights=None):

        assert len(X) == len(Y), 'X and Y must be the same length {}!={}'.format(len(X),len(Y))
        if sequential: print('Using sequential mode')
        print ('starting normal generator')
        self.X = X
        self.Y = Y
        self.rnd_idx = np.arange(len(Y))
        self.Y_last_epoch = []
        self.val = val
        self.step = 0
        self.i = 0
        self.truncate = truncate
        self.random = False if sequential or val else random
        self.batch_size = int(batch_size)
        self.sequential = sequential
        self.c_weights = class_weights if class_weights else dict(zip(np.unique(np.argmax(Y,1)),np.ones(len(np.argmax(Y,1)))))
        assert set(np.argmax(Y,1)) == set([int(x) for x in self.c_weights.keys()]), 'not all labels in class weights'
        self.n_batches = int(len(X)//batch_size if truncate else np.ceil(len(X)/batch_size))
        if self.random: self.randomize()
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def next_normal(self):
        x_batch = self.X[self.step*self.batch_size:(self.step+1)*self.batch_size]
        y_batch = self.Y[self.step*self.batch_size:(self.step+1)*self.batch_size]

        diff = len(x_batch[0]) - self.cropsize
        if self.cropsize!=0 and not self.val:
            start = np.random.choice(np.arange(0,diff+5,5), len(x_batch))
            x_batch = [x[start[i]:start[i]+self.cropsize,:] for i,x in enumerate(x_batch)]
        elif self.cropsize !=0 and self.val:
            x_batch = [x[diff//2:diff//2+self.cropsize] for i,x in enumerate(x_batch)]

        x_batch = np.array(x_batch, dtype=np.float32)
        y_batch = np.array(y_batch, dtype=np.int32)
        if self.val:
            return x_batch # for validation generator, save the new y_labels
            weights = np.ones(len(y_batch))
            for t in np.unique(np.argmax(y_batch,1)):
                weights[np.argmax(y_batch,1)==t] = self.c_weights[t]
            return (x_batch,y_batch)
项目:PersonalizedMultitaskLearning    作者:mitmedialab    | 项目源码 | 文件源码
def get_preds_true_for_task(self,train_tasks, test_tasks, param_dict):
        t = param_dict['task_num']
        X = train_tasks[t]['X']
        y = train_tasks[t]['Y']

        test_X = test_tasks[t]['X']
        true_y = list(test_tasks[t]['Y'].flatten())

        if len(y)==0 or len(X)==0 or len(test_X) == 0 or len(true_y)==0:
            return None, None

        if self.cant_train_with_one_class and len(np.unique(y))==1:
            preds = list(np.unique(y)[0]*np.ones(len(true_y)))
            preds = self.train_and_predict_task(t, X, y, test_X, param_dict)

        return preds, true_y
项目:a-cadmci    作者:florez87    | 项目源码 | 文件源码
def getClasses(labels):
        Get unique values from a column of labels.

        labels: array-like of shape = [number_samples] or [number_samples, number_outputs]
            The target values (class labels in classification).

        classes: ndarray
            The sorted unique labels

        ids: ndarray
            The indices of the first occurrences of the unique values in the original array.
        uniques, ids = numpy.unique(labels, return_inverse=True)
        return uniques, ids
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def grid_spacing(self):
        interval = [1,10]
        p1 = Parameter('A', 'integer', lower=interval[0], upper=interval[1])
        p2 = Parameter('B', 'continuous', lower=interval[0], upper=interval[1])
        p3 = Parameter('C', 'categorical', possible_values=['Bla1', 'Bla2'])
        p4 = Parameter('D', 'boolean')
        grid_sizes = {'A': 5, 'B': 6}
        grid_search = GridSearchOptimizer(model, [p1, p2, p3, p4], clf_score, grid_sizes)
        grid = grid_search.grid
        for params in grid:
            self.assertIn(params['A'], range(*interval))
            self.assertIn(params['C'], ['Bla1', 'Bla2'])
            self.assertIn(params['D'], ['True', 'False'])
        lenA = len(np.unique([params['A'] for params in grid]))
        lenB = len(np.unique([params['B'] for params in grid]))
        lenC = len(np.unique([params['C'] for params in grid]))
        lenD = len(np.unique([params['D'] for params in grid]))
        self.assertTrue((lenA==grid_sizes['A']) or (lenA==grid_sizes['A']+1))
        self.assertTrue((lenB==grid_sizes['B']) or (lenB==grid_sizes['B']+1))
        self.assertTrue((lenC==grid_sizes['C']) or (lenC==grid_sizes['C']+1))
        self.assertTrue((lenD==grid_sizes['D']) or (lenD==grid_sizes['D']+1))
项目:audio_scripts    作者:audiofilter    | 项目源码 | 文件源码
def logscale_spec(spec, sr=44100, factor=20.):
    timebins, freqbins = np.shape(spec)

    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1)/max(scale)
    scale = np.unique(np.round(scale))

    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
            newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)

    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]

    return newspec, freqs
项目:audio_scripts    作者:audiofilter    | 项目源码 | 文件源码
def logscale_spec(spec, sr=44100, factor=20.):
    timebins, freqbins = np.shape(spec)

    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1)/max(scale)
    scale = np.unique(np.round(scale))

    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
            newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)

    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]

    return newspec, freqs
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def free_parameters(self, data):
        Compute free parameters for the model fit using K-Means
        K = np.unique(self.labels_).shape[0]  # number of clusters
        n, d = data.shape
        r = (K - 1) + (K * d)
        if self.metric == 'euclidean':
            r += 1  # one parameter for variance
        elif self.metric == 'mahalanobis':
            if self.covar_type == 'full' and self.covar_tied:
                r += (d * (d + 1) * 0.5)  # half of the elements (including diagonal) in the matrix
            if self.covar_type == 'full' and not self.covar_tied:
                r += (d * (d + 1) * 0.5 * K)  # half of the elements (including diagonal) in the matrix
            if self.covar_type == 'diag' and self.covar_tied:
                r += d  # diagonal elements of the matrix
            if self.covar_type == 'diag' and not self.covar_tied:
                r += (d * K)  # diagonal elements of the matrix
            if self.covar_type == 'spher' and self.covar_tied:
                r += 1  # all diagonal elements are equal
            if self.covar_type == 'spher' and not self.covar_tied:
                r += K  # all diagonal elements are equal
        return r
项目:sef    作者:passalis    | 项目源码 | 文件源码
def sim_target_supervised(target_data, target_labels, sigma, idx, target_params):
    cur_labels = target_labels[idx]
    N = cur_labels.shape[0]

    N_labels = len(np.unique(cur_labels))

    Gt, mask = np.zeros((N, N)), np.zeros((N, N))

    for i in range(N):
        for j in range(N):
            if cur_labels[i] == cur_labels[j]:
                Gt[i, j] = 0.8
                mask[i, j] = 1
                Gt[i, j] = 0.1
                mask[i, j] = 0.8 / (N_labels - 1)

    return np.float32(Gt), np.float32(mask)
项目:em_examples    作者:geoscixyz    | 项目源码 | 文件源码
def get_Surface_Potentials(mtrue, survey, src, field_obj):

    phi = field_obj['phi']
    CCLoc = mesh.gridCC
    XLoc = np.unique(mesh.gridCC[:, 0])
    surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
    phiSurface = phi[surfaceInd]
    phiScale = 0.

    if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
        refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
        # refPoint =  CCLoc[refInd]
        # refSurfaceInd = np.where(xSurface == refPoint[0])
        # phiScale = np.median(phiSurface)
        phiScale = phi[refInd]
        phiSurface = phiSurface - phiScale

    return XLoc, phiSurface, phiScale
项目:em_examples    作者:geoscixyz    | 项目源码 | 文件源码
def Plot_ChargesDensity(XYZ, sig0, sig1, R, E0, ax):

    xr, yr, zr = np.unique(XYZ[:, 0]), np.unique(XYZ[:, 1]), np.unique(XYZ[:, 2])
    xcirc = xr[np.abs(xr) <= R]

    Et, Ep, Es = get_ElectricField(XYZ, sig0, sig1, R, E0)
    rho = get_ChargesDensity(XYZ, sig0, sig1, R, Et, Ep)

    ax.set_xlim([xr.min(), xr.max()])
    ax.set_ylim([yr.min(), yr.max()])
    Cplot = ax.pcolor(xr, yr, rho.reshape(xr.size, yr.size))
    cb1 = plt.colorbar(Cplot, ax=ax)
    cb1.set_label(label= 'Charge Density ($C/m^2$)', size=ftsize_label) #weight='bold')
    ax.plot(xcirc, np.sqrt(R**2-xcirc**2), '--k', xcirc, -np.sqrt(R**2-xcirc**2), '--k')
    ax.set_ylabel('Y coordinate ($m$)', fontsize=ftsize_label)
    ax.set_xlabel('X coordinate ($m$)', fontsize=ftsize_label)
    ax.set_title('Charges Density', fontsize=ftsize_title)

    return ax
项目:em_examples    作者:geoscixyz    | 项目源码 | 文件源码
def get_Surface_Potentials(mtrue, survey, src, field_obj):

    phi = field_obj['phi']
    CCLoc = mesh.gridCC
    XLoc = np.unique(mesh.gridCC[:, 0])
    surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
    phiSurface = phi[surfaceInd]
    phiScale = 0.

    if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
        refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
        # refPoint =  CCLoc[refInd]
        # refSurfaceInd = np.where(xSurface == refPoint[0])
        # phiScale = np.median(phiSurface)
        phiScale = phi[refInd]
        phiSurface = phiSurface - phiScale

    return XLoc, phiSurface, phiScale
项目:lps-anchor-pos-estimator    作者:bitcraze    | 项目源码 | 文件源码
def unique(eq):
    eq = eqsize(eq)
    c1 = [None] * eq.shape
    for i in range(0, eq.size):
        c1.append[i] = hash(eq[i])

    c1 = np.asarray(c1)

    if c1.ndim == 1:
        _, ia, ic = np.unique(c1, return_index=True, return_inverse=True)
        ia = (ia[:, ]).conj().T
        ic = (ic[:, ]).conj().T
        u = eq[ia]

        a = c1
        b = np.ascontiguousarray(a).view(
            np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
        _, ia, ic = np.unique(b, return_index=True, return_inverse=True)

    return u, ia, ic
项目:AutoML5    作者:djajetic    | 项目源码 | 文件源码
def getTypeProblem (self, solution_filename):
            ''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
        if 'task' not in
            solution = np.array(data_converter.file_to_array(solution_filename))
            target_num = solution.shape[1]
            if target_num == 1: # if we have only one column
                solution = np.ravel(solution) # flatten
                nbr_unique_values = len(np.unique(solution))
                if nbr_unique_values < len(solution)/8:
                    # Classification
          ['label_num'] = nbr_unique_values
                    if nbr_unique_values == 2:
              ['task'] = 'binary.classification'
              ['target_type'] = 'Binary'
              ['task'] = 'multiclass.classification'
              ['target_type'] = 'Categorical'
                    # Regression
          ['label_num'] = 0
          ['task'] = 'regression'
          ['target_type'] = 'Numerical'     
                # Multilabel or multiclass       
      ['label_num'] = target_num
      ['target_type'] = 'Binary' 
                if any(item > 1 for item in map(np.sum,solution.astype(int))):
          ['task'] = 'multilabel.classification'     
          ['task'] = 'multiclass.classification'        
项目:AutoML5    作者:djajetic    | 项目源码 | 文件源码
def tiedrank(a):
    ''' Return the ranks (with base 1) of a list resolving ties by averaging.
     This works for numpy arrays.'''    
    # Sort a in ascending order (sa=sorted vals, i=indices)
    # Find unique values
    # Test whether there are ties 
    R=np.arange(m, dtype=float)+1 # Ranks with base 1
    if len(uval)!=m:
        # Average the ranks for the ties 
        for k in range(1,m):
            if newval==oldval:
                # moving average
    # Invert the index
    return S
项目:AutoML5    作者:djajetic    | 项目源码 | 文件源码
def binarization (array):
    ''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
    array = np.array(array, dtype=float) # conversion needed to use np.inf after
    if len(np.unique(array)) > 2:
        raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))

    # manipulation which aims at avoid error in data with for example classes '1' and '2'.
    array[array == np.amax(array)] = np.inf
    array[array == np.amin(array)] = 0
    array[array == np.inf] = 1
    return np.array(array, dtype=int)
项目:IntroToDeepLearning    作者:robb-brown    | 项目源码 | 文件源码
def __init__(self, images, labels, fake_data=False):
    if fake_data:
      self._num_examples = 10000
      assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape,
      self._num_examples = images.shape[0]

      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      self.imageShape = images.shape[1:]
      self.imageChannels = self.imageShape[2]

      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2] * images.shape[3])
      # Convert from [0, 255] -> [0.0, 1.0].
      images = images.astype(numpy.float32)
      images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
      if len(numpy.shape(self._labels)) == 1:
        self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
    self._epochs_completed = 0
    self._index_in_epoch = 0
项目:IntroToDeepLearning    作者:robb-brown    | 项目源码 | 文件源码
def __init__(self, images, labels, fake_data=False):
    if fake_data:
      self._num_examples = 10000
      assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape,
      self._num_examples = images.shape[0]

      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      self.imageShape = images.shape[1:]
      self.imageChannels = self.imageShape[2]

      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2] * images.shape[3])
      # Convert from [0, 255] -> [0.0, 1.0].
      images = images.astype(numpy.float32)
      images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
      if len(numpy.shape(self._labels)) == 1:
        self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
    self._epochs_completed = 0
    self._index_in_epoch = 0
项目:rca-evaluation    作者:sieve-microservices    | 项目源码 | 文件源码
def cluster_service(path, service, cluster_size, prev_metadata=None):

    filename = os.path.join(path, service["preprocessed_filename"])
    df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)

    initial_idx = None
    if prev_metadata:
        initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns)
        # adjust cluster_size if an initial assigment has been found
        if initial_idx is not None:
            cluster_size = len(np.unique(initial_idx))

    prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size)
    if os.path.exists(prefix + "_1.png"):
        print("skip " + prefix)
        return (None, None)

    cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx)
    if cluster_size < 2:
        # no silhouette_score for cluster size 1
        return (None, None)
    print("silhouette_score: %f" % score)

    # protect the write access to the metadata file
    with metadata.update(path) as data:
        for srv in data["services"]:
            if srv["name"] == service["name"]:
                if "clusters" not in srv:
                    srv["clusters"] = {}
                d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics)
                srv["clusters"][cluster_size] = d

    return (service["name"], cluster_size)
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def view_waveforms_clusters(data, halo, threshold, templates, amps_lim, n_curves=200, save=False):

    nb_templates = templates.shape[1]
    n_panels     = numpy.ceil(numpy.sqrt(nb_templates))
    mask         = numpy.where(halo > -1)[0]
    clust_idx    = numpy.unique(halo[mask])
    fig          = pylab.figure()    
    square       = True
    center       = len(data[0] - 1)//2
    for count, i in enumerate(xrange(nb_templates)):
        if square:
            pylab.subplot(n_panels, n_panels, count + 1)
            if (numpy.mod(count, n_panels) != 0):
                pylab.setp(pylab.gca(), yticks=[])
            if (count < n_panels*(n_panels - 1)):
                pylab.setp(pylab.gca(), xticks=[])

        subcurves = numpy.where(halo == clust_idx[count])[0]
        for k in numpy.random.permutation(subcurves)[:n_curves]:
            pylab.plot(data[k], '0.5')

        pylab.plot(templates[:, count], 'r')        
        pylab.plot(amps_lim[count][0]*templates[:, count], 'b', alpha=0.5)
        pylab.plot(amps_lim[count][1]*templates[:, count], 'b', alpha=0.5)

        xmin, xmax = pylab.xlim()
        pylab.plot([xmin, xmax], [-threshold, -threshold], 'k--')
        pylab.plot([xmin, xmax], [threshold, threshold], 'k--')
        #pylab.ylim(-1.5*threshold, 1.5*threshold)
        ymin, ymax = pylab.ylim()
        pylab.plot([center, center], [ymin, ymax], 'k--')
        pylab.title('Cluster %d' %i)

    if nb_templates > 0:
    if save:
        pylab.savefig(os.path.join(save[0], 'waveforms_%s' %save[1]))
    del fig
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def check_consistent_length(*arrays):
    """Check that all arrays have consistent first dimensions.
    Checks whether all objects in arrays have the same shape or length.
    *arrays : list or tuple of input objects.
        Objects that will be checked for consistent length.

    uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
    if len(uniques) > 1:
        raise ValueError("Found arrays with inconsistent numbers of samples: "
                         "%s" % str(uniques))
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def transform(self, img, lbl):

        :param img:
        :param lbl:
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)

        if not np.all(classes == np.unique(lbl)):
            print("WARN: resizing labels yielded fewer classes")

        if not np.all(np.unique(lbl) < self.n_classes):
            raise ValueError("Segmentation map contained invalid class values")

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()

        return img, lbl
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def transform(self, img, lbl):

        :param img:
        :param lbl:
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)

        if not np.all(classes == np.unique(lbl)):
            print("WARN: resizing labels yielded fewer classes")

        if not np.all(np.unique(lbl) < self.n_classes):
            raise ValueError("Segmentation map contained invalid class values")

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()

        return img, lbl
项目:MKLMM    作者:omerwe    | 项目源码 | 文件源码
def fit(self, X, C, y, regions, kernelType, reml=True, maxiter=100):

        #construct a list of kernel names (one for each region) 
        if (kernelType == 'adapt'): kernelNames = self.buildKernelAdapt(X, C, y, regions, reml, maxiter)
        else: kernelNames = [kernelType] * len(regions)         

        #perform optimization
        kernelObj, hyp_kernels, sig2e, fixedEffects = self.optimize(X, C, y, kernelNames, regions, reml, maxiter)

        #compute posterior distribution
        Ktraintrain = kernelObj.getTrainKernel(hyp_kernels)
        post = self.infExact_scipy_post(Ktraintrain, C, y, sig2e, fixedEffects)

        #fix intercept if phenotype is binary
        if (len(np.unique(y)) == 2):            
            controls = (y<y.mean())
            cases = ~controls
            meanVec =
            mu, var = self.getPosteriorMeanAndVar(np.diag(Ktraintrain), Ktraintrain, post, meanVec)                                     
            fixedEffects[0] -= optimize.minimize_scalar(self.getNegLL, args=(mu, np.sqrt(sig2e+var), controls, cases), method='brent').x                

        #construct trainObj
        trainObj = dict([])
        trainObj['sig2e'] = sig2e
        trainObj['hyp_kernels'] = hyp_kernels
        trainObj['fixedEffects'] = fixedEffects     
        trainObj['kernelNames'] = kernelNames

        return trainObj
项目:kaggle_dsb2017    作者:astoc    | 项目源码 | 文件源码
def load_scan(path):
    slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
    #slices.sort(key = lambda x: int(x.InstanceNumber))

    acquisitions = [x.AcquisitionNumber for x in slices]

    vals, counts = np.unique(acquisitions, return_counts=True)
    vals = vals[::-1]  # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
    counts = counts[::-1]

    ## take the acquistions that has more entries; if these are identical take the later  entrye
    acq_val_sel = vals[np.argmax(counts)]

    ##acquisitions = sorted(np.unique(acquisitions), reverse=True)

    if len(vals) > 1:
        print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
    slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]

    slices = slices2

    ## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
    ## example of the     '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'

    #slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))  # from v 8, BUG should be float
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))  # from v 9
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)

    for s in slices:
        s.SliceThickness = slice_thickness

    return slices
项目:kaggle_dsb2017    作者:astoc    | 项目源码 | 文件源码
def largest_label_volume(im, bg=-1):
    vals, counts = np.unique(im, return_counts=True)

    counts = counts[vals != bg]
    vals = vals[vals != bg]

    if len(counts) > 0:
        return vals[np.argmax(counts)]
        return None

项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_chunks_by_gem_group(self):
        """ Return exactly one chunk per gem group."""
        gem_group_arr = self.get_column('gem_group')
        # verify gem groups are sorted
        assert np.all(np.diff(gem_group_arr)>=0)
        unique_ggs = np.unique(gem_group_arr)
        gg_key = lambda i: gem_group_arr[i]
        chunk_iter = self.get_chunks_from_partition(unique_ggs, gg_key)
        for (gg, chunk) in zip(unique_ggs, chunk_iter):
            yield (gg, chunk[0], chunk[1])
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def compute_readpairs_per_umi_threshold(reads, subsample_rate):
    ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
        reads (np.array(int)) - Read pairs for each UMI
        subsample_rate (float) - Subsample reads to this fraction.
        Returns threshold (int) - The RPPU threshold in the subsampled space '''

    if len(np.unique(reads)) < 2:
        print 'Skipping RPPU threshold calculation.'
        return 1

    print 'RPPU subsample rate: %0.4f' % subsample_rate

    reads = np.random.binomial(reads, subsample_rate)
    reads = reads[reads > 0]

    if len(np.unique(reads)) < 2:
        print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
        return 1

    new_n50 = tk_stats.NX(reads, 0.5)

    print 'New N50: %d:' % new_n50

    # Log-transform counts
    log_reads = np.log(reads)

    # Run K-Means. Reshape necessary because kmeans takes a matrix.
    kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))

    # Take the cluster with the smallest mean
    min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]

    print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
    print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))

    # Take the max element in the min-cluster
    threshold = np.max(reads[kmeans.labels_ == min_cluster])

    return threshold
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def append_data_column(ds, column):

    # Extend the dataset to fit the new data
    new_count = column.shape[0]
    existing_count = ds.shape[0]
    ds.resize((existing_count + new_count,))

    levels = get_levels(ds)

    if levels is not None:
        # update levels if we have new unique values
        if type(column.values) == p.Categorical:
            added_levels = set(column.values.categories) - set(levels)
        elif len(column) == 0:
            # Workaround for bug in pandas - get a crash in .unique() for an empty series
            added_levels = set([])
            added_levels = set(column.unique()) - set(levels)

        new_levels = list(levels)

        # Check if the new categorical column has more levels
        # than the current bit width supports.
        # If so, rewrite the existing column data w/ more bits
        if len(new_levels) > np.iinfo(ds.dtype).max:
            new_dtype = pick_cat_dtype(len(new_levels))
            ds = widen_cat_column(ds, new_dtype)

        new_levels = np.array(new_levels, dtype=np.object)
        new_data = make_index_array(new_levels, column.values, ds.dtype)

        create_levels(ds, new_levels)
        new_data = column

    # Append new data
    ds[existing_count:(existing_count + new_count)] = new_data
项目:FCN_train    作者:315386775    | 项目源码 | 文件源码
def _label2rgb_avg(label_field, image, bg_label=0, bg_color=(0, 0, 0)):
    """Visualise each segment in `label_field` with its mean color in `image`.

    label_field : array of int
        A segmentation of an image.
    image : array, shape ``label_field.shape + (3,)``
        A color image of the same spatial shape as `label_field`.
    bg_label : int, optional
        A value in `label_field` to be treated as background.
    bg_color : 3-tuple of int, optional
        The color for the background label

    out : array, same shape and type as `image`
        The output visualization.
    out = np.zeros_like(image)
    labels = np.unique(label_field)
    bg = (labels == bg_label)
    if bg.any():
        labels = labels[labels != bg_label]
        out[bg] = bg_color
    for label in labels:
        mask = (label_field == label).nonzero()
        color = image[mask].mean(axis=0)
        out[mask] = color
    return out
项目:soccerstan    作者:Torvaney    | 项目源码 | 文件源码
def stan_map(vector):
    """ Create a map of vector items : id. """
    unique_items = np.unique(vector)
    return {item: id_ for id_, item in enumerate(unique_items, start=1)}