def split_data(self,n,v=5):
        ''' The function split the data into v folds. Whatever the number of sample per class
            n : the number of samples
            v : the number of folds
        Output: None        
        step = n //v  # Compute the number of samples in each fold
        sp.random.seed(1)   # Set the random generator to the same initial state
        t = sp.random.permutation(n)    # Generate random sampling of the indices

        for i in range(v-1):            # group in v fold

        for i in range(v):
            l = range(v)
            temp = sp.empty(0,dtype=sp.int64)
            for j in l:            
                temp = sp.concatenate((temp,sp.asarray(indices[j])))
def main():

    posscores = sp.asarray([0.245 ,  0.2632,  0.3233,  0.3573,  0.4014,  0.4055,  0.4212, 0.5677])
    test_distances = sp.asarray([ 0.05,  0.1 ,  0.25,  0.4 ,  0.75,  1.  ,  1.5 ,  2.])

    mr = libmr.MR()
    # since higher is worse and we want to fit the higher tail,
    # use fit_high()
    mr.fit_high(posscores, posscores.shape[0])
    wscores = mr.w_score_vector(test_distances)
    for i in range(wscores.shape[0]):
        print "%.2f %.2f %.2f" %(test_distances[i], wscores[i], mr.inv(wscores[i]))
    # wscores are the ones to be used in the equation
    # s_i * (1 - rho_i)
    print "Low wscore --> Low probability that the score is outlier i.e. sample IS NOT outlier"
    print "High wscore --> High probability that the score is outlier i.e. sample IS an outlier"
    print "posscores: ", posscores
    print "test_distances: ", test_distances
    print "wscores: ", wscores
def split_data(self,n,v=5):
        ''' The function split the data into v folds. Whatever the number of sample per class
            n : the number of samples
            v : the number of folds
        Output: None        
        step = n //v  # Compute the number of samples in each fold
        sp.random.seed(1)   # Set the random generator to the same initial state
        t = sp.random.permutation(n)    # Generate random sampling of the indices

        for i in range(v-1):            # group in v fold

        for i in range(v):
            l = range(v)
            temp = sp.empty(0,dtype=sp.int64)
            for j in l:            
                temp = sp.concatenate((temp,sp.asarray(indices[j])))
def compute_mean_vector(category_name, labellist, layer = 'fc8'):
    print category_name
    featurefile_list = glob.glob('%s/%s/*.mat' %(featurefilepath, category_name))

    # gather all the training samples for which predicted category
    # was the category under consideration
    correct_features = []
    for featurefile in featurefile_list:
            img_arr = loadmat(featurefile)
            predicted_category = labellist[img_arr['scores'].argmax()]
            if predicted_category == category_name:
                correct_features += [img_arr[layer]]
        except TypeError:

    # Now compute channel wise mean vector
    channel_mean_vec = []
    for channelid in range(correct_features[0].shape[0]):
        channel = []
        for feature in correct_features:
            channel += [feature[channelid, :]]
        channel = sp.asarray(channel)
        assert len(correct_features) == channel.shape[0]
        # Gather mean over each channel, to get mean channel vector
        channel_mean_vec += [sp.mean(channel, axis=0)]

    # this vector contains mean computed over correct classifications
    # for each channel separately
    channel_mean_vec = sp.asarray(channel_mean_vec)
    savemat('%s.mat' %category_name, {'%s'%category_name: channel_mean_vec})
def computeOpenMaxProbability(openmax_fc8, openmax_score_u):
    """ Convert the scores in probability value using openmax

    openmax_fc8 : modified FC8 layer from Weibull based computation
    openmax_score_u : degree

    modified_scores : probability values modified using OpenMax framework,
    by incorporating degree of uncertainity/openness for a given class

    prob_scores, prob_unknowns = [], []
    for channel in range(NCHANNELS):
        channel_scores, channel_unknowns = [], []
        for category in range(NCLASSES):
            channel_scores += [sp.exp(openmax_fc8[channel, category])]

        total_denominator = sp.sum(sp.exp(openmax_fc8[channel, :])) + sp.exp(sp.sum(openmax_score_u[channel, :]))
        prob_scores += [channel_scores/total_denominator ]
        prob_unknowns += [sp.exp(sp.sum(openmax_score_u[channel, :]))/total_denominator]

    prob_scores = sp.asarray(prob_scores)
    prob_unknowns = sp.asarray(prob_unknowns)

    scores = sp.mean(prob_scores, axis = 0)
    unknowns = sp.mean(prob_unknowns, axis=0)
    modified_scores =  scores.tolist() + [unknowns]
    assert len(modified_scores) == 1001
    return modified_scores

def frame_to_vect(frame):
        # tranform rgb image for CNN input layer 
        H,W = frame.shape[:2]
        frame = sp.asarray(frame, dtype = sp.float16) / 255.0
        features = frame.transpose(2,0,1).reshape(3, H, W)
        return features
def initialize(self, sample_from_prior, distance_to_ground_truth_function):
        eps_logger.debug("calc initial epsilon")
        # calculate initial epsilon if not given
        if self._initial_epsilon == 'from_sample':
            distances = sp.asarray([distance_to_ground_truth_function(x)
                                    for x in sample_from_prior])
            eps_t0 = sp.median(distances) * self.median_multiplier
            self._look_up = {0: eps_t0}
            self._look_up = {0: self._initial_epsilon}"initial epsilon is {}".format(self._look_up[0]))
项目:pyabc    作者:neuralyzer    | 项目源码 | 文件源码
def _dict_to_to_vect(self, x):
        return sp.asarray([x[key] for key in self.measures_to_use])
项目:pyabc    作者:neuralyzer    | 项目源码 | 文件源码
def _calculate_whitening_transformation_matrix(self, sample_from_prior):
        samples_vec = sp.asarray([self._dict_to_to_vect(x)
                                  for x in sample_from_prior])
        # samples_vec is an array of shape nr_samples x nr_features
        means = samples_vec.mean(axis=0)
        centered = samples_vec - means
        covariance =
        w, v = la.eigh(covariance)
        self._whitening_transformation_matrix = (
   / sp.sqrt(w))).dot(v.T))
def split_data_class(self,y,v=5):
        ''' The function split the data into v folds. The samples of each class are split approximatly in v folds
            n : the number of samples
            v : the number of folds
        Output: None
        # Get parameters
        n = y.size
        C = y.max().astype('int')

        # Get the step for each class
        tc = []
        for j in range(v):
            tempit = []
            tempiT = []
            for i in range(C):
                # Get all samples for each class
                t  = sp.where(y==(i+1))[0]
                nc = t.size
                stepc = nc // v # Step size for each class
                if stepc == 0:
                    print "Not enough sample to build "+ str(v) +" folds in class " + str(i)                                    
                sp.random.seed(i)   # Set the random generator to the same initial state
                tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i

                # Set testing and training samples
                if j < (v-1):
                    start,end = j*stepc,(j+1)*stepc
                    start,end = j*stepc,nc
                tempiT.extend(sp.asarray(tc[start:end])) #Testing
                k = range(v)
                for l in k:
                    if l < (v-1):
                        start,end = l*stepc,(l+1)*stepc
                        start,end = l*stepc,nc
                    tempit.extend(sp.asarray(tc[start:end])) #Training

def compute_features(imgname, args):
    Instantiate a classifier class, pass the images through the network and save features.
    Features are saved in .mat format
    image_dims = [int(s) for s in args.images_dim.split(',')]
    if args.force_grayscale:
      channel_swap = None
      mean_file = None
      channel_swap = [int(s) for s in args.channel_swap.split(',')]
      mean_file = args.mean_file

    # Make classifier.
    classifier = caffe.Classifier(args.model_def, args.pretrained_model,
            image_dims=image_dims, gpu=args.gpu, mean_file=mean_file,
            input_scale=args.input_scale, channel_swap=channel_swap)

    if args.gpu:
        print 'GPU mode'

    outfname = imgname.replace('imageNetForWeb', 'imageNetForWeb_Features') + ".mat"
    print outfname
    if not path.exists(path.dirname(outfname)):

    inputs = []

    if args.force_grayscale:
        inputs = [rgb2gray(input) for input in inputs];

    print "Classifying %d inputs." % len(inputs)

    scores = classifier.predict(inputs, not args.center_only)
        # Now save features
    feature_dict = {}
    feature_dict['IMG_NAME'] = path.join(path.dirname(imgname), path.basename(imgname))
    feature_dict['fc7'] = sp.asarray(classifier.blobs['fc7'].data.squeeze(axis=(2,3)))
    feature_dict['fc8'] = sp.asarray(classifier.blobs['fc8'].data.squeeze(axis=(2,3)))
    feature_dict['prob'] = sp.asarray(classifier.blobs['prob'].data.squeeze(axis=(2,3)))
    feature_dict['scores'] = sp.asarray(scores)
    savemat(outfname, feature_dict)
def compute_channel_distances(mean_train_channel_vector, features, category_name):
    mean_train_channel_vector : mean activation vector for a given class. 
                                It can be computed using file
    features: features for the category under consideration
    category_name: synset_id

    channel_distances: dict of distance distribution from MAV for each channel. 
    distances considered are eucos, cosine and euclidean

    eucos_dist, eu_dist, cos_dist = [], [], []
    for channel in range(features[0].shape[0]):
        eu_channel, cos_channel, eu_cos_channel = [], [], []
        # compute channel specific distances
        for feat in features:
            eu_channel += [spd.euclidean(mean_train_channel_vector[channel, :], feat[channel, :])]
            cos_channel += [spd.cosine(mean_train_channel_vector[channel, :], feat[channel, :])]
            eu_cos_channel += [spd.euclidean(mean_train_channel_vector[channel, :], feat[channel, :])/200. +
                               spd.cosine(mean_train_channel_vector[channel, :], feat[channel, :])]
        eu_dist += [eu_channel]
        cos_dist += [cos_channel]
        eucos_dist += [eu_cos_channel]

    # convert all arrays as scipy arrays
    eucos_dist = sp.asarray(eucos_dist)
    eu_dist = sp.asarray(eu_dist)
    cos_dist = sp.asarray(cos_dist)

    # assertions for length check
    assert eucos_dist.shape[0] == 10
    assert eu_dist.shape[0] == 10
    assert cos_dist.shape[0] == 10
    assert eucos_dist.shape[1] == len(features)
    assert eu_dist.shape[1] == len(features)
    assert cos_dist.shape[1] == len(features)

    channel_distances = {'eucos': eucos_dist, 'cosine': cos_dist, 'euclidean':eu_dist}
    return channel_distances

def split_data_class(self,y,v=5):
        ''' The function split the data into v folds. The samples of each class are split approximatly in v folds
            n : the number of samples
            v : the number of folds
        Output: None
        # Get parameters
        n = y.size
        C = y.max().astype('int')

        # Get the step for each class
        tc = []
        for j in range(v):
            tempit = []
            tempiT = []
            for i in range(C):
                # Get all samples for each class
                t  = sp.where(y==(i+1))[0]
                nc = t.size
                stepc = nc // v # Step size for each class
                if stepc == 0:
                    print "Not enough sample to build "+ str(v) +" folds in class " + str(i)                                    
                sp.random.seed(i)   # Set the random generator to the same initial state
                tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i

                # Set testing and training samples
                if j < (v-1):
                    start,end = j*stepc,(j+1)*stepc
                    start,end = j*stepc,nc
                tempiT.extend(sp.asarray(tc[start:end])) #Testing
                k = range(v)
                for l in k:
                    if l < (v-1):
                        start,end = l*stepc,(l+1)*stepc
                        start,end = l*stepc,nc
                    tempit.extend(sp.asarray(tc[start:end])) #Training
