def computeF1_macro(confusion_matrix,matching, num_clusters):
    computes the macro F1 score
    confusion matrix : requres permutation
    matching according to which matrix must be permuted
    ##Permute the matrix columns
    permuted_confusion_matrix = np.zeros([num_clusters,num_clusters])
    for cluster in xrange(num_clusters):
        matched_cluster = matching[cluster]
        permuted_confusion_matrix[:,cluster] = confusion_matrix[:,matched_cluster]
    ##Compute the F1 score for every cluster
    F1_score = 0
    for cluster in xrange(num_clusters):
        TP = permuted_confusion_matrix[cluster,cluster]
        FP = np.sum(permuted_confusion_matrix[:,cluster]) - TP
        FN = np.sum(permuted_confusion_matrix[cluster,:]) - TP
        precision = TP/(TP + FP)
        recall = TP/(TP + FN)
        f1 = stats.hmean([precision,recall])
        F1_score += f1
    F1_score /= num_clusters
    return F1_score
def _computer_harmoic_mean_of_probabilities_over_non_zero_in_category_count_terms(self,
        df = pd.DataFrame({
            'cat_word_counts': cat_word_counts,
            'p_word_given_category': p_word_given_category,
            'p_category_given_word': p_category_given_word
        df_with_count = df[df['cat_word_counts'] > 0]
        df_with_count['scale p_word_given_category'] = scaler(df_with_count['p_word_given_category'])
        df_with_count['scale p_category_given_word'] = scaler(df_with_count['p_category_given_word'])
        df['scale p_word_given_category'] = 0
        df.loc[df_with_count.index, 'scale p_word_given_category'] = df_with_count['scale p_word_given_category']
        df['scale p_category_given_word'] = 0
        df.loc[df_with_count.index, 'scale p_category_given_word'] \
            = df_with_count['scale p_category_given_word']
        score = hmean([df_with_count['scale p_category_given_word'],
                       df_with_count['scale p_word_given_category']])
        df['score'] = 0
        df.loc[df_with_count.index, 'score'] = score
        return df['score']
def compute_semeval_score(pearson_score, spearman_score):
    Return NaN if a dataset can't be evaluated on a given frame. Return 0 if at least one similarity
    measure was 0 or negative. Otherwise, take a harmonic mean of a Pearson correlation coefficient
    and a Spearman correlation coefficient.
    intervals = ['acc', 'low', 'high']
    scores = []
    for interval in intervals:
        if any(np.isnan(x) for x in [spearman_score[interval], pearson_score[interval]]):
        elif any(x <= 0 for x in [spearman_score[interval], pearson_score[interval]]):
            scores.append(hmean([spearman_score[interval], pearson_score[interval]]))

    return pd.Series(
def calcWeightage(Elo_count) :
    Rrel = 0.4
    Rran = 1. - Rrel

    hm = stats.hmean(Elo_count) - 10

    elo_confidence = 0.4 * (1 - math.exp(-hm / 2) )

    elo_contri = Rran * (elo_confidence)
    pr_contri  = Rran - elo_contri

    return (Rrel, pr_contri, elo_contri)
##The basic folder to be created
def check_coarsening_method(methods):
    accepted_methods = ['min', 'max', 'amean', 'hmean', 'gmean', 'median']
    if methods is not None:
        for method in methods:
            if method not in accepted_methods:
                raise ValueError( ' Coarsening method {0} is not implemented..\
                \n Use these: {1}'.format(method, accepted_methods) )

        return methods
        return accepted_methods
def my_metrics(y_test, y_pred):

    cm=confusion_matrix(y_test, y_pred)

    #Be careful!  Sklearn confusion matrix is very confusing!
    TN = cm[0][0]
    FP = cm[0][1]
    FN = cm[1][0]
    TP = cm[1][1]

    if FN == 0:
        FN = 1
    if TN == 0:
        TN = 1

    #Proportion of those identified as negative that actually are.
    unprecision = TN/(TN+FN)
    #print unprecision

    #Proportion of those *actually* negative identified as such.
    unrecall = TN/(FP+TN)
    #print unrecall

    #Get harmonic mean
    unf = hmean([unrecall, unprecision])
    #print unf

    #Get mean of this and the f1 score:
    harmonic_af = np.mean([f1_score(y_test, y_pred), unf])

    return harmonic_af
def optimize_weights(func, *args):
    Both eval_pairwise_analogies() and eval_semeval2012_analogies() have three
    weights that can be tuned (and therefore two free parameters, as the total
    weight does not matter):

    - The *direct weight*, comparing (b2 - a2) to (b1 - a1)
    - The *transpose weight*, comparing (b2 - b1) to (a2 - a1)
    - The *similarity weight*, comparing b2 to b1 and a2 to a1

    This function takes a function for which to optimize the weights as an
    argument and returns the optimal weights, `weight_direct` and
    print('Tuning analogy weights')
    weights = [
        0., 0.05, 0.1, 0.15, 0.2, 0.3, 0.35, 0.4, 0.5, 0.6, 0.65, 0.7, 0.8,
        0.9, 1.0, 1.5, 2.0, 2.5, 3.0
    best_weights = None
    best_acc = 0.
    for weight_direct in weights:
        for weight_transpose in weights:
            scores = func(*args, weight_direct, weight_transpose, subset='dev')
            if isinstance(scores, list):
                # If a function to optimize returns two results, like eval_semeval2012_analogies(),
                #  take their harmonic mean to compute the weights optimal for both results
                acc = hmean([scores[0].loc['acc'], scores[1].loc['acc']])
                acc = scores.loc['acc']
            if acc > best_acc:
                print(weight_direct, weight_transpose, acc)
                best_weights = (weight_direct, weight_transpose)
                best_acc = acc
            elif acc == best_acc:
                print(weight_direct, weight_transpose, acc)
    weight_direct, weight_transpose = best_weights
    return weight_direct, weight_transpose
def calculate_likelihood(sentence):
    v = list()
    for t, char in enumerate(sentence):
        x = np.zeros((1, t+1, len(chars)))
        t2 = len(sentence)-(t+1)
        x2 = np.zeros((1, t2+1, len(chars)))
        x[0, 0, char_indices['{']] = 1.
        x2[0, t2, char_indices['}']] = 1.
        for i in range(t):
            x[0, i+1, char_indices[sentence[i]]] = 1.
        for i in range(t2):
            x2[0, i, char_indices[sentence[t+i+1]]] = 1.
        preds = model.predict([x,x2], verbose=0)[0]
        #print x
        #print preds
        #print char
        #print "char: %s" % preds[char_indices[char]]
        #print "agg:  %s." % p
    x = np.zeros((1, len(sentence)+1, len(chars)))
    t2 = len(sentence)-(len(sentence)+1)
    x2 = np.zeros((1, 1, len(chars)))
    x2[0, 0, char_indices[' ']] = 1.
    x[0, 0, char_indices['{']] = 1.
    for i in range(len(sentence)):
        x[0, i+1, char_indices[sentence[i]]] = 1.
    preds = model.predict([x,x2], verbose=0)[0]
    #print x
    #print preds
    #print 'end'
    #print "char: %s" % preds[char_indices['}']]
    #print "agg:  %s" % p
    #print "avg:  %s" % (sum(v)/len(v))
    #print "min:  %s" % min(v)
    #return hmean(v)
        return min(v)
    except ValueError:
        return 0
def _PerformDataCoarsening(self, Chrom, resolution, coarsening_method):
        """Base method to perform Data coarsening.

        This method read temporary Numpy array files and perform data coarsening using the given input method.

        .. warning::
            **Private method**. Use it at your own risk. It is used internally in :meth:`WigHandler._StoreInHdf5File`.

        Chrom : str
            Chromosome name

        resolution : str
            resolution in word.

        coarsening_method : str
            Name of method to use for data coarsening. Accepted keywords: min, max, median, amean, gmean and hmean.


        output = []
        binsize = util.resolutionToBinsize(resolution)
        size = self.chromSizeInfo[Chrom] + 1

        for i in range(1, size, binsize):
            tmpx = None
            if i+binsize >= size:
                tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : size]
                tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : i+binsize]

            int_idx = np.nonzero(tmpx > 0)

            if int_idx[0].shape[0] == 0:

            #print(Chrom, tmpx.shape, i, i+binsize, tmpx)
            if coarsening_method == 'max':

            if coarsening_method == 'min':

            if coarsening_method == 'amean':

            if coarsening_method == 'hmean':

            if coarsening_method == 'gmean':

            if coarsening_method == 'median':

        # print(Chrom, resolution, coarse_method, size, binsize, size/binsize, len(output), np.amax(output))

        return np.asarray(output)
def saveAsH5(self, hdf5Out, title=None, resolutions=None, coarsening_methods=None, compression='lzf', keep_original=False):
        """To convert Wig files to hdf5 file

        hdf5Out : :class:`HDF5Handler` or str
            Output hdf5 file name or :class:`HDF5Handler` instance
        title : str
            Title of the data
        resolutions : list of str
            Additional input resolutions other than these default resolutions:
            1kb', '2kb', '4kb', '5kb', '8kb', '10kb', '20kb', '40kb', '80kb',
            '100kb', '160kb','200kb', '320kb', '500kb', '640kb',  and '1mb'.

            For Example: use ``resolutions=['25kb', '50kb', '75kb']`` to add
            additional 25kb, 50kb and 75kb resolution data.
        coarsening_methods : list of str
            Methods to coarse or downsample the data for converting from 1-base
            to coarser resolutions. Presently, five methods are implemented.

            * ``'min'``    -> Minimum value
            * ``'max'``    -> Maximum value
            * ``'amean'``  -> Arithmetic mean or average
            * ``'hmean'``  -> Harmonic mean
            * ``'gmean'``  -> Geometric mean
            * ``'median'`` -> Median

            In case of ``None``, all five methods will be considered. User may
            use only subset of these methods. For example:
            ``coarse_method=['max', 'amean']`` can be used for downsampling by
            only these two methods.
        compression : str
            data compression method in HDF5 file : ``lzf`` or ``gzip`` method.
        keep_original : bool
            Whether original data present in bigwig file should be incorporated in HDF5 file. This will significantly increase size of HDF5 file.

        if not self.isWigParsed:

        # Storing data in hdf5 file
        self._StoreInHdf5File(hdf5Out, title, compression=compression, coarsening_methods=coarsening_methods, resolutions=resolutions, keep_original=keep_original)
def _PerformDataCoarsening(self, Chrom, resolution, coarse_method):
        """Base method to perform Data coarsening.

        This method read temporary Numpy array files and perform data coarsening using the given input method.

        .. warning::
            **Private method**. Use it at your own risk. It is used internally in :meth:`BEDHandler._StoreInHdf5File`.

        Chrom : str
            Chromosome name

        resolution : str
            resolution in word.

        coarse_method : str
            Name of method to use for data coarsening. Accepted keywords: min, max, median, amean, gmean and hmean.


        output = []
        binsize = util.resolutionToBinsize(resolution)
        size = self.chromSizeInfo[Chrom] + 1

        for i in range(1, size, binsize):
            tmpx = None
            if i+binsize >= size:
                tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : size]
                tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : i+binsize]

            int_idx = np.nonzero(tmpx > 0)

            if int_idx[0].shape[0] == 0:

            #print(Chrom, tmpx.shape, i, i+binsize, tmpx)
            if coarse_method == 'max':

            if coarse_method == 'min':

            if coarse_method == 'amean':

            if coarse_method == 'hmean':

            if coarse_method == 'gmean':

            if coarse_method == 'median':

        # print(Chrom, resolution, coarse_method, size, binsize, size/binsize, len(output), np.amax(output))

        return np.asarray(output)
def saveAsH5(self, hdf5Out, title=None, resolutions=None, coarsening_methods=None, compression='lzf', keep_original=False):
        """To convert bed files to hdf5 file

        It parses bed files, coarsened the data and store in an input hdf5/h5

        hdf5Out : :class:`HDF5Handler` or str
            Output hdf5 file name or :class:`HDF5Handler` instance
        title : str
            Title of the data
        resolutions : list of str
            Additional input resolutions other than these default resolutions:
            1kb', '2kb', '4kb', '5kb', '8kb', '10kb', '20kb', '40kb', '80kb',
            '100kb', '160kb','200kb', '320kb', '500kb', '640kb',  and '1mb'.

            For Example: use ``resolutions=['25kb', '50kb', '75kb']`` to add
            additional 25kb, 50kb and 75kb resolution data.
        coarsening_methods : list of str
            Methods to coarse or downsample the data for converting from 1-base
            to coarser resolutions. Presently, five methods are implemented.

            * ``'min'``    -> Minimum value
            * ``'max'``    -> Maximum value
            * ``'amean'``  -> Arithmetic mean or average
            * ``'hmean'``  -> Harmonic mean
            * ``'gmean'``  -> Geometric mean
            * ``'median'`` -> Median

            In case of ``None``, all five methods will be considered. User may
            use only subset of these methods. For example:
            ``coarse_method=['max', 'amean']`` can be used for downsampling by
            only these two methods.
        compression : str
            data compression method in HDF5 file : ``lzf`` or ``gzip`` method.
        keep_original : bool
            Whether original data present in bigwig file should be incorporated in HDF5 file. This will significantly increase size of HDF5 file.

        if not self.isBedParsed:

        # Storing data in hdf5 file
        self._StoreInHdf5File(hdf5Out, title, resolutions=resolutions, coarsening_methods=coarsening_methods, compression=compression, keep_original=keep_original)
def _build_lexicons(self):
        tdf = (self.term_doc_matrix_.get_term_doc_count_df()
               [[t + ' freq' for t
                 in [self.category_a_, self.category_b_] + self.neutral_categories_]])
        tdf = tdf[tdf.sum(axis=1) > 0]
        tdf[self.category_a_ + ' scores'] = self.scorer.get_scores(
            tdf[self.category_a_ + ' freq'],
            tdf[[t for t in tdf.columns if t != self.category_a_ + ' freq']].sum(axis=1))
        tdf[self.category_b_ + ' scores'] = self.scorer.get_scores(
            tdf[self.category_b_ + ' freq'],
            tdf[[t for t in tdf.columns if t != self.category_b_ + ' freq']].sum(axis=1))
        tdf[self.category_a_ + ' + ' + self.category_b_ + ' scores'] = tdf[
            [t + ' scores' for t in [self.category_a_, self.category_b_]]].apply(
            lambda x: hmean(x) if min(x) > 0 else 0, axis=1)

        tdf["not " + self.category_a_ + ' scores'] = self.scorer.get_scores(
            tdf[[t for t in tdf.columns if t != self.category_a_ + ' freq']].sum(axis=1),
            tdf[self.category_a_ + ' freq'])
        tdf["not " + self.category_b_ + ' scores'] = self.scorer.get_scores(
            tdf[[t for t in tdf.columns if t != self.category_b_ + ' freq']].sum(axis=1),
            tdf[self.category_b_ + ' freq'])
        tdf["not " + self.category_a_ + ' + ' + self.category_b_ + ' scores'] = tdf[
            ['not ' + t + ' scores' for t in [self.category_a_, self.category_b_]]].apply(
            lambda x: hmean(x) if min(x) > 0 else 0, axis=1)

        self.category_a_words_ = list(tdf.sort_values(by=self.category_a_ + ' scores',
        self.category_b_words_ = list(tdf.sort_values(by=self.category_b_ + ' scores',
        self.category_a_and_b_words_ = list(
            tdf.sort_values(by=self.category_a_ + ' + ' + self.category_b_ + ' scores',
        self.not_category_a_words_ = list(
            tdf.sort_values(by='not ' + self.category_a_ + ' scores',
        self.not_category_b_words_ = list(
            tdf.sort_values(by='not ' + self.category_b_ + ' scores',
        self.not_category_a_and_b_words_ = list(
            tdf.sort_values(by='not ' + self.category_a_ + ' + ' + self.category_b_ + ' scores',