Python scipy.stats 模块,zscore() 实例源码

我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用scipy.stats.zscore()

项目:saapy    作者:ashapochka    | 项目源码 | 文件源码
def find_outliers(data):
    absolute_normalized = np.abs(zscore(data))
    return absolute_normalized > 3
项目:Waskom_PNAS_2017    作者:WagnerLabPapers    | 项目源码 | 文件源码
def X(self):

        # Scale the data across features
        X = stats.zscore(np.asarray(self.betas))

        # Remove zero-variance features
        X = X[:, self.good_voxels]
        assert not np.isnan(X).any()

        # Regress out behavioral confounds
        rt = np.asarray(self.rt)
        m, s = np.nanmean(rt), np.nanstd(rt)
        rt = np.nan_to_num((rt - m) / s)
        X = OLS(X, rt).fit().resid

        return X
项目:untwist    作者:IoSR-Surrey    | 项目源码 | 文件源码
def process(self, X):
        onset_func = zscore(self.func(X))
        onset_func = signal.filtfilt(self.moving_avg_filter, 1, onset_func)
        onset_func = onset_func - signal.medfilt(
            onset_func[:,np.newaxis], self.median_kernel
        )[:,0]
        peaks = signal.argrelmax(onset_func)
        onsets = peaks[0][np.where(onset_func[peaks[0]] >
            self.threshold)]
        return onsets
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _generate_noise_temporal_task(stimfunction_tr,
                                  motion_noise='gaussian',
                                  ):
    """Generate the signal dependent noise

    Create noise specific to the signal, for instance there is variability
    in how the signal manifests on each event

    Parameters
    ----------

    stimfunction_tr : 1 Dimensional array
        This is the timecourse of the stimuli in this experiment,
        each element represents a TR

    motion_noise : str
        What type of noise will you generate? Can be gaussian or rician

    Returns
    ----------

    noise_task : one dimensional array, float
        Generates the temporal task noise timecourse

    """

    # Make the noise to be added
    stimfunction_tr = stimfunction_tr != 0
    if motion_noise == 'gaussian':
        noise = stimfunction_tr * np.random.normal(0, 1, size=len(
            stimfunction_tr))
    elif motion_noise == 'rician':
        noise = stimfunction_tr * stats.rice.rvs(0, 1, size=len(
            stimfunction_tr))

    noise_task = stimfunction_tr + noise

    # Normalize
    noise_task = stats.zscore(noise_task)

    return noise_task
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def calc_weighted_event_var(self, D, weights, event_pat):
        """Computes normalized weighted variance around event pattern

        Utility function for computing variance in a training set of weighted
        event examples. For each event, the sum of squared differences for all
        timepoints from the event pattern is computed, and then the weights
        specify how much each of these differences contributes to the
        variance (normalized by the number of voxels).

        Parameters
        ----------
        D : timepoint by voxel ndarray
            fMRI data for which to compute event variances

        weights : timepoint by event ndarray
            specifies relative weights of timepoints for each event

        event_pat : voxel by event ndarray
            mean event patterns to compute variance around

        Returns
        -------
        ev_var : ndarray of variances for each event
        """
        Dz = stats.zscore(D, axis=1, ddof=1)
        ev_var = np.empty(event_pat.shape[1])
        for e in range(event_pat.shape[1]):
            # Only compute variances for weights > 0.1% of max weight
            nz = weights[:, e] > np.max(weights[:, e])/1000
            sumsq = np.dot(weights[nz, e],
                           np.sum(np.square(Dz[nz, :] -
                                  event_pat[:, e]), axis=1))
            ev_var[e] = sumsq/(np.sum(weights[nz, e]) -
                               np.sum(np.square(weights[nz, e])) /
                               np.sum(weights[nz, e]))
        ev_var = ev_var / D.shape[1]
        return ev_var
项目:fexum    作者:KDD-OpenSource    | 项目源码 | 文件源码
def build_spectrogram(feature_id, width=256, height=128, frequency_base=1.0):
    """
    Builds a spectrogram using @Lichtso's ccwt library

    :param feature_id: The feature uuid to be analyzed.
    :param width: Width of the exported image (should be smaller than the number of samples)
    :param height: Height of the exported image
    :param frequency_base: Base for exponential frequency scales or 1.0 for linear scale
    """
    feature = Feature.objects.get(pk=feature_id)
    df = _get_dataframe(feature.dataset.id)

    feature_column = zscore(df[feature.name].values)
    fourier_transformed_signal = fft(feature_column)

    minimum_frequency = 0.001 * len(feature_column)
    maximum_frequency = 0.5 * len(feature_column)
    if frequency_base == 1.0:
        # Linear
        frequency_band_result = frequency_band(height, maximum_frequency - minimum_frequency, minimum_frequency)
    else:
        minimum_octave = log(minimum_frequency) / log(frequency_base)
        maximum_octave = log(maximum_frequency) / log(frequency_base)
        # Exponential
        frequency_band_result = frequency_band(height, maximum_octave - minimum_octave, minimum_octave, frequency_base)

    # Write into the spectrogram path
    filename = '{0}/spectrograms/{1}.png'.format(settings.MEDIA_ROOT, feature_id)
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'w') as output_file:
        render_png(output_file, EQUIPOTENTIAL, 0.0, fourier_transformed_signal, frequency_band_result, width)

    Spectrogram.objects.create(
        feature=feature,
        width=width,
        height=height,
        image=filename
    )
项目:visually-grounded-speech    作者:gchrupala    | 项目源码 | 文件源码
def z_score(a, b):
    """
    uses scipy.stats.zscore to transform to z-scores
    expects two arrays corresponding to sentences a and sentences b
    of sentence pairs, and returns two arrays which correspond to
    a and b undergone the same scaling by the mean & SD of their
    concatenation
    """
    if len(a) != len(b):
        print "warning: uneven nr of sentences in a and b"
    all_sents = numpy.concatenate((a, b))
    scaled = zscore(all_sents)
    return scaled[:len(a),:], scaled[len(a):,:]
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def zscore_site(args):
    """ z-scores only one site """
    from scipy.stats import zscore
    dataframe, columns, site = args
    return zscore(dataframe.loc[dataframe.site == site, columns].values,
                  ddof=1, axis=0)
项目:dataframe    作者:dirmeier    | 项目源码 | 文件源码
def __call__(self, *args):
        vals = args[0].values
        return sps.zscore(vals).tolist()
项目:dataframe    作者:dirmeier    | 项目源码 | 文件源码
def __call__(self, *args):
        vals = args[0].values
        return sps.zscore(vals).tolist()
项目:dataframe    作者:dirmeier    | 项目源码 | 文件源码
def __call__(self, *args):
        vals = args[0].values
        return sps.zscore(vals).tolist()
项目:didi-competition    作者:udacity    | 项目源码 | 文件源码
def filter_outlier_points(points):
    kt = kdtree.KDTree(points)
    distances, i = kt.query(kt.data, k=9)
    z_distances = stats.zscore(np.mean(distances, axis=1))
    o_filter = abs(z_distances) < 1  # rather arbitrary
    return points[o_filter]
项目:Waskom_PNAS_2017    作者:WagnerLabPapers    | 项目源码 | 文件源码
def confound_design(exp, subj, run, hpf_kernel):
    """Build a matrix of confound variables."""
    analysis_dir = PROJECT["analysis_dir"]

    if exp == "dots":
        tr = 1
    elif exp == "sticks":
        tr = .72

    # Load in the confound information
    fstem = op.join(analysis_dir, exp, subj,
                    "preproc", "run_{}".format(run))
    motion = (pd.read_csv(op.join(fstem, "realignment_params.csv"))
                .filter(regex="rot|trans")
                .apply(stats.zscore))
    nuisance = (pd.read_csv(op.join(fstem, "nuisance_variables.csv"))
                  .filter(regex="wm_")
                  .apply(stats.zscore))
    artifacts = (pd.read_csv(op.join(fstem, "artifacts.csv"))
                   .any(axis=1))

    # Upsample dots data
    if exp == "dots":
        motion = pd.DataFrame(moss.upsample(motion, 2))
        nuisance = pd.DataFrame(moss.upsample(nuisance, 2))
        new_tps = np.arange(0, 229.5, .5)
        artifacts = artifacts.reindex(new_tps).ffill().reset_index(drop=True)

    # Build this portion of the design matrix
    confounds = pd.concat([motion, nuisance], axis=1)
    dmat = glm.DesignMatrix(confounds=confounds,
                            artifacts=artifacts,
                            hpf_kernel=hpf_kernel,
                            tr=tr)

    return dmat.design_matrix
项目:Waskom_PNAS_2017    作者:WagnerLabPapers    | 项目源码 | 文件源码
def signal_confound_design(self, run):
        """Build a matrix of signal confound variables."""
        analysis_dir = PROJECT["analysis_dir"]

        if self.exp == "dots":
            tr = 2
        elif self.exp == "sticks":
            tr = .72

        fstem = op.join(analysis_dir, self.exp, self.subj,
                        "preproc", "run_{}".format(run))

        motion = (pd.read_csv(op.join(fstem, "realignment_params.csv"))
                    .filter(regex="rot|trans")
                    .apply(stats.zscore))
        nuisance = (pd.read_csv(op.join(fstem, "nuisance_variables.csv"))
                      .filter(regex="wm_")
                      .apply(stats.zscore))
        artifacts = (pd.read_csv(op.join(fstem, "artifacts.csv"))
                       .any(axis=1))

        confounds = pd.concat([motion, nuisance], axis=1)
        dmat = glm.DesignMatrix(confounds=confounds,
                                artifacts=artifacts,
                                hpf_kernel=self.hpf_kernel,
                                tr=tr)

        return dmat.design_matrix
项目:Undertone    作者:kgugle    | 项目源码 | 文件源码
def california():
    calif = []
    with open('california.txt') as f:
        calif = f.readlines()
    out_array = []
    for ccs in calif:
        cal_array = ccs.split(',')
        piece = [cal_array[0].replace('"',''),cal_array[1].replace(' ',''),cal_array[2].replace(' ','').replace('\n','')]
        out_array.append(piece)
    array_county = []
    for x in out_array:
        array_county.append(x[0])
    set_array_county = set(array_county)
    final_arr = []
    for county in set_array_county:
        divisor = 0
        total_score = 0
        for y in out_array:
            if county == y[0]:
                divisor += int(y[1])
                if divisor == 0:
                    divisor = 1
                total_score += float(y[2])
        final_arr.append((total_score/divisor))
    pprint(final_arr)
    np_array = np.asarray(final_arr)
    zscore_arr = stats.zscore(np_array)

    fin = []
    for x in zscore_arr:
        num = percentage_of_area_under_std_normal_curve_from_zcore(x)
        fin.append(num)

    proj = zip(set_array_county,fin)
    for x in proj:
        print(x[0],x[1])
项目:sgrsea    作者:bchen4    | 项目源码 | 文件源码
def zStat(sg_row,pNull):
  #logging(sg_row)
  if pNull == 0 or pNull ==1:
    return None
  else:
    zscore = (sg_row.pmme-pNull) * math.sqrt(sg_row.treat+sg_row.ctrl) / math.sqrt(pNull*(1-pNull)) 
    return zscore
项目:sgrsea    作者:bchen4    | 项目源码 | 文件源码
def standarizeMaxmeanDic(md):
  sfactor = {}
  snullmaxmean = []
  for k in md.keys():
    sfactor[k] = (np.mean(md[k]),np.std(md[k]))
    md[k] = stats.zscore(md[k])
    snullmaxmean += md[k].tolist()
  return (md,sfactor,snullmaxmean)
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def find_outliers(X, threshold=3.0, max_iter=2):
    """Find outliers based on iterated Z-scoring

    This procedure compares the absolute z-score against the threshold.
    After excluding local outliers, the comparison is repeated until no
    local outlier is present any more.

    Parameters
    ----------
    X : np.ndarray of float, shape (n_elemenets,)
        The scores for which to find outliers.
    threshold : float
        The value above which a feature is classified as outlier.
    max_iter : int
        The maximum number of iterations.

    Returns
    -------
    bad_idx : np.ndarray of int, shape (n_features)
        The outlier indices.
    """
    from scipy.stats import zscore
    my_mask = np.zeros(len(X), dtype=np.bool)
    for _ in range(max_iter):
        X = np.ma.masked_array(X, my_mask)
        this_z = np.abs(zscore(X))
        local_bad = this_z > threshold
        my_mask = np.max([my_mask, local_bad], 0)
        if not np.any(local_bad):
            break

    bad_idx = np.where(my_mask)[0]
    return bad_idx
项目:server-2017    作者:frc1678    | 项目源码 | 文件源码
def sprZScores(self, PBC):
        if np.std(self.sprs.values()) == 0:
            zscores = {k : (0.0, self.sprs[k]) for k in self.sprs.keys()}
        else:
            zscores = {k : (zscore, self.sprs[k]) for (k, zscore) in zip(self.sprs.keys(), stats.zscore(self.sprs.values()))}
        CSVExporter.CSVExportScoutZScores(zscores)
        PBC.sendExport('SPRExport.csv')
项目:server-2017    作者:frc1678    | 项目源码 | 文件源码
def rValuesForAverageFunctionForDict(self, averageFunction, d):
        values = map(averageFunction, self.cachedComp.teamsWithMatchesCompleted)
        for index, value in enumerate(values):
            if value == None:
                values[index] = 0
        if not values:
            return
        if not np.std(values):
            zscores = [0.0 for v in values] #Don't calculate z-score if the standard deviation is 0
        else:
            zscores = list(stats.zscore(values))
        [utils.setDictionaryValue(d, self.cachedComp.teamsWithMatchesCompleted[i].number, zscores[i]) for i in range(len(self.cachedComp.teamsWithMatchesCompleted))]
项目:tf-sparse-fit    作者:cmcneil    | 项目源码 | 文件源码
def pearson_correllations(y, y_pred):
    """Computes the pearson correllation of a set of predictions.

    Parameters
    ----------
    y:
        Actual test set values. A numpy array of shape (npoints, nvariables)
    y_pred:
        The predicted values. A numpy array of shape (npoints, nvariables)
    """
    return (zscore(y)*zscore(y_pred)).mean(axis=0)
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def normalize(signals, axis=None, groups=None, MP = False, comp=None):
    """
    :param signals: 1D, 2D or 3D signals
    returns zscored per patient
    """

    if comp is not None:
        print('zmapping with axis {}'.format(axis))
        return stats.zmap(signals, comp , axis=axis)

    if groups is None: 
        print('zscoring with axis {}'.format(axis))
        return stats.zscore(signals, axis=axis)

    if signals.ndim == 1: signals = np.expand_dims(signals,0) 
    if signals.ndim == 2: signals = np.expand_dims(signals,2)

    if MP:
        print('zscoring per patient using {} cores'.format(cpu_count()))
        p = Pool(cpu_count()) #use all except for one
        res = []
        new_signals = np.zeros_like(signals)
        for ID in np.unique(groups):
            idx = groups==ID
            job = p.apply_async(stats.zscore, args = (signals[idx],), kwds={'axis':None})
            res.append(job)
        start = 0
        for r in res:
            values = r.get(timeout = 1200)
            end = start + len(values)
            new_signals[start:end] = values
            end = start
        return new_signals
    else:
        print('zscoring per patient')
        res = []
        for ID in np.unique(groups):
            idx = groups==ID
            job = stats.zscore(signals[idx], axis=None)
            res.append(job)
        new_signals = np.vstack(res)
        return new_signals
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _generate_noise_temporal_drift(trs,
                                   tr_duration,
                                   period=300,
                                   ):

    """Generate the drift noise

    Create a sinewave, of a given period and random phase, to represent the
    drift of the signal over time

    Parameters
    ----------

    trs : int
        How many volumes (aka TRs) are there

    tr_duration : float
        How long in seconds is each volume acqusition

    period : int
        How many seconds is the period of oscillation of the drift

    Returns
    ----------
    noise_drift : one dimensional array, float
        The drift timecourse of activity

    """

    # Calculate the cycles of the drift for a given function.
    cycles = trs * tr_duration / period

    # Create a sine wave with a given number of cycles and random phase
    timepoints = np.linspace(0, trs - 1, trs)
    phaseshift = np.pi * 2 * np.random.random()
    phase = (timepoints / (trs - 1) * cycles * 2 * np.pi) + phaseshift
    noise_drift = np.sin(phase)

    # Normalize so the sigma is 1
    noise_drift = stats.zscore(noise_drift)

    # Return noise
    return noise_drift
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _generate_noise_temporal_phys(timepoints,
                                  resp_freq=0.2,
                                  heart_freq=1.17,
                                  ):
    """Generate the physiological noise.
    Create noise representing the heart rate and respiration of the data.
    Default values based on Walvaert, Durnez, Moerkerke, Verdoolaege and
    Rosseel, 2011

    Parameters
    ----------

    timepoints : 1 Dimensional array
        What time points, in seconds, are sampled by a TR

    resp_freq : float
        What is the frequency of respiration

    heart_freq : float
        What is the frequency of heart beat

    Returns
    ----------
    noise_phys : one dimensional array, float
        Generates the physiological temporal noise timecourse

    """

    noise_phys = []  # Preset
    resp_phase = (np.random.rand(1) * 2 * np.pi)[0]
    heart_phase = (np.random.rand(1) * 2 * np.pi)[0]
    for tr_counter in timepoints:

        # Calculate the radians for each variable at this
        # given TR
        resp_radians = resp_freq * tr_counter * 2 * np.pi + resp_phase
        heart_radians = heart_freq * tr_counter * 2 * np.pi + heart_phase

        # Combine the two types of noise and append
        noise_phys.append(np.cos(resp_radians) + np.sin(heart_radians))

    # Normalize
    noise_phys = stats.zscore(noise_phys)

    return noise_phys
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _logprob_obs(self, data, mean_pat, var):
        """Log probability of observing each timepoint under each event model

        Computes the log probability of each observed timepoint being
        generated by the Gaussian distribution for each event pattern

        Parameters
        ----------
        data: voxel by time ndarray
            fMRI data on which to compute log probabilities

        mean_pat: voxel by event ndarray
            Centers of the Gaussians for each event

        var: float or 1D array of length equal to the number of events
            Variance of the event Gaussians. If scalar, all events are
            assumed to have the same variance

        Returns
        -------
        logprob : time by event ndarray
            Log probability of each timepoint under each event Gaussian
        """

        n_vox = data.shape[0]
        t = data.shape[1]

        # z-score both data and mean patterns in space, so that Gaussians
        # are measuring Pearson correlations and are insensitive to overall
        # activity changes
        data_z = stats.zscore(data, axis=0, ddof=1)
        mean_pat_z = stats.zscore(mean_pat, axis=0, ddof=1)

        logprob = np.empty((t, self.n_events))

        if type(var) is not np.ndarray:
            var = var * np.ones(self.n_events)

        for k in range(self.n_events):
            logprob[:, k] = -0.5 * n_vox * np.log(
                2 * np.pi * var[k]) - 0.5 * np.sum(
                (data_z.T - mean_pat_z[:, k]).T ** 2, axis=0) / var[k]

        logprob /= n_vox
        return logprob
项目:frisbeer-backend    作者:Moetto    | 项目源码 | 文件源码
def calculate_ranks(game):
    logging.info("Calculating new ranks")
    players = Player.objects.all()
    ranks = list(Rank.objects.all())

    rank_distribution = OrderedDict()
    step = 6 / (len(ranks) - 2)
    for i in range(len(ranks) - 2):
        rank_distribution[-3 + i * step] = ranks[i]

    player_list = []
    for player in players:
        s1 = player.gameplayerrelation_set.filter(team=1).aggregate(Sum('game__team1_score'))[
            "game__team1_score__sum"] if not None else 0
        s2 = player.gameplayerrelation_set.filter(team=2).aggregate(Sum('game__team2_score'))[
            "game__team2_score__sum"] if not None else 0
        s1 = s1 if s1 else 0
        s2 = s2 if s2 else 0
        if s1 + s2 > 4:
            player_list.append(player)
    if not player_list:
        logging.debug("No players with four round victories")
        return
    scores = [player.score for player in player_list]
    if len(set(scores)) == 1:
        logging.debug("Only one player {} with rank".format(player_list[0]))
        z_scores = [0.0 for i in range(len(player_list))]
    else:
        z_scores = zscore(scores)
        logging.debug("Players: {}".format(player_list))
        logging.debug("Z_scores: {}".format(z_scores))

    for i in range(len(player_list)):
        player = player_list[i]
        if player not in list(game.players.all()):
            logging.debug("Not setting rank for %s because he didn't play", player)
            break
        player_z_score = z_scores[i]
        player = player_list[i]
        rank = None
        for required_z_score in rank_distribution.keys():
            if player_z_score > required_z_score:
                rank = rank_distribution[required_z_score]
            else:
                break
        logging.debug("Setting rank {} for {}".format(rank, player.name))
        player.rank = rank
        player.save()
项目:StockRecommendSystem    作者:doncat99    | 项目源码 | 文件源码
def normalization_scaler(norm, data, row_processing):
    '''
    data: N*M np.array
    N: sample
    M: features
    data_T: M*N
    data_T_scale: scaler for column by column, M*N
    data_T_scale_T: N*M
    '''
    if data.size == 0:
        return data 

    if '1' in norm:
        if row_processing:
            data_T = data.transpose()
            scaler = MinMaxScaler().fit_transform(StandardScaler().fit_transform(np.log(data+1)))
            #data_T_scale = scaler.transform(data_T)
            return scaler.transpose()
        else:
            #scaler = StandardScaler().fit(data)
            scaler =MinMaxScaler().fit_transform(StandardScaler().fit_transform(np.log(data+1)))
            return scaler

    elif '2' in norm:
        if row_processing:
            data_T = data.transpose()
            scaler = MinMaxScaler().fit(data_T)
            data_T_scale = scaler.transform(data_T)
            return data_T_scale.transpose()
        else:
            scaler = MinMaxScaler().fit(data)
            return scaler.transform(data)

    elif '3' in norm:
        if row_processing:
            data_T = data.transpose()
            data_T.apply(zscore)
            return data_T.transpose()
        else:
            data.apply(zscore)
            return data

    else: 
        return data
项目:webdataconnector_ml    作者:DoubleEE    | 项目源码 | 文件源码
def runClustering(cluster_df):
    from sklearn.cluster import KMeans
    from sklearn.metrics import silhouette_score as silhouette_score

    Xcols = [col for col in cluster_df.columns if 'NOTMODEL' not in col.upper()]

    # Convert character columns to dummy variables
    X = cluster_df[Xcols]
    cols = X.columns
    num_cols = X._get_numeric_data().columns
    char_cols = list(set(cols) - set(num_cols))
    for col in char_cols:
        if len(X[col].unique()) <= 20:
            dummy = pd.get_dummies(X[col], prefix='dm' + col)
            column_name = X.columns.values.tolist()
            column_name.remove(col)
            X = X[column_name].join(dummy)
        else:
            if col in X.columns:    # If more than 20 distinct values then delete
                del X[col]

    # Standardize (Z-score normalize) all continuous variables
    from scipy.stats import zscore
    for col in X:
        if len(X[col].unique()) > 2:    # Standardize non-dummy variables
            col_zscore = 'z_' + col
            X[col_zscore] = zscore(X[col])
            del X[col]

    # Fill missing values with 0 = the mean in the z-normalize data
    # Obviously missing values can be handled in many different ways
    X.fillna(0, inplace=True)

    # convert to matrix/numpy array to use in KMeans clustering class
    data_for_clustering_matrix = X.as_matrix()

    number_of_Clusters = []
    silhouette_value = []
    # Loop through 2 and 20 clusters and identify which has the highest silhouette score
    k = range(2, 21)
    for i in k:
        clustering_method = KMeans(n_clusters=i)
        clustering_method.fit(data_for_clustering_matrix)
        labels = clustering_method.predict(data_for_clustering_matrix)
        silhouette_average = silhouette_score(data_for_clustering_matrix, labels)
        silhouette_value.append(silhouette_average)
        number_of_Clusters.append(int(i))

        # maxind = np.argmax(silhouette_value)
        max_value = max(silhouette_value)
        indexMaxValue = silhouette_value.index(max_value)

        # FIT KMEANS CLUSTER MODEL WITH NUMBER OF CLUSTERS WITH HIGHEST SILHOUETTE SCORE
        clustering_method = KMeans(n_clusters=number_of_Clusters[indexMaxValue])
        clustering_method.fit(data_for_clustering_matrix)
        labels = clustering_method.predict(data_for_clustering_matrix)

        # SCORE THE DATAFRAME  score_df
        cluster_df['cluster'] = labels
        return cluster_df