Python scipy.stats 模块，shapiro() 实例源码

我们从Python开源项目中，提取了以下7个代码示例，用于说明如何使用scipy.stats.shapiro()。

项目：physalia 作者：TQRG | 项目源码 | 文件源码

def samples_are_normal(*samples, **options):
    """Test whether each sample differs from a normal distribution.

    Use both Shapiro-Wilk test and D'Agostino and Pearson's test
    to test the null hypothesis that the sample is drawn from a
    normal distribution.

    Returns:
        List of tuples (is_normal(bool),(statistic(float),pvalue(float)).

    """
    alpha = options.get('alpha', 0.05)
    results = []
    for sample in samples:
        (_, shapiro_pvalue) = shapiro_result = shapiro(sample)
        (_, normaltest_pvalue) = normaltest_result = normaltest(sample)
        results.append((
            not (normaltest_pvalue < alpha and shapiro_pvalue < alpha),
            shapiro_result,
            normaltest_result
        ))
    return results

项目：ISM2017 作者：ybayle | 项目源码 | 文件源码

def assert_normality(data):
    """Description of assert_normality
    The Shapiro-Wilk test tests the null hypothesis that the data was drawn 
    from a normal distribution
    if test_stat and p_value close to 1 then data is normal
    """
    print("Values " + str(data))
    statistic, pvalue = stats.shapiro(data)
    print("Shapiro Statistic " + str(statistic) + " and p-value " + str(pvalue))
    if pvalue > 0.05:
        print("Normal")
        return True
    else:
        print("Not normal")
        return False

项目：TFG 作者：alu0100505078 | 项目源码 | 文件源码

def shapiroWilkTest(nAlgorithms,hyperVolumeList):
    shapiroWilk = []
    #print len(hyperVolumeList)
    for i in range(nAlgorithms):
        #print i
        algorithmList = np.array(hyperVolumeList[i])
        shapiroTest= stats.shapiro(algorithmList)
        #print shapiroTest
        shapiroWilk.append(shapiroTest)
    return shapiroWilk

项目：yellowbrick 作者：DistrictDataLabs | 项目源码 | 文件源码

def __init__(self, ax=None, algorithm='shapiro', features=None,
                 orient='h', show_feature_names=True, **kwargs):
        """
        Initialize the class with the options required to rank and
        order features as well as visualize the result.
        """
        super(Rank1D, self).__init__(
            ax=None, algorithm=algorithm, features=features,
            show_feature_names=show_feature_names, **kwargs
        )
        self.orientation_ = orient

项目：Isomantics 作者：mike-seeber | 项目源码 | 文件源码

def norm_EDA(vectors, lg, embedding):
    """EDA on the norm of the vectors
    vectors = word embedding vectors
    lg = language
    embedding = gensim, polyglot, etc."""
    # L2 norm of vectors, then normalize distribution of L2 norms
    vectors_norm = np.linalg.norm(vectors, axis=1)
    vectors_norm_normalized = (vectors_norm - vectors_norm.mean()) \
        / vectors_norm.std()

    # Histogram compared to normal dist
    plt.figure(figsize=(10, 6))
    plt.xlim((-3, 5))
    plt.hist(vectors_norm_normalized, bins=100, normed=True)
    x = np.linspace(-3, 3, 100)
    plt.plot(x, norm.pdf(x, 0, 1), color='r', linewidth=3)
    plt.savefig('../images/' + lg + '_' + embedding + '_norm.png')
    plt.close('all')

    # Anderson Darling
    # If test stat is greater than crit val, reject ho=normal
    # crit_val_1 is critical value for p-value of 1%
    ad = anderson(vectors_norm_normalized, 'norm')
    ad_test_stat = ad.statistic
    ad_crit_val_1 = ad.critical_values[-1]
    ad_result = 'Reject' if ad_test_stat > ad_crit_val_1 else 'Fail to Reject'

    # Kolmogorov-Smirnov
    ks_p_val = kstest(vectors_norm_normalized, 'norm')[1]
    ks_result = 'Reject' if ks_p_val < .01 else 'Fail to Reject'

    # Shapiro
    sh_p_val = shapiro(vectors_norm_normalized)[1]
    sh_result = 'Reject' if sh_p_val < .01 else 'Fail to Reject'

    result = (ad_test_stat, ad_crit_val_1, ad_result,
              ks_p_val, ks_result,
              sh_p_val, sh_result)
    return result

项目：rca-evaluation 作者：sieve-microservices | 项目源码 | 文件源码

def draw(path):
    data = metadata.load(path)
    p_values_pearson = []
    p_values_shapiro = []
    norm_dist_path = os.path.join(path, "normtest_distribution.png")
    if os.path.exists(norm_dist_path):
        print("path exists %s, skip" % norm_dist_path)
        #return
    for srv in data["services"]:
        filename = os.path.join(path, srv["filename"])
        df = load_timeseries(filename, srv)
        columns = []
        for c in df.columns:
            if (not df[c].isnull().all()) and df[c].var() != 0:
                columns.append(c)
        df = df[columns]
        n = len(columns)
        if n == 0:
            continue
        fig, axis = plt.subplots(n, 2)
        fig.set_figheight(n * 4)
        fig.set_figwidth(30)

        for i, col in enumerate(df.columns):
            serie = df[col].dropna()
            sns.boxplot(x=serie, ax=axis[i, 0])
            statistic_1, p_value_1 = normaltest(serie)
            p_values_pearson.append(p_value_1)
            statistic_2, p_value_2 = shapiro(serie)
            p_values_shapiro.append(p_value_2)
            templ = """Pearson's normtest:
statistic: %f
p-value: %E
-> %s

Shapiro-Wilk test for normality:
statistic: %f
p-value: %E
-> %s
"""
            outcome_1 = "not normal distributed" if p_value_1 < 0.05 else "normal distributed"
            outcome_2 = "not normal distributed" if p_value_2 < 0.05 else "normal distributed"
            text = templ % (statistic_1, p_value_1, outcome_1, statistic_2, p_value_2, outcome_2)
            axis[i, 1].axis('off')
            axis[i, 1].text(0.05, 0.05, text, fontsize=18)
        plot_path = os.path.join(path, "%s_normtest.png" % srv["name"])
        plt.savefig(plot_path)
        print(plot_path)

    fig, axis = plt.subplots(2)
    fig.set_figheight(8)
    measurement = os.path.dirname(os.path.join(path,''))
    name = "Distribution of p-value for Pearson's normtest for %s" % measurement
    plot = sns.distplot(pd.Series(p_values_pearson, name=name), rug=True, kde=False, norm_hist=False, ax=axis[0])
    name = "Distribution of p-value for Shapiro-Wilk's normtest for %s" % measurement
    plot = sns.distplot(pd.Series(p_values_shapiro, name=name), rug=True, kde=False, norm_hist=False, ax=axis[1])
    fig.savefig(norm_dist_path)
    print(norm_dist_path)

项目：yellowbrick 作者：DistrictDataLabs | 项目源码 | 文件源码

def rank1d(X, y=None, ax=None, algorithm='shapiro', features=None,
           orient='h', show_feature_names=True, **kwargs):
    """Scores each feature with the algorithm and ranks them in a bar plot.

    This helper function is a quick wrapper to utilize the Rank1D Visualizer
    (Transformer) for one-off analysis.

    Parameters
    ----------
    X : ndarray or DataFrame of shape n x m
        A matrix of n instances with m features

    y : ndarray or Series of length n
        An array or series of target or class values

    ax : matplotlib axes
        the axis to plot the figure on.

    algorithm : one of {'shapiro', }, default: 'shapiro'
        The ranking algorithm to use, default is 'Shapiro-Wilk.

    features : list
        A list of feature names to use.
        If a DataFrame is passed to fit and features is None, feature
        names are selected as the columns of the DataFrame.

    orient : 'h' or 'v'
        Specifies a horizontal or vertical bar chart.

    show_feature_names : boolean, default: True
        If True, the feature names are used to label the axis ticks in the
        plot.

    Returns
    -------
    ax : matplotlib axes
        Returns the axes that the parallel coordinates were drawn on.

    """
    # Instantiate the visualizer
    visualizer = Rank1D(ax, algorithm, features, orient, show_feature_names,
                        **kwargs)

    # Fit and transform the visualizer (calls draw)
    visualizer.fit(X, y, **kwargs)
    visualizer.transform(X)

    # Return the axes object on the visualizer
    return visualizer.ax