Python sklearn.metrics 模块,euclidean_distances() 实例源码

我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用sklearn.metrics.euclidean_distances()

项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def test_euclidean_distances():
    X = da.random.uniform(size=(100, 4), chunks=50)
    Y = da.random.uniform(size=(100, 4), chunks=50)
    a = dm.euclidean_distances(X, Y)
    b = sm.euclidean_distances(X, Y)
    assert_eq(a, b)

    x_norm_squared = (X ** 2).sum(axis=1).compute()[:, np.newaxis]
    a = dm.euclidean_distances(X, Y, X_norm_squared=x_norm_squared)
    b = sm.euclidean_distances(X, Y, X_norm_squared=x_norm_squared)
    assert_eq(a, b)

    y_norm_squared = (Y ** 2).sum(axis=1).compute()[np.newaxis, :]
    a = dm.euclidean_distances(X, Y, Y_norm_squared=y_norm_squared)
    b = sm.euclidean_distances(X, Y, Y_norm_squared=y_norm_squared)
    assert_eq(a, b)
项目:project-template    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def predict(self, X):
        """ A reference implementation of a prediction for a classifier.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of int of shape = [n_samples]
            The label for each sample is the label of the closest sample
            seen udring fit.
        """
        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest]
项目:Word_Mover_Distance    作者:PragmaticLab    | 项目源码 | 文件源码
def get_wmd_distance(d1, d2, min_vocab=7, verbose=False):
    vocabulary = [w for w in set(d1.lower().split() + d2.lower().split()) if w in model.vocab and w not in stop_words.ENGLISH_STOP_WORDS]
    if len(vocabulary) < min_vocab:
        return 1
    vect = CountVectorizer(vocabulary=vocabulary).fit([d1, d2])
    W_ = np.array([model[w] for w in vect.get_feature_names() if w in model])
    D_ = euclidean_distances(W_)
    D_ = D_.astype(np.double)
    D_ /= D_.max()  # just for comparison purposes
    v_1, v_2 = vect.transform([d1, d2])
    v_1 = v_1.toarray().ravel()
    v_2 = v_2.toarray().ravel()
    # pyemd needs double precision input
    v_1 = v_1.astype(np.double)
    v_2 = v_2.astype(np.double)
    v_1 /= v_1.sum()
    v_2 /= v_2.sum()
    if verbose:
        print vocabulary
        print v_1, v_2
    return emd(v_1, v_2, D_)

# d1 = "Government speaks to the media in Illinois"
# d2 = "The state addresses the press in Chicago"
# print get_wmd_distance(d1, d2)
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def test_euclidean_distances_same():
    X = da.random.uniform(size=(100, 4), chunks=50)
    a = dm.euclidean_distances(X, X)
    b = sm.euclidean_distances(X, X)
    assert_eq(a, b, atol=1e-4)

    x_norm_squared = (X ** 2).sum(axis=1).compute()[:, np.newaxis]
    assert_eq(X, X, Y_norm_squared=x_norm_squared, atol=1e-4)
项目:icing    作者:slipguru    | 项目源码 | 文件源码
def fit(self, X, **kwargs):
        """Apply affinity propagation clustering.

        Create affinity matrix from negative euclidean distances if required.

        Parameters
        ----------
        X: array-like or sparse matrix,
                shape (n_samples, n_features) or (n_samples, n_samples)
            Data matrix or, if affinity is ``precomputed``, matrix of
            similarities / affinities.
        """
        if not issparse(X):
            return super(AffinityPropagation, self).fit(X, **kwargs)

        # Since X is sparse, this converts it in a coo_matrix if required
        X = check_array(X, accept_sparse='coo')
        if self.affinity == "precomputed":
            self.affinity_matrix_ = X
        elif self.affinity == "euclidean":
            self.affinity_matrix_ = coo_matrix(
                -euclidean_distances(X, squared=True))
        else:
            raise ValueError("Affinity must be 'precomputed' or "
                             "'euclidean'. Got %s instead"
                             % str(self.affinity))

        self.cluster_centers_indices_, self.labels_, self.n_iter_ = \
            sparse_ap(
                self.affinity_matrix_, self.preference, max_iter=self.max_iter,
                convergence_iter=self.convergence_iter, damping=self.damping,
                copy=self.copy, verbose=self.verbose, return_n_iter=True,
                convergence_percentage=self.convergence_percentage)

        if self.affinity != "precomputed":
            self.cluster_centers_ = X.data[self.cluster_centers_indices_].copy()

        return self
项目:sentence-classification    作者:bgmartins    | 项目源码 | 文件源码
def _wmd(self, i, row, X_train):
        """Compute the WMD between training sample i and given test row.

        Assumes that `row` and train samples are sparse BOW vectors summing to 1.
        """
        union_idx = np.union1d(X_train[i].indices, row.indices) - 1
        W_minimal = self.W_embed[union_idx]
        W_dist = euclidean_distances(W_minimal)
        bow_i = X_train[i, union_idx].A.ravel()
        bow_j = row[:, union_idx].A.ravel()
        return emd(bow_i, bow_j, W_dist)
项目:hmm-reps    作者:rug-compling    | 项目源码 | 文件源码
def get_twodim_reps(reps, seed, distance=euclidean_distances):
    reps = reps.astype(np.float64)
    similarities = distance(reps)
    mds = MDS(n_components=2, dissimilarity="precomputed", random_state=seed)
    return mds.fit(similarities).embedding_
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_random_projection_embedding_quality():
    data, _ = make_sparse_random_data(8, 5000, 15000)
    eps = 0.2

    original_distances = euclidean_distances(data, squared=True)
    original_distances = original_distances.ravel()
    non_identical = original_distances != 0.0

    # remove 0 distances to avoid division by 0
    original_distances = original_distances[non_identical]

    for RandomProjection in all_RandomProjection:
        rp = RandomProjection(n_components='auto', eps=eps, random_state=0)
        projected = rp.fit_transform(data)

        projected_distances = euclidean_distances(projected, squared=True)
        projected_distances = projected_distances.ravel()

        # remove 0 distances to avoid division by 0
        projected_distances = projected_distances[non_identical]

        distances_ratio = projected_distances / original_distances

        # check that the automatically tuned values for the density respect the
        # contract for eps: pairwise distances are preserved according to the
        # Johnson-Lindenstrauss lemma
        assert_less(distances_ratio.max(), 1 + eps)
        assert_less(1 - eps, distances_ratio.min())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_affinity_propagation():
    # Affinity Propagation algorithm
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(
        S, preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference, verbose=True)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S, preference=preference,
                                             copy=False)
    assert_array_equal(labels, labels_no_copy)

    # Test input validation
    assert_raises(ValueError, affinity_propagation, S[:, :-1])
    assert_raises(ValueError, affinity_propagation, S, damping=0)
    af = AffinityPropagation(affinity="unknown")
    assert_raises(ValueError, af.fit, X)