Python sklearn.neighbors 模块,kneighbors_graph() 实例源码

我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.neighbors.kneighbors_graph()

项目:TextCategorization    作者:Y-oHr-N    | 项目源码 | 文件源码
def _build_graph(self):
        """Compute the graph Laplacian."""

        # Graph sparsification
        if self.sparsify == 'epsilonNN':
            self.A_           = radius_neighbors_graph(self.X_, self.radius, include_self=False)
        else:
            Q                 = kneighbors_graph(
                self.X_,
                self.n_neighbors,
                include_self  = False
            ).astype(np.bool)

            if self.sparsify   == 'kNN':
                self.A_       = (Q + Q.T).astype(np.float64)
            elif self.sparsify == 'MkNN':
                self.A_       = (Q.multiply(Q.T)).astype(np.float64)

        # Edge re-weighting
        if self.reweight == 'rbf':
            W                 = rbf_kernel(self.X_, gamma=self.t)
            self.A_           = self.A_.multiply(W)

        return sp.csgraph.laplacian(self.A_, normed=self.normed)
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def cluster_agglomerative(X_train, model_args=None, gridsearch=True, connectivity_graph=True, connectivity_graph_neighbors=10):
    from sklearn.cluster import AgglomerativeClustering
    from sklearn.neighbors import kneighbors_graph
    print('AgglomerativeClustering')

    if connectivity_graph:
        print('Creating k-neighbors graph for connectivity restraint')
        connectivity = kneighbors_graph(X_train, n_neighbors=connectivity_graph_neighbors)
        model_args['connectivity'] = connectivity

    if gridsearch is True:
        ## TODO:
        # add hyperparamter searching. No scoring method available for this model, 
        # so we can't easily use gridsearching.

        raise NotImplementedError('No hyperparameter optimization available yet for this model. Set gridsearch to False')
        # prune(param_grid, model_args)
    else:
        if 'n_clusters' not in model_args:
            raise KeyError('Need to define n_clusters for AgglomerativeClustering')
        param_grid = None

    return ModelWrapper(AgglomerativeClustering, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kneighbors_graph_sparse(seed=36):
    # Test kneighbors_graph to build the k-Nearest Neighbor graph
    # for sparse input.
    rng = np.random.RandomState(seed)
    X = rng.randn(10, 10)
    Xcsr = csr_matrix(X)

    for n_neighbors in [1, 2, 3]:
        for mode in ["connectivity", "distance"]:
            assert_array_almost_equal(
                neighbors.kneighbors_graph(X,
                                           n_neighbors,
                                           mode=mode).toarray(),
                neighbors.kneighbors_graph(Xcsr,
                                           n_neighbors,
                                           mode=mode).toarray())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_isomap_simple_grid():
    # Isomap should preserve distances when all neighbors are used
    N_per_side = 5
    Npts = N_per_side ** 2
    n_neighbors = Npts - 1

    # grid of equidistant points in 2D, n_components = n_dim
    X = np.array(list(product(range(N_per_side), repeat=2)))

    # distances from each point to all others
    G = neighbors.kneighbors_graph(X, n_neighbors,
                                   mode='distance').toarray()

    for eigen_solver in eigen_solvers:
        for path_method in path_methods:
            clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
                                  eigen_solver=eigen_solver,
                                  path_method=path_method)
            clf.fit(X)

            G_iso = neighbors.kneighbors_graph(clf.embedding_,
                                               n_neighbors,
                                               mode='distance').toarray()
            assert_array_almost_equal(G, G_iso)
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def ward_hc(self, n_clusters, n_neighbors=10):
        """
        Perform Ward hierarchical clustering

        Parameters
        ----------
        n_clusters : int
            number of clusters
        lsi_components : int
            apply LSA before the clustering algorithm
        n_neighbors : int
            N nearest neighbors used for computing the connectivity matrix
        """
        from sklearn.cluster import AgglomerativeClustering
        from sklearn.neighbors import kneighbors_graph
        pars = {'n_neighbors': n_neighbors, 'is_hierarchical': True,
                "metric": self.metric}
        if 'lsi' not in self.pipeline:
            raise ValueError("you must use lsi with birch clustering "
                             "for scaling reasons.")

        # This is really not efficient as
        # it's done a second time in _cluster_func
        X = self.pipeline.data
        connectivity = kneighbors_graph(X, n_neighbors=n_neighbors,
                                        include_self=False)

        km = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',
                                     connectivity=connectivity)

        return self._cluster_func(n_clusters, km, pars)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kneighbors_graph():
    # Test kneighbors_graph to build the k-Nearest Neighbor graph.
    X = np.array([[0, 1], [1.01, 1.], [2, 0]])

    # n_neighbors = 1
    A = neighbors.kneighbors_graph(X, 1, mode='connectivity',
                                   include_self=True)
    assert_array_equal(A.toarray(), np.eye(A.shape[0]))

    A = neighbors.kneighbors_graph(X, 1, mode='distance')
    assert_array_almost_equal(
        A.toarray(),
        [[0.00, 1.01, 0.],
         [1.01, 0., 0.],
         [0.00, 1.40716026, 0.]])

    # n_neighbors = 2
    A = neighbors.kneighbors_graph(X, 2, mode='connectivity',
                                   include_self=True)
    assert_array_equal(
        A.toarray(),
        [[1., 1., 0.],
         [1., 1., 0.],
         [0., 1., 1.]])

    A = neighbors.kneighbors_graph(X, 2, mode='distance')
    assert_array_almost_equal(
        A.toarray(),
        [[0., 1.01, 2.23606798],
         [1.01, 0., 1.40716026],
         [2.23606798, 1.40716026, 0.]])

    # n_neighbors = 3
    A = neighbors.kneighbors_graph(X, 3, mode='connectivity',
                                   include_self=True)
    assert_array_almost_equal(
        A.toarray(),
        [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(
            X, 3, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs,
                  radius, metric='euclidean')
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_k_and_radius_neighbors_train_is_not_query():
    # Test kneighbors et.al when query is not training data

    for algorithm in ALGORITHMS:

        nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)

        X = [[0], [1]]
        nn.fit(X)
        test_data = [[2], [1]]

        # Test neighbors.
        dist, ind = nn.kneighbors(test_data)
        assert_array_equal(dist, [[1], [0]])
        assert_array_equal(ind, [[1], [1]])
        dist, ind = nn.radius_neighbors([[2], [1]], radius=1.5)
        check_object_arrays(dist, [[1], [1, 0]])
        check_object_arrays(ind, [[1], [0, 1]])

        # Test the graph variants.
        assert_array_equal(
            nn.kneighbors_graph(test_data).A, [[0., 1.], [0., 1.]])
        assert_array_equal(
            nn.kneighbors_graph([[2], [1]], mode='distance').A,
            np.array([[0., 1.], [0., 0.]]))
        rng = nn.radius_neighbors_graph([[2], [1]], radius=1.5)
        assert_array_equal(rng.A, [[0, 1], [1, 1]])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_include_self_neighbors_graph():
    # Test include_self parameter in neighbors_graph
    X = [[2, 3], [4, 5]]
    kng = neighbors.kneighbors_graph(X, 1, include_self=True).A
    kng_not_self = neighbors.kneighbors_graph(X, 1, include_self=False).A
    assert_array_equal(kng, [[1., 0.], [0., 1.]])
    assert_array_equal(kng_not_self, [[0., 1.], [1., 0.]])

    rng = neighbors.radius_neighbors_graph(X, 5.0, include_self=True).A
    rng_not_self = neighbors.radius_neighbors_graph(
        X, 5.0, include_self=False).A
    assert_array_equal(rng, [[1., 1.], [1., 1.]])
    assert_array_equal(rng_not_self, [[0., 1.], [1., 0.]])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_same_knn_parallel():
    X, y = datasets.make_classification(n_samples=30, n_features=5,
                                        n_redundant=0, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    def check_same_knn_parallel(algorithm):
        clf = neighbors.KNeighborsClassifier(n_neighbors=3,
                                             algorithm=algorithm)
        clf.fit(X_train, y_train)
        y = clf.predict(X_test)
        dist, ind = clf.kneighbors(X_test)
        graph = clf.kneighbors_graph(X_test, mode='distance').toarray()

        clf.set_params(n_jobs=3)
        clf.fit(X_train, y_train)
        y_parallel = clf.predict(X_test)
        dist_parallel, ind_parallel = clf.kneighbors(X_test)
        graph_parallel = \
            clf.kneighbors_graph(X_test, mode='distance').toarray()

        assert_array_equal(y, y_parallel)
        assert_array_almost_equal(dist, dist_parallel)
        assert_array_equal(ind, ind_parallel)
        assert_array_almost_equal(graph, graph_parallel)

    for algorithm in ALGORITHMS:
        yield check_same_knn_parallel, algorithm
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_isomap_reconstruction_error():
    # Same setup as in test_isomap_simple_grid, with an added dimension
    N_per_side = 5
    Npts = N_per_side ** 2
    n_neighbors = Npts - 1

    # grid of equidistant points in 2D, n_components = n_dim
    X = np.array(list(product(range(N_per_side), repeat=2)))

    # add noise in a third dimension
    rng = np.random.RandomState(0)
    noise = 0.1 * rng.randn(Npts, 1)
    X = np.concatenate((X, noise), 1)

    # compute input kernel
    G = neighbors.kneighbors_graph(X, n_neighbors,
                                   mode='distance').toarray()

    centerer = preprocessing.KernelCenterer()
    K = centerer.fit_transform(-0.5 * G ** 2)

    for eigen_solver in eigen_solvers:
        for path_method in path_methods:
            clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
                                  eigen_solver=eigen_solver,
                                  path_method=path_method)
            clf.fit(X)

            # compute output kernel
            G_iso = neighbors.kneighbors_graph(clf.embedding_,
                                               n_neighbors,
                                               mode='distance').toarray()

            K_iso = centerer.fit_transform(-0.5 * G_iso ** 2)

            # make sure error agrees
            reconstruction_error = np.linalg.norm(K - K_iso) / Npts
            assert_almost_equal(reconstruction_error,
                                clf.reconstruction_error())
项目:extract    作者:dblalock    | 项目源码 | 文件源码
def makeWard(X, k=2):
    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(X, n_neighbors=10)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)
    return cluster.AgglomerativeClustering(n_clusters=k,
                        linkage='ward', connectivity=connectivity)
项目:extract    作者:dblalock    | 项目源码 | 文件源码
def makeAvgLinkage(X=None, k=2):
    connectivity = kneighbors_graph(X, n_neighbors=10)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)
    return cluster.AgglomerativeClustering(linkage="average",
                                affinity="cityblock", n_clusters=k,
                                connectivity=connectivity)
项目:extract    作者:dblalock    | 项目源码 | 文件源码
def makeMaxLinkage(X=None, k=2):
    connectivity = kneighbors_graph(X, n_neighbors=10)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)
    return cluster.AgglomerativeClustering(linkage="complete",
                                affinity="cityblock", n_clusters=k,
                                connectivity=connectivity)
项目:icing    作者:slipguru    | 项目源码 | 文件源码
def fit(self, X, y=None):
        """Creates an affinity matrix for X using the selected affinity,
        then applies spectral clustering to this affinity matrix.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            OR, if affinity==`precomputed`, a precomputed affinity
            matrix of shape (n_samples, n_samples)
        """
        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                        dtype=np.float64)
        if X.shape[0] == X.shape[1] and self.affinity != "precomputed":
            warnings.warn("The spectral clustering API has changed. ``fit``"
                          "now constructs an affinity matrix from data. To use"
                          " a custom affinity matrix, "
                          "set ``affinity=precomputed``.")

        if self.affinity == 'nearest_neighbors':
            connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors, include_self=True)
            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
        elif self.affinity == 'precomputed':
            self.affinity_matrix_ = X
        else:
            params = self.kernel_params
            if params is None:
                params = {}
            if not callable(self.affinity):
                params['gamma'] = self.gamma
                params['degree'] = self.degree
                params['coef0'] = self.coef0
            self.affinity_matrix_ = pairwise_kernels(X, metric=self.affinity,
                                                     filter_params=True,
                                                     **params)

        random_state = check_random_state(self.random_state)
        self.labels_ = spectral_clustering(self.affinity_matrix_,
                                           n_clusters=self.n_clusters,
                                           eigen_solver=self.eigen_solver,
                                           random_state=random_state,
                                           n_init=self.n_init,
                                           eigen_tol=self.eigen_tol,
                                           assign_labels=self.assign_labels,
                                           norm_laplacian=self.norm_laplacian)
        return self
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_neighbors_badargs():
    # Test bad argument values: these should all raise ValueErrors
    assert_raises(ValueError,
                  neighbors.NearestNeighbors,
                  algorithm='blah')

    X = rng.random_sample((10, 2))
    Xsparse = csr_matrix(X)
    y = np.ones(10)

    for cls in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        assert_raises(ValueError,
                      cls,
                      weights='blah')
        assert_raises(ValueError,
                      cls, p=-1)
        assert_raises(ValueError,
                      cls, algorithm='blah')
        nbrs = cls(algorithm='ball_tree', metric='haversine')
        assert_raises(ValueError,
                      nbrs.predict,
                      X)
        assert_raises(ValueError,
                      ignore_warnings(nbrs.fit),
                      Xsparse, y)
        nbrs = cls()
        assert_raises(ValueError,
                      nbrs.fit,
                      np.ones((0, 2)), np.ones(0))
        assert_raises(ValueError,
                      nbrs.fit,
                      X[:, :, None], y)
        nbrs.fit(X, y)
        assert_raises(ValueError,
                      nbrs.predict,
                      [[]])
        if (isinstance(cls, neighbors.KNeighborsClassifier) or
                isinstance(cls, neighbors.KNeighborsRegressor)):
            nbrs = cls(n_neighbors=-1)
            assert_raises(ValueError, nbrs.fit, X, y)

    nbrs = neighbors.NearestNeighbors().fit(X)

    assert_raises(ValueError, nbrs.kneighbors_graph, X, mode='blah')
    assert_raises(ValueError, nbrs.radius_neighbors_graph, X, mode='blah')
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_k_and_radius_neighbors_duplicates():
    # Test behavior of kneighbors when duplicates are present in query

    for algorithm in ALGORITHMS:
        nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)
        nn.fit([[0], [1]])

        # Do not do anything special to duplicates.
        kng = nn.kneighbors_graph([[0], [1]], mode='distance')
        assert_array_equal(
            kng.A,
            np.array([[0., 0.], [0., 0.]]))
        assert_array_equal(kng.data, [0., 0.])
        assert_array_equal(kng.indices, [0, 1])

        dist, ind = nn.radius_neighbors([[0], [1]], radius=1.5)
        check_object_arrays(dist, [[0, 1], [1, 0]])
        check_object_arrays(ind, [[0, 1], [0, 1]])

        rng = nn.radius_neighbors_graph([[0], [1]], radius=1.5)
        assert_array_equal(rng.A, np.ones((2, 2)))

        rng = nn.radius_neighbors_graph([[0], [1]], radius=1.5,
                                        mode='distance')
        assert_array_equal(rng.A, [[0, 1], [1, 0]])
        assert_array_equal(rng.indices, [0, 1, 0, 1])
        assert_array_equal(rng.data, [0, 1, 1, 0])

        # Mask the first duplicates when n_duplicates > n_neighbors.
        X = np.ones((3, 1))
        nn = neighbors.NearestNeighbors(n_neighbors=1)
        nn.fit(X)
        dist, ind = nn.kneighbors()
        assert_array_equal(dist, np.zeros((3, 1)))
        assert_array_equal(ind, [[1], [0], [1]])

        # Test that zeros are explicitly marked in kneighbors_graph.
        kng = nn.kneighbors_graph(mode='distance')
        assert_array_equal(
            kng.A, np.zeros((3, 3)))
        assert_array_equal(kng.data, np.zeros(3))
        assert_array_equal(kng.indices, [1., 0., 1.])
        assert_array_equal(
            nn.kneighbors_graph().A,
            np.array([[0., 1., 0.], [1., 0., 0.], [0., 1., 0.]]))