Python sklearn.model_selection 模块,ParameterSampler() 实例源码

我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用sklearn.model_selection.ParameterSampler()

项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def fit(self, X, y=None):
        """Run fit on the estimator with randomly drawn parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        """
        sampled_params = ParameterSampler(self.param_distributions,
                                          self.n_iter,
                                          random_state=self.random_state)

        # the super class will handle the X, y validation
        return self._fit(X, y, sampled_params)
项目:spotlight    作者:maciejkula    | 项目源码 | 文件源码
def sample_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:
        yield params
项目:spotlight    作者:maciejkula    | 项目源码 | 文件源码
def sample_cnn_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
        'kernel_width': [3, 5, 7],
        'num_layers': list(range(1, 10)),
        'dilation_multiplier': [1, 2],
        'nonlinearity': ['tanh', 'relu'],
        'residual': [True, False]
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:
        params['dilation'] = list(params['dilation_multiplier'] ** (i % 8)
                                  for i in range(params['num_layers']))

        yield params
项目:spotlight    作者:maciejkula    | 项目源码 | 文件源码
def sample_lstm_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:

        yield params
项目:spotlight    作者:maciejkula    | 项目源码 | 文件源码
def sample_pooling_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM,
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)

    for params in sampler:

        yield params
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)

    # test that repeated calls yield identical parameters
    param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=3, random_state=0)
    assert_equal([x for x in sampler], [x for x in sampler])

    if sp_version >= (0, 16):
        param_distributions = {"C": uniform(0, 1)}
        sampler = ParameterSampler(param_distributions=param_distributions,
                                   n_iter=10, random_state=0)
        assert_equal([x for x in sampler], [x for x in sampler])
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        return self._fit(frame, sampled_params)
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def fit(self, frame):
        """Fit the grid search.

        Parameters
        ----------

        frame : H2OFrame, shape=(n_samples, n_features)
            The training frame on which to fit.
        """
        sampled_params = ParameterSampler(self.param_grid,
                                          self.n_iter,
                                          random_state=self.random_state)

        # set our score class
        self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_)

        # we can do this once to avoid many as_data_frame operations
        exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature)
        self.extra_args_ = {
            'expo': _as_numpy(frame[exp]),
            'loss': _as_numpy(frame[loss]),
            'prem': _as_numpy(frame[prem]) if prem is not None else None
        }

        # for validation set
        self.extra_names_ = {
            'expo': exp,
            'loss': loss,
            'prem': prem
        }

        # do fit
        the_fit = self._fit(frame, sampled_params)

        # clear extra_args_, because they might take lots of mem
        # we can do this because a re-fit will re-assign them anyways.
        # don't delete the extra_names_ though, because they're used in
        # scoring the incoming frame.
        del self.extra_args_

        return the_fit
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def _get_param_iterator(self):
        """Return ParameterSampler instance for the given distributions"""
        return model_selection.ParameterSampler(self.param_distributions,
                                                self.n_iter,
                                                random_state=self.random_state)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_parameters_sampler_replacement():
    # raise error if n_iter too large
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    assert_raises(ValueError, list, sampler)
    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert_true(values in samples)

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7)
项目:Optimus    作者:Yatoom    | 项目源码 | 文件源码
def maximize(self, score_optimum=None, realize=True):
        """
        Find the next best hyper-parameter setting to optimizer.

        Parameters
        ----------
        score_optimum: float
            An optional score to use inside the EI formula instead of the optimizer's current_best_score

        realize: bool
            Whether or not to give a more realistic estimate of the EI (default=True)

        Returns
        -------
        best_setting: dict
            The setting with the highest expected improvement

        best_score: float
            The highest EI (per second)
        """

        start = time.time()

        # Select a sample of parameters
        sampled_params = ParameterSampler(self.param_distributions, self.draw_samples)

        # Set score optimum
        if score_optimum is None:
            score_optimum = self.current_best_score

        # Determine the best parameters
        best_setting, best_score = self._maximize_on_sample(sampled_params, score_optimum)

        if self.local_search:
            best_setting, best_score = self._local_search(best_setting, best_score, score_optimum,
                                                          max_steps=self.ls_max_steps)

        if realize:
            best_setting, best_score = self._realize(best_setting, best_score, score_optimum)

        # Store running time
        running_time = (time.time() - start) / self.simulate_speedup
        self.maximize_times.append(running_time)

        return best_setting, best_score