Python sklearn.svm 模块,SVR 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.svm.SVR

项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)
项目:regression-stock-prediction    作者:chaitjo    | 项目源码 | 文件源码
def predict_price(dates, prices, x):
    dates = np.reshape(dates,(len(dates), 1)) # converting to matrix of n X 1

    svr_rbf = SVR(kernel= 'rbf', C= 1e3, gamma= 0.1) # defining the support vector regression models
    svr_lin = SVR(kernel= 'linear', C= 1e3)
    svr_poly = SVR(kernel= 'poly', C= 1e3, degree= 2)
    svr_rbf.fit(dates, prices) # fitting the data points in the models
    svr_lin.fit(dates, prices)
    svr_poly.fit(dates, prices)

    plt.scatter(dates, prices, color= 'black', label= 'Data') # plotting the initial datapoints 
    plt.plot(dates, svr_rbf.predict(dates), color= 'red', label= 'RBF model') # plotting the line made by the RBF kernel
    plt.plot(dates,svr_lin.predict(dates), color= 'green', label= 'Linear model') # plotting the line made by linear kernel
    plt.plot(dates,svr_poly.predict(dates), color= 'blue', label= 'Polynomial model') # plotting the line made by polynomial kernel
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Support Vector Regression')
    plt.legend()
    plt.show()

    return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
项目:Electricity-Load-Forecasting    作者:palnabarun    | 项目源码 | 文件源码
def svrtrainsk(x, y, cost=1.0, epsilon=0.1):
    model = SVR(C=cost, epsilon=epsilon)
    model.fit(x, y)
    return model
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def sample_pipelines(pca_kernels=None, svr_kernels=None):
    """
    Pipelines that can't be fit in a reasonable amount of time on the whole
    dataset
    """
    # Model instances
    model_steps = []
    if pca_kernels is None:
        pca_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
    for pca_kernel in pca_kernels:
        model_steps.append([
            KernelPCA(n_components=2, kernel=pca_kernel),
            LinearRegression(),
        ])
    if svr_kernels is None:
        svr_kernels = ['poly', 'rbf', 'sigmoid']
    for svr_kernel in svr_kernels:
        model_steps.append(SVR(kernel=svr_kernel, verbose=True, cache_size=1000))

    # Pipelines
    pipelines = []
    for m in model_steps:
        # Steps
        common_steps = [
            StandardScaler(),
        ]
        model_steps = m if isinstance(m, list) else [m]
        steps = common_steps + model_steps
        pipelines.append(make_pipeline(*steps))
    return pipelines
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:MENGEL    作者:CodeSpaceHQ    | 项目源码 | 文件源码
def train_support_vector_regression():
    # Picking model
    return mp.ModelProperties(regression=True), svm.SVR()


# http://xgboost.readthedocs.io/en/latest/python/python_api.html
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_support_vector_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = SVR(kernel='rbf')
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0],
                                 coreml_model.predict({'data': test_data})['target'],
                                 msg="{} != {} for Dtype: {}".format(
                                     scikit_model.predict(test_data)[0],
                                     coreml_model.predict({'data': test_data})['target'],
                                     dtype
                                 )
                                 )
            except RuntimeError:
                print("{} not supported. ".format(dtype))
项目:job-salary-prediction    作者:soton-data-mining    | 项目源码 | 文件源码
def predict(self):
        svr_rbf = SVM.SVR(kernel='rbf', C=1e3, gamma=0.1)
        train_result = svr_rbf.fit(self.x_train, self.y_train).predict(self.x_train)
        test_result = svr_rbf.fit(self.x_train, self.y_train).predict(self.x_test)
        BaseModel.export_prediction(test_result, 'SVR_RBF_C1e3_Gamma01')
        return (train_result, test_result)
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_pred_error(self):
        """
        Assert no errors occur during Prediction Error Plots integration
        """
        model = SVR()
        X_train, X_test, y_train, y_test = tts(X, y, test_size=0.5, random_state=42)
        model.fit(X_train, y_train)
        visualizer = PredictionError(model)
        visualizer.score(X_test, y_test)
        visualizer.poof()
        visualizer.ax.grid(False)
        self.assert_images_similar(visualizer)

##########################################################################
## Residuals Plots test case
##########################################################################
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_clusterer_enforcement(self):
        """
        Assert that only clustering estimators can be passed to cluster viz
        """
        nomodels = [
            SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
        ]

        for nomodel in nomodels:
            with self.assertRaises(YellowbrickTypeError):
                visualizer = ClusteringScoreVisualizer(nomodel())

        models = [
            KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
        ]

        for model in models:
            try:
                visualizer = ClusteringScoreVisualizer(model())
            except YellowbrickTypeError:
                self.fail("could not pass clustering estimator to visualizer")
项目:Smart-Grid-Analytics    作者:Merit-Research    | 项目源码 | 文件源码
def __init__(self, num_features, training_window, training_interval):
        """
        num_features: the length of the feature vector
        training_window: the number of previous data points to train on
        training_interval: the number of data points between training periods
        """
        self.num_features = num_features
        self.training_interval = training_interval
        self.training_window = training_window

        # Init sample matrix, a deque of feature vectors
        self.samples = deque(maxlen=training_window)
        self.targets = deque(maxlen=training_window)

        #self.model = SVR(kernel='rbf', C=1000)
        self.model = BayesianRidge()
        self.severity = blr.Severity()
        self.alpha = 1.0
        self.parameters = 0     # Training parameters
        self.train_count = 0
        self.have_trained = False
        self.pred_range = [0.0, np.inf]   # upper and lower bounds for predictions
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def svm_SVR_C( xM, yV, c_l, graph = True):
    """
    SVR is performed iteratively with different C values
    until all C in the list are used.
    """

    r2_l, sd_l = [], []
    for C in c_l:
        print('sklearn.svm.SVR(C={})'.format( C))
        clf = svm.SVR( C = C)
        clf.fit( xM, yV.A1)
        yV_pred = clf.predict(xM)       

        r2, sd = regress_show( yV, np.mat( yV_pred).T, graph = graph)
        for X, x in [[r2_l, r2], [sd_l, sd]]:
            X.append( x)

    print('average r2, sd are', np.mean( r2_l), np.mean( sd_l))


    if graph:
        pdw = pd.DataFrame( { 'log10(C)': np.log10(c_l), 'r2': r2_l, 'sd': sd_l})
        pdw.plot( x = 'log10(C)')

    return r2_l, sd_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def svm_SVR_C( xM, yV, c_l, graph = True):
    """
    SVR is performed iteratively with different C values
    until all C in the list are used.
    """

    r2_l, sd_l = [], []
    for C in c_l:
        print('sklearn.svm.SVR(C={})'.format( C))
        clf = svm.SVR( C = C)
        clf.fit( xM, yV.A1)
        yV_pred = clf.predict(xM)       

        r2, sd = regress_show( yV, np.mat( yV_pred).T, graph = graph)
        for X, x in [[r2_l, r2], [sd_l, sd]]:
            X.append( x)

    print('average r2, sd are', np.mean( r2_l), np.mean( sd_l))


    if graph:
        pdw = pd.DataFrame( { 'log10(C)': np.log10(c_l), 'r2': r2_l, 'sd': sd_l})
        pdw.plot( x = 'log10(C)')

    return r2_l, sd_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_SVR(xM, yV, svr_params, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = svm.SVR(**svr_params)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)

    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV.A1, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_SVR_rbf(*data):
    '''
    test SVR with RBF kernel and different gamma
    :param data: train_data,test_data, train_target, test_target
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    gammas=range(1,20)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        regr=svm.SVR(kernel='rbf',gamma=gamma)
        regr.fit(X_train,y_train)
        train_scores.append(regr.score(X_train,y_train))
        test_scores.append(regr.score(X_test, y_test))
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(gammas,train_scores,label="Training score ",marker='+' )
    ax.plot(gammas,test_scores,label= " Testing  score ",marker='o' )
    ax.set_title( "SVR_rbf")
    ax.set_xlabel(r"$\gamma$")
    ax.set_ylabel("score")
    ax.set_ylim(-1,1)
    ax.legend(loc="best",framealpha=0.5)
    plt.show()
项目:Informed-Finance-Canary    作者:Darthone    | 项目源码 | 文件源码
def regressorOp(x, y):
    """
    This will optimize the parameters for the algo
    """
    regr_rbf = svm.SVR(kernel="rbf")
    C = [1000, 10, 1]
    gamma = [0.005, 0.004, 0.003, 0.002, 0.001]
    epsilon = [0.1, 0.01]
    parameters = {"C":C, "gamma":gamma, "epsilon":epsilon}

    gs = grid_search.GridSearchCV(regr_rbf, parameters, scoring="r2")   
    gs.fit(x, y)

    print "Best Estimator:\n", gs.best_estimator_
    print "Type: ", type(gs.best_estimator_)

    return gs.best_estimator_
项目:bitcoin-forecast    作者:roksela    | 项目源码 | 文件源码
def __init__(self, model_type=DEFAULT_MODEL_TYPE):
        """
        Set ups model and pipeline for learning and predicting.

        :param model_type: only 'SVR' model is supported for now
        """
        assert (model_type == 'SVR'), "Model '{}' is not supported. " \
                                      "We support only SVR for now.".format(model_type)
        self._model_type = model_type
        self._model_params = BTCForecast.DEFAULT_SVR_MODEL_PARAMS

        # set up SVR pipeline
        self._scaler = preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True)
        self._model = SVR(kernel=self._model_params['kernel'],
                          epsilon=self._model_params['epsilon'],
                          C=self._model_params['c'],
                          gamma=self._model_params['gamma'])
        self._pipeline = make_pipeline(self._scaler, self._model)
        self.has_learned = False
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_ovr_single_label_predict_proba():
    base_clf = MultinomialNB(alpha=1)
    X, Y = iris.data, iris.target
    X_train, Y_train = X[:80], Y[:80]
    X_test = X[80:]
    clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)

    # decision function only estimator. Fails in current implementation.
    decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
    assert_raises(AttributeError, decision_only.predict_proba, X_test)

    Y_pred = clf.predict(X_test)
    Y_proba = clf.predict_proba(X_test)

    assert_almost_equal(Y_proba.sum(axis=1), 1.0)
    # predict assigns a label if the probability that the
    # sample has the label is greater than 0.5.
    pred = np.array([l.argmax() for l in Y_proba])
    assert_false((pred - Y_pred).any())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_rfe_min_step():
    n_features = 10
    X, y = make_friedman1(n_samples=50, n_features=n_features, random_state=0)
    n_samples, n_features = X.shape
    estimator = SVR(kernel="linear")

    # Test when floor(step * n_features) <= 0
    selector = RFE(estimator, step=0.01)
    sel = selector.fit(X, y)
    assert_equal(sel.support_.sum(), n_features // 2)

    # Test when step is between (0,1) and floor(step * n_features) > 0
    selector = RFE(estimator, step=0.20)
    sel = selector.fit(X, y)
    assert_equal(sel.support_.sum(), n_features // 2)

    # Test when step is an integer
    selector = RFE(estimator, step=5)
    sel = selector.fit(X, y)
    assert_equal(sel.support_.sum(), n_features // 2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0),
                svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear', C=10.),
                svm.LinearSVR(C=10.),
                svm.LinearSVR(C=10.),
                ):
        clf.fit(diabetes.data, diabetes.target)
        assert_greater(clf.score(diabetes.data, diabetes.target), 0.02)

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_svr_predict():
    # Test SVR's decision_function
    # Sanity check, test that predict implemented in python
    # returns the same as the one in libsvm

    X = iris.data
    y = iris.target

    # linear kernel
    reg = svm.SVR(kernel='linear', C=0.1).fit(X, y)

    dec = np.dot(X, reg.coef_.T) + reg.intercept_
    assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())

    # rbf kernel
    reg = svm.SVR(kernel='rbf', gamma=1).fit(X, y)

    rbfs = rbf_kernel(X, reg.support_vectors_, gamma=reg.gamma)
    dec = np.dot(rbfs, reg.dual_coef_.T) + reg.intercept_
    assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        start = time.time()

        print('size before truncated outliers is %d ' % len(self.TrainData))
        self.TrainData = self.TrainData[
            (self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(self.TrainData))

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        svr = SVR(C = self._C, epsilon= self._epsilon, tol= 1e-3, kernel= 'linear',max_iter= 100, verbose= True)

        self._model = svr.fit(X, Y)
        end = time.time()

        print('time consumed %d ' % ((end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        # with open(self._f_eval_train_model, 'wb') as o_file:
        #   pickle.dump(self._model, o_file, -1)
        # o_file.close()

        self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
                                   ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
项目:sanergy-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self):
        #if self.modeltype == "AR" :
        #    return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
        if self.modeltype == "RandomForest" :
            return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
            #return ensemble.RandomForestClassifier(
            #    n_estimators=self.parameters['n_estimators'])
        elif self.modeltype == "LinearRegression" :
            return linear_model.LinearRegression()
        elif self.modeltype == "Lasso" :
            return linear_model.Lasso(
            alpha=self.parameters['alpha'])
        elif self.modeltype == "ElasticNet" :
            return linear_model.ElasticNet(
            alpha=self.parameters['alpha'],
            l1_ratio=self.parameters['l1_ratio'])
        elif self.modeltype == "SVR" :
            return SVR(
            C=self.parameters['C'],
            epsilon=self.parameters['epsilon'],
            kernel=self.parameters['kernel'])
        #elif self.modeltype == 'StaticModel':
        #   return StaticModel (
        #      parameters=self.parameters
        #     )
        #elif self.modeltype == 'AdvancedStaticModel':
        #   return AdvancedStaticModel (
        #       parameters=self.parameters
        #        )

        # elif self.modeltype == 'SGDRegressor' :
        #     print(self.parameters)
        #     return linear_model.SGDRegressor(
        #     loss=self.parameters['loss'],
        #     penalty=self.parameters['penalty'],
        #     l1_ratio=self.parameters['l1_ratio'])
        else:
            raise ConfigError("Unsupported model {0}".format(self.modeltype))
项目:OpenAPS    作者:medicinexlab    | 项目源码 | 文件源码
def lasso_regression_model(parameter_array):
    alpha_value = parameter_array[0] #alpha value index is first index
    return linear_model.Lasso(alpha=alpha_value, fit_intercept=True, normalize=True, precompute=False, copy_X=True,
                                max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')

#Returns the SVR Linear Kernel model
项目:OpenAPS    作者:medicinexlab    | 项目源码 | 文件源码
def svr_linear_regression(parameter_array):
    c_value = parameter_array[0]
    # epsilon_value = parameter_array[1]
    return svm.SVR(kernel='linear', degree=3, gamma='auto', coef0=0.0, tol=0.001, C=c_value, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1)

#Returns the mlp regression model
项目:XTREE    作者:ai-se    | 项目源码 | 文件源码
def SVM(train, test, tunings=None, smoteit=True, bin=True, regress=False):
  "SVM "
  if not isinstance(train, pd.core.frame.DataFrame):
    train = csv2DF(train, as_mtx=False, toBin=bin)

  if not isinstance(test, pd.core.frame.DataFrame):
    test = csv2DF(test, as_mtx=False, toBin=True)

  if smoteit:
    train = SMOTE(train, resample=True)
    # except: set_trace()
  if not tunings:
    if regress:
      clf = SVR()
    else:
      clf = SVC()
  else:
    if regress:
      clf = SVR()
    else:
      clf = SVC()

  features = train.columns[:-1]
  klass = train[train.columns[-1]]
  # set_trace()
  clf.fit(train[features], klass)
  actual = test[test.columns[-1]].as_matrix()
  try: preds = clf.predict(test[test.columns[:-1]])
  except: set_trace()
  return actual, preds
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def setClf(self):
        clf = SVR(C=100, epsilon=0.1, gamma = 0.0001,cache_size = 10240)
        min_max_scaler = preprocessing.MinMaxScaler()
        self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
        return
项目:finance-ml    作者:Omarkhursheed    | 项目源码 | 文件源码
def train():
    os.chdir(dname)
    for selected_stock in onlyfiles:
        df = pd.read_csv(os.path.join('data_files',selected_stock))
        #preprocessing the data
        df = df[['Adj. Open',  'Adj. High',  'Adj. Low',  'Adj. Close', 'Adj. Volume']]
        #measure of volatility
        df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
        df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
        df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
        forecast_col = 'Adj. Close'
        df.fillna(value=-99999, inplace=True)
        forecast_out = int(math.ceil(0.01 * len(df)))
        df['label'] = df[forecast_col].shift(-forecast_out)

        X = np.array(df.drop(['label'],1))
        X = preprocessing.scale(X)
        X_lately = X[-forecast_out:]
        X = X[:-forecast_out]

        df.dropna(inplace=True)
        y = np.array(df['label'])
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)

        svr = SVR()
        pickle.dump(svr,open(join(dname+'/models/svr_unfit/', selected_stock+'svr.sav'),'wb'))
        svr.fit(X_train, y_train)

        lr = LinearRegression()
        pickle.dump(lr,open(join(dname+'/models/lr_unfit/', selected_stock+'lr.sav'),'wb'))
        lr.fit(X_train, y_train)

        mlp = MLPRegressor()
        pickle.dump(mlp,open(join(dname+'/models/mlp_unfit/', selected_stock+'mlp.sav'),'wb'))
        mlp.fit(X_train, y_train)

        pickle.dump(svr,open(join(dname+'/models/svr_fit/', selected_stock+'svr.sav'),'wb'))
        pickle.dump(lr,open(join(dname+'/models/lr_fit/', selected_stock+'lr.sav'),'wb'))
        pickle.dump(mlp,open(join(dname+'/models/mlp_fit/', selected_stock+'mlp.sav'),'wb'))

        print(selected_stock+" - trained")
项目:HorseRacingPrediction    作者:dominicplouffe    | 项目源码 | 文件源码
def train(self):

        clf = SVR(C=1.0, epsilon=0.1, cache_size=1000)
        X, y, = self._get_data('training-2016-12-01-2017-02-28.csv')

        # Fit the model
        clf.fit(X, y)

        # Pickle the model so we can save and reuse it
        s = pickle.dumps(clf)

        # Save the model to a file
        f = open('finish_pos.model', 'wb')
        f.write(s)
        f.close()
项目:drone    作者:arunsoman    | 项目源码 | 文件源码
def learn(x, y, c=1e3, gamma=0.1):
    svr_rbf = SVR(kernel='rbf', C=c, gamma=gamma)
    model = svr_rbf.fit(x, y)
    return model
项目:stock_price_prediction    作者:inaciomdrs    | 项目源码 | 文件源码
def svr_lin(): return SVR(kernel='linear', C=1e3)
项目:stock_price_prediction    作者:inaciomdrs    | 项目源码 | 文件源码
def svr_poly(): return SVR(kernel='poly', C=1e3, degree=2)
项目:stock_price_prediction    作者:inaciomdrs    | 项目源码 | 文件源码
def svr_rbf(): return SVR(kernel='rbf',C=1e3, gamma=0.1)
项目:papers    作者:jeffheaton    | 项目源码 | 文件源码
def svr_grid():
    param_grid = {
        'C': [1e-2, 1, 1e2],
        'gamma': [1e-1, 1, 1e1]

    }
    clf = GridSearchCV(SVR(kernel='rbf'), verbose=VERBOSE, n_jobs=THREADS, param_grid=param_grid)
    return clf


# Perform an experiment for a single model type.
项目:Smart-Trader    作者:i-sultan    | 项目源码 | 文件源码
def __init__(self):
        """ Initialize predictive model with model, model indicators, and params. """
        self.name = "Support Vector"
        self.summary_name = "SVR"
        self.indicators_samples      = {'Daily':42}
        self.full_indicators_samples = {'Daily':42, 'Volume':10, 'Open':10, 'High':10, 'Low':10, 'SMA':5, 'EWMA':5, 'MOM':5, 'STD':5}
        self.model_params = dict(kernel    = ['poly', 'rbf'],
                                 C         = [1e-2, 0.1, 1, 10],
                                 tolerance = [.001, 0.1],
                                 full_indicators     = [True, False],
                                 sample_presentation = [SamplePresentation.cumulative])
        self.pretrained_model = None #save the pretrained model for future use
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
项目:MENGEL    作者:CodeSpaceHQ    | 项目源码 | 文件源码
def train_bayesian_ridge():
    # Picking model
    return mp.ModelProperties(regression=True), linear_model.BayesianRidge()


# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return

        scikit_data = load_boston()
        scikit_model = SVR(kernel='linear')
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_conversion_bad_inputs(self):
        # Error on converting an untrained model
        with self.assertRaises(TypeError):
            model = SVR()
            spec = sklearn_converter.convert(model, 'data', 'out')

        # Check the expected class during covnersion.
        with self.assertRaises(TypeError):
            model = OneHotEncoder()
            spec = sklearn_converter.convert(model, 'data', 'out')
项目:stock-price-prediction    作者:chinuy    | 项目源码 | 文件源码
def training(self, c, g):

        self.model = SVR(kernel= 'rbf', C= c, gamma= g)
        self.model.fit(self.train_date, self.train_price) # fitting the data points in the models
项目:stock-price-prediction    作者:chinuy    | 项目源码 | 文件源码
def draw(self):

        plt.scatter(self.dates, self.prices, color= 'black', label= 'Data')
        plt.plot(self.dates, self.model.predict(self.dates), color= 'red', label= 'RBF model')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.title('SVR test for SPY trimmed data (2014, 2015)')
        #plt.legend()
        plt.show()
项目:KDD-2017-Travel-Time-Prediction    作者:InfiniteWing    | 项目源码 | 文件源码
def getWeatherPredict(predict,nowww):
    if(nowww>=2):
        return predict*math.sqrt(1.110727412879317)
    elif(nowww>=1.0):
        return predict*math.sqrt(1.0960104809326925)
    elif(nowww>0):
        return predict*math.sqrt(1.0730721851729204)
    else:
        return predict*math.sqrt(0.98)

#??SVR??????????????????????
项目:KDD-2017-Travel-Time-Prediction    作者:InfiniteWing    | 项目源码 | 文件源码
def SVR_Model(fw,train_lines,test_train_lines,test_lines,mn_time):
    features_train=[]
    labels_train=[]
    features_test=[]    
    labels_test=[]
    for i,line in enumerate(train_lines):
        label,feature=parsePoint(line)
        labels_train.append(label)
        features_train.append(feature)

    for i,line in enumerate(test_lines):
        label,feature=parsePoint(line)
        labels_test.append(label)
        features_test.append(feature)

    X=np.array(features_train)
    y=np.array(labels_train)
    X_test=np.array(features_test)

    svr_rbf = SVR(kernel=KERNEL, C=C_VALUE)
    y_rbf = svr_rbf.fit(X, y).predict(X_test)

    avgTime=getAvgTime(features_train)

    for i,predict in enumerate(y_rbf):
        time=getTime(features_test[i])
        weighting=1-(avgTime-time)/avgTime
        weighting=math.sqrt(math.sqrt(math.sqrt((weighting+weighting)/2)))
        #???????????????????????????
        #??????????????????????????????????(weighting=1)
        if(mn_time==2 or mn_time==4):
            weighting=1
        predict=predict*weighting
        printResult(fw,labels_test[i],predict,mn_time)
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(kls, **kwargs):
    """Generate ensemble of class kls."""

    ens = kls(**kwargs)
    ens.add([SVR() for _ in range(4)])
    ens.add_meta(SVR())
    return ens
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(kls, **kwargs):
    """Generate ensemble of class kls."""

    ens = kls(**kwargs)
    ens.add([SVR(), RandomForestRegressor(),
             GradientBoostingRegressor(), Lasso(copy_X=False),
             MLPRegressor(shuffle=False, alpha=0.001)])
    ens.add_meta(Lasso(copy_X=False))
    return ens
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        clf = SVR(kernel='linear')
        clf.fit(X, Y)
        return clf