Python sklearn.linear_model 模块,LinearRegression() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.linear_model.LinearRegression()

项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_stacked_regressor(self):
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = StackedRegressor(bclf,
                              clfs,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_fwls_regressor(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = FWLSRegressor(bclf,
                              clfs,
                              feature_func,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)
项目:FFS-ANN    作者:GVLABHernandez    | 项目源码 | 文件源码
def scatter_regresion_Plot(X, Y, testName):

    plt.scatter(X, Y, c = 'b', label = '_nolegend_', s = 1)

    X = X.reshape(-1, 1)
    Y = Y.reshape(-1, 1)
    R2 = r2_score(X, Y)

    regr = linear_model.LinearRegression()
    regr.fit(X, Y)
    plt.plot(X, regr.predict(X), "--", label = 'Regression', color = 'r')
    plt.title(testName + ' ($R^2$: ' + "{0:.3f}".format(R2) + ")", fontsize = 14)
    plt.xlabel('True Values', fontsize = 12, weight = 'bold')
    plt.ylabel('Predicted Values', fontsize = 12, weight = 'bold')
    plt.legend(loc = 'upper left', bbox_to_anchor = (0, 1.0), fancybox = True, shadow = True, fontsize = 10)
    plt.subplots_adjust(left = 0.2, right = 0.9, bottom = 0.05, top = 0.97, wspace = 0.15, hspace = 0.3)
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:PonyGE2    作者:PonyGE    | 项目源码 | 文件源码
def fit_lr(train_X, train_y, test_X):
    """
    Use linear regression to predict.

    :param train_X:
    :param train_y:
    :param test_X:
    :return:
    """
    lr = LinearRegression()
    lr.fit(train_X, train_y)
    yhat_train = lr.predict(train_X)
    yhat_test = lr.predict(test_X)
    model = "LR int %.2f coefs %s" % (lr.intercept_, pprint(lr.coef_))

    return model, yhat_train, yhat_test
项目:abcpy    作者:eth-cscs    | 项目源码 | 文件源码
def __init__(self, model, statistics_calc, backend, n_samples = 1000, seed = None):
        self.model = model
        self.statistics_calc = statistics_calc
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.model.prior.reseed(self.rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) 

        # main algorithm                 
        seed_arr = self.rng.randint(1, n_samples*n_samples, size=n_samples, dtype=np.int32)
        seed_pds = self.backend.parallelize(seed_arr)     

        sample_parameters_statistics_pds = self.backend.map(self._sample_parameter_statistics, seed_pds)
        sample_parameters_and_statistics = self.backend.collect(sample_parameters_statistics_pds)
        sample_parameters, sample_statistics = [list(t) for t in zip(*sample_parameters_and_statistics)]
        sample_parameters = np.array(sample_parameters)
        sample_statistics = np.concatenate(sample_statistics)

        self.coefficients_learnt = np.zeros(shape=(sample_parameters.shape[1],sample_statistics.shape[1]))
        regr = linear_model.LinearRegression(fit_intercept=True)
        for ind in range(sample_parameters.shape[1]):
            regr.fit(sample_statistics, sample_parameters[:,ind]) 
            self.coefficients_learnt[ind,:] = regr.coef_
项目:covar_me_app    作者:CovarMe    | 项目源码 | 文件源码
def calculate_residual_correlation_matrix(returns):
    # find the market return constraining on the selected companies (first PCA)
    # regress each stock on that and find correlation of residuals
    returns_matrix = returns.as_matrix().transpose()
    covar_matrix = np.cov(returns_matrix)
    pca = decomposition.PCA(n_components=1)
    pca.fit(covar_matrix)
    X = pca.transform(covar_matrix)
    regr = linear_model.LinearRegression()
    dim = covar_matrix.shape[1]
    res = np.zeros(shape=(dim,dim))
    for x in range(0, dim):
        regr = linear_model.LinearRegression()
        regr = regr.fit(X, covar_matrix[:,x])
        res[:,x] = covar_matrix[:,x] - regr.predict(X)

    res_corr = np.corrcoef(res)
    return pd.DataFrame(res_corr, index = returns.columns, columns = returns.columns)
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
    '''
        Given a dataset and some solutions (X, y) a regression class (from scikit learn)
        and an Lambda which is required if the regression class is Lasso or Ridge

        X (pandas DataFrame): The data.
        y (pandas DataFrame or Series): The answers.
        regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
        regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
                              Called alpha by scikit learn for interface reasons.

        Return:
            tuple, (the_fitted_regressor, mean(cross_val_score)).
    '''
    if regression_class is LinearRegression:
        predictor = regression_class()
    else:
        predictor = regression_class(alpha=regularization_const, normalize=True)

    predictor.fit(X, y)

    cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
    cross_scores_corrected = np.sqrt(-1 * cross_scores)  # Scikit learn returns negative vals && we need root

    return (predictor, np.mean(cross_scores_corrected))
项目:ESL-Model    作者:littlezz    | 项目源码 | 文件源码
def test_least_square_model(prostate_data):
    from esl_model.ch3.models import LeastSquareModel
    train_x, train_y, test_x, test_y, features = prostate_data
    lsm = LeastSquareModel(train_x=train_x, train_y=train_y, features_name=features)
    lsm.pre_processing()

    lsm.train()

    print(lsm.beta_hat)
    print('rss:',lsm.rss)
    print('F-statistic', lsm.F_statistic(remove_cols=['age', 'lcp', 'gleason', 'pgg45']))
    print('z-score', lsm.z_score)

    result = lsm.test(test_x, test_y)

    print('test error: ', result.mse)

    from sklearn.linear_model import LinearRegression

    lr = LinearRegression()

    lr.fit(train_x, train_y)
    print('std error', result.std_error)
    assert np.isclose(result.mse, np.mean(((lr.predict(test_x)) - test_y) **2))
项目:algotrading    作者:alifanov    | 项目源码 | 文件源码
def rolling_beta(X, y, idx, window=100):
    assert len(X) == len(y)

    out_dates = []
    out_beta = []

    model_ols = linear_model.LinearRegression()

    for iStart in range(0, len(X) - window):
        iEnd = iStart + window

        _x = X[iStart:iEnd].values.reshape(-1, 1)
        _y = y[iStart:iEnd].values.reshape(-1, 1)

        model_ols.fit(_x, _y)

        # store output
        out_dates.append(idx[iEnd])
        out_beta.append(model_ols.coef_[0][0])

    return pd.DataFrame({'beta': out_beta}, index=out_dates)
项目:algotrading    作者:alifanov    | 项目源码 | 文件源码
def rolling_beta(X, y, idx, window=100):
    assert len(X) == len(y)

    out_dates = []
    out_beta = []

    model_ols = linear_model.LinearRegression()

    for iStart in range(0, len(X) - window):
        iEnd = iStart + window

        _x = X[iStart:iEnd].values.reshape(-1, 1)
        _y = y[iStart:iEnd].values.reshape(-1, 1)

        model_ols.fit(_x, _y)

        # store output
        out_dates.append(idx[iEnd])
        out_beta.append(model_ols.coef_[0][0])

    return pd.DataFrame({'beta': out_beta}, index=out_dates)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_linear_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = LinearRegression(normalize=True)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
                                 type(coreml_model.predict({'data': test_data})['target']))
                self.assertAlmostEqual(scikit_model.predict(test_data)[0],
                                       coreml_model.predict({'data': test_data})['target'],
                                       msg="{} != {} for Dtype: {}".format(
                                           scikit_model.predict(test_data)[0],
                                           coreml_model.predict({'data': test_data})['target'],
                                           dtype
                                       )
                                       )
            except RuntimeError:
                print("{} not supported. ".format(dtype))
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not(HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return
        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = Pipeline(steps = [
                  ('linear' , LinearRegression())
        ])
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_linear_regression_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for normalize_value in (True, False):
            cur_model = LinearRegression(normalize=normalize_value)
            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
            spec = convert(cur_model, input_names, 'target')

            df['prediction'] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics['max_error'], 0)
项目:stock    作者:dmegbert    | 项目源码 | 文件源码
def find_parameters_w(X, Y):
    """Find the parameter values w for the model which best fits X and Y.

    Args:
        X: A 2-dimensional numpy array representing the independent variables
            in the linear regression model.
        Y: A numpy array of floats representing the dependent variables in the
            linear regression model.

    Returns:
        A tuple (w0, w1, w2, w3, w4) representing the parameter values w.
    """
    clf = linear_model.LinearRegression()
    clf.fit(X, Y)
    w0 = clf.intercept_
    w1, w2, w3, w4 = clf.coef_
    return w0, w1, w2, w3, w4
项目:regression-stock-prediction    作者:chaitjo    | 项目源码 | 文件源码
def predict_price(dates, prices, x):
    dates = np.reshape(dates, (len(dates),1)) # converting to matrix of n X 1
    prices = np.reshape(prices, (len(prices),1))

    linear_mod = linear_model.LinearRegression() # defining the linear regression model
    linear_mod.fit(dates, prices) # fitting the data points in the model

    plt.scatter(dates, prices, color= 'black', label= 'Data') # plotting the initial datapoints 
    plt.plot(dates, linear_mod.predict(dates), color= 'red', label= 'Linear model') # plotting the line made by linear regression
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Linear Regression')
    plt.legend()
    plt.show()

    return linear_mod.predict(x)[0][0], linear_mod.coef_[0][0], linear_mod.intercept_[0]
项目:healthcareai-py    作者:HealthCatalyst    | 项目源码 | 文件源码
def prepare_fit_model_for_factors(model_type, x_train, y_train):
    """
    Given a model type, train and test data

    Args:
        model_type (str): 'classification' or 'regression'
        x_train:
        y_train:

    Returns:
        (sklearn.base.BaseEstimator): A fit model.
    """

    if model_type == 'classification':
        algorithm = LogisticRegression()
    elif model_type == 'regression':
        algorithm = LinearRegression()
    else:
        algorithm = None

    if algorithm is not None:
        algorithm.fit(x_train, y_train)

    return algorithm
项目:challenges    作者:py-study-group    | 项目源码 | 文件源码
def regression_murder(year):  # applies linear regression on murder rates
    murder = pd.DataFrame()
    dates = crime_rate_df.index.values.tolist()
    murder['label'] = crime_rate_df['Murder and\nnonnegligent \nmanslaughter']
    prediction_size = int(0.1 * len(murder))

    X = np.array(dates)
    y = np.array(murder['label'])
    y.reshape((len(X), 1))
    y_train = y[:-prediction_size]
    X_train = X[:-prediction_size]
    clf = LinearRegression()

    clf.fit(X_train.reshape(-1, 1), y_train)
    regression_line = [clf.predict(X_train[i].reshape(1, -1)) for i in range(len(X_train))]
    print(clf.predict(year))
    plt.scatter(X_train, y_train)
    plt.plot(X_train, regression_line)
    plt.show()
项目:House-Pricing    作者:playing-kaggle    | 项目源码 | 文件源码
def linear_regression():
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    # Look at predictions on training and validation set
    print("RMSE on Training set :", rmse_cv(lr, train_split, y).mean())
    y_train_pred = lr.predict(train_split)
    print('rmsle calculate by self:', rmsle(list(np.exp(y) - 1), list(np.exp(y_train_pred) - 1)))
    plt.scatter(y_train_pred, y_train_pred - y, c="blue", marker="s", label="Training data")
    plt.title("Linear regression")
    plt.xlabel("Predicted values")
    plt.ylabel("Residuals")
    plt.legend(loc="upper left")
    plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red")
    plt.show()
    # Plot predictions
    plt.scatter(y_train_pred, y, c="blue", marker="s", label="Training data")
    plt.title("Linear regression")
    plt.xlabel("Predicted values")
    plt.ylabel("Real values")
    plt.legend(loc="upper left")
    plt.plot([10.5, 13.5], [10.5, 13.5], c="red")
    plt.show()
    return lr
项目:cloud-ml-sdk    作者:XiaoMi    | 项目源码 | 文件源码
def main():
  diabetes = datasets.load_diabetes()
  diabetes_X = diabetes.data[:, np.newaxis, 2]

  diabetes_X_train = diabetes_X[:-20]
  diabetes_X_test = diabetes_X[-20:]

  diabetes_y_train = diabetes.target[:-20]
  diabetes_y_test = diabetes.target[-20:]

  regr = linear_model.LinearRegression()
  regr.fit(diabetes_X_train, diabetes_y_train)

  print('Coefficients: \n', regr.coef_)
  print("Mean squared error: %.2f" %
        np.mean((regr.predict(diabetes_X_test) - diabetes_y_test)**2))
  print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test))
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def test_parameter_estimation_low_memory(self):
        X = np.random.uniform(0, 4, 1000)
        y = X + np.random.normal(0, 1, 1000)
        m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=True)
        m.fit(X.reshape(-1, 1), y)
        coef_samples = [b.coef_ for b in m.base_models_]
        intercept_samples = [b.intercept_ for b in m.base_models_]
        self.assertAlmostEqual(np.mean(coef_samples), 1, delta=0.3)
        l, r = central_credible_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        l, r = highest_density_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = central_credible_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = highest_density_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def test_parameter_estimation(self):
        X = np.random.uniform(0, 4, 1000)
        y = X + np.random.normal(0, 1, 1000)
        m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=False)
        m.fit(X.reshape(-1, 1), y)
        coef_samples = [b.coef_ for b in m.base_models_]
        intercept_samples = [b.intercept_ for b in m.base_models_]
        self.assertAlmostEqual(np.mean(coef_samples), 1, delta=0.3)
        l, r = central_credible_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        l, r = highest_density_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = central_credible_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = highest_density_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)
项目:ConversationalQA    作者:btjhjeon    | 项目源码 | 文件源码
def train_regressor(options, embed_map, wordvecs, worddict):
    """
    Return regressor to map word2vec to RNN word space
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda : 0)
    for w in embed_map.vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0
    for w in worddict.keys()[:options['n_words']-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = numpy.zeros((len(shared), 300), dtype='float32')
    sg = numpy.zeros((len(shared), options['dim_word']), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = embed_map[w]
        sg[shared[w]] = wordvecs[w]

    clf = LinearRegression()
    clf.fit(w2v, sg)
    return clf
项目:heamy    作者:rushter    | 项目源码 | 文件源码
def test_stacking():
    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    ds = model.stack(10)

    assert ds.X_train.shape[0] == model.dataset.X_train.shape[0]
    assert ds.X_test.shape[0] == model.dataset.X_test.shape[0]
    assert ds.y_train.shape[0] == model.dataset.y_train.shape[0]

    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    ds = model.stack(10, full_test=False)
    assert np.isnan(ds.X_train).sum() == 0
    assert ds.X_train.shape[0] == model.dataset.X_train.shape[0]
    assert ds.X_test.shape[0] == model.dataset.X_test.shape[0]
    assert ds.y_train.shape[0] == model.dataset.y_train.shape[0]

    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    model.dataset.load()
    ds = model.stack(10, full_test=False)
    # Check cache
    assert np.isnan(ds.X_train).sum() == 0
    assert ds.X_train.shape[0] == model.dataset.X_train.shape[0]
    assert ds.X_test.shape[0] == model.dataset.X_test.shape[0]
    assert ds.y_train.shape[0] == model.dataset.y_train.shape[0]
项目:fabric8-analytics-worker    作者:fabric8-analytics    | 项目源码 | 文件源码
def _get_trend(cls, log, starting_date):
        """Get commit count trend based on log.

        :param log: a log on which the trend should be computed
        :param starting_date: starting date of log
        :return: computed trend
        """
        records = [0]
        date = starting_date
        for entry in log:
            if entry['author']['date'] > date + cls._SECONDS_PER_DAY:
                date += cls._SECONDS_PER_DAY
                records.append(0)
            records[-1] += 1

        lr = LinearRegression()
        lr.fit(np.array(range(len(records))).reshape(-1, 1), np.array(records))

        return lr.coef_[0]
项目:MachineLearningDemo    作者:MichaelLinn    | 项目源码 | 文件源码
def linear_model_manual(prediction_value):
    data = pd.read_csv('E://Spyder/LinearRegression/data/data.csv')
    X_tem = []
    Y_tem = []
    for X_data ,Y_data in zip(data['x'],data['y']):
        X_tem.append(int(X_data))
        Y_tem.append(float(Y_data))
    X_parameters = np.array(X_tem)
    Y_parameters = np.array(Y_tem)
    xy = X_parameters*Y_parameters
    xy_avg = xy.mean()
    x_avg = X_parameters.mean()
    y_avg = Y_parameters.mean()
    x_square = X_parameters*X_parameters
    x_square_avg = x_square.mean()
    predictions = {}
    #Method of least squares
    predictions['coefficient'] = (xy_avg - x_avg*y_avg) / (x_square_avg - x_avg*x_avg)
    predictions['intercept'] = y_avg - predictions['coefficient']*x_avg
    #prediction_result
    predictions['predictions_result'] = predictions['intercept'] + predictions['coefficient']*prediction_value    
    return predictions
项目:MachineLearningDemo    作者:MichaelLinn    | 项目源码 | 文件源码
def linear_model_multivariate():
    #coefficient = (X_trans*X)^-1 * X_trans * y 

    data = pd.read_csv('E://Spyder/LinearRegression/data/data.csv')
    X_tem = []
    Y_tem = []
    linearModel={}
    for X_data ,Y_data in zip(data['x'],data['y']):
        X_tem.append(int(X_data))
        Y_tem.append(float(Y_data))
    X_parameters = np.ones((len(X_tem),2))

    for i in range(len(X_tem)):
        X_parameters[i][0] = X_tem[i]

    Y_parameters = np.array(Y_tem)
    # Formula  
    # coefficient = inv(X.T*X) * X.T * y    
    coefficient = np.dot(np.dot(np.linalg.inv(np.dot(X_parameters.T,X_parameters)),X_parameters.T),Y_parameters)

    avg_X = X_parameters.mean(axis = 0)   
    intercept = Y_parameters.mean() + coefficient * avg_X[1]
    linearModel['coefficient'] = coefficient
    linearModel['intercept'] = intercept
    return linearModel
项目:MachineLearningDemo    作者:MichaelLinn    | 项目源码 | 文件源码
def get_loss():
    #Calculate the loss the linear_model
    data = pd.read_csv('E://Spyder/LinearRegression/data/data.csv')
    X_tem = []
    Y_tem = []

    for X_data ,Y_data in zip(data['x'],data['y']):
        X_tem.append([int(X_data)])
        Y_tem.append(float(Y_data))

    x_data = np.array(X_tem)
    y_data = np.array(Y_tem)

    regr = linear_model.LinearRegression() 
    regr.fit(x_data,y_data)
    loss = np.sum((y_data - regr.predict(x_data)) ** 2)
    return loss



#Function to show the result of linear fit model
项目:sport_movements_analysis    作者:guillaumeAssogba    | 项目源码 | 文件源码
def plot2dRegression(x,y, nameX, nameY, namePlot):
    model = LinearRegression()
    linearModel = model.fit(x, y)
    predictModel = linearModel.predict(x)
    plt.scatter(x,y, color='g')
    plt.plot(x, predictModel, color='k')
    plt.xlabel(nameX)
    plt.ylabel(nameY)
    test = stats.linregress(predictModel,y)
    print("The squared of the correlation coefficient R^2 is " + str(test.rvalue**2))
    plt.savefig("plot/loadings/"+namePlot, bbox_inches='tight')
    plt.show()
    return test.rvalue**2

#plot the 2D regression between the performance values and the loadings.
#return the correlation factor: R squared
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_select_best(self):
        """
        Test the select best fit estimator
        """
        X, y = ANSCOMBE[1]
        X = np.array(X)
        y = np.array(y)
        X = X[:,np.newaxis]

        model = fit_select_best(X, y)
        self.assertIsNotNone(model)
        self.assertIsInstance(model, Pipeline)

        X, y = ANSCOMBE[3]
        X = np.array(X)
        y = np.array(y)
        X = X[:,np.newaxis]

        model = fit_select_best(X, y)
        self.assertIsNotNone(model)
        self.assertIsInstance(model, LinearRegression)
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_estimator_instance(self):
        """
        Test that isestimator works for instances
        """

        models = (
            LinearRegression(),
            LogisticRegression(),
            KMeans(),
            LSHForest(),
            PCA(),
            RidgeCV(),
            LassoCV(),
            RandomForestClassifier(),
        )

        for model in models:
            self.assertTrue(isestimator(model))
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_estimator_class(self):
        """
        Test that isestimator works for classes
        """
        models = (
            LinearRegression,
            LogisticRegression,
            KMeans,
            LSHForest,
            PCA,
            RidgeCV,
            LassoCV,
            RandomForestClassifier,
        )

        for model in models:
            self.assertTrue(inspect.isclass(model))
            self.assertTrue(isestimator(model))
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_clusterer_enforcement(self):
        """
        Assert that only clustering estimators can be passed to cluster viz
        """
        nomodels = [
            SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
        ]

        for nomodel in nomodels:
            with self.assertRaises(YellowbrickTypeError):
                visualizer = ClusteringScoreVisualizer(nomodel())

        models = [
            KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
        ]

        for model in models:
            try:
                visualizer = ClusteringScoreVisualizer(model())
            except YellowbrickTypeError:
                self.fail("could not pass clustering estimator to visualizer")
项目:Market-Neutral-Model    作者:SunJiaxuan    | 项目源码 | 文件源码
def GetBeta(f,*args):
    FactorValue = f(*args)
    stock = args[0]
    date = args[1]
    #Get 20 Business day's data
    tempprice = get_price(list(stock), date, "{:%Y-%m-%d}".format(datetime.datetime.strptime(date, '%Y-%m-%d') + datetime.timedelta(days=30)), frequency='1d', fields=None)['OpeningPx']
    tempreturn = np.log(tempprice.iloc[-1]/tempprice.iloc[0])
    #print('FV',FactorValue)
    FactorValue = pd.DataFrame(FactorValue)
    DataAll = pd.concat([FactorValue,tempreturn],axis = 1)
    DataAll = DataAll.dropna()
    DataAll.columns = ['f','p']
    #print('fs',FactorValue.shape)    
    #print('ts',tempreturn.shape)
    #print(DataAll)
    #print(DataAll.shape)
    #print(np.matrix(DataAll.ix[:,0]).shape)
    #print(np.matrix(DataAll.ix[:,1]).shape)
    regr = linear_model.LinearRegression()
    regr.fit(np.transpose(np.matrix(DataAll['f'])), np.transpose(np.matrix(DataAll['p'])))
    return regr.coef_
项目:Market-Neutral-Model    作者:SunJiaxuan    | 项目源码 | 文件源码
def GetResiduals(stock,enddate):
    Xinput = [EquityOCFP(stock,enddate), EquitySize(stock,enddate), RSIIndividual(stock,enddate), Min130Day(stock,enddate)]
    X = pd.concat(Xinput, axis=1)
    date = enddate
    tempprice = get_price(list(stock), date, "{:%Y-%m-%d}".format(datetime.datetime.strptime(date, '%Y-%m-%d') + datetime.timedelta(days=30)), frequency='1d', fields=None)['OpeningPx']
    y = np.log(tempprice.iloc[-1]/tempprice.iloc[0])
    DataAll = pd.concat([X,y],axis = 1)
    DataAll = DataAll.dropna()
    regr = linear_model.LinearRegression()
    regr.fit(np.matrix(DataAll.ix[:,0:4]), np.transpose(np.matrix(DataAll.ix[:,4])))
    residuals = regr.predict(np.matrix(DataAll.ix[:,0:4])) - np.transpose(np.matrix(DataAll.ix[:,4]))
    residuals = pd.DataFrame(data = residuals, index = np.transpose(np.matrix(DataAll.index.values)))
    residuals.index = DataAll.index.values
    residuals.columns = [enddate]
    return residuals

#This function is used in the later function
项目:aliMusic    作者:wangqingbaidu    | 项目源码 | 文件源码
def getDataSet(self, max_value_threshold = 1000, train_length_threshold = 30):
        try:
            return self.data_set
        except:
            self.__gen_data_set(max_value_threshold = max_value_threshold, 
                                train_length_threshold = train_length_threshold)
            return self.data_set

#     def __gen_model(self, model = LinearRegression()):
#         X_train, y_train, _ = self.getDataSet(10000, 60)
#         model.fit(X_train, y_train)
#         if self.ifPlotTrain:
#             y_pred = model.predict(X_train)
#             df = pd.DataFrame(np.hstack((y_train.reshape(-1,1), y_pred.reshape(-1,1))))
#             df.columns = ['Train', 'Predict']
#             df[:60].plot()
#             plt.title('train_all')
#             fig = plt.gcf()
#             fig.savefig('./img/train_all.png')
#             plt.close(fig)
#         self.model = model
项目:aliMusic    作者:wangqingbaidu    | 项目源码 | 文件源码
def getDataSet(self, max_value_threshold = 1000, train_length_threshold = 30):
        try:
            return self.data_set
        except:
            self.__gen_data_set(max_value_threshold = max_value_threshold, 
                                train_length_threshold = train_length_threshold)
            return self.data_set

#     def __gen_model(self, model = LinearRegression()):
#         X_train, y_train, _ = self.getDataSet(10000, 60)
#         model.fit(X_train, y_train)
#         if self.ifPlotTrain:
#             y_pred = model.predict(X_train)
#             df = pd.DataFrame(np.hstack((y_train.reshape(-1,1), y_pred.reshape(-1,1))))
#             df.columns = ['Train', 'Predict']
#             df[:60].plot()
#             plt.title('train_all')
#             fig = plt.gcf()
#             fig.savefig('./img/train_all.png')
#             plt.close(fig)
#         self.model = model
项目:aliMusic    作者:wangqingbaidu    | 项目源码 | 文件源码
def analysis():
    mysql_cn= pymysql.connect(host='10.25.0.119', port=3306,user='root', passwd='111111', db='music')
    df = pd.read_sql('''
    SELECT COUNT(*) as plays, ds from user_actions JOIN songs
    on user_actions.song_id = songs.song_id
    WHERE ds >= '20150805' AND ds <= '20150830' AND action_type = '1' 
    AND artist_id = 'c026b84e8f23a7741d9b670e3d8973f0'
    GROUP BY artist_id, ds 
    ORDER BY ds
    '''.format(),mysql_cn)
    X = np.array([i for i in range(26)])
    df.columns = ['plays', 'ds']
    y = df['plays'].values
    print X, y
    model = LinearRegression()
    model.fit(X.reshape(X.shape[0], 1), y.reshape(y.shape[0]))
    x = np.array([i for i in range(26, 50)])
    Y = model.predict(x.reshape(x.shape[0], 1))
    df = pd.DataFrame(Y)
    print Y
    df.plot()
    plt.show()

    mysql_cn.close()
项目:pactools    作者:pactools    | 项目源码 | 文件源码
def test_pink_noise_slope():
    n_points = 10000
    fs = 500.0
    try:
        from sklearn.linear_model import LinearRegression
    except ImportError:
        return True

    # test the slope
    for slope in [1, 1.5, 2]:
        noise = pink_noise(n_points, slope=slope)
        spec = Spectrum(fs=fs)
        psd = spec.periodogram(noise).T

        freq = np.linspace(0, fs / 2., psd.size)[:, None]

        # linear regression fit in the log domain
        reg = LinearRegression()
        reg.fit(np.log10(freq[1:]), np.log10(psd[1:]))
        assert_almost_equal(reg.coef_[0][0], -slope, decimal=1)
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def mlr_val( RM, yE, disp = True, graph = True, rate = 2, more_train = True, center = None):
    """
    Validation is peformed as much as the given ratio.
    """
    RMt, yEt, RMv, yEv = jchem.get_valid_mode_data( RM, yE, rate = rate, more_train = more_train, center = center)

    clf = linear_model.LinearRegression()   
    clf.fit( RMt, yEt)

    print('Training result')
    mlr_show( clf, RMt, yEt, disp = disp, graph = graph)

    print('Validation result')
    r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)

    return r_sqr, RMSE
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_train_test( xMa, yVa, tr, ts):
    """
    Regression and test is performed for given data
    with cross-validation streams
    """     
    xM = xMa[ tr, :]
    yV = yVa[ tr, 0]

    clf = linear_model.LinearRegression()
    clf.fit( xM, yV)

    # The testing information is extracted.
    xM_test = xMa[ ts, :]
    yV_test = yVa[ ts, 0]

    return yV_test.A1, clf.predict( xM_test).ravel()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_param( model, X, y, param_grid, n_splits=5, shuffle=True, n_jobs=-1, graph=False):
    """
    gs = gs_param( model, X, y, param_grid, n_splits=5, shuffle=True, n_jobs=-1)

    Inputs
    ======
    model = svm.SVC(), or linear_model.LinearRegression(), for example
    param = {"C": np.logspace(-2,2,5)}
    """
    #print(xM.shape, yVc.shape)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    gs = model_selection.GridSearchCV( model, param_grid, cv=kf5_c, n_jobs=n_jobs)
    gs.fit( X, y)

    if graph:
        plt.plot( gs.cv_results_["mean_train_score"], label='E[Train]')
        plt.plot( gs.cv_results_["mean_test_score"], label='E[Test]')
        plt.legend(loc=0)
        plt.grid()

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_pilot_only(self):

        """
        Cross-validatin scores are evaluated using LOO. 
        SNRpilot is equal to SNR, which is SNRdata.     
        """
        yT_a = self.rx_p["yT_a"]
        x_a = self.rx_p["x_a"]

        lm = linear_model.LinearRegression()
        scores = codes.cross_val_score_loo( lm, yT_a, x_a)

        # Output is stored with enviromental variables.
        pdi = pd.DataFrame()
        pdi["model"] = ["LinearRegression"]
        pdi["alpha"] = [0]
        pdi["metric"] = ["mean_squared_error"]
        pdi["E[scores]"] = [np.mean(scores)]
        pdi["std[scores]"] = [np.std(scores)]
        pdi["scores"] = [scores]

        return pdi
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_pilot_reg_only(self, alpha = 0):
        model = self.model
        yT_a = self.rx_p["yT_a"]
        x_a = self.rx_p["x_a"]

        # kf = KFold() 
        # loo = cross_validation.LeaveOneOut( x_a.shape[0])
        if alpha == 0:
            lm = linear_model.LinearRegression()
        else:
            lm = getattr( linear_model, model)(alpha)
        scores = codes.cross_val_score_loo( lm, yT_a, x_a)

        # Output is stored with enviromental variables.
        pdi = pd.DataFrame()
        pdi["model"] = [model]
        pdi["alpha"] = [alpha]
        pdi["metric"] = ["mean_squared_error"]
        pdi["E[scores]"] = [np.mean(np.power(scores,2))] # MSE
        pdi["std[scores]"] = ["t.b.d."]
        pdi["scores"] = [scores]

        return pdi
项目:SharesData    作者:xjkj123    | 项目源码 | 文件源码
def Beta(self):
        prixe = math.log(0.03637 / float(365) + 1)
        df1 = self.sharedf
        df1['change']=df1['change']-prixe
        df2 = ShareClass().GetDayData(code='000001',zs=True)
        print 11111111111
        coef = []
        intercept = []
        residues=[]
        ret= pandas.merge(df1,df2,how='inner',on='date')
        array2 = []
        if len(ret) > 252:
            for z in range(0, 252):
                array2.append(math.pow(math.pow(float(1) / 2, float(1 / float(63))), (252 - z - 1)))
            for z in range(0, 251):
                coef.append(numpy.NaN)
                intercept.append(numpy.NaN)
                residues.append(numpy.NaN)
            for c in range(252, len(ret)+1):
                array=[]
                for x in ret[c - 252:c]['change_x']:
                    array.append([x])
                clf = linear_model.LinearRegression()
                clf.fit(X=array, y=ret[c - 252:c]["change_y"], sample_weight=array2)
                coef.append(float(clf.coef_))
                residues.append(clf._residues)
                intercept.append(float(clf.intercept_))
            ret['beta'] = coef
            ret['alpha'] = intercept
            ret['residues'] = residues
            return ret[['date','beta','alpha','residues']]
项目:sanergy-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self):
        #if self.modeltype == "AR" :
        #    return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
        if self.modeltype == "RandomForest" :
            return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
            #return ensemble.RandomForestClassifier(
            #    n_estimators=self.parameters['n_estimators'])
        elif self.modeltype == "LinearRegression" :
            return linear_model.LinearRegression()
        elif self.modeltype == "Lasso" :
            return linear_model.Lasso(
            alpha=self.parameters['alpha'])
        elif self.modeltype == "ElasticNet" :
            return linear_model.ElasticNet(
            alpha=self.parameters['alpha'],
            l1_ratio=self.parameters['l1_ratio'])
        elif self.modeltype == "SVR" :
            return SVR(
            C=self.parameters['C'],
            epsilon=self.parameters['epsilon'],
            kernel=self.parameters['kernel'])
        #elif self.modeltype == 'StaticModel':
        #   return StaticModel (
        #      parameters=self.parameters
        #     )
        #elif self.modeltype == 'AdvancedStaticModel':
        #   return AdvancedStaticModel (
        #       parameters=self.parameters
        #        )

        # elif self.modeltype == 'SGDRegressor' :
        #     print(self.parameters)
        #     return linear_model.SGDRegressor(
        #     loss=self.parameters['loss'],
        #     penalty=self.parameters['penalty'],
        #     l1_ratio=self.parameters['l1_ratio'])
        else:
            raise ConfigError("Unsupported model {0}".format(self.modeltype))
项目:deep_arb    作者:mhernan88    | 项目源码 | 文件源码
def regressionDistance(vec1,vec2):
    regr = linear_model.LinearRegression()
    regr.fit(np.asarray(vec1).reshape(len(vec1),1),np.asarray(vec2))
    return regr.coef_
项目:sef    作者:passalis    | 项目源码 | 文件源码
def outofsample_extensions(method='linear-regression'):
    # Load the data and init seeds
    train_data, train_labels, test_data, test_labels = load_mnist()
    np.random.seed(1)
    sklearn.utils.check_random_state(1)
    n_train_samples = 5000

    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(isomap.fit_transform(train_data[:n_train_samples, :]))

    if method == 'linear-regression':
        # Use linear regression to provide baseline out-of-sample extensions
        proj = LinearRegression()
        proj.fit(np.float64(train_data[:n_train_samples, :]), np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
                           proj.predict(test_data), test_labels)
    elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'c-ISOMAP-10d':
            proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
            proj.cuda()
        else:
            proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
            proj.cuda()
        loss = proj.fit(data=train_data[:n_train_samples, :], target_data=train_data_isomap, target='copy',
                        epochs=50, batch_size=128, verbose=True, learning_rate=0.001, regularizer_weight=1)
        acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
项目:sef    作者:passalis    | 项目源码 | 文件源码
def outofsample_extensions(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    train_data, train_labels, test_data, test_labels = dataset_loader(dataset, seed=1)

    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(isomap.fit_transform(train_data))

    if method == 'linear-regression':
        from sklearn.preprocessing import StandardScaler
        std = StandardScaler()
        train_data = std.fit_transform(train_data)
        test_data = std.transform(test_data)

        # Use linear regression to provide baseline out-of-sample extensions
        proj = LinearRegression()
        proj.fit(np.float64(train_data), np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data), train_labels,
                           proj.predict(test_data), test_labels)
    elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'c-ISOMAP-10d':
            proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
            proj.cuda()
        else:
            proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
            proj.cuda()
        loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy',
                        epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1)
        acc = evaluate_svm(proj.transform(train_data), train_labels,
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")