Python sklearn.linear_model 模块，LinearRegression() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用sklearn.linear_model.LinearRegression()。

项目：stacked_generalization 作者：fukatani | 项目源码 | 文件源码

def test_stacked_regressor(self):
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = StackedRegressor(bclf,
                              clfs,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)

项目：stacked_generalization 作者：fukatani | 项目源码 | 文件源码

def test_fwls_regressor(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LinearRegression()
        clfs = [RandomForestRegressor(n_estimators=50, random_state=1),
                GradientBoostingRegressor(n_estimators=25, random_state=1),
                Ridge(random_state=1)]

        # Friedman1
        X, y = datasets.make_friedman1(n_samples=1200,
                                       random_state=1,
                                       noise=1.0)
        X_train, y_train = X[:200], y[:200]
        X_test, y_test = X[200:], y[200:]

        sr = FWLSRegressor(bclf,
                              clfs,
                              feature_func,
                              n_folds=3,
                              verbose=0,
                              oob_score_flag=True)
        sr.fit(X_train, y_train)
        mse = mean_squared_error(y_test, sr.predict(X_test))
        assert_less(mse, 6.0)

项目：FFS-ANN 作者：GVLABHernandez | 项目源码 | 文件源码

def scatter_regresion_Plot(X, Y, testName):

    plt.scatter(X, Y, c = 'b', label = '_nolegend_', s = 1)

    X = X.reshape(-1, 1)
    Y = Y.reshape(-1, 1)
    R2 = r2_score(X, Y)

    regr = linear_model.LinearRegression()
    regr.fit(X, Y)
    plt.plot(X, regr.predict(X), "--", label = 'Regression', color = 'r')
    plt.title(testName + ' ($R^2$: ' + "{0:.3f}".format(R2) + ")", fontsize = 14)
    plt.xlabel('True Values', fontsize = 12, weight = 'bold')
    plt.ylabel('Predicted Values', fontsize = 12, weight = 'bold')
    plt.legend(loc = 'upper left', bbox_to_anchor = (0, 1.0), fancybox = True, shadow = True, fontsize = 10)
    plt.subplots_adjust(left = 0.2, right = 0.9, bottom = 0.05, top = 0.97, wspace = 0.15, hspace = 0.3)

项目：strategy 作者：kanghua309 | 项目源码 | 文件源码

def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())

项目：PonyGE2 作者：PonyGE | 项目源码 | 文件源码

def fit_lr(train_X, train_y, test_X):
    """
    Use linear regression to predict.

    :param train_X:
    :param train_y:
    :param test_X:
    :return:
    """
    lr = LinearRegression()
    lr.fit(train_X, train_y)
    yhat_train = lr.predict(train_X)
    yhat_test = lr.predict(test_X)
    model = "LR int %.2f coefs %s" % (lr.intercept_, pprint(lr.coef_))

    return model, yhat_train, yhat_test

项目：abcpy 作者：eth-cscs | 项目源码 | 文件源码

def __init__(self, model, statistics_calc, backend, n_samples = 1000, seed = None):
        self.model = model
        self.statistics_calc = statistics_calc
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.model.prior.reseed(self.rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) 

        # main algorithm                 
        seed_arr = self.rng.randint(1, n_samples*n_samples, size=n_samples, dtype=np.int32)
        seed_pds = self.backend.parallelize(seed_arr)     

        sample_parameters_statistics_pds = self.backend.map(self._sample_parameter_statistics, seed_pds)
        sample_parameters_and_statistics = self.backend.collect(sample_parameters_statistics_pds)
        sample_parameters, sample_statistics = [list(t) for t in zip(*sample_parameters_and_statistics)]
        sample_parameters = np.array(sample_parameters)
        sample_statistics = np.concatenate(sample_statistics)

        self.coefficients_learnt = np.zeros(shape=(sample_parameters.shape[1],sample_statistics.shape[1]))
        regr = linear_model.LinearRegression(fit_intercept=True)
        for ind in range(sample_parameters.shape[1]):
            regr.fit(sample_statistics, sample_parameters[:,ind]) 
            self.coefficients_learnt[ind,:] = regr.coef_

项目：covar_me_app 作者：CovarMe | 项目源码 | 文件源码

def calculate_residual_correlation_matrix(returns):
    # find the market return constraining on the selected companies (first PCA)
    # regress each stock on that and find correlation of residuals
    returns_matrix = returns.as_matrix().transpose()
    covar_matrix = np.cov(returns_matrix)
    pca = decomposition.PCA(n_components=1)
    pca.fit(covar_matrix)
    X = pca.transform(covar_matrix)
    regr = linear_model.LinearRegression()
    dim = covar_matrix.shape[1]
    res = np.zeros(shape=(dim,dim))
    for x in range(0, dim):
        regr = linear_model.LinearRegression()
        regr = regr.fit(X, covar_matrix[:,x])
        res[:,x] = covar_matrix[:,x] - regr.predict(X)

    res_corr = np.corrcoef(res)
    return pd.DataFrame(res_corr, index = returns.columns, columns = returns.columns)

项目：DSI-personal-reference-kit 作者：teb311 | 项目源码 | 文件源码

def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
    '''
        Given a dataset and some solutions (X, y) a regression class (from scikit learn)
        and an Lambda which is required if the regression class is Lasso or Ridge

        X (pandas DataFrame): The data.
        y (pandas DataFrame or Series): The answers.
        regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
        regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
                              Called alpha by scikit learn for interface reasons.

        Return:
            tuple, (the_fitted_regressor, mean(cross_val_score)).
    '''
    if regression_class is LinearRegression:
        predictor = regression_class()
    else:
        predictor = regression_class(alpha=regularization_const, normalize=True)

    predictor.fit(X, y)

    cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
    cross_scores_corrected = np.sqrt(-1 * cross_scores)  # Scikit learn returns negative vals && we need root

    return (predictor, np.mean(cross_scores_corrected))

项目：ESL-Model 作者：littlezz | 项目源码 | 文件源码

def test_least_square_model(prostate_data):
    from esl_model.ch3.models import LeastSquareModel
    train_x, train_y, test_x, test_y, features = prostate_data
    lsm = LeastSquareModel(train_x=train_x, train_y=train_y, features_name=features)
    lsm.pre_processing()

    lsm.train()

    print(lsm.beta_hat)
    print('rss:',lsm.rss)
    print('F-statistic', lsm.F_statistic(remove_cols=['age', 'lcp', 'gleason', 'pgg45']))
    print('z-score', lsm.z_score)

    result = lsm.test(test_x, test_y)

    print('test error: ', result.mse)

    from sklearn.linear_model import LinearRegression

    lr = LinearRegression()

    lr.fit(train_x, train_y)
    print('std error', result.std_error)
    assert np.isclose(result.mse, np.mean(((lr.predict(test_x)) - test_y) **2))

项目：algotrading 作者：alifanov | 项目源码 | 文件源码

def rolling_beta(X, y, idx, window=100):
    assert len(X) == len(y)

    out_dates = []
    out_beta = []

    model_ols = linear_model.LinearRegression()

    for iStart in range(0, len(X) - window):
        iEnd = iStart + window

        _x = X[iStart:iEnd].values.reshape(-1, 1)
        _y = y[iStart:iEnd].values.reshape(-1, 1)

        model_ols.fit(_x, _y)

        # store output
        out_dates.append(idx[iEnd])
        out_beta.append(model_ols.coef_[0][0])

    return pd.DataFrame({'beta': out_beta}, index=out_dates)

项目：algotrading 作者：alifanov | 项目源码 | 文件源码

def rolling_beta(X, y, idx, window=100):
    assert len(X) == len(y)

    out_dates = []
    out_beta = []

    model_ols = linear_model.LinearRegression()

    for iStart in range(0, len(X) - window):
        iEnd = iStart + window

        _x = X[iStart:iEnd].values.reshape(-1, 1)
        _y = y[iStart:iEnd].values.reshape(-1, 1)

        model_ols.fit(_x, _y)

        # store output
        out_dates.append(idx[iEnd])
        out_beta.append(model_ols.coef_[0][0])

    return pd.DataFrame({'beta': out_beta}, index=out_dates)

项目：coremltools 作者：apple | 项目源码 | 文件源码

def test_linear_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = LinearRegression(normalize=True)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
                                 type(coreml_model.predict({'data': test_data})['target']))
                self.assertAlmostEqual(scikit_model.predict(test_data)[0],
                                       coreml_model.predict({'data': test_data})['target'],
                                       msg="{} != {} for Dtype: {}".format(
                                           scikit_model.predict(test_data)[0],
                                           coreml_model.predict({'data': test_data})['target'],
                                           dtype
                                       )
                                       )
            except RuntimeError:
                print("{} not supported. ".format(dtype))

项目：coremltools 作者：apple | 项目源码 | 文件源码

def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not(HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model

项目：coremltools 作者：apple | 项目源码 | 文件源码

def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return
        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = Pipeline(steps = [
                  ('linear' , LinearRegression())
        ])
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model

项目：coremltools 作者：apple | 项目源码 | 文件源码

def test_linear_regression_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for normalize_value in (True, False):
            cur_model = LinearRegression(normalize=normalize_value)
            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
            spec = convert(cur_model, input_names, 'target')

            df['prediction'] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics['max_error'], 0)

项目：stock 作者：dmegbert | 项目源码 | 文件源码

def find_parameters_w(X, Y):
    """Find the parameter values w for the model which best fits X and Y.

    Args:
        X: A 2-dimensional numpy array representing the independent variables
            in the linear regression model.
        Y: A numpy array of floats representing the dependent variables in the
            linear regression model.

    Returns:
        A tuple (w0, w1, w2, w3, w4) representing the parameter values w.
    """
    clf = linear_model.LinearRegression()
    clf.fit(X, Y)
    w0 = clf.intercept_
    w1, w2, w3, w4 = clf.coef_
    return w0, w1, w2, w3, w4

项目：regression-stock-prediction 作者：chaitjo | 项目源码 | 文件源码

def predict_price(dates, prices, x):
    dates = np.reshape(dates, (len(dates),1)) # converting to matrix of n X 1
    prices = np.reshape(prices, (len(prices),1))

    linear_mod = linear_model.LinearRegression() # defining the linear regression model
    linear_mod.fit(dates, prices) # fitting the data points in the model

    plt.scatter(dates, prices, color= 'black', label= 'Data') # plotting the initial datapoints 
    plt.plot(dates, linear_mod.predict(dates), color= 'red', label= 'Linear model') # plotting the line made by linear regression
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Linear Regression')
    plt.legend()
    plt.show()

    return linear_mod.predict(x)[0][0], linear_mod.coef_[0][0], linear_mod.intercept_[0]

项目：healthcareai-py 作者：HealthCatalyst | 项目源码 | 文件源码

def prepare_fit_model_for_factors(model_type, x_train, y_train):
    """
    Given a model type, train and test data

    Args:
        model_type (str): 'classification' or 'regression'
        x_train:
        y_train:

    Returns:
        (sklearn.base.BaseEstimator): A fit model.
    """

    if model_type == 'classification':
        algorithm = LogisticRegression()
    elif model_type == 'regression':
        algorithm = LinearRegression()
    else:
        algorithm = None

    if algorithm is not None:
        algorithm.fit(x_train, y_train)

    return algorithm

项目：challenges 作者：py-study-group | 项目源码 | 文件源码

def regression_murder(year):  # applies linear regression on murder rates
    murder = pd.DataFrame()
    dates = crime_rate_df.index.values.tolist()
    murder['label'] = crime_rate_df['Murder and\nnonnegligent \nmanslaughter']
    prediction_size = int(0.1 * len(murder))

    X = np.array(dates)
    y = np.array(murder['label'])
    y.reshape((len(X), 1))
    y_train = y[:-prediction_size]
    X_train = X[:-prediction_size]
    clf = LinearRegression()

    clf.fit(X_train.reshape(-1, 1), y_train)
    regression_line = [clf.predict(X_train[i].reshape(1, -1)) for i in range(len(X_train))]
    print(clf.predict(year))
    plt.scatter(X_train, y_train)
    plt.plot(X_train, regression_line)
    plt.show()

项目：House-Pricing 作者：playing-kaggle | 项目源码 | 文件源码

def linear_regression():
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    # Look at predictions on training and validation set
    print("RMSE on Training set :", rmse_cv(lr, train_split, y).mean())
    y_train_pred = lr.predict(train_split)
    print('rmsle calculate by self:', rmsle(list(np.exp(y) - 1), list(np.exp(y_train_pred) - 1)))
    plt.scatter(y_train_pred, y_train_pred - y, c="blue", marker="s", label="Training data")
    plt.title("Linear regression")
    plt.xlabel("Predicted values")
    plt.ylabel("Residuals")
    plt.legend(loc="upper left")
    plt.hlines(y=0, xmin=10.5, xmax=13.5, color="red")
    plt.show()
    # Plot predictions
    plt.scatter(y_train_pred, y, c="blue", marker="s", label="Training data")
    plt.title("Linear regression")
    plt.xlabel("Predicted values")
    plt.ylabel("Real values")
    plt.legend(loc="upper left")
    plt.plot([10.5, 13.5], [10.5, 13.5], c="red")
    plt.show()
    return lr

项目：cloud-ml-sdk 作者：XiaoMi | 项目源码 | 文件源码

def main():
  diabetes = datasets.load_diabetes()
  diabetes_X = diabetes.data[:, np.newaxis, 2]

  diabetes_X_train = diabetes_X[:-20]
  diabetes_X_test = diabetes_X[-20:]

  diabetes_y_train = diabetes.target[:-20]
  diabetes_y_test = diabetes.target[-20:]

  regr = linear_model.LinearRegression()
  regr.fit(diabetes_X_train, diabetes_y_train)

  print('Coefficients: \n', regr.coef_)
  print("Mean squared error: %.2f" %
        np.mean((regr.predict(diabetes_X_test) - diabetes_y_test)**2))
  print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test))

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def test_parameter_estimation_low_memory(self):
        X = np.random.uniform(0, 4, 1000)
        y = X + np.random.normal(0, 1, 1000)
        m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=True)
        m.fit(X.reshape(-1, 1), y)
        coef_samples = [b.coef_ for b in m.base_models_]
        intercept_samples = [b.intercept_ for b in m.base_models_]
        self.assertAlmostEqual(np.mean(coef_samples), 1, delta=0.3)
        l, r = central_credible_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        l, r = highest_density_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = central_credible_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = highest_density_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def test_parameter_estimation(self):
        X = np.random.uniform(0, 4, 1000)
        y = X + np.random.normal(0, 1, 1000)
        m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=False)
        m.fit(X.reshape(-1, 1), y)
        coef_samples = [b.coef_ for b in m.base_models_]
        intercept_samples = [b.intercept_ for b in m.base_models_]
        self.assertAlmostEqual(np.mean(coef_samples), 1, delta=0.3)
        l, r = central_credible_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        l, r = highest_density_interval(coef_samples, alpha=0.05)
        self.assertLess(l, 1)
        self.assertGreater(r, 1)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = central_credible_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)
        self.assertAlmostEqual(np.mean(intercept_samples), 0, delta=0.3)
        l, r = highest_density_interval(intercept_samples, alpha=0.05)
        self.assertLess(l, 0)
        self.assertGreater(r, 0)

项目：ConversationalQA 作者：btjhjeon | 项目源码 | 文件源码

def train_regressor(options, embed_map, wordvecs, worddict):
    """
    Return regressor to map word2vec to RNN word space
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda : 0)
    for w in embed_map.vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0
    for w in worddict.keys()[:options['n_words']-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = numpy.zeros((len(shared), 300), dtype='float32')
    sg = numpy.zeros((len(shared), options['dim_word']), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = embed_map[w]
        sg[shared[w]] = wordvecs[w]

    clf = LinearRegression()
    clf.fit(w2v, sg)
    return clf

项目：heamy 作者：rushter | 项目源码 | 文件源码

def test_stacking():
    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    ds = model.stack(10)

    assert ds.X_train.shape[0] == model.dataset.X_train.shape[0]
    assert ds.X_test.shape[0] == model.dataset.X_test.shape[0]
    assert ds.y_train.shape[0] == model.dataset.y_train.shape[0]

    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    ds = model.stack(10, full_test=False)
    assert np.isnan(ds.X_train).sum() == 0
    assert ds.X_train.shape[0] == model.dataset.X_train.shape[0]
    assert ds.X_test.shape[0] == model.dataset.X_test.shape[0]
    assert ds.y_train.shape[0] == model.dataset.y_train.shape[0]

    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    model.dataset.load()
    ds = model.stack(10, full_test=False)
    # Check cache
    assert np.isnan(ds.X_train).sum() == 0
    assert ds.X_train.shape[0] == model.dataset.X_train.shape[0]
    assert ds.X_test.shape[0] == model.dataset.X_test.shape[0]
    assert ds.y_train.shape[0] == model.dataset.y_train.shape[0]

项目：fabric8-analytics-worker 作者：fabric8-analytics | 项目源码 | 文件源码

def _get_trend(cls, log, starting_date):
        """Get commit count trend based on log.

        :param log: a log on which the trend should be computed
        :param starting_date: starting date of log
        :return: computed trend
        """
        records = [0]
        date = starting_date
        for entry in log:
            if entry['author']['date'] > date + cls._SECONDS_PER_DAY:
                date += cls._SECONDS_PER_DAY
                records.append(0)
            records[-1] += 1

        lr = LinearRegression()
        lr.fit(np.array(range(len(records))).reshape(-1, 1), np.array(records))

        return lr.coef_[0]

项目：MachineLearningDemo 作者：MichaelLinn | 项目源码 | 文件源码

def linear_model_manual(prediction_value):
    data = pd.read_csv('E://Spyder/LinearRegression/data/data.csv')
    X_tem = []
    Y_tem = []
    for X_data ,Y_data in zip(data['x'],data['y']):
        X_tem.append(int(X_data))
        Y_tem.append(float(Y_data))
    X_parameters = np.array(X_tem)
    Y_parameters = np.array(Y_tem)
    xy = X_parameters*Y_parameters
    xy_avg = xy.mean()
    x_avg = X_parameters.mean()
    y_avg = Y_parameters.mean()
    x_square = X_parameters*X_parameters
    x_square_avg = x_square.mean()
    predictions = {}
    #Method of least squares
    predictions['coefficient'] = (xy_avg - x_avg*y_avg) / (x_square_avg - x_avg*x_avg)
    predictions['intercept'] = y_avg - predictions['coefficient']*x_avg
    #prediction_result
    predictions['predictions_result'] = predictions['intercept'] + predictions['coefficient']*prediction_value    
    return predictions

项目：MachineLearningDemo 作者：MichaelLinn | 项目源码 | 文件源码

def linear_model_multivariate():
    #coefficient = (X_trans*X)^-1 * X_trans * y 

    data = pd.read_csv('E://Spyder/LinearRegression/data/data.csv')
    X_tem = []
    Y_tem = []
    linearModel={}
    for X_data ,Y_data in zip(data['x'],data['y']):
        X_tem.append(int(X_data))
        Y_tem.append(float(Y_data))
    X_parameters = np.ones((len(X_tem),2))

    for i in range(len(X_tem)):
        X_parameters[i][0] = X_tem[i]

    Y_parameters = np.array(Y_tem)
    # Formula  
    # coefficient = inv(X.T*X) * X.T * y    
    coefficient = np.dot(np.dot(np.linalg.inv(np.dot(X_parameters.T,X_parameters)),X_parameters.T),Y_parameters)

    avg_X = X_parameters.mean(axis = 0)   
    intercept = Y_parameters.mean() + coefficient * avg_X[1]
    linearModel['coefficient'] = coefficient
    linearModel['intercept'] = intercept
    return linearModel

项目：MachineLearningDemo 作者：MichaelLinn | 项目源码 | 文件源码

def get_loss():
    #Calculate the loss the linear_model
    data = pd.read_csv('E://Spyder/LinearRegression/data/data.csv')
    X_tem = []
    Y_tem = []

    for X_data ,Y_data in zip(data['x'],data['y']):
        X_tem.append([int(X_data)])
        Y_tem.append(float(Y_data))

    x_data = np.array(X_tem)
    y_data = np.array(Y_tem)

    regr = linear_model.LinearRegression() 
    regr.fit(x_data,y_data)
    loss = np.sum((y_data - regr.predict(x_data)) ** 2)
    return loss



#Function to show the result of linear fit model

项目：sport_movements_analysis 作者：guillaumeAssogba | 项目源码 | 文件源码

def plot2dRegression(x,y, nameX, nameY, namePlot):
    model = LinearRegression()
    linearModel = model.fit(x, y)
    predictModel = linearModel.predict(x)
    plt.scatter(x,y, color='g')
    plt.plot(x, predictModel, color='k')
    plt.xlabel(nameX)
    plt.ylabel(nameY)
    test = stats.linregress(predictModel,y)
    print("The squared of the correlation coefficient R^2 is " + str(test.rvalue**2))
    plt.savefig("plot/loadings/"+namePlot, bbox_inches='tight')
    plt.show()
    return test.rvalue**2

#plot the 2D regression between the performance values and the loadings.
#return the correlation factor: R squared

项目：yellowbrick 作者：DistrictDataLabs | 项目源码 | 文件源码

def test_select_best(self):
        """
        Test the select best fit estimator
        """
        X, y = ANSCOMBE[1]
        X = np.array(X)
        y = np.array(y)
        X = X[:,np.newaxis]

        model = fit_select_best(X, y)
        self.assertIsNotNone(model)
        self.assertIsInstance(model, Pipeline)

        X, y = ANSCOMBE[3]
        X = np.array(X)
        y = np.array(y)
        X = X[:,np.newaxis]

        model = fit_select_best(X, y)
        self.assertIsNotNone(model)
        self.assertIsInstance(model, LinearRegression)

项目：yellowbrick 作者：DistrictDataLabs | 项目源码 | 文件源码

def test_estimator_instance(self):
        """
        Test that isestimator works for instances
        """

        models = (
            LinearRegression(),
            LogisticRegression(),
            KMeans(),
            LSHForest(),
            PCA(),
            RidgeCV(),
            LassoCV(),
            RandomForestClassifier(),
        )

        for model in models:
            self.assertTrue(isestimator(model))

项目：yellowbrick 作者：DistrictDataLabs | 项目源码 | 文件源码

def test_estimator_class(self):
        """
        Test that isestimator works for classes
        """
        models = (
            LinearRegression,
            LogisticRegression,
            KMeans,
            LSHForest,
            PCA,
            RidgeCV,
            LassoCV,
            RandomForestClassifier,
        )

        for model in models:
            self.assertTrue(inspect.isclass(model))
            self.assertTrue(isestimator(model))

项目：yellowbrick 作者：DistrictDataLabs | 项目源码 | 文件源码

def test_clusterer_enforcement(self):
        """
        Assert that only clustering estimators can be passed to cluster viz
        """
        nomodels = [
            SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
        ]

        for nomodel in nomodels:
            with self.assertRaises(YellowbrickTypeError):
                visualizer = ClusteringScoreVisualizer(nomodel())

        models = [
            KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
        ]

        for model in models:
            try:
                visualizer = ClusteringScoreVisualizer(model())
            except YellowbrickTypeError:
                self.fail("could not pass clustering estimator to visualizer")

项目：Market-Neutral-Model 作者：SunJiaxuan | 项目源码 | 文件源码

def GetBeta(f,*args):
    FactorValue = f(*args)
    stock = args[0]
    date = args[1]
    #Get 20 Business day's data
    tempprice = get_price(list(stock), date, "{:%Y-%m-%d}".format(datetime.datetime.strptime(date, '%Y-%m-%d') + datetime.timedelta(days=30)), frequency='1d', fields=None)['OpeningPx']
    tempreturn = np.log(tempprice.iloc[-1]/tempprice.iloc[0])
    #print('FV',FactorValue)
    FactorValue = pd.DataFrame(FactorValue)
    DataAll = pd.concat([FactorValue,tempreturn],axis = 1)
    DataAll = DataAll.dropna()
    DataAll.columns = ['f','p']
    #print('fs',FactorValue.shape)    
    #print('ts',tempreturn.shape)
    #print(DataAll)
    #print(DataAll.shape)
    #print(np.matrix(DataAll.ix[:,0]).shape)
    #print(np.matrix(DataAll.ix[:,1]).shape)
    regr = linear_model.LinearRegression()
    regr.fit(np.transpose(np.matrix(DataAll['f'])), np.transpose(np.matrix(DataAll['p'])))
    return regr.coef_

项目：Market-Neutral-Model 作者：SunJiaxuan | 项目源码 | 文件源码

def GetResiduals(stock,enddate):
    Xinput = [EquityOCFP(stock,enddate), EquitySize(stock,enddate), RSIIndividual(stock,enddate), Min130Day(stock,enddate)]
    X = pd.concat(Xinput, axis=1)
    date = enddate
    tempprice = get_price(list(stock), date, "{:%Y-%m-%d}".format(datetime.datetime.strptime(date, '%Y-%m-%d') + datetime.timedelta(days=30)), frequency='1d', fields=None)['OpeningPx']
    y = np.log(tempprice.iloc[-1]/tempprice.iloc[0])
    DataAll = pd.concat([X,y],axis = 1)
    DataAll = DataAll.dropna()
    regr = linear_model.LinearRegression()
    regr.fit(np.matrix(DataAll.ix[:,0:4]), np.transpose(np.matrix(DataAll.ix[:,4])))
    residuals = regr.predict(np.matrix(DataAll.ix[:,0:4])) - np.transpose(np.matrix(DataAll.ix[:,4]))
    residuals = pd.DataFrame(data = residuals, index = np.transpose(np.matrix(DataAll.index.values)))
    residuals.index = DataAll.index.values
    residuals.columns = [enddate]
    return residuals

#This function is used in the later function

项目：aliMusic 作者：wangqingbaidu | 项目源码 | 文件源码

def getDataSet(self, max_value_threshold = 1000, train_length_threshold = 30):
        try:
            return self.data_set
        except:
            self.__gen_data_set(max_value_threshold = max_value_threshold, 
                                train_length_threshold = train_length_threshold)
            return self.data_set

#     def __gen_model(self, model = LinearRegression()):
#         X_train, y_train, _ = self.getDataSet(10000, 60)
#         model.fit(X_train, y_train)
#         if self.ifPlotTrain:
#             y_pred = model.predict(X_train)
#             df = pd.DataFrame(np.hstack((y_train.reshape(-1,1), y_pred.reshape(-1,1))))
#             df.columns = ['Train', 'Predict']
#             df[:60].plot()
#             plt.title('train_all')
#             fig = plt.gcf()
#             fig.savefig('./img/train_all.png')
#             plt.close(fig)
#         self.model = model

项目：aliMusic 作者：wangqingbaidu | 项目源码 | 文件源码

def getDataSet(self, max_value_threshold = 1000, train_length_threshold = 30):
        try:
            return self.data_set
        except:
            self.__gen_data_set(max_value_threshold = max_value_threshold, 
                                train_length_threshold = train_length_threshold)
            return self.data_set

#     def __gen_model(self, model = LinearRegression()):
#         X_train, y_train, _ = self.getDataSet(10000, 60)
#         model.fit(X_train, y_train)
#         if self.ifPlotTrain:
#             y_pred = model.predict(X_train)
#             df = pd.DataFrame(np.hstack((y_train.reshape(-1,1), y_pred.reshape(-1,1))))
#             df.columns = ['Train', 'Predict']
#             df[:60].plot()
#             plt.title('train_all')
#             fig = plt.gcf()
#             fig.savefig('./img/train_all.png')
#             plt.close(fig)
#         self.model = model

项目：aliMusic 作者：wangqingbaidu | 项目源码 | 文件源码

def analysis():
    mysql_cn= pymysql.connect(host='10.25.0.119', port=3306,user='root', passwd='111111', db='music')
    df = pd.read_sql('''
    SELECT COUNT(*) as plays, ds from user_actions JOIN songs
    on user_actions.song_id = songs.song_id
    WHERE ds >= '20150805' AND ds <= '20150830' AND action_type = '1' 
    AND artist_id = 'c026b84e8f23a7741d9b670e3d8973f0'
    GROUP BY artist_id, ds 
    ORDER BY ds
    '''.format(),mysql_cn)
    X = np.array([i for i in range(26)])
    df.columns = ['plays', 'ds']
    y = df['plays'].values
    print X, y
    model = LinearRegression()
    model.fit(X.reshape(X.shape[0], 1), y.reshape(y.shape[0]))
    x = np.array([i for i in range(26, 50)])
    Y = model.predict(x.reshape(x.shape[0], 1))
    df = pd.DataFrame(Y)
    print Y
    df.plot()
    plt.show()

    mysql_cn.close()

项目：pactools 作者：pactools | 项目源码 | 文件源码

def test_pink_noise_slope():
    n_points = 10000
    fs = 500.0
    try:
        from sklearn.linear_model import LinearRegression
    except ImportError:
        return True

    # test the slope
    for slope in [1, 1.5, 2]:
        noise = pink_noise(n_points, slope=slope)
        spec = Spectrum(fs=fs)
        psd = spec.periodogram(noise).T

        freq = np.linspace(0, fs / 2., psd.size)[:, None]

        # linear regression fit in the log domain
        reg = LinearRegression()
        reg.fit(np.log10(freq[1:]), np.log10(psd[1:]))
        assert_almost_equal(reg.coef_[0][0], -slope, decimal=1)