import time
import numpy as np
import xgboost as xgb
from xgboost import plot_importance,plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error
%matplotlib inline

def dropun(X):
    for x in X.columns:
        if x[:7]=='Unnamed':
            X=X.drop(columns=[x])
    return X

def hist(L):
    kwargs = dict(histtype='stepfilled',density=True,alpha=0.3,bins=40)
    for X in L:
        plt.hist(X, **kwargs)
        
def prediction(y_pred, y_test, plot=True):
    sum_erro = mean_squared_error(y_pred, y_test) 
    # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html#sklearn.metrics.mean_squared_error
    print ("MSE:", sum_erro)
    
    if plot:
        # 做ROC曲线
        plt.figure()
        plt.plot(range(len(y_pred)), y_pred, 'b', label="predict")
        plt.plot(range(len(y_pred)), y_test, 'r', label="test")
        plt.legend(loc="upper right")  # 显示图中的标签
    
    return sum_erro
X=pd.read_csv('data/factors2013-0-2-1.csv')
Y=pd.read_csv('data/daily2011-2017-1.csv')
X=dropun(X)
Y=dropun(Y)

factors=list(X.columns)
factors.remove('ts_code')
factors.remove('trade_date')

days=set(X['trade_date'])
days=list(days)
days.sort()
def get_split_by_trade_date(date, state=0, remove_factors = []):
    # state=0表示不进行缺失值去除/填充
    # state=1表示直接去除含有缺失值股票的数据
    # state=2表示使用当天的平均值进行填充缺失数据
    
    # '2013-03-01'
    x=X[X['trade_date']==date].drop(columns=['trade_date'] + remove_factors)
    y=Y[Y['trade_date']==date].drop(columns=['trade_date'])
    z=pd.merge(x,y,on='ts_code')
    
    if state==1:
        z.dropna(inplace=True)
    elif state==2:
        z.fillna(value=dict(z.mean()), inplace=True)
        
    x=z[set(factors)-set(remove_factors)]
    y=z['yield']*100
    # 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) # random_state=0 
    # print('x_train.shape={}\ny_train.shape ={}\nx_test.shape={}\ny_test.shape={}'.format(x_train.shape, y_train.shape, x_test.shape, y_test.shape))
    return x_train, x_test, y_train, y_test

去除无用特征

根据permutaiton importance求在2013年全年各因子对于预测结果的重要性(使用随机random_state)。

首先根据线性模型计算一遍,再用XGBoost计算一遍

如果全年超过一半都为负数,则直接删去该因子

linear模型2013年各因子重要性

def get_linear_importance(date):
    # 划分数据集
    x_train, x_test, y_train, y_test = get_split_by_trade_date(date, 1)

    # 模型训练
    model = LinearRegression()
    model.fit(x_train, y_train)
    
    # 计算排序重要性
    perm = PermutationImportance(model).fit(x_test, y_test)
    
    return perm
perm = get_linear_importance('2013-01-04')
print(pd.DataFrame(perm.feature_importances_, index = factors))
eli5.show_weights(perm, feature_names = factors, top=56)
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
                  0
size       0.022446
beta       0.000113
betad      0.001902
idvol     -0.003462
total_vol  0.003035
idskew     0.140981
skew      -0.002844
coskew     0.008199
turn      -0.002669
std_turn   0.018550
volumed    0.000761
std_dvol   0.146898
retnmax    0.002345
illq      -0.005696
LM         0.018519
sharechg  -0.000700
age        0.000593
mom12      0.089085
mom6       0.029796
momchg     0.009969
imom       0.005457
lagretn   -0.002747
BM         0.005480
AM        -0.005562
LEV        0.006296
EP         0.012142
CFP        0.031895
OCFP      -0.000878
DP        -0.000697
SP         0.039042
AG         0.027988
LG         0.013685
BVEG       0.004491
INVG      -0.006270
INVchg     0.004587
SG         0.000570
SgINVg    -0.000105
PMG        0.003184
TAXchg     0.010312
ACC        0.065508
ACCP       0.164036
ROE        0.005988
ROA        0.006740
PA         0.003271
CT         0.009938
cash       0.012838
cashpr     0.132102
RD        -0.001856
RDsales    0.002344
CR        -0.004083
QR         0.007043
CFdebt     0.002388
salecash  -0.003473
saleinv    0.086358
CRG        0.025812
QRG       -0.005456






<style>
table.eli5-weights tr:hover {
    filter: brightness(85%);
}
    <table class="eli5-weights eli5-feature-importances" style="border-collapse: collapse; border: none; margin-top: 0em; table-layout: auto;">
<thead>
<tr style="border: none;">
    <th style="padding: 0 1em 0 0.5em; text-align: right; border: none;">Weight</th>
    <th style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">Feature</th>
</tr>
</thead>
<tbody>

    <tr style="background-color: hsl(120, 100.00%, 80.00%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.1640

                &plusmn; 0.0249

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            ACCP
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 81.49%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.1469

                &plusmn; 0.0485

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            std_dvol
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 82.01%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.1410

                &plusmn; 0.0263

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            idskew
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 82.81%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.1321

                &plusmn; 0.0312

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            cashpr
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 86.96%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0891

                &plusmn; 0.0471

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            mom12
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 87.24%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0864

                &plusmn; 0.0665

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            saleinv
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 89.48%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0655

                &plusmn; 0.0407

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            ACC
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 92.68%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0390

                &plusmn; 0.0297

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            SP
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 93.64%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0319

                &plusmn; 0.0159

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            CFP
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 93.94%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0298

                &plusmn; 0.0300

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            mom6
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 94.20%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0280

                &plusmn; 0.0271

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            AG
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 94.52%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0258

                &plusmn; 0.0220

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            CRG
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 95.03%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0224

                &plusmn; 0.0311

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            size
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 95.65%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0185

                &plusmn; 0.0286

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            std_turn
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 95.66%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0185

                &plusmn; 0.0124

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            LM
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 96.48%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0137

                &plusmn; 0.0407

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            LG
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 96.64%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0128

                &plusmn; 0.0083

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            cash
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 96.77%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0121

                &plusmn; 0.0262

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            EP
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.12%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0103

                &plusmn; 0.0119

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            TAXchg
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.18%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0100

                &plusmn; 0.0262

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            momchg
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.19%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0099

                &plusmn; 0.0095

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            CT
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.54%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0082

                &plusmn; 0.0189

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            coskew
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.79%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0070

                &plusmn; 0.0130

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            QR
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.86%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0067

                &plusmn; 0.0082

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            ROA
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 97.96%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0063

                &plusmn; 0.0178

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            LEV
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.03%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0060

                &plusmn; 0.0132

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            ROE
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.15%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0055

                &plusmn; 0.0190

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            BM
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.15%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0055

                &plusmn; 0.0057

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            imom
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.36%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0046

                &plusmn; 0.0078

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            INVchg
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.39%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0045

                &plusmn; 0.0073

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            BVEG
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.71%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0033

                &plusmn; 0.0058

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            PA
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.73%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0032

                &plusmn; 0.0051

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            PMG
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.78%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0030

                &plusmn; 0.0116

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            total_vol
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.96%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0024

                &plusmn; 0.0032

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            CFdebt
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.98%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0023

                &plusmn; 0.0133

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            retnmax
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 98.98%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0023

                &plusmn; 0.0028

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            RDsales
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 99.12%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0019

                &plusmn; 0.0067

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            betad
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 99.53%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0008

                &plusmn; 0.0018

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            volumed
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 99.61%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0006

                &plusmn; 0.0005

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            age
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 99.62%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0006

                &plusmn; 0.0063

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            SG
        </td>
    </tr>

    <tr style="background-color: hsl(120, 100.00%, 99.88%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            0.0001

                &plusmn; 0.0053

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            beta
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 99.88%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0001

                &plusmn; 0.0010

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            SgINVg
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 99.56%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0007

                &plusmn; 0.0062

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            DP
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 99.56%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0007

                &plusmn; 0.0017

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            sharechg
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 99.49%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0009

                &plusmn; 0.0172

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            OCFP
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 99.13%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0019

                &plusmn; 0.0071

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            RD
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.88%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0027

                &plusmn; 0.0048

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            turn
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.86%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0027

                &plusmn; 0.0026

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            lagretn
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.83%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0028

                &plusmn; 0.0025

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            skew
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.66%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0035

                &plusmn; 0.0107

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            idvol
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.65%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0035

                &plusmn; 0.0025

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            salecash
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.49%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0041

                &plusmn; 0.0069

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            CR
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.15%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0055

                &plusmn; 0.0050

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            QRG
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.13%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0056

                &plusmn; 0.0190

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            AM
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 98.10%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0057

                &plusmn; 0.0074

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            illq
        </td>
    </tr>

    <tr style="background-color: hsl(0, 100.00%, 97.96%); border: none;">
        <td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
            -0.0063

                &plusmn; 0.0067

        </td>
        <td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
            INVG
        </td>
    </tr>


</tbody>
importance = pd.DataFrame(index = factors)   # 记录每个因子重要性的总和,重要性为负的天数
importance['sum'] = 0
importance['negative'] = 0

for day in days:
    print(day)
    perm = get_linear_importance(day)
    importance['sum']+=perm.feature_importances_
    importance['negative']+=(perm.feature_importances_<0)
2013-01-04
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
2013-01-07
x_train.shape=(1175, 56)
y_train.shape =(1175,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-08
x_train.shape=(1176, 56)
y_train.shape =(1176,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-09
x_train.shape=(1178, 56)
y_train.shape =(1178,)
x_test.shape=(295, 56)
y_test.shape=(295,)
2013-01-10
x_train.shape=(1170, 56)
y_train.shape =(1170,)
x_test.shape=(293, 56)
y_test.shape=(293,)
2013-01-11
x_train.shape=(1172, 56)
y_train.shape =(1172,)
x_test.shape=(293, 56)
y_test.shape=(293,)
2013-01-14
x_train.shape=(1161, 56)
y_train.shape =(1161,)
x_test.shape=(291, 56)
y_test.shape=(291,)
2013-01-15
x_train.shape=(1176, 56)
y_train.shape =(1176,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-16
x_train.shape=(1164, 56)
y_train.shape =(1164,)
x_test.shape=(291, 56)
y_test.shape=(291,)
2013-01-17
x_train.shape=(1174, 56)
y_train.shape =(1174,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-18
x_train.shape=(1175, 56)
y_train.shape =(1175,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-21
x_train.shape=(1186, 56)
y_train.shape =(1186,)
x_test.shape=(297, 56)
y_test.shape=(297,)
2013-01-22
x_train.shape=(1181, 56)
y_train.shape =(1181,)
x_test.shape=(296, 56)
y_test.shape=(296,)
2013-01-23
x_train.shape=(1168, 56)
y_train.shape =(1168,)
x_test.shape=(293, 56)
y_test.shape=(293,)
2013-01-24
x_train.shape=(1184, 56)
y_train.shape =(1184,)
x_test.shape=(297, 56)
y_test.shape=(297,)
2013-01-25
x_train.shape=(1167, 56)
y_train.shape =(1167,)
x_test.shape=(292, 56)
y_test.shape=(292,)

...
2013-12-31
x_train.shape=(1257, 56)
y_train.shape =(1257,)
x_test.shape=(315, 56)
y_test.shape=(315,)
pd.set_option('display.max_columns', None)
importance.sort_values(by="sum" , ascending=False).transpose()
ROE mom12 ACCP std_dvol momchg BM AG SP idskew sharechg SgINVg saleinv CRG total_vol ACC LM LG cashpr LEV betad mom6 imom CFdebt QRG std_turn coskew PA beta ROA QR INVchg size DP TAXchg PMG volumed BVEG CR CT OCFP SG lagretn retnmax illq cash CFP skew EP INVG RDsales RD salecash idvol age AM turn
sum 450.179329 430.119666 74.490326 72.665519 18.17473 18.007253 17.610628 16.867773 15.003665 14.761029 14.236372 14.163207 13.581656 11.512615 10.269702 9.761518 7.554256 6.340006 5.963265 5.642828 3.519301 2.888595 2.681334 2.456618 2.447205 2.4201 2.337622 2.337583 2.138391 2.084859 1.935707 1.856355 1.847884 1.705292 1.395641 1.257528 1.239606 1.188467 1.143138 1.113026 0.890586 0.826029 0.81739 0.780101 0.593657 0.590286 0.501265 0.443186 0.340089 0.211641 0.153811 0.098633 0.090249 0.017987 -0.00769 -0.100969
negative 9.000000 6.000000 12.000000 7.000000 30.00000 26.000000 29.000000 40.000000 36.000000 35.000000 32.000000 17.000000 33.000000 40.000000 28.000000 45.000000 47.000000 50.000000 55.000000 50.000000 62.000000 56.000000 67.000000 60.000000 55.000000 51.0000 59.000000 65.000000 57.000000 64.000000 56.000000 71.000000 64.000000 61.000000 87.000000 76.000000 75.000000 86.000000 67.000000 78.000000 87.000000 84.000000 79.00000 87.000000 95.000000 103.000000 95.000000 96.000000 106.000000 110.000000 108.000000 127.000000 116.000000 120.000000 125.00000 130.000000
importance.sort_values(by="negative" , ascending=True).transpose()
mom12 std_dvol ROE ACCP saleinv BM ACC AG momchg SgINVg CRG sharechg idskew total_vol SP LM LG betad cashpr coskew std_turn LEV imom INVchg ROA PA QRG TAXchg mom6 DP QR beta CT CFdebt size BVEG volumed OCFP retnmax lagretn CR illq SG PMG skew cash EP CFP INVG RD RDsales idvol age AM salecash turn
sum 430.119666 72.665519 450.179329 74.490326 14.163207 18.007253 10.269702 17.610628 18.17473 14.236372 13.581656 14.761029 15.003665 11.512615 16.867773 9.761518 7.554256 5.642828 6.340006 2.4201 2.447205 5.963265 2.888595 1.935707 2.138391 2.337622 2.456618 1.705292 3.519301 1.847884 2.084859 2.337583 1.143138 2.681334 1.856355 1.239606 1.257528 1.113026 0.81739 0.826029 1.188467 0.780101 0.890586 1.395641 0.501265 0.593657 0.443186 0.590286 0.340089 0.153811 0.211641 0.090249 0.017987 -0.00769 0.098633 -0.100969
negative 6.000000 7.000000 9.000000 12.000000 17.000000 26.000000 28.000000 29.000000 30.00000 32.000000 33.000000 35.000000 36.000000 40.000000 40.000000 45.000000 47.000000 50.000000 50.000000 51.0000 55.000000 55.000000 56.000000 56.000000 57.000000 59.000000 60.000000 61.000000 62.000000 64.000000 64.000000 65.000000 67.000000 67.000000 71.000000 75.000000 76.000000 78.000000 79.00000 84.000000 86.000000 87.000000 87.000000 87.000000 95.000000 95.000000 96.000000 103.000000 106.000000 108.000000 110.000000 116.000000 120.000000 125.00000 127.000000 130.000000

def get_last_N(importance, n):
    if n==0:
        return {}
    return set(importance.sort_values(by="sum" , ascending=False).index[-n:]) &  set(importance.sort_values(by="negative" , ascending=True).index[-n:])

remove_factors = list(get_last_N(importance, 10))
remove_factors
['salecash', 'idvol', 'EP', 'INVG', 'turn', 'AM', 'RD', 'RDsales', 'age']
importance["mark"] = 0

i=1
for index, row in importance.sort_values(by="sum" , ascending=False).iterrows():
    importance.loc[index, "mark"] += i
    i+=1
i=1
for index, row in importance.sort_values(by="negative" , ascending=True).iterrows():
    importance.loc[index, "mark"] += i
    i+=1
    
importance.sort_values(by="mark" , ascending=True).transpose()
mom12 ROE std_dvol ACCP BM momchg AG saleinv SgINVg sharechg ACC idskew SP CRG total_vol LM LG cashpr betad LEV imom std_turn coskew mom6 QRG PA ROA INVchg CFdebt beta QR TAXchg DP size CT volumed BVEG OCFP CR PMG lagretn retnmax SG illq cash skew CFP EP INVG RD RDsales idvol age salecash AM turn
sum 430.119666 450.179329 72.665519 74.490326 18.007253 18.17473 17.610628 14.163207 14.236372 14.761029 10.269702 15.003665 16.867773 13.581656 11.512615 9.761518 7.554256 6.340006 5.642828 5.963265 2.888595 2.447205 2.4201 3.519301 2.456618 2.337622 2.138391 1.935707 2.681334 2.337583 2.084859 1.705292 1.847884 1.856355 1.143138 1.257528 1.239606 1.113026 1.188467 1.395641 0.826029 0.81739 0.890586 0.780101 0.593657 0.501265 0.590286 0.443186 0.340089 0.153811 0.211641 0.090249 0.017987 0.098633 -0.00769 -0.100969
negative 6.000000 9.000000 7.000000 12.000000 26.000000 30.00000 29.000000 17.000000 32.000000 35.000000 28.000000 36.000000 40.000000 33.000000 40.000000 45.000000 47.000000 50.000000 50.000000 55.000000 56.000000 55.000000 51.0000 62.000000 60.000000 59.000000 57.000000 56.000000 67.000000 65.000000 64.000000 61.000000 64.000000 71.000000 67.000000 76.000000 75.000000 78.000000 86.000000 87.000000 84.000000 79.00000 87.000000 87.000000 95.000000 95.000000 103.000000 96.000000 106.000000 108.000000 110.000000 116.000000 120.000000 127.000000 125.00000 130.000000
mark 3.000000 4.000000 6.000000 7.000000 12.000000 14.00000 15.000000 17.000000 21.000000 22.000000 22.000000 22.000000 23.000000 24.000000 28.000000 32.000000 34.000000 37.000000 38.000000 41.000000 45.000000 46.000000 46.0000 50.000000 51.000000 53.000000 54.000000 55.000000 57.000000 60.000000 61.000000 62.000000 63.000000 67.000000 72.000000 73.000000 73.000000 78.000000 79.000000 79.000000 82.000000 82.00000 84.000000 86.000000 91.000000 92.000000 94.000000 95.000000 98.000000 101.000000 101.000000 105.000000 107.000000 107.000000 109.00000 112.000000
def get_trivial(n):
    if n==0:
        return []
    return list(importance.sort_values(by="mark" , ascending=False).index[:n])
get_trivial(5)
['turn', 'AM', 'salecash', 'age', 'idvol']

linear模型去除无用特征后计算准确率

linear_err1 = []
for day in days:
    print(day)
    linear_err1.append(linear_train(day, 1, remove_factors))
2013-01-04
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.8900002458152043
2013-01-07
x_train.shape=(1276, 47)
y_train.shape =(1276,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 3.6450571284258366
2013-01-08
x_train.shape=(1272, 47)
y_train.shape =(1272,)
x_test.shape=(319, 47)
y_test.shape=(319,)
MSE: 3.4844858023817724
2013-01-09
x_train.shape=(1279, 47)
y_train.shape =(1279,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 4.385839023354379
2013-01-10
x_train.shape=(1267, 47)
y_train.shape =(1267,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 3.2301927988080217
2013-01-11
x_train.shape=(1276, 47)
y_train.shape =(1276,)
x_test.shape=(319, 47)
y_test.shape=(319,)
MSE: 3.3160646057613286
2013-01-14
x_train.shape=(1264, 47)
y_train.shape =(1264,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 2.6315416654139416
2013-01-15
x_train.shape=(1280, 47)
y_train.shape =(1280,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 3.161195649230965
2013-01-16
x_train.shape=(1267, 47)
y_train.shape =(1267,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 3.648510598956155
2013-01-17
x_train.shape=(1277, 47)
y_train.shape =(1277,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 3.6928441679882473
2013-01-18
x_train.shape=(1278, 47)
y_train.shape =(1278,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 2.6165099237166096
2013-01-21
x_train.shape=(1288, 47)
y_train.shape =(1288,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.894516190209926
2013-01-22
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 4.286606469881523
2013-01-23
x_train.shape=(1270, 47)
y_train.shape =(1270,)
x_test.shape=(318, 47)
y_test.shape=(318,)
MSE: 3.5002474096789133
2013-01-24
x_train.shape=(1289, 47)
y_train.shape =(1289,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 4.337442107992211
2013-01-25
x_train.shape=(1268, 47)
y_train.shape =(1268,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 4.233215798264175
2013-01-28
x_train.shape=(1285, 47)
y_train.shape =(1285,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 2.1029862362265517
2013-01-29
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.0675884909138524
2013-01-30
x_train.shape=(1288, 47)
y_train.shape =(1288,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.390301459355334
2013-01-31
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 4.195368336294644
2013-02-01
x_train.shape=(1283, 47)
y_train.shape =(1283,)
x_test.shape=(321, 47)
y_test.shape=(321,)
MSE: 4.275662003906857
2013-02-04
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(321, 47)
y_test.shape=(321,)
MSE: 4.889704561843679
2013-02-05
x_train.shape=(1287, 47)
y_train.shape =(1287,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.308904702025854
2013-02-06
x_train.shape=(1302, 47)
y_train.shape =(1302,)
x_test.shape=(326, 47)
y_test.shape=(326,)
MSE: 2.3231021846206152
2013-02-07
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.0897366381997493
2013-02-08
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.0095948358334943
2013-02-18
x_train.shape=(1306, 47)
y_train.shape =(1306,)
x_test.shape=(327, 47)
y_test.shape=(327,)
MSE: 3.7638168877810902
2013-02-19
x_train.shape=(1289, 47)
y_train.shape =(1289,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.9436390461992437
2013-02-20
x_train.shape=(1276, 47)
y_train.shape =(1276,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 2.8167905873185783
2013-02-21
x_train.shape=(1316, 47)
y_train.shape =(1316,)
x_test.shape=(330, 47)
y_test.shape=(330,)
MSE: 4.046686848841419
2013-02-22
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 4.683281982924828
2013-02-25
x_train.shape=(1292, 47)
y_train.shape =(1292,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.465463687553005
2013-02-26
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 3.895290637571066
2013-02-27
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 3.1285588354082488
2013-02-28
x_train.shape=(1304, 47)
y_train.shape =(1304,)
x_test.shape=(326, 47)
y_test.shape=(326,)
MSE: 2.250473626020243
2013-03-01
x_train.shape=(1292, 47)
y_train.shape =(1292,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 2.8604010827684423
2013-03-04
x_train.shape=(1296, 47)
y_train.shape =(1296,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 3.6787565887350375
2013-03-05
x_train.shape=(1292, 47)
y_train.shape =(1292,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 2.581324233766885
2013-03-06
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.626701292445271
2013-03-07
x_train.shape=(1293, 47)
y_train.shape =(1293,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 4.545620279447667
2013-03-08
x_train.shape=(1304, 47)
y_train.shape =(1304,)
x_test.shape=(327, 47)
y_test.shape=(327,)
MSE: 3.928400420077943
2013-03-11
x_train.shape=(1316, 47)
y_train.shape =(1316,)
x_test.shape=(329, 47)
y_test.shape=(329,)
MSE: 3.166664175089738
2013-03-12
x_train.shape=(1301, 47)
y_train.shape =(1301,)
x_test.shape=(326, 47)
y_test.shape=(326,)
MSE: 4.5616688994028
2013-03-13
x_train.shape=(1309, 47)
y_train.shape =(1309,)
x_test.shape=(328, 47)
y_test.shape=(328,)
MSE: 4.472122380897298
2013-03-14
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.188998539129929
2013-03-15
x_train.shape=(1293, 47)
y_train.shape =(1293,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 3.040507215859806
2013-03-18
x_train.shape=(1318, 47)
y_train.shape =(1318,)
x_test.shape=(330, 47)
y_test.shape=(330,)
MSE: 4.55967705671932
2013-03-19
x_train.shape=(1320, 47)
y_train.shape =(1320,)
x_test.shape=(331, 47)
y_test.shape=(331,)
MSE: 3.6700105115883552
2013-03-20
x_train.shape=(1322, 47)
y_train.shape =(1322,)
x_test.shape=(331, 47)
y_test.shape=(331,)
MSE: 1.8441303047839912
...
2013-12-31
x_train.shape=(1391, 47)
y_train.shape =(1391,)
x_test.shape=(348, 47)
y_test.shape=(348,)
MSE: 2.682600346871959
np.array(linear_err1).mean()
3.8178624023374335
linear_err2 = []
for day in days:
    print(day)
    linear_err2.append(linear_train(day, 2, remove_factors))
2013-01-04
x_train.shape=(1820, 47)
y_train.shape =(1820,)
x_test.shape=(455, 47)
y_test.shape=(455,)
MSE: 5.068830718368795
2013-01-07
x_train.shape=(1793, 47)
y_train.shape =(1793,)
x_test.shape=(449, 47)
y_test.shape=(449,)
MSE: 3.107165891835445
2013-01-08
x_train.shape=(1797, 47)
y_train.shape =(1797,)
x_test.shape=(450, 47)
y_test.shape=(450,)
MSE: 3.965400340309687
...
2013-12-27
x_train.shape=(1771, 47)
y_train.shape =(1771,)
x_test.shape=(443, 47)
y_test.shape=(443,)
MSE: 2.829811584378174
2013-12-30
x_train.shape=(1783, 47)
y_train.shape =(1783,)
x_test.shape=(446, 47)
y_test.shape=(446,)
MSE: 3.7229692784887587
2013-12-31
x_train.shape=(1781, 47)
y_train.shape =(1781,)
x_test.shape=(446, 47)
y_test.shape=(446,)
MSE: 2.9187787604137077
np.array(linear_err2).mean()
3.9034814254319627

XGBoost模型2013年各因子重要性

def get_XGBoost_importance(date):
    # 划分数据集
    x_train, x_test, y_train, y_test = get_split_by_trade_date(date)

    # 模型训练
    model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=100, objective='reg:squarederror',tree_method='gpu_hist')
    model.fit(x_train, y_train, eval_set=[(x_test, y_test)], eval_metric='rmse', early_stopping_rounds=5)
    
    # 计算排序重要性
    perm = PermutationImportance(model).fit(x_test, y_test)
    
    return perm
importance_XGBoost = pd.DataFrame(index = factors)   # 记录每个因子重要性的总和,重要性为负的天数
importance_XGBoost['sum'] = 0
importance_XGBoost['negative'] = 0

for day in days:
    print(day)
    perm = get_linear_importance(day)
    importance_XGBoost['sum']+=perm.feature_importances_
    importance_XGBoost['negative']+=(perm.feature_importances_<0)
2013-01-04
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
...
2013-12-30
x_train.shape=(1257, 56)
y_train.shape =(1257,)
x_test.shape=(315, 56)
y_test.shape=(315,)
2013-12-31
x_train.shape=(1257, 56)
y_train.shape =(1257,)
x_test.shape=(315, 56)
y_test.shape=(315,)
pd.set_option('display.max_columns', None)
importance_XGBoost.sort_values(by="sum" , ascending=False).transpose()
ROE mom12 ACCP std_dvol momchg BM AG SP idskew sharechg saleinv SgINVg CRG total_vol ACC LM LG cashpr LEV betad mom6 imom CFdebt QRG std_turn beta coskew PA ROA QR size DP INVchg TAXchg BVEG PMG volumed CT CR OCFP SG lagretn illq retnmax cash CFP skew EP INVG RDsales RD idvol age salecash AM turn
sum 448.405801 429.702717 74.743502 72.6 18.79974 17.990883 17.463805 17.166179 14.967974 14.685136 14.330699 14.089602 13.72714 11.487408 10.214821 9.876215 7.523135 6.356613 5.795162 5.624204 3.459836 3.01425 2.780895 2.529968 2.471124 2.463948 2.385884 2.328784 2.254455 2.201655 1.965896 1.916151 1.891923 1.651909 1.406529 1.3165 1.236694 1.194309 1.187277 1.122339 0.874006 0.854623 0.823669 0.776181 0.64295 0.596978 0.521497 0.438793 0.3598 0.192119 0.139393 0.052621 0.03791 0.015843 -0.000126 -0.101014
negative 7.000000 5.000000 15.000000 13.0 25.00000 23.000000 30.000000 34.000000 41.000000 31.000000 21.000000 41.000000 35.00000 44.000000 28.000000 41.000000 48.000000 53.000000 59.000000 50.000000 63.000000 61.00000 67.000000 59.000000 69.000000 67.000000 54.000000 59.000000 60.000000 64.000000 71.000000 61.000000 57.000000 69.000000 69.000000 79.0000 81.000000 59.000000 90.000000 67.000000 82.000000 94.000000 85.000000 75.000000 88.00000 99.000000 94.000000 89.000000 93.0000 117.000000 110.000000 111.000000 113.00000 137.000000 122.000000 134.000000
importance_XGBoost.sort_values(by="negative" , ascending=True).transpose()
mom12 ROE std_dvol ACCP saleinv BM momchg ACC AG sharechg SP CRG LM idskew SgINVg total_vol LG betad cashpr coskew INVchg PA LEV QRG CT ROA DP imom mom6 QR CFdebt OCFP beta BVEG std_turn TAXchg size retnmax PMG volumed SG illq cash EP CR INVG skew lagretn CFP RD idvol age RDsales AM turn salecash
sum 429.702717 448.405801 72.6 74.743502 14.330699 17.990883 18.79974 10.214821 17.463805 14.685136 17.166179 13.72714 9.876215 14.967974 14.089602 11.487408 7.523135 5.624204 6.356613 2.385884 1.891923 2.328784 5.795162 2.529968 1.194309 2.254455 1.916151 3.01425 3.459836 2.201655 2.780895 1.122339 2.463948 1.406529 2.471124 1.651909 1.965896 0.776181 1.3165 1.236694 0.874006 0.823669 0.64295 0.438793 1.187277 0.3598 0.521497 0.854623 0.596978 0.139393 0.052621 0.03791 0.192119 -0.000126 -0.101014 0.015843
negative 5.000000 7.000000 13.0 15.000000 21.000000 23.000000 25.00000 28.000000 30.000000 31.000000 34.000000 35.00000 41.000000 41.000000 41.000000 44.000000 48.000000 50.000000 53.000000 54.000000 57.000000 59.000000 59.000000 59.000000 59.000000 60.000000 61.000000 61.00000 63.000000 64.000000 67.000000 67.000000 67.000000 69.000000 69.000000 69.000000 71.000000 75.000000 79.0000 81.000000 82.000000 85.000000 88.00000 89.000000 90.000000 93.0000 94.000000 94.000000 99.000000 110.000000 111.000000 113.00000 117.000000 122.000000 134.000000 137.000000
remove_factors_XGBoost = list(get_last_N(importance_XGBoost, 10))
print(get_last_N(importance, 10))
print(get_last_N(importance_XGBoost, 10))
{'salecash', 'idvol', 'INVG', 'turn', 'AM', 'RD', 'RDsales', 'age'}
{'salecash', 'idvol', 'turn', 'skew', 'AM', 'RD', 'RDsales', 'age'}
print(get_last_N(importance, 20))
print(get_last_N(importance_XGBoost, 20))
{'salecash', 'CR', 'volumed', 'CFP', 'SG', 'idvol', 'EP', 'turn', 'INVG', 'lagretn', 'cash', 'skew', 'age', 'retnmax', 'AM', 'illq', 'RDsales', 'RD'}
{'salecash', 'CR', 'volumed', 'CFP', 'SG', 'idvol', 'EP', 'turn', 'INVG', 'lagretn', 'cash', 'skew', 'age', 'retnmax', 'AM', 'illq', 'RDsales', 'RD'}

XGBoost得到的结果与Linear模型不一样:但差别不大

XGBoost去除无用特征

XGBoost_err1 = []
for day in days:
    print(day)
    XGBoost_err1.append(XGBoost_train(day, 0, remove_factors_XGBoost))
2013-01-04
x_train.shape=(1820, 48)
y_train.shape =(1820,)
x_test.shape=(455, 48)
y_test.shape=(455,)
[0]	validation_0-rmse:2.71745
[1]	validation_0-rmse:2.67359
[2]	validation_0-rmse:2.63451
[3]	validation_0-rmse:2.60041
[4]	validation_0-rmse:2.57119
[5]	validation_0-rmse:2.54164
[6]	validation_0-rmse:2.51318
[7]	validation_0-rmse:2.48936
[8]	validation_0-rmse:2.46562
[9]	validation_0-rmse:2.44546
[10]	validation_0-rmse:2.42487
[11]	validation_0-rmse:2.40673
[12]	validation_0-rmse:2.38718
[13]	validation_0-rmse:2.37333
[14]	validation_0-rmse:2.36220
[15]	validation_0-rmse:2.35255
[16]	validation_0-rmse:2.34168
[17]	validation_0-rmse:2.32783
[18]	validation_0-rmse:2.31884
[19]	validation_0-rmse:2.30887
[20]	validation_0-rmse:2.30385
[21]	validation_0-rmse:2.30015
[22]	validation_0-rmse:2.29274
[23]	validation_0-rmse:2.28728
[24]	validation_0-rmse:2.28071
[25]	validation_0-rmse:2.27549
[26]	validation_0-rmse:2.27096
[27]	validation_0-rmse:2.26446
[28]	validation_0-rmse:2.25991
[29]	validation_0-rmse:2.25987
[30]	validation_0-rmse:2.25923
[31]	validation_0-rmse:2.25714
[32]	validation_0-rmse:2.25545
[33]	validation_0-rmse:2.25242
[34]	validation_0-rmse:2.25230
[35]	validation_0-rmse:2.25038
[36]	validation_0-rmse:2.24694
[37]	validation_0-rmse:2.24920
[38]	validation_0-rmse:2.24767
[39]	validation_0-rmse:2.24455
[40]	validation_0-rmse:2.24335
[41]	validation_0-rmse:2.24286
[42]	validation_0-rmse:2.24344
[43]	validation_0-rmse:2.24169
[44]	validation_0-rmse:2.23983
[45]	validation_0-rmse:2.23949
[46]	validation_0-rmse:2.24040
[47]	validation_0-rmse:2.23639
[48]	validation_0-rmse:2.23753
[49]	validation_0-rmse:2.23831
[50]	validation_0-rmse:2.23831
[51]	validation_0-rmse:2.23881
[52]	validation_0-rmse:2.23851
MSE: 5.001454124013577
2013-01-07
x_train.shape=(1793, 48)
y_train.shape =(1793,)
x_test.shape=(449, 48)
y_test.shape=(449,)
[0]	validation_0-rmse:2.00898
[1]	validation_0-rmse:1.98336
[2]	validation_0-rmse:1.96136
[3]	validation_0-rmse:1.93963
[4]	validation_0-rmse:1.92541
[5]	validation_0-rmse:1.90738
[6]	validation_0-rmse:1.89212
[7]	validation_0-rmse:1.87982
[8]	validation_0-rmse:1.87254
[9]	validation_0-rmse:1.86130
[10]	validation_0-rmse:1.85293
[11]	validation_0-rmse:1.84307
[12]	validation_0-rmse:1.83806
[13]	validation_0-rmse:1.83238
[14]	validation_0-rmse:1.82658
[15]	validation_0-rmse:1.82079
[16]	validation_0-rmse:1.81401
[17]	validation_0-rmse:1.81253
[18]	validation_0-rmse:1.80937
[19]	validation_0-rmse:1.80658
[20]	validation_0-rmse:1.80209
[21]	validation_0-rmse:1.79880
[22]	validation_0-rmse:1.79395
[23]	validation_0-rmse:1.79342
[24]	validation_0-rmse:1.79180
[25]	validation_0-rmse:1.78960
[26]	validation_0-rmse:1.79045
[27]	validation_0-rmse:1.79132
[28]	validation_0-rmse:1.78716
[29]	validation_0-rmse:1.78690
[30]	validation_0-rmse:1.78452
[31]	validation_0-rmse:1.78472
[32]	validation_0-rmse:1.78501
[33]	validation_0-rmse:1.78616
[34]	validation_0-rmse:1.78586
[35]	validation_0-rmse:1.78812
MSE: 3.1845212488497667
2013-01-08
x_train.shape=(1797, 48)
y_train.shape =(1797,)
x_test.shape=(450, 48)
y_test.shape=(450,)
[0]	validation_0-rmse:2.21991
[1]	validation_0-rmse:2.19453
[2]	validation_0-rmse:2.17296
[3]	validation_0-rmse:2.15496
[4]	validation_0-rmse:2.14025
[5]	validation_0-rmse:2.12435
[6]	validation_0-rmse:2.11047
[7]	validation_0-rmse:2.09894
[8]	validation_0-rmse:2.08667
[9]	validation_0-rmse:2.07808
[10]	validation_0-rmse:2.07004
[11]	validation_0-rmse:2.06143
[12]	validation_0-rmse:2.05203
[13]	validation_0-rmse:2.04436
[14]	validation_0-rmse:2.03613
[15]	validation_0-rmse:2.03236
[16]	validation_0-rmse:2.02629
[17]	validation_0-rmse:2.02123
[18]	validation_0-rmse:2.02001
[19]	validation_0-rmse:2.01185
[20]	validation_0-rmse:2.01016
[21]	validation_0-rmse:2.00876
[22]	validation_0-rmse:2.00288
[23]	validation_0-rmse:2.00041
[24]	validation_0-rmse:1.99874
[25]	validation_0-rmse:1.99588
[26]	validation_0-rmse:1.99229
[27]	validation_0-rmse:1.98823
[28]	validation_0-rmse:1.98213
[29]	validation_0-rmse:1.98026
[30]	validation_0-rmse:1.97804
[31]	validation_0-rmse:1.97848
[32]	validation_0-rmse:1.97623
[33]	validation_0-rmse:1.97624
[34]	validation_0-rmse:1.97415
[35]	validation_0-rmse:1.97366
[36]	validation_0-rmse:1.97279
[37]	validation_0-rmse:1.97119
[38]	validation_0-rmse:1.97256
[39]	validation_0-rmse:1.97306
[40]	validation_0-rmse:1.97376
[41]	validation_0-rmse:1.97482
MSE: 3.8855736293546372
2013-01-09
x_train.shape=(1791, 48)
y_train.shape =(1791,)
x_test.shape=(448, 48)
y_test.shape=(448,)
[0]	validation_0-rmse:1.90462
[1]	validation_0-rmse:1.90026
[2]	validation_0-rmse:1.89663
[3]	validation_0-rmse:1.89111
[4]	validation_0-rmse:1.88658
[5]	validation_0-rmse:1.88702
[6]	validation_0-rmse:1.88244
[7]	validation_0-rmse:1.88211
[8]	validation_0-rmse:1.87815
[9]	validation_0-rmse:1.87744
[10]	validation_0-rmse:1.87602
[11]	validation_0-rmse:1.87436
[12]	validation_0-rmse:1.87080
[13]	validation_0-rmse:1.86997
[14]	validation_0-rmse:1.86840
[15]	validation_0-rmse:1.86853
[16]	validation_0-rmse:1.86896
[17]	validation_0-rmse:1.86809
[18]	validation_0-rmse:1.86592
[19]	validation_0-rmse:1.86313
[20]	validation_0-rmse:1.86108
[21]	validation_0-rmse:1.86272
[22]	validation_0-rmse:1.86133
[23]	validation_0-rmse:1.85858
[24]	validation_0-rmse:1.85833
[25]	validation_0-rmse:1.85615
[26]	validation_0-rmse:1.85641
[27]	validation_0-rmse:1.85625
[28]	validation_0-rmse:1.85526
[29]	validation_0-rmse:1.85653
[30]	validation_0-rmse:1.85803
[31]	validation_0-rmse:1.85546
[32]	validation_0-rmse:1.85270
[33]	validation_0-rmse:1.85378
[34]	validation_0-rmse:1.85455
[35]	validation_0-rmse:1.85685
[36]	validation_0-rmse:1.85556
MSE: 3.4324928107513224
2013-01-10
x_train.shape=(1791, 48)
y_train.shape =(1791,)
x_test.shape=(448, 48)
y_test.shape=(448,)
[0]	validation_0-rmse:1.74036
[1]	validation_0-rmse:1.73539
[2]	validation_0-rmse:1.72730
[3]	validation_0-rmse:1.72252
[4]	validation_0-rmse:1.71576
[5]	validation_0-rmse:1.71449
[6]	validation_0-rmse:1.70838
[7]	validation_0-rmse:1.70582
[8]	validation_0-rmse:1.70224
[9]	validation_0-rmse:1.70405
[10]	validation_0-rmse:1.70169
[11]	validation_0-rmse:1.70120
[12]	validation_0-rmse:1.69680
[13]	validation_0-rmse:1.69821
[14]	validation_0-rmse:1.69667
[15]	validation_0-rmse:1.69463
[16]	validation_0-rmse:1.69219
[17]	validation_0-rmse:1.68838
[18]	validation_0-rmse:1.68872
[19]	validation_0-rmse:1.68787
[20]	validation_0-rmse:1.68485
[21]	validation_0-rmse:1.68401
[22]	validation_0-rmse:1.68272
[23]	validation_0-rmse:1.68308
[24]	validation_0-rmse:1.68362
[25]	validation_0-rmse:1.68241
[26]	validation_0-rmse:1.68232
[27]	validation_0-rmse:1.68231
[28]	validation_0-rmse:1.68390
[29]	validation_0-rmse:1.68406
[30]	validation_0-rmse:1.68301
[31]	validation_0-rmse:1.68244
MSE: 2.8301813631073527
2013-01-11
x_train.shape=(1798, 48)
y_train.shape =(1798,)
x_test.shape=(450, 48)
y_test.shape=(450,)
[0]	validation_0-rmse:3.31175
[1]	validation_0-rmse:3.21892
[2]	validation_0-rmse:3.13651
[3]	validation_0-rmse:3.06162
[4]	validation_0-rmse:2.98938
[5]	validation_0-rmse:2.92879
[6]	validation_0-rmse:2.86506
[7]	validation_0-rmse:2.81064
[8]	validation_0-rmse:2.76554
[9]	validation_0-rmse:2.71910
[10]	validation_0-rmse:2.67275
[11]	validation_0-rmse:2.62927
[12]	validation_0-rmse:2.58945
[13]	validation_0-rmse:2.55245
[14]	validation_0-rmse:2.51939
[15]	validation_0-rmse:2.49457
[16]	validation_0-rmse:2.47158
[17]	validation_0-rmse:2.44465
[18]	validation_0-rmse:2.42555
[19]	validation_0-rmse:2.41074
[20]	validation_0-rmse:2.39081
[21]	validation_0-rmse:2.37692
[22]	validation_0-rmse:2.36240
[23]	validation_0-rmse:2.34919
[24]	validation_0-rmse:2.33831
[25]	validation_0-rmse:2.32760
[26]	validation_0-rmse:2.31880
[27]	validation_0-rmse:2.30908
[28]	validation_0-rmse:2.29942
[29]	validation_0-rmse:2.29173
[30]	validation_0-rmse:2.28906
[31]	validation_0-rmse:2.28330
[32]	validation_0-rmse:2.27824
[33]	validation_0-rmse:2.27168
[34]	validation_0-rmse:2.26974
[35]	validation_0-rmse:2.26434
[36]	validation_0-rmse:2.26080
[37]	validation_0-rmse:2.25779
[38]	validation_0-rmse:2.25554
[39]	validation_0-rmse:2.25194
[40]	validation_0-rmse:2.24950
[41]	validation_0-rmse:2.24582
[42]	validation_0-rmse:2.24212
[43]	validation_0-rmse:2.23924
[44]	validation_0-rmse:2.23693
[45]	validation_0-rmse:2.23408
[46]	validation_0-rmse:2.23157
[47]	validation_0-rmse:2.22886
[48]	validation_0-rmse:2.22640
[49]	validation_0-rmse:2.22447
[50]	validation_0-rmse:2.22414
[51]	validation_0-rmse:2.22391
[52]	validation_0-rmse:2.22305
[53]	validation_0-rmse:2.22291
[54]	validation_0-rmse:2.22153
[55]	validation_0-rmse:2.21619
[56]	validation_0-rmse:2.21558
[57]	validation_0-rmse:2.21659
[58]	validation_0-rmse:2.21574
[59]	validation_0-rmse:2.21568
[60]	validation_0-rmse:2.21477
[61]	validation_0-rmse:2.21468
[62]	validation_0-rmse:2.21416
[63]	validation_0-rmse:2.21339
[64]	validation_0-rmse:2.21196
[65]	validation_0-rmse:2.21234
[66]	validation_0-rmse:2.21082
[67]	validation_0-rmse:2.20976
[68]	validation_0-rmse:2.20896
[69]	validation_0-rmse:2.20771
[70]	validation_0-rmse:2.20623
[71]	validation_0-rmse:2.20521
[72]	validation_0-rmse:2.20464
[73]	validation_0-rmse:2.20457
[74]	validation_0-rmse:2.20517
[75]	validation_0-rmse:2.20470
[76]	validation_0-rmse:2.20315
[77]	validation_0-rmse:2.20307
[78]	validation_0-rmse:2.20312
[79]	validation_0-rmse:2.20367
[80]	validation_0-rmse:2.20395
[81]	validation_0-rmse:2.20458
MSE: 4.853516475539035
2013-01-14
x_train.shape=(1788, 48)
y_train.shape =(1788,)
x_test.shape=(448, 48)
y_test.shape=(448,)
[0]	validation_0-rmse:3.54992
[1]	validation_0-rmse:3.41347
[2]	validation_0-rmse:3.28376
[3]	validation_0-rmse:3.16036
[4]	validation_0-rmse:3.04717
[5]	validation_0-rmse:2.94013
[6]	validation_0-rmse:2.83867
[7]	validation_0-rmse:2.74747
[8]	validation_0-rmse:2.66053
[9]	validation_0-rmse:2.57977
[10]	validation_0-rmse:2.50311
[11]	validation_0-rmse:2.43331
[12]	validation_0-rmse:2.36710
[13]	validation_0-rmse:2.30966
[14]	validation_0-rmse:2.25344
[15]	validation_0-rmse:2.20568
[16]	validation_0-rmse:2.15862
[17]	validation_0-rmse:2.11274
[18]	validation_0-rmse:2.07126
[19]	validation_0-rmse:2.03326
[20]	validation_0-rmse:1.99911
[21]	validation_0-rmse:1.96901
[22]	validation_0-rmse:1.93961
[23]	validation_0-rmse:1.91477
[24]	validation_0-rmse:1.89042
[25]	validation_0-rmse:1.86812
[26]	validation_0-rmse:1.84573
[27]	validation_0-rmse:1.82862
[28]	validation_0-rmse:1.81004
[29]	validation_0-rmse:1.79396
[30]	validation_0-rmse:1.78030
[31]	validation_0-rmse:1.76868
[32]	validation_0-rmse:1.75735
[33]	validation_0-rmse:1.74560
[34]	validation_0-rmse:1.73647
[35]	validation_0-rmse:1.72792
[36]	validation_0-rmse:1.71951
[37]	validation_0-rmse:1.71065
[38]	validation_0-rmse:1.70197
[39]	validation_0-rmse:1.69403
[40]	validation_0-rmse:1.68678
[41]	validation_0-rmse:1.68097
[42]	validation_0-rmse:1.67777
[43]	validation_0-rmse:1.67243
[44]	validation_0-rmse:1.66799
[45]	validation_0-rmse:1.66378
[46]	validation_0-rmse:1.66015
[47]	validation_0-rmse:1.65685
[48]	validation_0-rmse:1.65420
[49]	validation_0-rmse:1.65186
[50]	validation_0-rmse:1.64992
[51]	validation_0-rmse:1.64827
[52]	validation_0-rmse:1.64639
[53]	validation_0-rmse:1.64575
[54]	validation_0-rmse:1.64446
[55]	validation_0-rmse:1.64233
[56]	validation_0-rmse:1.64053
[57]	validation_0-rmse:1.63897
[58]	validation_0-rmse:1.63678
[59]	validation_0-rmse:1.63496
[60]	validation_0-rmse:1.63408
[61]	validation_0-rmse:1.63270
[62]	validation_0-rmse:1.63197
[63]	validation_0-rmse:1.63001
[64]	validation_0-rmse:1.63058
[65]	validation_0-rmse:1.63124
[66]	validation_0-rmse:1.63046
[67]	validation_0-rmse:1.63053
[68]	validation_0-rmse:1.62815
[69]	validation_0-rmse:1.62808
[70]	validation_0-rmse:1.62787
[71]	validation_0-rmse:1.62731
[72]	validation_0-rmse:1.62736
[73]	validation_0-rmse:1.62676
[74]	validation_0-rmse:1.62728
[75]	validation_0-rmse:1.62717
[76]	validation_0-rmse:1.62791
[77]	validation_0-rmse:1.62786
[78]	validation_0-rmse:1.62667
[79]	validation_0-rmse:1.62613
[80]	validation_0-rmse:1.62612
[81]	validation_0-rmse:1.62547
[82]	validation_0-rmse:1.62677
[83]	validation_0-rmse:1.62850
[84]	validation_0-rmse:1.62882
[85]	validation_0-rmse:1.62957
[86]	validation_0-rmse:1.63227
MSE: 2.642140469381674
...
2013-12-31
x_train.shape=(1781, 48)
y_train.shape =(1781,)
x_test.shape=(446, 48)
y_test.shape=(446,)
[0]	validation_0-rmse:1.88690
[1]	validation_0-rmse:1.86526
[2]	validation_0-rmse:1.85005
[3]	validation_0-rmse:1.84162
[4]	validation_0-rmse:1.83684
[5]	validation_0-rmse:1.82044
[6]	validation_0-rmse:1.80873
[7]	validation_0-rmse:1.79836
[8]	validation_0-rmse:1.79244
[9]	validation_0-rmse:1.78463
[10]	validation_0-rmse:1.77731
[11]	validation_0-rmse:1.77208
[12]	validation_0-rmse:1.76650
[13]	validation_0-rmse:1.76504
[14]	validation_0-rmse:1.76111
[15]	validation_0-rmse:1.76009
[16]	validation_0-rmse:1.75637
[17]	validation_0-rmse:1.75409
[18]	validation_0-rmse:1.75204
[19]	validation_0-rmse:1.75110
[20]	validation_0-rmse:1.74982
[21]	validation_0-rmse:1.74916
[22]	validation_0-rmse:1.74626
[23]	validation_0-rmse:1.74462
[24]	validation_0-rmse:1.74572
[25]	validation_0-rmse:1.74326
[26]	validation_0-rmse:1.74332
[27]	validation_0-rmse:1.74115
[28]	validation_0-rmse:1.73936
[29]	validation_0-rmse:1.73940
[30]	validation_0-rmse:1.73854
[31]	validation_0-rmse:1.73787
[32]	validation_0-rmse:1.73685
[33]	validation_0-rmse:1.73552
[34]	validation_0-rmse:1.73746
[35]	validation_0-rmse:1.73781
[36]	validation_0-rmse:1.73913
[37]	validation_0-rmse:1.73897
MSE: 3.01203762063307
np.array(XGBoost_err1).mean()
3.909562275559899
XGBoost_err2 = []
for day in days:
    print(day)
    XGBoost_err2.append(XGBoost_train(day, 1, remove_factors_XGBoost))
2013-01-04
x_train.shape=(1185, 48)
y_train.shape =(1185,)
x_test.shape=(297, 48)
y_test.shape=(297,)
[0]	validation_0-rmse:2.44807
[1]	validation_0-rmse:2.39745
[2]	validation_0-rmse:2.35292
[3]	validation_0-rmse:2.30882
[4]	validation_0-rmse:2.27245
[5]	validation_0-rmse:2.23914
[6]	validation_0-rmse:2.20729
[7]	validation_0-rmse:2.16935
[8]	validation_0-rmse:2.14256
[9]	validation_0-rmse:2.12006
[10]	validation_0-rmse:2.09587
[11]	validation_0-rmse:2.07339
[12]	validation_0-rmse:2.04927
[13]	validation_0-rmse:2.03210
[14]	validation_0-rmse:2.01497
[15]	validation_0-rmse:2.00233
[16]	validation_0-rmse:1.98758
[17]	validation_0-rmse:1.97418
[18]	validation_0-rmse:1.96359
[19]	validation_0-rmse:1.95194
[20]	validation_0-rmse:1.94411
[21]	validation_0-rmse:1.93538
[22]	validation_0-rmse:1.92844
[23]	validation_0-rmse:1.92259
[24]	validation_0-rmse:1.91411
[25]	validation_0-rmse:1.90856
[26]	validation_0-rmse:1.90620
[27]	validation_0-rmse:1.90025
[28]	validation_0-rmse:1.89549
[29]	validation_0-rmse:1.89190
[30]	validation_0-rmse:1.88803
[31]	validation_0-rmse:1.88336
[32]	validation_0-rmse:1.87740
[33]	validation_0-rmse:1.87631
[34]	validation_0-rmse:1.87285
[35]	validation_0-rmse:1.87090
[36]	validation_0-rmse:1.86915
[37]	validation_0-rmse:1.86485
[38]	validation_0-rmse:1.86454
[39]	validation_0-rmse:1.86267
[40]	validation_0-rmse:1.85725
[41]	validation_0-rmse:1.85345
[42]	validation_0-rmse:1.85321
[43]	validation_0-rmse:1.85299
[44]	validation_0-rmse:1.85393
[45]	validation_0-rmse:1.85441
[46]	validation_0-rmse:1.85222
[47]	validation_0-rmse:1.85128
[48]	validation_0-rmse:1.85197
[49]	validation_0-rmse:1.84963
[50]	validation_0-rmse:1.84854
[51]	validation_0-rmse:1.84855
[52]	validation_0-rmse:1.84776
[53]	validation_0-rmse:1.84315
[54]	validation_0-rmse:1.84244
[55]	validation_0-rmse:1.84082
[56]	validation_0-rmse:1.84197
[57]	validation_0-rmse:1.84060
[58]	validation_0-rmse:1.84109
[59]	validation_0-rmse:1.84236
[60]	validation_0-rmse:1.84235
[61]	validation_0-rmse:1.84354
[62]	validation_0-rmse:1.84242
MSE: 3.387808379507812
...
np.array(XGBoost_err2).mean()
3.7494466498176644

准确率对比

2013年全年MSE平均值 不去除 去除低重要性因子
Linear使用平均值 3.921 3.903
Linear去除缺失值 3.801 3.818
XGBoost未去除缺失值 3.888 3.910
XGBoost去除缺失值 3.742 3.749

选择最佳因子组合

逐个排除因子(预降维)

(使用全年的数据在Linear模型上)对无用特征进行排序过后,考虑逐个排除因子,查看准确率是否有较大的影响:

remove_factors1 = []
output = {}

for j in range(0, 40):
    linear_err = []
    for day in days:
        print(day)
        linear_err.append(linear_train(day, 1, get_trivial(j)))
    
    output[j] = np.array(linear_err).mean()
2013-01-04
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
MSE: 3.617532364939728
2013-01-07
x_train.shape=(1175, 56)
y_train.shape =(1175,)
x_test.shape=(294, 56)
y_test.shape=(294,)
MSE: 3.767207686481283
...
2013-09-06
x_train.shape=(1262, 54)
y_train.shape =(1262,)
x_test.shape=(316, 54)
y_test.shape=(316,)
MSE: 4.479822455337999
2013-09-09
x_train.shape=(1245, 54)
y_train.shape =(1245,)
x_test.shape=(312, 54)
y_test.shape=(312,)
MSE: 2.6867597216311045
2013-09-10
output
{0: 3.8005630796082426,
 1: 3.800560102538793,
 2: 3.7993222041207915,
 3: 3.796581734282571,
 4: 3.810176307276948,
 5: 3.821326643110776,
 6: 3.8207592247949775,
 7: 3.8226390947166196,
 8: 3.8198824229201382,
 9: 3.8178624023374335,
 10: 3.8164553373810577,
 11: 3.817581805700733,
 12: 3.8162670045691693,
 13: 3.8173551938010513,
 14: 3.8128217050940854,
 15: 3.8171463773589944,
 16: 3.8382788440258135,
 17: 3.8355687130078513,
 18: 3.8323229320104573,
 19: 3.8318099337863303,
 20: 3.827302483819496,
 21: 3.84656562478651,
 22: 3.845318679687666,
 23: 3.8486123394339438,
 24: 3.9034638992606205,
 25: 3.9005512092481425,
 26: 3.90396465347381,
 27: 3.918746734890699,
 28: 3.9180623909064027,
 29: 3.917919924138114,
 30: 3.8968072309036286,
 31: 3.8957130947549037,
 32: 3.8947024917348525,
 33: 3.8949611266433943,
 34: 3.9093928642932676,
 35: 3.924233068042662,
 36: 3.922568663342627,
 37: 3.9293781688167835,
 38: 3.967771645777158,
 39: 4.045240388664313}
plt.plot(list(output.keys()), list(output.values()))
[<matplotlib.lines.Line2D at 0x7f3f48772ee0>]


png

如图所示,在去除20个因子以前,误差变化较小,在去除3个因子时,误差达到最小值。

而去除因子达到35个以上,误差会急剧增大,可能因为去除了有效的因子。

这说明在56个因子中,真正有效的因子数量甚至不到一半,直接去除掉不但提高了模型的可解释性,甚至同时能提升准确性

动态排除因子(后降维)

预降维是在全年排除相同因子采用的,而后降维将对每天的数据进行评估,对于每天的模型去除不重要的因子。

因此,可以预想到,去除的因子数量会更多(接近一半的负排序重要性因子都会被去除),准确性是一定会提升的。

此处采取的策略是:对每天因子重要性进行三次计算,均为负则去除该因子。然后再次回归

def remove_and_fit(date, state=1):
    # 划分数据集
    x_train, x_test, y_train, y_test = get_split_by_trade_date(date, state)

    # 模型训练
    model = LinearRegression()
    model.fit(x_train, y_train)
    
    # 计算排序重要性
    df = pd.DataFrame(index = factors)
    
    for i in range(3):
        perm = PermutationImportance(model, random_state=i).fit(x_test, y_test)
        df[i] = perm.feature_importances_
    
    series = df[df<0].transpose().count()==3
    remove_factors = list(series[series].index)
    print(remove_factors)
    
    return len(remove_factors), linear_train(day, state, remove_factors)  # 这里划分数据集的方式不同
linear_remove = pd.DataFrame(columns = ['removed', 'err'])
for day in days:
    print(day, end = ' ')
    removed, err = remove_and_fit(day)
    linear_remove.loc[day] = {'removed':removed, 'err':err}
2013-01-04 ['beta', 'turn', 'sharechg', 'age', 'lagretn', 'AM', 'DP', 'BVEG', 'INVG', 'CT', 'RD', 'RDsales', 'salecash']
MSE: 3.7858729998590723
2013-01-07 ['beta', 'idvol', 'skew', 'turn', 'std_turn', 'age', 'lagretn', 'AM', 'SG', 'PMG', 'cash', 'RDsales', 'QR', 'saleinv', 'CRG']
MSE: 4.025161449626848
2013-01-08 ['size', 'beta', 'skew', 'coskew', 'turn', 'LM', 'age', 'BM', 'CFP', 'INVG', 'ROA', 'PA', 'salecash']
MSE: 4.162825913159681
2013-01-09 ['size', 'beta', 'idvol', 'skew', 'std_turn', 'retnmax', 'age', 'AM', 'PMG', 'ROA', 'RDsales']
MSE: 2.9702129885547928
2013-01-10 ['beta', 'betad', 'idvol', 'sharechg', 'lagretn', 'AM', 'INVG', 'SgINVg', 'cash', 'cashpr', 'salecash']
MSE: 2.4174113537403037
2013-01-11 ['beta', 'turn', 'age', 'mom6', 'LEV', 'CFP', 'TAXchg', 'cash', 'RDsales', 'CR', 'salecash']
MSE: 3.751365716737459
2013-01-14 ['volumed', 'std_dvol', 'AM', 'OCFP', 'INVchg', 'PMG', 'ROA', 'RD', 'saleinv']
MSE: 2.4185355766852887
2013-01-15 ['beta', 'betad', 'idvol', 'age', 'lagretn', 'INVG', 'PMG', 'cashpr', 'CFdebt']
MSE: 3.370724501216639
2013-01-16 ['idvol', 'turn', 'std_turn', 'volumed', 'EP', 'CFP', 'DP', 'INVchg', 'QRG']
MSE: 4.447045990712184
2013-01-17 ['beta', 'total_vol', 'retnmax', 'illq', 'sharechg', 'age', 'AM', 'LEV', 'INVG', 'SG', 'PMG', 'TAXchg', 'CT', 'cash', 'CR', 'QR', 'salecash']
MSE: 4.082304969198799
2013-01-18 ['turn', 'CFP', 'OCFP', 'PMG', 'ROA']
MSE: 2.1234872477788023
2013-01-21 ['idvol', 'idskew', 'skew', 'std_dvol', 'illq', 'lagretn', 'PMG', 'ROA', 'CR']
MSE: 4.075557103299469
2013-01-22 ['betad', 'skew', 'mom6', 'AM', 'CFP', 'OCFP', 'PA', 'cash', 'RD', 'CR', 'salecash']
MSE: 3.0421280038462126
2013-01-23 ['size', 'betad', 'idskew', 'turn', 'mom6', 'lagretn', 'AM', 'EP', 'LG', 'TAXchg', 'ROA', 'PA', 'RDsales', 'salecash']
MSE: 3.8397380865824173
2013-01-24 ['std_turn', 'age', 'AM', 'INVG', 'PMG', 'ROA']
MSE: 4.3652342753144415
2013-01-25 ['skew', 'LM', 'BM', 'AM', 'INVG', 'INVchg', 'PMG', 'cash', 'salecash', 'CRG']
MSE: 3.649042345177247
2013-01-28 ['beta', 'skew', 'EP', 'CFP', 'BVEG', 'INVchg', 'SG', 'PMG', 'ACCP', 'RD', 'RDsales', 'QR']
MSE: 1.998549925155473
2013-01-29 ['idvol', 'total_vol', 'skew', 'turn', 'illq', 'age', 'AM', 'EP', 'OCFP', 'LG', 'INVchg', 'SgINVg', 'ROA', 'PA', 'cashpr', 'RDsales', 'CR', 'CFdebt', 'salecash', 'CRG']
MSE: 2.9341143357828474
2013-01-30 ['beta', 'idskew', 'retnmax', 'age', 'mom6', 'PA', 'cash']
MSE: 3.37135529969674
2013-01-31 ['beta', 'idvol', 'turn', 'AM', 'INVG', 'SG', 'TAXchg', 'ACC', 'RD']
MSE: 4.029868334639127
2013-02-01 ['size', 'skew', 'coskew', 'turn', 'std_turn', 'LEV', 'BVEG', 'INVG', 'INVchg', 'CT', 'cash', 'CFdebt']
MSE: 2.9752105980905337
2013-02-04 ['lagretn', 'SG', 'PMG', 'RDsales', 'salecash']
MSE: 5.564731556991589
2013-02-05 ['beta', 'age', 'mom6', 'CFP', 'DP', 'AG', 'INVG', 'INVchg', 'cash', 'cashpr', 'QRG']
MSE: 3.015906109352361
2013-02-06 ['AM', 'cash', 'QR']
MSE: 2.2884328731540524
2013-02-07 ['LM', 'CFP', 'OCFP', 'BVEG', 'PA', 'RDsales', 'salecash']
MSE: 2.6887185072823114
2013-02-08 ['idvol', 'skew', 'volumed', 'retnmax', 'ACC', 'cash']
MSE: 2.4370138796593364
2013-02-18 ['illq', 'AM', 'OCFP', 'LG', 'RD', 'CRG']
MSE: 3.0436795852997207
2013-02-19 ['age', 'lagretn', 'AM', 'EP', 'OCFP', 'INVG', 'QR']
MSE: 3.589772885371211
2013-02-20 ['betad', 'turn', 'lagretn', 'AM', 'CT', 'RD', 'RDsales', 'CR']
MSE: 2.376644549747271
2013-02-21 ['idvol', 'sharechg', 'EP', 'CFP', 'DP', 'SP', 'BVEG', 'INVG', 'RDsales']
MSE: 4.382255648644994
2013-02-22 ['skew', 'volumed', 'retnmax', 'LM', 'mom6', 'imom', 'AM', 'CFP', 'PA', 'RD']
MSE: 3.984581871361109
2013-02-25 ['skew', 'volumed', 'mom12', 'EP', 'CFP', 'DP', 'LG', 'cash', 'RD', 'QR', 'CFdebt']
MSE: 2.628439990574723
2013-02-26 ['skew', 'coskew', 'retnmax', 'sharechg', 'age', 'imom', 'lagretn', 'LEV', 'EP', 'DP', 'BVEG', 'INVG', 'cash', 'RDsales', 'salecash', 'CRG']
MSE: 3.617891237938386
2013-02-27 ['size', 'idvol', 'coskew', 'retnmax', 'age', 'lagretn', 'AM', 'LEV', 'SG', 'PMG', 'TAXchg', 'CFdebt']
MSE: 2.9805103770521884
2013-02-28 ['betad', 'volumed', 'imom', 'lagretn', 'DP', 'AG', 'TAXchg', 'cash', 'CFdebt']
MSE: 2.4335964170626463
2013-03-01 ['betad', 'skew', 'turn', 'std_turn', 'volumed', 'illq', 'LM', 'age', 'mom6', 'lagretn', 'EP', 'INVG', 'CFdebt', 'salecash']
MSE: 3.308268260769943
2013-03-04 ['betad', 'skew', 'turn', 'volumed', 'AM', 'LEV', 'SP', 'QR', 'salecash']
MSE: 3.091217982194591
2013-03-05 ['skew', 'turn', 'retnmax', 'lagretn', 'CFP', 'DP', 'INVG', 'CT']
MSE: 3.0806149519588337
2013-03-06 ['idvol', 'turn', 'retnmax', 'age', 'momchg', 'AM', 'RDsales']
MSE: 4.669136460161346
2013-03-07 ['idvol', 'volumed', 'LM', 'AM', 'LEV', 'OCFP', 'INVchg', 'RD', 'salecash']
MSE: 4.602641402312919
2013-03-08 ['turn', 'age', 'AM', 'EP', 'INVchg', 'SG', 'PMG', 'CFdebt', 'salecash']
MSE: 3.559080961750365
2013-03-11 ['idvol', 'skew', 'EP', 'CFP', 'PMG', 'RD', 'QR']
MSE: 3.1585437676637826
2013-03-12 ['skew', 'turn', 'volumed', 'imom', 'AM', 'LEV', 'EP', 'BVEG', 'SgINVg', 'PA', 'CT', 'cash', 'CFdebt']
MSE: 4.33071164653929
2013-03-13 ['skew', 'volumed', 'illq', 'mom6', 'lagretn', 'AM', 'EP', 'OCFP', 'DP', 'TAXchg', 'saleinv', 'QRG']
MSE: 4.228758398689912
2013-03-14 ['total_vol', 'skew', 'retnmax', 'illq', 'mom6', 'AM', 'EP', 'CFP', 'SP', 'RD', 'QRG']
MSE: 2.919961024320269
2013-03-15 ['turn', 'illq', 'LM', 'BM', 'BVEG', 'salecash']
MSE: 3.2026742871048857
2013-03-18 ['LM', 'age', 'imom', 'CFP', 'INVchg', 'CFdebt']
MSE: 4.825258612532052
2013-03-19 ['turn', 'age', 'BM', 'AM', 'LEV', 'EP', 'ACC', 'QRG']
MSE: 4.917209643944105
2013-03-20 ['size', 'beta', 'idvol', 'skew', 'age', 'lagretn', 'CFP', 'SP', 'LG', 'INVG', 'SG', 'ROA', 'CT', 'QRG']
MSE: 2.2047808208664144
2013-03-21 ['beta', 'betad', 'idvol', 'turn', 'AM', 'CFP', 'DP', 'PMG', 'TAXchg', 'ACC', 'PA', 'RDsales', 'CFdebt', 'QRG']
MSE: 2.507678699246893
2013-03-22 ['skew', 'lagretn', 'EP', 'DP', 'INVG', 'PMG', 'ROE', 'saleinv']
MSE: 2.8863295573117242
2013-03-25 ['betad', 'skew', 'turn', 'std_turn', 'volumed', 'lagretn', 'CFP', 'DP', 'INVG', 'SG', 'ROA', 'cashpr']
MSE: 3.316622703962829
2013-03-26 ['skew', 'age', 'mom6', 'AM', 'EP', 'CFP', 'INVG', 'QR']
MSE: 4.50087835556555
2013-03-27 ['skew', 'turn', 'volumed', 'age', 'mom6', 'LEV', 'EP', 'SG', 'SgINVg', 'PMG', 'TAXchg', 'CT', 'RDsales']
MSE: 3.8974965957604497
2013-03-28 ['skew', 'turn', 'volumed', 'LM', 'QR']
MSE: 3.344769615966232
2013-03-29 ['beta', 'turn', 'age', 'mom6', 'EP', 'OCFP', 'LG', 'SG', 'cash', 'RD']
MSE: 3.6517181917715344
2013-04-01 ['betad', 'turn', 'volumed', 'std_dvol', 'retnmax', 'imom', 'lagretn', 'LEV', 'EP', 'INVchg', 'cashpr', 'RD', 'CR', 'QR', 'salecash']
MSE: 3.39461678703113
2013-04-02 ['size', 'idvol', 'age', 'mom6', 'lagretn', 'AM', 'EP', 'SgINVg', 'TAXchg', 'cash', 'salecash', 'saleinv']
MSE: 5.006330832570682
2013-04-03 ['std_turn', 'retnmax', 'illq', 'LM', 'EP', 'CFP', 'OCFP', 'SgINVg', 'PMG', 'ROA', 'RD', 'CR', 'QRG']
MSE: 4.595119149833135
2013-04-08 ['beta', 'illq', 'mom6', 'CFP', 'OCFP', 'INVG', 'cash']
MSE: 4.27367902986036
2013-04-09 ['size', 'idvol', 'retnmax', 'illq', 'EP', 'QRG']
MSE: 4.311125099788795
2013-04-10 ['coskew', 'turn', 'retnmax', 'AM', 'OCFP', 'INVG', 'PMG', 'CT', 'cashpr', 'saleinv', 'CRG', 'QRG']
MSE: 2.943656923181127
2013-04-11 ['beta', 'skew', 'coskew', 'turn', 'std_turn', 'LM', 'sharechg', 'AM', 'EP', 'CFP', 'OCFP', 'PMG', 'ACC', 'ROA', 'CT', 'CFdebt', 'salecash']
MSE: 3.0621524306130676
2013-04-12 ['turn', 'age', 'mom6', 'AM', 'AG', 'LG', 'INVG', 'INVchg', 'RDsales', 'QR', 'CFdebt', 'CRG']
MSE: 2.594489069882756
2013-04-15 ['coskew', 'INVG', 'cash']
MSE: 4.122738534906467
2013-04-16 ['idvol', 'total_vol', 'skew', 'coskew', 'volumed', 'std_dvol', 'retnmax', 'age', 'lagretn', 'SG', 'SgINVg', 'ACCP', 'PA', 'CT', 'RDsales', 'CR', 'salecash']
MSE: 2.9841222620046266
2013-04-17 ['coskew', 'turn', 'retnmax', 'imom', 'AM', 'CFP', 'INVG', 'RD', 'CR']
MSE: 2.4707997412930944
2013-04-18 ['skew', 'AM', 'LEV', 'OCFP', 'PMG', 'cashpr', 'QR', 'salecash', 'QRG']
MSE: 3.1476566645198756
2013-04-19 ['lagretn', 'EP', 'BVEG', 'CT', 'RD', 'RDsales', 'CRG']
MSE: 1.750884841826081
2013-04-22 ['idvol', 'coskew', 'PMG', 'cash', 'RDsales', 'QR']
MSE: 4.292764260288389
2013-04-23 ['size', 'betad', 'idvol', 'skew', 'turn', 'std_dvol', 'AM', 'CFP', 'BVEG', 'RDsales', 'QR']
MSE: 3.1710658821709057
2013-04-24 ['turn', 'retnmax', 'LM', 'momchg', 'BM', 'DP', 'BVEG', 'SG', 'SgINVg', 'ACCP', 'PA', 'cash', 'RD', 'CR', 'QR', 'salecash']
MSE: 3.351606083122748
2013-04-25 ['beta', 'volumed', 'std_dvol', 'retnmax', 'age', 'AM', 'INVchg', 'PMG', 'CT', 'cashpr', 'RD', 'saleinv', 'QRG']
MSE: 4.790360563735561
2013-04-26 ['size', 'turn', 'retnmax', 'illq', 'INVG', 'PA', 'cash', 'RD', 'salecash']
MSE: 4.628211756526541
2013-05-02 ['idvol', 'volumed', 'retnmax', 'sharechg', 'SG', 'PMG', 'CR']
MSE: 5.240087277472901
2013-05-03 ['idvol', 'skew', 'LM', 'EP', 'CFP', 'SP', 'cash']
MSE: 2.5405350578236368
2013-05-06 ['size', 'betad', 'skew', 'coskew', 'turn', 'retnmax', 'illq', 'momchg', 'AM', 'DP', 'SP', 'INVchg', 'SG', 'TAXchg', 'PA', 'cashpr', 'RD', 'RDsales', 'CR', 'CRG']
MSE: 3.0044574721981134
2013-05-07 ['idvol', 'skew', 'illq', 'LM', 'mom6', 'OCFP', 'AG', 'PMG', 'ACC', 'cash', 'CR', 'salecash']
MSE: 2.771803937426694
2013-05-08 ['turn', 'age', 'mom6', 'imom', 'AM', 'EP', 'BVEG', 'SG', 'CT', 'RD', 'QR', 'salecash']
MSE: 2.7455414666450255
2013-05-09 ['beta', 'idvol', 'turn', 'age', 'mom6', 'momchg', 'lagretn', 'CFP', 'SP', 'SgINVg', 'TAXchg', 'ROA', 'PA', 'cashpr', 'RDsales']
MSE: 4.6174483990466335
2013-05-10 ['size', 'betad', 'idvol', 'skew', 'illq', 'age', 'EP', 'CFP', 'DP', 'SP', 'INVG', 'PMG', 'cashpr', 'CFdebt', 'salecash']
MSE: 4.2289625201434475
2013-05-13 ['beta', 'volumed', 'retnmax', 'momchg', 'BVEG', 'INVG', 'INVchg', 'SgINVg', 'ROA', 'RD', 'RDsales']
MSE: 3.2821573771656642
2013-05-14 ['betad', 'turn', 'volumed', 'age', 'momchg', 'AM', 'DP', 'BVEG', 'SG', 'PMG']
MSE: 4.236639304204587
2013-05-15 ['turn', 'LM', 'mom6', 'lagretn', 'EP', 'DP', 'SgINVg', 'cash', 'RD', 'QR', 'salecash', 'QRG']
MSE: 2.308756438792809
2013-05-16 ['retnmax', 'SG', 'RD', 'RDsales', 'salecash']
MSE: 3.66247839778964
2013-05-17 ['idvol', 'total_vol', 'momchg', 'AM', 'CFP', 'AG', 'BVEG', 'INVG', 'INVchg', 'cashpr', 'RD', 'RDsales', 'salecash']
MSE: 3.4005687977375905
2013-05-20 ['idvol', 'turn', 'retnmax', 'INVG', 'SG', 'CRG']
MSE: 4.251271852207275
2013-05-21 ['idvol', 'volumed', 'retnmax', 'age', 'lagretn', 'EP', 'OCFP', 'INVchg', 'PMG', 'ROA', 'PA', 'cashpr', 'CR', 'CFdebt', 'QRG']
MSE: 3.7684960172846664
2013-05-22 ['volumed', 'EP', 'INVchg', 'PMG', 'ACC', 'CT', 'cash', 'RDsales', 'QR']
MSE: 4.730338570373537
2013-05-23 ['skew', 'coskew', 'retnmax', 'age', 'lagretn', 'AM', 'DP', 'SP', 'RD', 'salecash']
MSE: 3.981534716882582
2013-05-24 ['total_vol', 'skew', 'turn', 'lagretn', 'AM', 'CFP', 'LG', 'INVG', 'SG', 'RDsales', 'QR', 'CRG']
MSE: 3.3530280927986165
2013-05-27 ['size', 'idvol', 'volumed', 'mom6', 'imom', 'SG', 'CT', 'RD']
MSE: 4.061919240761789
2013-05-28 ['turn', 'retnmax', 'illq', 'LM', 'age', 'mom6', 'LEV', 'CFP', 'PA', 'CT', 'RD', 'QR', 'CFdebt', 'salecash']
MSE: 5.243846264700614
2013-05-29 ['betad', 'total_vol', 'illq', 'age', 'AM', 'INVG', 'RDsales']
MSE: 2.7711284059099497
2013-05-30 ['size', 'idvol', 'skew', 'turn', 'std_turn', 'volumed', 'lagretn', 'EP', 'INVchg', 'SG', 'ACC', 'ROA', 'CR']
MSE: 3.5887124438798774
2013-05-31 ['beta', 'skew', 'LM', 'mom6', 'CFP', 'INVG', 'RD', 'CFdebt', 'QRG']
MSE: 3.9045752522526107
2013-06-03 ['sharechg', 'AM', 'LEV', 'EP', 'DP', 'SP', 'SgINVg', 'TAXchg', 'PA', 'RD', 'RDsales', 'salecash']
MSE: 4.7864433498877705
2013-06-04 ['total_vol', 'volumed', 'age', 'lagretn', 'BVEG', 'CR']
MSE: 4.2228459858796015
2013-06-05 ['size', 'coskew', 'age', 'momchg', 'AM', 'EP', 'DP', 'LG', 'BVEG', 'PA', 'cash', 'QR']
MSE: 3.1121881665080253
2013-06-06 ['idskew', 'volumed', 'illq', 'mom6', 'momchg', 'SP', 'SG', 'PMG', 'CT', 'RD', 'CR']
MSE: 3.283001630711237
2013-06-07 ['betad', 'idvol', 'skew', 'retnmax', 'AM', 'EP', 'DP', 'BVEG', 'SgINVg', 'TAXchg', 'RD', 'QR', 'salecash']
MSE: 4.962202056166642
2013-06-13 ['retnmax', 'illq', 'LG', 'SG', 'cashpr', 'CR', 'CFdebt', 'salecash']
MSE: 5.353840468483478
2013-06-14 ['coskew', 'illq', 'mom6', 'AM', 'INVG', 'SgINVg', 'PMG', 'ACC', 'salecash', 'CRG']
MSE: 2.5061345638899355
2013-06-17 ['idvol', 'skew', 'coskew', 'turn', 'volumed', 'age', 'momchg', 'SP', 'BVEG', 'INVG', 'SG', 'PA', 'cash', 'CR', 'salecash']
MSE: 3.9994831436523994
2013-06-18 ['idvol', 'idskew', 'coskew', 'retnmax', 'age', 'lagretn', 'DP', 'LG', 'SgINVg', 'ROA', 'PA', 'CT', 'cashpr', 'RDsales']
MSE: 3.723980540254007
2013-06-19 ['beta', 'idvol', 'std_dvol', 'retnmax', 'age', 'lagretn', 'AM', 'OCFP', 'INVchg', 'SG', 'PA', 'cash', 'cashpr', 'RD']
MSE: 4.331620733088984
2013-06-20 ['skew', 'illq', 'sharechg', 'age', 'AM', 'LEV', 'SgINVg', 'CR', 'QR', 'salecash']
MSE: 2.856545736980471
2013-06-21 ['skew', 'volumed', 'age', 'BM', 'AM', 'CFP', 'BVEG', 'INVchg', 'PMG', 'PA', 'RDsales', 'CFdebt']
MSE: 4.2808016622111165
2013-06-24 ['size', 'turn', 'std_turn', 'LM', 'AM', 'CFP', 'DP', 'cashpr', 'CR', 'QRG']
MSE: 4.300456944090253
2013-06-25 ['turn', 'lagretn', 'DP', 'BVEG', 'INVG', 'ROA', 'RD']
MSE: 5.634483683169749
2013-06-26 ['idvol', 'skew', 'volumed', 'retnmax', 'AM', 'DP', 'LG', 'CT', 'QR', 'CFdebt']
MSE: 3.889013796976845
2013-06-27 ['beta', 'betad', 'skew', 'turn', 'std_turn', 'retnmax', 'mom6', 'DP', 'PMG']
MSE: 6.958856578714843
2013-06-28 ['turn', 'lagretn', 'AM', 'BVEG', 'SgINVg', 'cash', 'cashpr']
MSE: 4.021165719370021
2013-07-01 ['betad', 'skew', 'turn', 'age', 'mom6', 'CFP', 'CFdebt', 'salecash', 'saleinv']
MSE: 3.0490661274428366
2013-07-02 ['coskew', 'OCFP', 'AG', 'BVEG', 'INVG', 'PMG', 'CR']
MSE: 3.2681444178466332
2013-07-03 ['idvol', 'skew', 'volumed', 'lagretn', 'AM', 'CFP', 'OCFP', 'BVEG', 'INVchg', 'CR']
MSE: 4.458588490993542
2013-07-04 ['std_dvol', 'illq', 'EP', 'OCFP', 'BVEG', 'INVchg', 'PMG', 'ACCP', 'cash', 'RDsales', 'QR']
MSE: 4.5351058807630364
2013-07-05 ['beta', 'betad', 'lagretn', 'INVG', 'PA', 'RD', 'RDsales']
MSE: 4.107400239809365
2013-07-08 ['turn', 'illq', 'imom', 'AM', 'SP', 'QR', 'salecash']
MSE: 5.079964916060136
2013-07-09 ['skew', 'coskew', 'turn', 'retnmax', 'lagretn', 'EP', 'RDsales']
MSE: 4.48783743996065
2013-07-10 ['beta', 'idvol', 'turn', 'volumed', 'sharechg', 'imom', 'lagretn']
MSE: 2.91120609934653
2013-07-11 ['idskew', 'skew', 'lagretn', 'AG', 'TAXchg', 'CT', 'RDsales', 'QRG']
MSE: 2.476220330100001
2013-07-12 ['skew', 'turn', 'std_turn', 'mom6', 'LEV', 'SG']
MSE: 2.9211273543349447
2013-07-15 ['size', 'turn', 'AM', 'EP', 'CFP', 'OCFP', 'DP', 'BVEG', 'INVchg', 'TAXchg', 'ROE', 'ROA', 'CR', 'QRG']
MSE: 3.4050465131682772
2013-07-16 ['skew', 'std_turn', 'volumed', 'retnmax', 'age', 'SP', 'BVEG', 'INVG', 'INVchg', 'PA', 'RD', 'QR']
MSE: 3.7945392668841373
2013-07-17 ['skew', 'turn', 'LEV', 'EP', 'SgINVg', 'cash', 'RD', 'RDsales', 'salecash']
MSE: 5.203289917919129
2013-07-18 ['total_vol', 'coskew', 'volumed', 'retnmax', 'AM', 'INVchg', 'RDsales', 'CR']
MSE: 4.509474728031157
2013-07-19 ['turn', 'retnmax', 'imom', 'LEV', 'INVG', 'ACC', 'ROA', 'salecash']
MSE: 4.979871792755535
2013-07-22 ['idvol', 'coskew', 'turn', 'age', 'AM', 'CFP', 'cashpr']
MSE: 4.324260298617077
2013-07-23 ['momchg', 'AM', 'SP', 'SG', 'TAXchg', 'ACC', 'cash', 'CR', 'CFdebt', 'CRG']
MSE: 2.955548999586859
2013-07-24 ['size', 'betad', 'idvol', 'turn', 'retnmax', 'imom', 'LEV', 'BVEG', 'SG', 'TAXchg', 'ROA']
MSE: 3.9297805684821476
2013-07-25 ['lagretn', 'AM', 'EP', 'PMG']
MSE: 6.005756235116204
2013-07-26 ['idvol', 'total_vol', 'coskew', 'volumed', 'retnmax', 'CFP', 'BVEG', 'INVchg', 'SG', 'TAXchg', 'RD', 'CR', 'salecash', 'saleinv']
MSE: 3.652047181874535
2013-07-29 ['size', 'idvol', 'turn', 'LEV', 'OCFP', 'SgINVg', 'PA', 'RD', 'CFdebt']
MSE: 4.084588835171696
2013-07-30 ['idvol', 'skew', 'turn', 'volumed', 'age', 'mom6', 'imom', 'lagretn', 'CFP', 'BVEG', 'SG', 'PMG', 'CT', 'cashpr', 'RD', 'QR']
MSE: 5.427820478672383
2013-07-31 ['size', 'idvol', 'skew', 'turn', 'std_turn', 'volumed', 'retnmax', 'LM', 'lagretn', 'OCFP', 'INVG', 'INVchg', 'SgINVg', 'PMG', 'CT', 'CR']
MSE: 4.857858414184512
2013-08-01 ['skew', 'std_turn', 'age', 'EP', 'CFP', 'SG', 'ROA', 'RDsales', 'QR', 'CFdebt']
MSE: 2.013532319730573
2013-08-02 ['turn', 'sharechg', 'BVEG', 'INVG', 'INVchg', 'SG', 'cash', 'RD']
MSE: 3.5316282738391513
2013-08-05 ['skew', 'retnmax', 'illq', 'age', 'AM', 'DP', 'SP', 'ROA', 'cashpr']
MSE: 3.698425872146903
2013-08-06 ['volumed', 'std_dvol', 'retnmax', 'mom6', 'lagretn', 'BVEG', 'INVG', 'TAXchg', 'ACCP', 'ROA', 'PA', 'CT', 'cash', 'cashpr', 'RDsales', 'CFdebt', 'QRG']
MSE: 2.8973094805224786
2013-08-07 ['idvol', 'age', 'lagretn', 'EP', 'CFP', 'INVchg', 'ACCP', 'salecash']
MSE: 3.593317809201272
2013-08-08 ['idvol', 'lagretn', 'AM', 'CFP', 'SG', 'PMG', 'ROA', 'CR', 'salecash']
MSE: 3.351461269519372
2013-08-09 ['coskew', 'mom6', 'OCFP', 'INVG', 'ROA', 'RDsales']
MSE: 4.0261812796161855
2013-08-12 ['betad', 'std_turn', 'EP', 'LG', 'INVchg', 'PMG', 'CT', 'RDsales', 'CR', 'salecash']
MSE: 2.9240852956829206
2013-08-13 ['size', 'idvol', 'retnmax', 'age', 'momchg', 'lagretn', 'AM', 'AG', 'BVEG', 'CRG']
MSE: 2.769309375178218
2013-08-14 ['beta', 'skew', 'std_turn', 'imom', 'lagretn', 'CFP', 'QR']
MSE: 3.1386489115048763
2013-08-15 ['coskew', 'illq', 'age', 'mom6', 'imom', 'OCFP', 'TAXchg', 'CT', 'cash', 'RD', 'CFdebt', 'salecash']
MSE: 3.1872869055781012
2013-08-16 ['sharechg', 'lagretn', 'LEV', 'EP', 'cash']
MSE: 4.331717368162058
2013-08-19 ['idvol', 'retnmax', 'lagretn', 'AM', 'EP', 'OCFP', 'LG', 'INVchg', 'TAXchg', 'ACC', 'ROA', 'saleinv']
MSE: 3.1894871655441053
2013-08-20 ['idvol', 'skew', 'illq', 'age', 'momchg', 'AM', 'EP', 'cash', 'RDsales', 'QR']
MSE: 3.64489260107602
2013-08-21 ['idskew', 'coskew', 'turn', 'sharechg', 'imom', 'EP', 'CFP', 'SG', 'CT', 'RD', 'salecash']
MSE: 3.133218730832844
2013-08-22 ['skew', 'turn', 'DP', 'BVEG', 'INVchg', 'PA', 'cash', 'RD', 'RDsales', 'CR']
MSE: 2.9998075331609875
2013-08-23 ['total_vol', 'age', 'imom', 'LEV', 'EP', 'INVchg', 'CFdebt']
MSE: 4.128531566187385
2013-08-26 ['beta', 'std_turn', 'age', 'momchg', 'AM', 'SG', 'cash', 'CR', 'QR', 'salecash']
MSE: 3.1464418828179266
2013-08-27 ['size', 'beta', 'skew', 'turn', 'std_dvol', 'AM', 'CFP', 'SG', 'PMG', 'CT', 'cashpr', 'RD', 'QR']
MSE: 3.451404811937106
2013-08-28 ['size', 'idvol', 'turn', 'illq', 'LEV', 'EP', 'PMG', 'cash', 'salecash']
MSE: 5.279398156725779
2013-08-29 ['beta', 'idvol', 'skew', 'turn', 'age', 'imom', 'LEV', 'INVchg', 'TAXchg', 'cash', 'RDsales', 'QR']
MSE: 5.070080414799744
2013-08-30 ['size', 'idvol', 'BVEG', 'INVG', 'PMG', 'RD', 'salecash']
MSE: 6.2538219549019525
2013-09-02 ['age', 'mom6', 'TAXchg', 'PA', 'RD', 'salecash']
MSE: 6.5811019041794205
2013-09-03 ['beta', 'skew', 'turn', 'mom6', 'PMG', 'ROA', 'CT', 'RDsales']
MSE: 4.43396936602181
2013-09-04 ['size', 'turn', 'sharechg', 'age', 'LG']
MSE: 4.650476080237525
2013-09-05 ['idvol', 'illq', 'age', 'imom', 'OCFP', 'SP', 'INVG', 'SG', 'TAXchg', 'PA', 'cash', 'RD']
MSE: 4.547262023813774
2013-09-06 ['idskew', 'skew', 'std_dvol', 'mom6', 'CFP', 'ACCP']
MSE: 4.244393336537197
2013-09-09 ['size', 'beta', 'betad', 'idvol', 'volumed', 'age', 'imom', 'DP', 'AG', 'INVG', 'PMG', 'salecash', 'saleinv', 'CRG']
MSE: 4.132922887031511
2013-09-10 ['skew', 'turn', 'std_turn', 'illq', 'AM', 'EP', 'TAXchg', 'RD']
MSE: 4.124117314719291
2013-09-11 ['turn', 'std_turn', 'age', 'mom6', 'BM', 'BVEG', 'SgINVg', 'CR', 'CFdebt']
MSE: 5.6085128303163545
2013-09-12 ['skew', 'illq', 'INVG', 'SG', 'RD']
MSE: 3.4850675051252296
2013-09-13 ['LEV', 'CFP', 'SG', 'SgINVg', 'PA', 'CT', 'cash', 'RDsales', 'CR', 'QRG']
MSE: 4.029990953218458
2013-09-16 ['betad', 'skew', 'turn', 'retnmax', 'imom', 'lagretn', 'EP', 'DP', 'INVG', 'SG', 'ROA', 'QR', 'CFdebt', 'salecash']
MSE: 3.801293360185082
2013-09-17 ['betad', 'illq', 'LEV', 'CFP', 'BVEG', 'cash', 'RDsales', 'salecash']
MSE: 4.813227650270847
2013-09-18 ['betad', 'idvol', 'turn', 'std_turn', 'momchg', 'AM', 'CFP', 'INVchg', 'ACC', 'CT', 'CFdebt', 'salecash', 'saleinv']
MSE: 3.5081704961882854
2013-09-23 ['mom6', 'PMG', 'TAXchg', 'PA', 'cash']
MSE: 2.7720720592228916
2013-09-24 ['total_vol', 'retnmax', 'illq', 'AM', 'SG', 'CT', 'cash', 'RDsales']
MSE: 4.876084895763892
2013-09-25 ['idskew', 'turn', 'std_turn', 'retnmax', 'illq', 'LEV', 'OCFP', 'DP', 'SP', 'CT', 'RD', 'CFdebt', 'salecash']
MSE: 5.0340127061687925
2013-09-26 ['beta', 'idvol', 'turn', 'age', 'SP', 'INVG', 'cashpr', 'QR', 'CRG']
MSE: 5.773339755545188
2013-09-27 ['size', 'idskew', 'turn', 'age', 'LEV', 'INVG', 'ROA', 'CT', 'RD', 'CR']
MSE: 4.931646845338142
2013-09-30 ['total_vol', 'turn', 'retnmax', 'sharechg', 'AM', 'CFP', 'INVchg', 'ROA', 'PA', 'CR']
MSE: 2.853335134669498
2013-10-08 ['idvol', 'retnmax', 'AM', 'OCFP', 'INVG', 'TAXchg', 'CT', 'RDsales', 'CR']
MSE: 3.8198359428496502
2013-10-09 ['turn', 'volumed', 'lagretn', 'CFP', 'LG', 'SG', 'PMG', 'ROA', 'CT', 'RDsales', 'QRG']
MSE: 3.1539746575072862
2013-10-10 ['idvol', 'turn', 'illq', 'age', 'lagretn', 'AG', 'TAXchg', 'cash', 'cashpr', 'RDsales', 'CR', 'CRG']
MSE: 6.029162821566159
2013-10-11 ['beta', 'idskew', 'turn', 'volumed', 'age', 'AM', 'INVchg', 'SG', 'PMG', 'CT', 'RD', 'CR']
MSE: 4.083926097141506
2013-10-14 ['coskew', 'turn', 'CFP', 'DP', 'SG', 'TAXchg', 'ROE', 'salecash']
MSE: 5.358110375884738
2013-10-15 ['beta', 'idvol', 'volumed', 'retnmax', 'LM', 'LEV', 'EP', 'CFP', 'SP', 'LG', 'INVG', 'SG']
MSE: 4.379737994755032
2013-10-16 ['skew', 'turn', 'mom6', 'EP', 'SG', 'PA', 'RDsales', 'CFdebt']
MSE: 5.418096078492006
2013-10-17 ['idvol', 'mom6', 'imom', 'AG', 'PMG', 'TAXchg', 'cash', 'CR', 'CRG']
MSE: 4.744623539638933
2013-10-18 ['turn', 'age', 'AM', 'ROA', 'RD', 'QRG']
MSE: 3.8896567621758504
2013-10-21 ['betad', 'total_vol', 'idskew', 'lagretn', 'AM', 'cash']
MSE: 2.7790111037241867
2013-10-22 ['idvol', 'momchg', 'imom', 'AM', 'EP', 'DP', 'SP', 'INVG', 'INVchg', 'CT', 'cash', 'RD', 'RDsales']
MSE: 6.093988664010849
2013-10-23 ['idvol', 'turn', 'retnmax', 'mom6', 'imom', 'lagretn', 'BM', 'AM', 'LEV', 'EP', 'CFP', 'DP', 'LG', 'BVEG', 'SgINVg', 'TAXchg', 'PA', 'cashpr', 'RD', 'RDsales', 'QR', 'QRG']
MSE: 5.786239798513934
2013-10-24 ['betad', 'idvol', 'skew', 'coskew', 'turn', 'illq', 'mom6', 'imom', 'AM', 'INVG', 'CT', 'RD', 'RDsales']
MSE: 5.030191474831782
2013-10-25 ['idskew', 'std_turn', 'mom6', 'AM', 'CFP', 'OCFP', 'AG', 'BVEG', 'INVG', 'CR', 'salecash', 'QRG']
MSE: 4.639727451186666
2013-10-28 ['skew', 'coskew', 'SP', 'ROA', 'PA', 'RD', 'RDsales', 'CR', 'saleinv']
MSE: 6.330372509834726
2013-10-29 ['skew', 'retnmax', 'sharechg', 'age', 'momchg', 'imom', 'AM', 'LEV', 'CFP', 'AG', 'LG', 'BVEG', 'INVG', 'ROA', 'cashpr', 'RD', 'salecash']
MSE: 7.175615556082851
2013-10-30 ['age', 'momchg', 'lagretn', 'AM', 'INVchg', 'CT', 'cash', 'CR', 'CRG']
MSE: 3.0270851338446665
2013-10-31 ['turn', 'mom6', 'imom', 'AM', 'ACCP', 'PA', 'RD', 'RDsales', 'CR']
MSE: 5.288040276830776
2013-11-01 ['coskew', 'turn', 'age', 'INVchg', 'SG', 'CT', 'cash', 'RD', 'RDsales', 'QR', 'CRG', 'QRG']
MSE: 4.1196453322319
2013-11-04 ['size', 'skew', 'std_turn', 'illq', 'age', 'mom6', 'momchg', 'AM', 'OCFP', 'BVEG', 'cash', 'RDsales', 'CFdebt']
MSE: 3.0966750440768926
2013-11-05 ['total_vol', 'skew', 'std_turn', 'age', 'mom6', 'CFP', 'INVchg', 'SG', 'RDsales', 'salecash', 'CRG']
MSE: 2.662112847030313
2013-11-06 ['idvol', 'turn', 'illq', 'LEV', 'EP', 'OCFP', 'BVEG', 'INVchg', 'cash', 'RD', 'RDsales', 'CR', 'salecash']
MSE: 2.990025202465042
2013-11-07 ['size', 'beta', 'std_turn', 'SG', 'ROA', 'QRG']
MSE: 3.711989594519554
2013-11-08 ['betad', 'idvol', 'skew', 'volumed', 'std_dvol', 'lagretn', 'AM', 'DP', 'INVG', 'PMG', 'CR']
MSE: 4.592895702838832
2013-11-11 ['size', 'turn', 'INVchg', 'ROA', 'CT', 'QR', 'saleinv']
MSE: 4.454780865729851
2013-11-12 ['idvol', 'skew', 'std_dvol', 'CFP', 'OCFP', 'DP', 'SP', 'AG', 'INVchg', 'PMG', 'PA', 'cash', 'cashpr', 'RD', 'QR', 'salecash', 'QRG']
MSE: 3.131677788337437
2013-11-13 ['coskew', 'turn', 'std_turn', 'LM', 'age', 'mom6', 'lagretn', 'LEV', 'CFP', 'OCFP', 'DP', 'BVEG', 'cash', 'RDsales', 'QR']
MSE: 2.767840492935834
2013-11-14 ['turn', 'retnmax', 'age', 'lagretn', 'AM', 'SP', 'AG', 'SG', 'PMG', 'CR', 'QR', 'CRG']
MSE: 3.7624903972143158
2013-11-15 ['idvol', 'coskew', 'turn', 'volumed', 'LM', 'lagretn', 'AM', 'EP', 'LG', 'INVG', 'SG', 'PMG', 'PA', 'CT', 'CR', 'CFdebt', 'QRG']
MSE: 2.234910451900615
2013-11-18 ['std_turn', 'volumed', 'EP', 'DP']
MSE: 3.286022743901893
2013-11-19 ['betad', 'total_vol', 'volumed', 'AM', 'CFP', 'PMG', 'ROA', 'QR', 'CFdebt', 'salecash']
MSE: 3.050946341562079
2013-11-20 ['retnmax', 'LM', 'age', 'ROA', 'cash', 'RD']
MSE: 3.334620743562507
2013-11-21 ['idvol', 'imom', 'AM', 'OCFP', 'BVEG', 'INVchg', 'cashpr', 'RD', 'CR', 'CRG']
MSE: 3.8894040528171714
2013-11-22 ['skew', 'turn', 'age', 'EP', 'cash', 'RDsales']
MSE: 4.045378809032109
2013-11-25 ['size', 'idvol', 'std_turn', 'volumed', 'age', 'mom6', 'CFP', 'OCFP', 'DP', 'PMG', 'TAXchg', 'ROE', 'cashpr', 'RD', 'QR', 'salecash', 'saleinv']
MSE: 4.3597700589470145
2013-11-26 ['beta', 'skew', 'illq', 'LM', 'AM', 'EP', 'PMG', 'RDsales', 'QR']
MSE: 4.716572482662422
2013-11-27 ['illq', 'sharechg', 'age', 'imom', 'SG', 'ROA', 'CR', 'CRG']
MSE: 2.688063586386144
2013-11-28 ['skew', 'illq', 'age', 'imom', 'SgINVg']
MSE: 2.995623491216169
2013-11-29 ['beta', 'imom', 'lagretn', 'AM', 'CFP', 'INVG', 'CFdebt', 'salecash']
MSE: 3.071047354868936
2013-12-02 ['idvol', 'retnmax', 'lagretn', 'AM', 'TAXchg', 'CT', 'RDsales', 'CFdebt', 'salecash']
MSE: 5.16301029674257
2013-12-03 ['beta', 'skew', 'lagretn', 'AM', 'INVG', 'SgINVg', 'ROA', 'CR', 'CRG', 'QRG']
MSE: 3.6562350393532226
2013-12-04 ['turn', 'std_turn', 'mom6', 'INVG', 'INVchg', 'SgINVg', 'cash', 'CR']
MSE: 3.429245762918336
2013-12-05 ['EP', 'CFP', 'SG', 'CFdebt', 'salecash']
MSE: 3.2732258037626787
2013-12-06 ['idskew', 'coskew', 'volumed', 'age', 'AM', 'LEV', 'EP', 'INVG', 'PMG', 'ROA', 'cash', 'RD', 'QR', 'CFdebt']
MSE: 3.0892243267207697
2013-12-09 ['beta', 'coskew', 'illq', 'age', 'INVchg', 'ACCP', 'cash', 'cashpr', 'QRG']
MSE: 2.909975588127043
2013-12-10 ['coskew', 'turn', 'imom', 'lagretn', 'EP', 'CFP', 'DP', 'PMG', 'CT', 'RD', 'QR', 'CFdebt']
MSE: 3.429970656413309
2013-12-11 ['idvol', 'skew', 'LM', 'AM', 'LG', 'SgINVg', 'RD', 'RDsales', 'QR']
MSE: 3.899990774640407
2013-12-12 ['size', 'beta', 'idvol', 'age', 'AM', 'CFP', 'OCFP', 'INVG', 'INVchg', 'SG', 'CR', 'CRG']
MSE: 3.2466069131941597
2013-12-13 ['retnmax', 'LM', 'sharechg', 'mom6', 'lagretn', 'AM', 'LEV', 'EP', 'CFP', 'DP', 'SP', 'BVEG', 'INVG', 'ACC', 'salecash', 'saleinv', 'QRG']
MSE: 2.7994189887519934
2013-12-16 ['size', 'idskew', 'turn', 'LM', 'AM', 'EP', 'CFP', 'BVEG', 'SG', 'PMG', 'RDsales']
MSE: 4.5553817848811144
2013-12-17 ['beta', 'turn', 'std_turn', 'volumed', 'retnmax', 'mom6', 'AM', 'SG', 'PMG', 'RD']
MSE: 3.5561130332117106
2013-12-18 ['turn', 'lagretn', 'LEV', 'PA', 'CT', 'CR', 'salecash']
MSE: 2.474691764611882
2013-12-19 ['size', 'idvol', 'skew', 'coskew', 'turn', 'LM', 'BM', 'LG', 'INVchg', 'SG', 'PMG', 'CT', 'CR']
MSE: 2.5083141184554707
2013-12-20 ['betad', 'idvol', 'skew', 'coskew', 'std_turn', 'LEV', 'OCFP', 'TAXchg', 'CT', 'RD', 'salecash']
MSE: 3.7529575553876904
2013-12-23 ['turn', 'retnmax', 'AM', 'LEV', 'CFP', 'DP', 'CR']
MSE: 5.605109218355425
2013-12-24 ['coskew', 'turn', 'age', 'SG', 'QR', 'salecash']
MSE: 2.609969578928021
2013-12-25 ['mom6', 'lagretn', 'INVG', 'TAXchg', 'cash', 'CFdebt', 'salecash']
MSE: 2.9110866706614567
2013-12-26 ['idvol', 'skew', 'turn', 'sharechg', 'AM', 'PMG', 'ACCP', 'CT', 'RDsales', 'CR', 'salecash']
MSE: 4.76824086903283
2013-12-27 ['lagretn', 'LEV', 'INVG', 'RDsales', 'CFdebt']
MSE: 2.4008224944342778
2013-12-30 ['skew', 'turn', 'retnmax', 'mom6', 'momchg', 'INVG', 'CR', 'QRG']
MSE: 3.3727608487381446
2013-12-31 ['volumed', 'mom6', 'lagretn', 'DP', 'INVchg', 'SG', 'TAXchg', 'RD', 'RDsales', 'CFdebt', 'salecash']
MSE: 2.764648018725637
linear_remove['removed'].mean()
10.084033613445378
linear_remove['err'].mean()
3.808543809919859