import time
import numpy as np
import xgboost as xgb
from xgboost import plot_importance,plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error
%matplotlib inline
def dropun(X):
for x in X.columns:
if x[:7]=='Unnamed':
X=X.drop(columns=[x])
return X
def hist(L):
kwargs = dict(histtype='stepfilled',density=True,alpha=0.3,bins=40)
for X in L:
plt.hist(X, **kwargs)
def prediction(y_pred, y_test, plot=True):
sum_erro = mean_squared_error(y_pred, y_test)
# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html#sklearn.metrics.mean_squared_error
print ("MSE:", sum_erro)
if plot:
# 做ROC曲线
plt.figure()
plt.plot(range(len(y_pred)), y_pred, 'b', label="predict")
plt.plot(range(len(y_pred)), y_test, 'r', label="test")
plt.legend(loc="upper right") # 显示图中的标签
return sum_erro
X=pd.read_csv('data/factors2013-0-2-1.csv')
Y=pd.read_csv('data/daily2011-2017-1.csv')
X=dropun(X)
Y=dropun(Y)
factors=list(X.columns)
factors.remove('ts_code')
factors.remove('trade_date')
days=set(X['trade_date'])
days=list(days)
days.sort()
def get_split_by_trade_date(date, state=0, remove_factors = []):
# state=0表示不进行缺失值去除/填充
# state=1表示直接去除含有缺失值股票的数据
# state=2表示使用当天的平均值进行填充缺失数据
# '2013-03-01'
x=X[X['trade_date']==date].drop(columns=['trade_date'] + remove_factors)
y=Y[Y['trade_date']==date].drop(columns=['trade_date'])
z=pd.merge(x,y,on='ts_code')
if state==1:
z.dropna(inplace=True)
elif state==2:
z.fillna(value=dict(z.mean()), inplace=True)
x=z[set(factors)-set(remove_factors)]
y=z['yield']*100
# 划分数据集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) # random_state=0
# print('x_train.shape={}\ny_train.shape ={}\nx_test.shape={}\ny_test.shape={}'.format(x_train.shape, y_train.shape, x_test.shape, y_test.shape))
return x_train, x_test, y_train, y_test
去除无用特征
根据permutaiton importance求在2013年全年各因子对于预测结果的重要性(使用随机random_state)。
首先根据线性模型计算一遍,再用XGBoost计算一遍
如果全年超过一半都为负数,则直接删去该因子
linear模型2013年各因子重要性
def get_linear_importance(date):
# 划分数据集
x_train, x_test, y_train, y_test = get_split_by_trade_date(date, 1)
# 模型训练
model = LinearRegression()
model.fit(x_train, y_train)
# 计算排序重要性
perm = PermutationImportance(model).fit(x_test, y_test)
return perm
perm = get_linear_importance('2013-01-04')
print(pd.DataFrame(perm.feature_importances_, index = factors))
eli5.show_weights(perm, feature_names = factors, top=56)
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
0
size 0.022446
beta 0.000113
betad 0.001902
idvol -0.003462
total_vol 0.003035
idskew 0.140981
skew -0.002844
coskew 0.008199
turn -0.002669
std_turn 0.018550
volumed 0.000761
std_dvol 0.146898
retnmax 0.002345
illq -0.005696
LM 0.018519
sharechg -0.000700
age 0.000593
mom12 0.089085
mom6 0.029796
momchg 0.009969
imom 0.005457
lagretn -0.002747
BM 0.005480
AM -0.005562
LEV 0.006296
EP 0.012142
CFP 0.031895
OCFP -0.000878
DP -0.000697
SP 0.039042
AG 0.027988
LG 0.013685
BVEG 0.004491
INVG -0.006270
INVchg 0.004587
SG 0.000570
SgINVg -0.000105
PMG 0.003184
TAXchg 0.010312
ACC 0.065508
ACCP 0.164036
ROE 0.005988
ROA 0.006740
PA 0.003271
CT 0.009938
cash 0.012838
cashpr 0.132102
RD -0.001856
RDsales 0.002344
CR -0.004083
QR 0.007043
CFdebt 0.002388
salecash -0.003473
saleinv 0.086358
CRG 0.025812
QRG -0.005456
<style>
table.eli5-weights tr:hover {
filter: brightness(85%);
}
<table class="eli5-weights eli5-feature-importances" style="border-collapse: collapse; border: none; margin-top: 0em; table-layout: auto;">
<thead>
<tr style="border: none;">
<th style="padding: 0 1em 0 0.5em; text-align: right; border: none;">Weight</th>
<th style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">Feature</th>
</tr>
</thead>
<tbody>
<tr style="background-color: hsl(120, 100.00%, 80.00%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.1640
± 0.0249
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
ACCP
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 81.49%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.1469
± 0.0485
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
std_dvol
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 82.01%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.1410
± 0.0263
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
idskew
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 82.81%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.1321
± 0.0312
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
cashpr
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 86.96%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0891
± 0.0471
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
mom12
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 87.24%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0864
± 0.0665
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
saleinv
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 89.48%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0655
± 0.0407
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
ACC
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 92.68%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0390
± 0.0297
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
SP
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 93.64%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0319
± 0.0159
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
CFP
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 93.94%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0298
± 0.0300
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
mom6
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 94.20%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0280
± 0.0271
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
AG
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 94.52%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0258
± 0.0220
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
CRG
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 95.03%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0224
± 0.0311
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
size
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 95.65%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0185
± 0.0286
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
std_turn
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 95.66%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0185
± 0.0124
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
LM
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 96.48%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0137
± 0.0407
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
LG
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 96.64%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0128
± 0.0083
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
cash
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 96.77%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0121
± 0.0262
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
EP
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.12%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0103
± 0.0119
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
TAXchg
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.18%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0100
± 0.0262
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
momchg
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.19%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0099
± 0.0095
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
CT
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.54%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0082
± 0.0189
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
coskew
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.79%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0070
± 0.0130
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
QR
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.86%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0067
± 0.0082
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
ROA
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 97.96%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0063
± 0.0178
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
LEV
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.03%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0060
± 0.0132
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
ROE
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.15%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0055
± 0.0190
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
BM
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.15%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0055
± 0.0057
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
imom
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.36%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0046
± 0.0078
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
INVchg
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.39%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0045
± 0.0073
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
BVEG
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.71%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0033
± 0.0058
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
PA
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.73%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0032
± 0.0051
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
PMG
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.78%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0030
± 0.0116
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
total_vol
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.96%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0024
± 0.0032
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
CFdebt
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.98%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0023
± 0.0133
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
retnmax
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 98.98%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0023
± 0.0028
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
RDsales
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 99.12%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0019
± 0.0067
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
betad
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 99.53%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0008
± 0.0018
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
volumed
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 99.61%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0006
± 0.0005
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
age
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 99.62%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0006
± 0.0063
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
SG
</td>
</tr>
<tr style="background-color: hsl(120, 100.00%, 99.88%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
0.0001
± 0.0053
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
beta
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 99.88%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0001
± 0.0010
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
SgINVg
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 99.56%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0007
± 0.0062
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
DP
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 99.56%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0007
± 0.0017
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
sharechg
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 99.49%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0009
± 0.0172
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
OCFP
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 99.13%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0019
± 0.0071
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
RD
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.88%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0027
± 0.0048
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
turn
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.86%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0027
± 0.0026
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
lagretn
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.83%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0028
± 0.0025
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
skew
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.66%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0035
± 0.0107
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
idvol
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.65%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0035
± 0.0025
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
salecash
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.49%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0041
± 0.0069
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
CR
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.15%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0055
± 0.0050
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
QRG
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.13%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0056
± 0.0190
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
AM
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 98.10%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0057
± 0.0074
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
illq
</td>
</tr>
<tr style="background-color: hsl(0, 100.00%, 97.96%); border: none;">
<td style="padding: 0 1em 0 0.5em; text-align: right; border: none;">
-0.0063
± 0.0067
</td>
<td style="padding: 0 0.5em 0 0.5em; text-align: left; border: none;">
INVG
</td>
</tr>
</tbody>
importance = pd.DataFrame(index = factors) # 记录每个因子重要性的总和,重要性为负的天数
importance['sum'] = 0
importance['negative'] = 0
for day in days:
print(day)
perm = get_linear_importance(day)
importance['sum']+=perm.feature_importances_
importance['negative']+=(perm.feature_importances_<0)
2013-01-04
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
2013-01-07
x_train.shape=(1175, 56)
y_train.shape =(1175,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-08
x_train.shape=(1176, 56)
y_train.shape =(1176,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-09
x_train.shape=(1178, 56)
y_train.shape =(1178,)
x_test.shape=(295, 56)
y_test.shape=(295,)
2013-01-10
x_train.shape=(1170, 56)
y_train.shape =(1170,)
x_test.shape=(293, 56)
y_test.shape=(293,)
2013-01-11
x_train.shape=(1172, 56)
y_train.shape =(1172,)
x_test.shape=(293, 56)
y_test.shape=(293,)
2013-01-14
x_train.shape=(1161, 56)
y_train.shape =(1161,)
x_test.shape=(291, 56)
y_test.shape=(291,)
2013-01-15
x_train.shape=(1176, 56)
y_train.shape =(1176,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-16
x_train.shape=(1164, 56)
y_train.shape =(1164,)
x_test.shape=(291, 56)
y_test.shape=(291,)
2013-01-17
x_train.shape=(1174, 56)
y_train.shape =(1174,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-18
x_train.shape=(1175, 56)
y_train.shape =(1175,)
x_test.shape=(294, 56)
y_test.shape=(294,)
2013-01-21
x_train.shape=(1186, 56)
y_train.shape =(1186,)
x_test.shape=(297, 56)
y_test.shape=(297,)
2013-01-22
x_train.shape=(1181, 56)
y_train.shape =(1181,)
x_test.shape=(296, 56)
y_test.shape=(296,)
2013-01-23
x_train.shape=(1168, 56)
y_train.shape =(1168,)
x_test.shape=(293, 56)
y_test.shape=(293,)
2013-01-24
x_train.shape=(1184, 56)
y_train.shape =(1184,)
x_test.shape=(297, 56)
y_test.shape=(297,)
2013-01-25
x_train.shape=(1167, 56)
y_train.shape =(1167,)
x_test.shape=(292, 56)
y_test.shape=(292,)
...
2013-12-31
x_train.shape=(1257, 56)
y_train.shape =(1257,)
x_test.shape=(315, 56)
y_test.shape=(315,)
pd.set_option('display.max_columns', None)
importance.sort_values(by="sum" , ascending=False).transpose()
ROE | mom12 | ACCP | std_dvol | momchg | BM | AG | SP | idskew | sharechg | SgINVg | saleinv | CRG | total_vol | ACC | LM | LG | cashpr | LEV | betad | mom6 | imom | CFdebt | QRG | std_turn | coskew | PA | beta | ROA | QR | INVchg | size | DP | TAXchg | PMG | volumed | BVEG | CR | CT | OCFP | SG | lagretn | retnmax | illq | cash | CFP | skew | EP | INVG | RDsales | RD | salecash | idvol | age | AM | turn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sum | 450.179329 | 430.119666 | 74.490326 | 72.665519 | 18.17473 | 18.007253 | 17.610628 | 16.867773 | 15.003665 | 14.761029 | 14.236372 | 14.163207 | 13.581656 | 11.512615 | 10.269702 | 9.761518 | 7.554256 | 6.340006 | 5.963265 | 5.642828 | 3.519301 | 2.888595 | 2.681334 | 2.456618 | 2.447205 | 2.4201 | 2.337622 | 2.337583 | 2.138391 | 2.084859 | 1.935707 | 1.856355 | 1.847884 | 1.705292 | 1.395641 | 1.257528 | 1.239606 | 1.188467 | 1.143138 | 1.113026 | 0.890586 | 0.826029 | 0.81739 | 0.780101 | 0.593657 | 0.590286 | 0.501265 | 0.443186 | 0.340089 | 0.211641 | 0.153811 | 0.098633 | 0.090249 | 0.017987 | -0.00769 | -0.100969 |
negative | 9.000000 | 6.000000 | 12.000000 | 7.000000 | 30.00000 | 26.000000 | 29.000000 | 40.000000 | 36.000000 | 35.000000 | 32.000000 | 17.000000 | 33.000000 | 40.000000 | 28.000000 | 45.000000 | 47.000000 | 50.000000 | 55.000000 | 50.000000 | 62.000000 | 56.000000 | 67.000000 | 60.000000 | 55.000000 | 51.0000 | 59.000000 | 65.000000 | 57.000000 | 64.000000 | 56.000000 | 71.000000 | 64.000000 | 61.000000 | 87.000000 | 76.000000 | 75.000000 | 86.000000 | 67.000000 | 78.000000 | 87.000000 | 84.000000 | 79.00000 | 87.000000 | 95.000000 | 103.000000 | 95.000000 | 96.000000 | 106.000000 | 110.000000 | 108.000000 | 127.000000 | 116.000000 | 120.000000 | 125.00000 | 130.000000 |
importance.sort_values(by="negative" , ascending=True).transpose()
mom12 | std_dvol | ROE | ACCP | saleinv | BM | ACC | AG | momchg | SgINVg | CRG | sharechg | idskew | total_vol | SP | LM | LG | betad | cashpr | coskew | std_turn | LEV | imom | INVchg | ROA | PA | QRG | TAXchg | mom6 | DP | QR | beta | CT | CFdebt | size | BVEG | volumed | OCFP | retnmax | lagretn | CR | illq | SG | PMG | skew | cash | EP | CFP | INVG | RD | RDsales | idvol | age | AM | salecash | turn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sum | 430.119666 | 72.665519 | 450.179329 | 74.490326 | 14.163207 | 18.007253 | 10.269702 | 17.610628 | 18.17473 | 14.236372 | 13.581656 | 14.761029 | 15.003665 | 11.512615 | 16.867773 | 9.761518 | 7.554256 | 5.642828 | 6.340006 | 2.4201 | 2.447205 | 5.963265 | 2.888595 | 1.935707 | 2.138391 | 2.337622 | 2.456618 | 1.705292 | 3.519301 | 1.847884 | 2.084859 | 2.337583 | 1.143138 | 2.681334 | 1.856355 | 1.239606 | 1.257528 | 1.113026 | 0.81739 | 0.826029 | 1.188467 | 0.780101 | 0.890586 | 1.395641 | 0.501265 | 0.593657 | 0.443186 | 0.590286 | 0.340089 | 0.153811 | 0.211641 | 0.090249 | 0.017987 | -0.00769 | 0.098633 | -0.100969 |
negative | 6.000000 | 7.000000 | 9.000000 | 12.000000 | 17.000000 | 26.000000 | 28.000000 | 29.000000 | 30.00000 | 32.000000 | 33.000000 | 35.000000 | 36.000000 | 40.000000 | 40.000000 | 45.000000 | 47.000000 | 50.000000 | 50.000000 | 51.0000 | 55.000000 | 55.000000 | 56.000000 | 56.000000 | 57.000000 | 59.000000 | 60.000000 | 61.000000 | 62.000000 | 64.000000 | 64.000000 | 65.000000 | 67.000000 | 67.000000 | 71.000000 | 75.000000 | 76.000000 | 78.000000 | 79.00000 | 84.000000 | 86.000000 | 87.000000 | 87.000000 | 87.000000 | 95.000000 | 95.000000 | 96.000000 | 103.000000 | 106.000000 | 108.000000 | 110.000000 | 116.000000 | 120.000000 | 125.00000 | 127.000000 | 130.000000 |
def get_last_N(importance, n):
if n==0:
return {}
return set(importance.sort_values(by="sum" , ascending=False).index[-n:]) & set(importance.sort_values(by="negative" , ascending=True).index[-n:])
remove_factors = list(get_last_N(importance, 10))
remove_factors
['salecash', 'idvol', 'EP', 'INVG', 'turn', 'AM', 'RD', 'RDsales', 'age']
importance["mark"] = 0
i=1
for index, row in importance.sort_values(by="sum" , ascending=False).iterrows():
importance.loc[index, "mark"] += i
i+=1
i=1
for index, row in importance.sort_values(by="negative" , ascending=True).iterrows():
importance.loc[index, "mark"] += i
i+=1
importance.sort_values(by="mark" , ascending=True).transpose()
mom12 | ROE | std_dvol | ACCP | BM | momchg | AG | saleinv | SgINVg | sharechg | ACC | idskew | SP | CRG | total_vol | LM | LG | cashpr | betad | LEV | imom | std_turn | coskew | mom6 | QRG | PA | ROA | INVchg | CFdebt | beta | QR | TAXchg | DP | size | CT | volumed | BVEG | OCFP | CR | PMG | lagretn | retnmax | SG | illq | cash | skew | CFP | EP | INVG | RD | RDsales | idvol | age | salecash | AM | turn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sum | 430.119666 | 450.179329 | 72.665519 | 74.490326 | 18.007253 | 18.17473 | 17.610628 | 14.163207 | 14.236372 | 14.761029 | 10.269702 | 15.003665 | 16.867773 | 13.581656 | 11.512615 | 9.761518 | 7.554256 | 6.340006 | 5.642828 | 5.963265 | 2.888595 | 2.447205 | 2.4201 | 3.519301 | 2.456618 | 2.337622 | 2.138391 | 1.935707 | 2.681334 | 2.337583 | 2.084859 | 1.705292 | 1.847884 | 1.856355 | 1.143138 | 1.257528 | 1.239606 | 1.113026 | 1.188467 | 1.395641 | 0.826029 | 0.81739 | 0.890586 | 0.780101 | 0.593657 | 0.501265 | 0.590286 | 0.443186 | 0.340089 | 0.153811 | 0.211641 | 0.090249 | 0.017987 | 0.098633 | -0.00769 | -0.100969 |
negative | 6.000000 | 9.000000 | 7.000000 | 12.000000 | 26.000000 | 30.00000 | 29.000000 | 17.000000 | 32.000000 | 35.000000 | 28.000000 | 36.000000 | 40.000000 | 33.000000 | 40.000000 | 45.000000 | 47.000000 | 50.000000 | 50.000000 | 55.000000 | 56.000000 | 55.000000 | 51.0000 | 62.000000 | 60.000000 | 59.000000 | 57.000000 | 56.000000 | 67.000000 | 65.000000 | 64.000000 | 61.000000 | 64.000000 | 71.000000 | 67.000000 | 76.000000 | 75.000000 | 78.000000 | 86.000000 | 87.000000 | 84.000000 | 79.00000 | 87.000000 | 87.000000 | 95.000000 | 95.000000 | 103.000000 | 96.000000 | 106.000000 | 108.000000 | 110.000000 | 116.000000 | 120.000000 | 127.000000 | 125.00000 | 130.000000 |
mark | 3.000000 | 4.000000 | 6.000000 | 7.000000 | 12.000000 | 14.00000 | 15.000000 | 17.000000 | 21.000000 | 22.000000 | 22.000000 | 22.000000 | 23.000000 | 24.000000 | 28.000000 | 32.000000 | 34.000000 | 37.000000 | 38.000000 | 41.000000 | 45.000000 | 46.000000 | 46.0000 | 50.000000 | 51.000000 | 53.000000 | 54.000000 | 55.000000 | 57.000000 | 60.000000 | 61.000000 | 62.000000 | 63.000000 | 67.000000 | 72.000000 | 73.000000 | 73.000000 | 78.000000 | 79.000000 | 79.000000 | 82.000000 | 82.00000 | 84.000000 | 86.000000 | 91.000000 | 92.000000 | 94.000000 | 95.000000 | 98.000000 | 101.000000 | 101.000000 | 105.000000 | 107.000000 | 107.000000 | 109.00000 | 112.000000 |
def get_trivial(n):
if n==0:
return []
return list(importance.sort_values(by="mark" , ascending=False).index[:n])
get_trivial(5)
['turn', 'AM', 'salecash', 'age', 'idvol']
linear模型去除无用特征后计算准确率
linear_err1 = []
for day in days:
print(day)
linear_err1.append(linear_train(day, 1, remove_factors))
2013-01-04
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.8900002458152043
2013-01-07
x_train.shape=(1276, 47)
y_train.shape =(1276,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 3.6450571284258366
2013-01-08
x_train.shape=(1272, 47)
y_train.shape =(1272,)
x_test.shape=(319, 47)
y_test.shape=(319,)
MSE: 3.4844858023817724
2013-01-09
x_train.shape=(1279, 47)
y_train.shape =(1279,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 4.385839023354379
2013-01-10
x_train.shape=(1267, 47)
y_train.shape =(1267,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 3.2301927988080217
2013-01-11
x_train.shape=(1276, 47)
y_train.shape =(1276,)
x_test.shape=(319, 47)
y_test.shape=(319,)
MSE: 3.3160646057613286
2013-01-14
x_train.shape=(1264, 47)
y_train.shape =(1264,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 2.6315416654139416
2013-01-15
x_train.shape=(1280, 47)
y_train.shape =(1280,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 3.161195649230965
2013-01-16
x_train.shape=(1267, 47)
y_train.shape =(1267,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 3.648510598956155
2013-01-17
x_train.shape=(1277, 47)
y_train.shape =(1277,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 3.6928441679882473
2013-01-18
x_train.shape=(1278, 47)
y_train.shape =(1278,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 2.6165099237166096
2013-01-21
x_train.shape=(1288, 47)
y_train.shape =(1288,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.894516190209926
2013-01-22
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 4.286606469881523
2013-01-23
x_train.shape=(1270, 47)
y_train.shape =(1270,)
x_test.shape=(318, 47)
y_test.shape=(318,)
MSE: 3.5002474096789133
2013-01-24
x_train.shape=(1289, 47)
y_train.shape =(1289,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 4.337442107992211
2013-01-25
x_train.shape=(1268, 47)
y_train.shape =(1268,)
x_test.shape=(317, 47)
y_test.shape=(317,)
MSE: 4.233215798264175
2013-01-28
x_train.shape=(1285, 47)
y_train.shape =(1285,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 2.1029862362265517
2013-01-29
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.0675884909138524
2013-01-30
x_train.shape=(1288, 47)
y_train.shape =(1288,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.390301459355334
2013-01-31
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 4.195368336294644
2013-02-01
x_train.shape=(1283, 47)
y_train.shape =(1283,)
x_test.shape=(321, 47)
y_test.shape=(321,)
MSE: 4.275662003906857
2013-02-04
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(321, 47)
y_test.shape=(321,)
MSE: 4.889704561843679
2013-02-05
x_train.shape=(1287, 47)
y_train.shape =(1287,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.308904702025854
2013-02-06
x_train.shape=(1302, 47)
y_train.shape =(1302,)
x_test.shape=(326, 47)
y_test.shape=(326,)
MSE: 2.3231021846206152
2013-02-07
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.0897366381997493
2013-02-08
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.0095948358334943
2013-02-18
x_train.shape=(1306, 47)
y_train.shape =(1306,)
x_test.shape=(327, 47)
y_test.shape=(327,)
MSE: 3.7638168877810902
2013-02-19
x_train.shape=(1289, 47)
y_train.shape =(1289,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.9436390461992437
2013-02-20
x_train.shape=(1276, 47)
y_train.shape =(1276,)
x_test.shape=(320, 47)
y_test.shape=(320,)
MSE: 2.8167905873185783
2013-02-21
x_train.shape=(1316, 47)
y_train.shape =(1316,)
x_test.shape=(330, 47)
y_test.shape=(330,)
MSE: 4.046686848841419
2013-02-22
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 4.683281982924828
2013-02-25
x_train.shape=(1292, 47)
y_train.shape =(1292,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.465463687553005
2013-02-26
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 3.895290637571066
2013-02-27
x_train.shape=(1298, 47)
y_train.shape =(1298,)
x_test.shape=(325, 47)
y_test.shape=(325,)
MSE: 3.1285588354082488
2013-02-28
x_train.shape=(1304, 47)
y_train.shape =(1304,)
x_test.shape=(326, 47)
y_test.shape=(326,)
MSE: 2.250473626020243
2013-03-01
x_train.shape=(1292, 47)
y_train.shape =(1292,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 2.8604010827684423
2013-03-04
x_train.shape=(1296, 47)
y_train.shape =(1296,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 3.6787565887350375
2013-03-05
x_train.shape=(1292, 47)
y_train.shape =(1292,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 2.581324233766885
2013-03-06
x_train.shape=(1291, 47)
y_train.shape =(1291,)
x_test.shape=(323, 47)
y_test.shape=(323,)
MSE: 3.626701292445271
2013-03-07
x_train.shape=(1293, 47)
y_train.shape =(1293,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 4.545620279447667
2013-03-08
x_train.shape=(1304, 47)
y_train.shape =(1304,)
x_test.shape=(327, 47)
y_test.shape=(327,)
MSE: 3.928400420077943
2013-03-11
x_train.shape=(1316, 47)
y_train.shape =(1316,)
x_test.shape=(329, 47)
y_test.shape=(329,)
MSE: 3.166664175089738
2013-03-12
x_train.shape=(1301, 47)
y_train.shape =(1301,)
x_test.shape=(326, 47)
y_test.shape=(326,)
MSE: 4.5616688994028
2013-03-13
x_train.shape=(1309, 47)
y_train.shape =(1309,)
x_test.shape=(328, 47)
y_test.shape=(328,)
MSE: 4.472122380897298
2013-03-14
x_train.shape=(1284, 47)
y_train.shape =(1284,)
x_test.shape=(322, 47)
y_test.shape=(322,)
MSE: 3.188998539129929
2013-03-15
x_train.shape=(1293, 47)
y_train.shape =(1293,)
x_test.shape=(324, 47)
y_test.shape=(324,)
MSE: 3.040507215859806
2013-03-18
x_train.shape=(1318, 47)
y_train.shape =(1318,)
x_test.shape=(330, 47)
y_test.shape=(330,)
MSE: 4.55967705671932
2013-03-19
x_train.shape=(1320, 47)
y_train.shape =(1320,)
x_test.shape=(331, 47)
y_test.shape=(331,)
MSE: 3.6700105115883552
2013-03-20
x_train.shape=(1322, 47)
y_train.shape =(1322,)
x_test.shape=(331, 47)
y_test.shape=(331,)
MSE: 1.8441303047839912
...
2013-12-31
x_train.shape=(1391, 47)
y_train.shape =(1391,)
x_test.shape=(348, 47)
y_test.shape=(348,)
MSE: 2.682600346871959
np.array(linear_err1).mean()
3.8178624023374335
linear_err2 = []
for day in days:
print(day)
linear_err2.append(linear_train(day, 2, remove_factors))
2013-01-04
x_train.shape=(1820, 47)
y_train.shape =(1820,)
x_test.shape=(455, 47)
y_test.shape=(455,)
MSE: 5.068830718368795
2013-01-07
x_train.shape=(1793, 47)
y_train.shape =(1793,)
x_test.shape=(449, 47)
y_test.shape=(449,)
MSE: 3.107165891835445
2013-01-08
x_train.shape=(1797, 47)
y_train.shape =(1797,)
x_test.shape=(450, 47)
y_test.shape=(450,)
MSE: 3.965400340309687
...
2013-12-27
x_train.shape=(1771, 47)
y_train.shape =(1771,)
x_test.shape=(443, 47)
y_test.shape=(443,)
MSE: 2.829811584378174
2013-12-30
x_train.shape=(1783, 47)
y_train.shape =(1783,)
x_test.shape=(446, 47)
y_test.shape=(446,)
MSE: 3.7229692784887587
2013-12-31
x_train.shape=(1781, 47)
y_train.shape =(1781,)
x_test.shape=(446, 47)
y_test.shape=(446,)
MSE: 2.9187787604137077
np.array(linear_err2).mean()
3.9034814254319627
XGBoost模型2013年各因子重要性
def get_XGBoost_importance(date):
# 划分数据集
x_train, x_test, y_train, y_test = get_split_by_trade_date(date)
# 模型训练
model = xgb.XGBRegressor(max_depth=6, learning_rate=0.05, n_estimators=100, objective='reg:squarederror',tree_method='gpu_hist')
model.fit(x_train, y_train, eval_set=[(x_test, y_test)], eval_metric='rmse', early_stopping_rounds=5)
# 计算排序重要性
perm = PermutationImportance(model).fit(x_test, y_test)
return perm
importance_XGBoost = pd.DataFrame(index = factors) # 记录每个因子重要性的总和,重要性为负的天数
importance_XGBoost['sum'] = 0
importance_XGBoost['negative'] = 0
for day in days:
print(day)
perm = get_linear_importance(day)
importance_XGBoost['sum']+=perm.feature_importances_
importance_XGBoost['negative']+=(perm.feature_importances_<0)
2013-01-04
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
...
2013-12-30
x_train.shape=(1257, 56)
y_train.shape =(1257,)
x_test.shape=(315, 56)
y_test.shape=(315,)
2013-12-31
x_train.shape=(1257, 56)
y_train.shape =(1257,)
x_test.shape=(315, 56)
y_test.shape=(315,)
pd.set_option('display.max_columns', None)
importance_XGBoost.sort_values(by="sum" , ascending=False).transpose()
ROE | mom12 | ACCP | std_dvol | momchg | BM | AG | SP | idskew | sharechg | saleinv | SgINVg | CRG | total_vol | ACC | LM | LG | cashpr | LEV | betad | mom6 | imom | CFdebt | QRG | std_turn | beta | coskew | PA | ROA | QR | size | DP | INVchg | TAXchg | BVEG | PMG | volumed | CT | CR | OCFP | SG | lagretn | illq | retnmax | cash | CFP | skew | EP | INVG | RDsales | RD | idvol | age | salecash | AM | turn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sum | 448.405801 | 429.702717 | 74.743502 | 72.6 | 18.79974 | 17.990883 | 17.463805 | 17.166179 | 14.967974 | 14.685136 | 14.330699 | 14.089602 | 13.72714 | 11.487408 | 10.214821 | 9.876215 | 7.523135 | 6.356613 | 5.795162 | 5.624204 | 3.459836 | 3.01425 | 2.780895 | 2.529968 | 2.471124 | 2.463948 | 2.385884 | 2.328784 | 2.254455 | 2.201655 | 1.965896 | 1.916151 | 1.891923 | 1.651909 | 1.406529 | 1.3165 | 1.236694 | 1.194309 | 1.187277 | 1.122339 | 0.874006 | 0.854623 | 0.823669 | 0.776181 | 0.64295 | 0.596978 | 0.521497 | 0.438793 | 0.3598 | 0.192119 | 0.139393 | 0.052621 | 0.03791 | 0.015843 | -0.000126 | -0.101014 |
negative | 7.000000 | 5.000000 | 15.000000 | 13.0 | 25.00000 | 23.000000 | 30.000000 | 34.000000 | 41.000000 | 31.000000 | 21.000000 | 41.000000 | 35.00000 | 44.000000 | 28.000000 | 41.000000 | 48.000000 | 53.000000 | 59.000000 | 50.000000 | 63.000000 | 61.00000 | 67.000000 | 59.000000 | 69.000000 | 67.000000 | 54.000000 | 59.000000 | 60.000000 | 64.000000 | 71.000000 | 61.000000 | 57.000000 | 69.000000 | 69.000000 | 79.0000 | 81.000000 | 59.000000 | 90.000000 | 67.000000 | 82.000000 | 94.000000 | 85.000000 | 75.000000 | 88.00000 | 99.000000 | 94.000000 | 89.000000 | 93.0000 | 117.000000 | 110.000000 | 111.000000 | 113.00000 | 137.000000 | 122.000000 | 134.000000 |
importance_XGBoost.sort_values(by="negative" , ascending=True).transpose()
mom12 | ROE | std_dvol | ACCP | saleinv | BM | momchg | ACC | AG | sharechg | SP | CRG | LM | idskew | SgINVg | total_vol | LG | betad | cashpr | coskew | INVchg | PA | LEV | QRG | CT | ROA | DP | imom | mom6 | QR | CFdebt | OCFP | beta | BVEG | std_turn | TAXchg | size | retnmax | PMG | volumed | SG | illq | cash | EP | CR | INVG | skew | lagretn | CFP | RD | idvol | age | RDsales | AM | turn | salecash | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sum | 429.702717 | 448.405801 | 72.6 | 74.743502 | 14.330699 | 17.990883 | 18.79974 | 10.214821 | 17.463805 | 14.685136 | 17.166179 | 13.72714 | 9.876215 | 14.967974 | 14.089602 | 11.487408 | 7.523135 | 5.624204 | 6.356613 | 2.385884 | 1.891923 | 2.328784 | 5.795162 | 2.529968 | 1.194309 | 2.254455 | 1.916151 | 3.01425 | 3.459836 | 2.201655 | 2.780895 | 1.122339 | 2.463948 | 1.406529 | 2.471124 | 1.651909 | 1.965896 | 0.776181 | 1.3165 | 1.236694 | 0.874006 | 0.823669 | 0.64295 | 0.438793 | 1.187277 | 0.3598 | 0.521497 | 0.854623 | 0.596978 | 0.139393 | 0.052621 | 0.03791 | 0.192119 | -0.000126 | -0.101014 | 0.015843 |
negative | 5.000000 | 7.000000 | 13.0 | 15.000000 | 21.000000 | 23.000000 | 25.00000 | 28.000000 | 30.000000 | 31.000000 | 34.000000 | 35.00000 | 41.000000 | 41.000000 | 41.000000 | 44.000000 | 48.000000 | 50.000000 | 53.000000 | 54.000000 | 57.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 60.000000 | 61.000000 | 61.00000 | 63.000000 | 64.000000 | 67.000000 | 67.000000 | 67.000000 | 69.000000 | 69.000000 | 69.000000 | 71.000000 | 75.000000 | 79.0000 | 81.000000 | 82.000000 | 85.000000 | 88.00000 | 89.000000 | 90.000000 | 93.0000 | 94.000000 | 94.000000 | 99.000000 | 110.000000 | 111.000000 | 113.00000 | 117.000000 | 122.000000 | 134.000000 | 137.000000 |
remove_factors_XGBoost = list(get_last_N(importance_XGBoost, 10))
print(get_last_N(importance, 10))
print(get_last_N(importance_XGBoost, 10))
{'salecash', 'idvol', 'INVG', 'turn', 'AM', 'RD', 'RDsales', 'age'}
{'salecash', 'idvol', 'turn', 'skew', 'AM', 'RD', 'RDsales', 'age'}
print(get_last_N(importance, 20))
print(get_last_N(importance_XGBoost, 20))
{'salecash', 'CR', 'volumed', 'CFP', 'SG', 'idvol', 'EP', 'turn', 'INVG', 'lagretn', 'cash', 'skew', 'age', 'retnmax', 'AM', 'illq', 'RDsales', 'RD'}
{'salecash', 'CR', 'volumed', 'CFP', 'SG', 'idvol', 'EP', 'turn', 'INVG', 'lagretn', 'cash', 'skew', 'age', 'retnmax', 'AM', 'illq', 'RDsales', 'RD'}
XGBoost得到的结果与Linear模型不一样:但差别不大
XGBoost去除无用特征
XGBoost_err1 = []
for day in days:
print(day)
XGBoost_err1.append(XGBoost_train(day, 0, remove_factors_XGBoost))
2013-01-04
x_train.shape=(1820, 48)
y_train.shape =(1820,)
x_test.shape=(455, 48)
y_test.shape=(455,)
[0] validation_0-rmse:2.71745
[1] validation_0-rmse:2.67359
[2] validation_0-rmse:2.63451
[3] validation_0-rmse:2.60041
[4] validation_0-rmse:2.57119
[5] validation_0-rmse:2.54164
[6] validation_0-rmse:2.51318
[7] validation_0-rmse:2.48936
[8] validation_0-rmse:2.46562
[9] validation_0-rmse:2.44546
[10] validation_0-rmse:2.42487
[11] validation_0-rmse:2.40673
[12] validation_0-rmse:2.38718
[13] validation_0-rmse:2.37333
[14] validation_0-rmse:2.36220
[15] validation_0-rmse:2.35255
[16] validation_0-rmse:2.34168
[17] validation_0-rmse:2.32783
[18] validation_0-rmse:2.31884
[19] validation_0-rmse:2.30887
[20] validation_0-rmse:2.30385
[21] validation_0-rmse:2.30015
[22] validation_0-rmse:2.29274
[23] validation_0-rmse:2.28728
[24] validation_0-rmse:2.28071
[25] validation_0-rmse:2.27549
[26] validation_0-rmse:2.27096
[27] validation_0-rmse:2.26446
[28] validation_0-rmse:2.25991
[29] validation_0-rmse:2.25987
[30] validation_0-rmse:2.25923
[31] validation_0-rmse:2.25714
[32] validation_0-rmse:2.25545
[33] validation_0-rmse:2.25242
[34] validation_0-rmse:2.25230
[35] validation_0-rmse:2.25038
[36] validation_0-rmse:2.24694
[37] validation_0-rmse:2.24920
[38] validation_0-rmse:2.24767
[39] validation_0-rmse:2.24455
[40] validation_0-rmse:2.24335
[41] validation_0-rmse:2.24286
[42] validation_0-rmse:2.24344
[43] validation_0-rmse:2.24169
[44] validation_0-rmse:2.23983
[45] validation_0-rmse:2.23949
[46] validation_0-rmse:2.24040
[47] validation_0-rmse:2.23639
[48] validation_0-rmse:2.23753
[49] validation_0-rmse:2.23831
[50] validation_0-rmse:2.23831
[51] validation_0-rmse:2.23881
[52] validation_0-rmse:2.23851
MSE: 5.001454124013577
2013-01-07
x_train.shape=(1793, 48)
y_train.shape =(1793,)
x_test.shape=(449, 48)
y_test.shape=(449,)
[0] validation_0-rmse:2.00898
[1] validation_0-rmse:1.98336
[2] validation_0-rmse:1.96136
[3] validation_0-rmse:1.93963
[4] validation_0-rmse:1.92541
[5] validation_0-rmse:1.90738
[6] validation_0-rmse:1.89212
[7] validation_0-rmse:1.87982
[8] validation_0-rmse:1.87254
[9] validation_0-rmse:1.86130
[10] validation_0-rmse:1.85293
[11] validation_0-rmse:1.84307
[12] validation_0-rmse:1.83806
[13] validation_0-rmse:1.83238
[14] validation_0-rmse:1.82658
[15] validation_0-rmse:1.82079
[16] validation_0-rmse:1.81401
[17] validation_0-rmse:1.81253
[18] validation_0-rmse:1.80937
[19] validation_0-rmse:1.80658
[20] validation_0-rmse:1.80209
[21] validation_0-rmse:1.79880
[22] validation_0-rmse:1.79395
[23] validation_0-rmse:1.79342
[24] validation_0-rmse:1.79180
[25] validation_0-rmse:1.78960
[26] validation_0-rmse:1.79045
[27] validation_0-rmse:1.79132
[28] validation_0-rmse:1.78716
[29] validation_0-rmse:1.78690
[30] validation_0-rmse:1.78452
[31] validation_0-rmse:1.78472
[32] validation_0-rmse:1.78501
[33] validation_0-rmse:1.78616
[34] validation_0-rmse:1.78586
[35] validation_0-rmse:1.78812
MSE: 3.1845212488497667
2013-01-08
x_train.shape=(1797, 48)
y_train.shape =(1797,)
x_test.shape=(450, 48)
y_test.shape=(450,)
[0] validation_0-rmse:2.21991
[1] validation_0-rmse:2.19453
[2] validation_0-rmse:2.17296
[3] validation_0-rmse:2.15496
[4] validation_0-rmse:2.14025
[5] validation_0-rmse:2.12435
[6] validation_0-rmse:2.11047
[7] validation_0-rmse:2.09894
[8] validation_0-rmse:2.08667
[9] validation_0-rmse:2.07808
[10] validation_0-rmse:2.07004
[11] validation_0-rmse:2.06143
[12] validation_0-rmse:2.05203
[13] validation_0-rmse:2.04436
[14] validation_0-rmse:2.03613
[15] validation_0-rmse:2.03236
[16] validation_0-rmse:2.02629
[17] validation_0-rmse:2.02123
[18] validation_0-rmse:2.02001
[19] validation_0-rmse:2.01185
[20] validation_0-rmse:2.01016
[21] validation_0-rmse:2.00876
[22] validation_0-rmse:2.00288
[23] validation_0-rmse:2.00041
[24] validation_0-rmse:1.99874
[25] validation_0-rmse:1.99588
[26] validation_0-rmse:1.99229
[27] validation_0-rmse:1.98823
[28] validation_0-rmse:1.98213
[29] validation_0-rmse:1.98026
[30] validation_0-rmse:1.97804
[31] validation_0-rmse:1.97848
[32] validation_0-rmse:1.97623
[33] validation_0-rmse:1.97624
[34] validation_0-rmse:1.97415
[35] validation_0-rmse:1.97366
[36] validation_0-rmse:1.97279
[37] validation_0-rmse:1.97119
[38] validation_0-rmse:1.97256
[39] validation_0-rmse:1.97306
[40] validation_0-rmse:1.97376
[41] validation_0-rmse:1.97482
MSE: 3.8855736293546372
2013-01-09
x_train.shape=(1791, 48)
y_train.shape =(1791,)
x_test.shape=(448, 48)
y_test.shape=(448,)
[0] validation_0-rmse:1.90462
[1] validation_0-rmse:1.90026
[2] validation_0-rmse:1.89663
[3] validation_0-rmse:1.89111
[4] validation_0-rmse:1.88658
[5] validation_0-rmse:1.88702
[6] validation_0-rmse:1.88244
[7] validation_0-rmse:1.88211
[8] validation_0-rmse:1.87815
[9] validation_0-rmse:1.87744
[10] validation_0-rmse:1.87602
[11] validation_0-rmse:1.87436
[12] validation_0-rmse:1.87080
[13] validation_0-rmse:1.86997
[14] validation_0-rmse:1.86840
[15] validation_0-rmse:1.86853
[16] validation_0-rmse:1.86896
[17] validation_0-rmse:1.86809
[18] validation_0-rmse:1.86592
[19] validation_0-rmse:1.86313
[20] validation_0-rmse:1.86108
[21] validation_0-rmse:1.86272
[22] validation_0-rmse:1.86133
[23] validation_0-rmse:1.85858
[24] validation_0-rmse:1.85833
[25] validation_0-rmse:1.85615
[26] validation_0-rmse:1.85641
[27] validation_0-rmse:1.85625
[28] validation_0-rmse:1.85526
[29] validation_0-rmse:1.85653
[30] validation_0-rmse:1.85803
[31] validation_0-rmse:1.85546
[32] validation_0-rmse:1.85270
[33] validation_0-rmse:1.85378
[34] validation_0-rmse:1.85455
[35] validation_0-rmse:1.85685
[36] validation_0-rmse:1.85556
MSE: 3.4324928107513224
2013-01-10
x_train.shape=(1791, 48)
y_train.shape =(1791,)
x_test.shape=(448, 48)
y_test.shape=(448,)
[0] validation_0-rmse:1.74036
[1] validation_0-rmse:1.73539
[2] validation_0-rmse:1.72730
[3] validation_0-rmse:1.72252
[4] validation_0-rmse:1.71576
[5] validation_0-rmse:1.71449
[6] validation_0-rmse:1.70838
[7] validation_0-rmse:1.70582
[8] validation_0-rmse:1.70224
[9] validation_0-rmse:1.70405
[10] validation_0-rmse:1.70169
[11] validation_0-rmse:1.70120
[12] validation_0-rmse:1.69680
[13] validation_0-rmse:1.69821
[14] validation_0-rmse:1.69667
[15] validation_0-rmse:1.69463
[16] validation_0-rmse:1.69219
[17] validation_0-rmse:1.68838
[18] validation_0-rmse:1.68872
[19] validation_0-rmse:1.68787
[20] validation_0-rmse:1.68485
[21] validation_0-rmse:1.68401
[22] validation_0-rmse:1.68272
[23] validation_0-rmse:1.68308
[24] validation_0-rmse:1.68362
[25] validation_0-rmse:1.68241
[26] validation_0-rmse:1.68232
[27] validation_0-rmse:1.68231
[28] validation_0-rmse:1.68390
[29] validation_0-rmse:1.68406
[30] validation_0-rmse:1.68301
[31] validation_0-rmse:1.68244
MSE: 2.8301813631073527
2013-01-11
x_train.shape=(1798, 48)
y_train.shape =(1798,)
x_test.shape=(450, 48)
y_test.shape=(450,)
[0] validation_0-rmse:3.31175
[1] validation_0-rmse:3.21892
[2] validation_0-rmse:3.13651
[3] validation_0-rmse:3.06162
[4] validation_0-rmse:2.98938
[5] validation_0-rmse:2.92879
[6] validation_0-rmse:2.86506
[7] validation_0-rmse:2.81064
[8] validation_0-rmse:2.76554
[9] validation_0-rmse:2.71910
[10] validation_0-rmse:2.67275
[11] validation_0-rmse:2.62927
[12] validation_0-rmse:2.58945
[13] validation_0-rmse:2.55245
[14] validation_0-rmse:2.51939
[15] validation_0-rmse:2.49457
[16] validation_0-rmse:2.47158
[17] validation_0-rmse:2.44465
[18] validation_0-rmse:2.42555
[19] validation_0-rmse:2.41074
[20] validation_0-rmse:2.39081
[21] validation_0-rmse:2.37692
[22] validation_0-rmse:2.36240
[23] validation_0-rmse:2.34919
[24] validation_0-rmse:2.33831
[25] validation_0-rmse:2.32760
[26] validation_0-rmse:2.31880
[27] validation_0-rmse:2.30908
[28] validation_0-rmse:2.29942
[29] validation_0-rmse:2.29173
[30] validation_0-rmse:2.28906
[31] validation_0-rmse:2.28330
[32] validation_0-rmse:2.27824
[33] validation_0-rmse:2.27168
[34] validation_0-rmse:2.26974
[35] validation_0-rmse:2.26434
[36] validation_0-rmse:2.26080
[37] validation_0-rmse:2.25779
[38] validation_0-rmse:2.25554
[39] validation_0-rmse:2.25194
[40] validation_0-rmse:2.24950
[41] validation_0-rmse:2.24582
[42] validation_0-rmse:2.24212
[43] validation_0-rmse:2.23924
[44] validation_0-rmse:2.23693
[45] validation_0-rmse:2.23408
[46] validation_0-rmse:2.23157
[47] validation_0-rmse:2.22886
[48] validation_0-rmse:2.22640
[49] validation_0-rmse:2.22447
[50] validation_0-rmse:2.22414
[51] validation_0-rmse:2.22391
[52] validation_0-rmse:2.22305
[53] validation_0-rmse:2.22291
[54] validation_0-rmse:2.22153
[55] validation_0-rmse:2.21619
[56] validation_0-rmse:2.21558
[57] validation_0-rmse:2.21659
[58] validation_0-rmse:2.21574
[59] validation_0-rmse:2.21568
[60] validation_0-rmse:2.21477
[61] validation_0-rmse:2.21468
[62] validation_0-rmse:2.21416
[63] validation_0-rmse:2.21339
[64] validation_0-rmse:2.21196
[65] validation_0-rmse:2.21234
[66] validation_0-rmse:2.21082
[67] validation_0-rmse:2.20976
[68] validation_0-rmse:2.20896
[69] validation_0-rmse:2.20771
[70] validation_0-rmse:2.20623
[71] validation_0-rmse:2.20521
[72] validation_0-rmse:2.20464
[73] validation_0-rmse:2.20457
[74] validation_0-rmse:2.20517
[75] validation_0-rmse:2.20470
[76] validation_0-rmse:2.20315
[77] validation_0-rmse:2.20307
[78] validation_0-rmse:2.20312
[79] validation_0-rmse:2.20367
[80] validation_0-rmse:2.20395
[81] validation_0-rmse:2.20458
MSE: 4.853516475539035
2013-01-14
x_train.shape=(1788, 48)
y_train.shape =(1788,)
x_test.shape=(448, 48)
y_test.shape=(448,)
[0] validation_0-rmse:3.54992
[1] validation_0-rmse:3.41347
[2] validation_0-rmse:3.28376
[3] validation_0-rmse:3.16036
[4] validation_0-rmse:3.04717
[5] validation_0-rmse:2.94013
[6] validation_0-rmse:2.83867
[7] validation_0-rmse:2.74747
[8] validation_0-rmse:2.66053
[9] validation_0-rmse:2.57977
[10] validation_0-rmse:2.50311
[11] validation_0-rmse:2.43331
[12] validation_0-rmse:2.36710
[13] validation_0-rmse:2.30966
[14] validation_0-rmse:2.25344
[15] validation_0-rmse:2.20568
[16] validation_0-rmse:2.15862
[17] validation_0-rmse:2.11274
[18] validation_0-rmse:2.07126
[19] validation_0-rmse:2.03326
[20] validation_0-rmse:1.99911
[21] validation_0-rmse:1.96901
[22] validation_0-rmse:1.93961
[23] validation_0-rmse:1.91477
[24] validation_0-rmse:1.89042
[25] validation_0-rmse:1.86812
[26] validation_0-rmse:1.84573
[27] validation_0-rmse:1.82862
[28] validation_0-rmse:1.81004
[29] validation_0-rmse:1.79396
[30] validation_0-rmse:1.78030
[31] validation_0-rmse:1.76868
[32] validation_0-rmse:1.75735
[33] validation_0-rmse:1.74560
[34] validation_0-rmse:1.73647
[35] validation_0-rmse:1.72792
[36] validation_0-rmse:1.71951
[37] validation_0-rmse:1.71065
[38] validation_0-rmse:1.70197
[39] validation_0-rmse:1.69403
[40] validation_0-rmse:1.68678
[41] validation_0-rmse:1.68097
[42] validation_0-rmse:1.67777
[43] validation_0-rmse:1.67243
[44] validation_0-rmse:1.66799
[45] validation_0-rmse:1.66378
[46] validation_0-rmse:1.66015
[47] validation_0-rmse:1.65685
[48] validation_0-rmse:1.65420
[49] validation_0-rmse:1.65186
[50] validation_0-rmse:1.64992
[51] validation_0-rmse:1.64827
[52] validation_0-rmse:1.64639
[53] validation_0-rmse:1.64575
[54] validation_0-rmse:1.64446
[55] validation_0-rmse:1.64233
[56] validation_0-rmse:1.64053
[57] validation_0-rmse:1.63897
[58] validation_0-rmse:1.63678
[59] validation_0-rmse:1.63496
[60] validation_0-rmse:1.63408
[61] validation_0-rmse:1.63270
[62] validation_0-rmse:1.63197
[63] validation_0-rmse:1.63001
[64] validation_0-rmse:1.63058
[65] validation_0-rmse:1.63124
[66] validation_0-rmse:1.63046
[67] validation_0-rmse:1.63053
[68] validation_0-rmse:1.62815
[69] validation_0-rmse:1.62808
[70] validation_0-rmse:1.62787
[71] validation_0-rmse:1.62731
[72] validation_0-rmse:1.62736
[73] validation_0-rmse:1.62676
[74] validation_0-rmse:1.62728
[75] validation_0-rmse:1.62717
[76] validation_0-rmse:1.62791
[77] validation_0-rmse:1.62786
[78] validation_0-rmse:1.62667
[79] validation_0-rmse:1.62613
[80] validation_0-rmse:1.62612
[81] validation_0-rmse:1.62547
[82] validation_0-rmse:1.62677
[83] validation_0-rmse:1.62850
[84] validation_0-rmse:1.62882
[85] validation_0-rmse:1.62957
[86] validation_0-rmse:1.63227
MSE: 2.642140469381674
...
2013-12-31
x_train.shape=(1781, 48)
y_train.shape =(1781,)
x_test.shape=(446, 48)
y_test.shape=(446,)
[0] validation_0-rmse:1.88690
[1] validation_0-rmse:1.86526
[2] validation_0-rmse:1.85005
[3] validation_0-rmse:1.84162
[4] validation_0-rmse:1.83684
[5] validation_0-rmse:1.82044
[6] validation_0-rmse:1.80873
[7] validation_0-rmse:1.79836
[8] validation_0-rmse:1.79244
[9] validation_0-rmse:1.78463
[10] validation_0-rmse:1.77731
[11] validation_0-rmse:1.77208
[12] validation_0-rmse:1.76650
[13] validation_0-rmse:1.76504
[14] validation_0-rmse:1.76111
[15] validation_0-rmse:1.76009
[16] validation_0-rmse:1.75637
[17] validation_0-rmse:1.75409
[18] validation_0-rmse:1.75204
[19] validation_0-rmse:1.75110
[20] validation_0-rmse:1.74982
[21] validation_0-rmse:1.74916
[22] validation_0-rmse:1.74626
[23] validation_0-rmse:1.74462
[24] validation_0-rmse:1.74572
[25] validation_0-rmse:1.74326
[26] validation_0-rmse:1.74332
[27] validation_0-rmse:1.74115
[28] validation_0-rmse:1.73936
[29] validation_0-rmse:1.73940
[30] validation_0-rmse:1.73854
[31] validation_0-rmse:1.73787
[32] validation_0-rmse:1.73685
[33] validation_0-rmse:1.73552
[34] validation_0-rmse:1.73746
[35] validation_0-rmse:1.73781
[36] validation_0-rmse:1.73913
[37] validation_0-rmse:1.73897
MSE: 3.01203762063307
np.array(XGBoost_err1).mean()
3.909562275559899
XGBoost_err2 = []
for day in days:
print(day)
XGBoost_err2.append(XGBoost_train(day, 1, remove_factors_XGBoost))
2013-01-04
x_train.shape=(1185, 48)
y_train.shape =(1185,)
x_test.shape=(297, 48)
y_test.shape=(297,)
[0] validation_0-rmse:2.44807
[1] validation_0-rmse:2.39745
[2] validation_0-rmse:2.35292
[3] validation_0-rmse:2.30882
[4] validation_0-rmse:2.27245
[5] validation_0-rmse:2.23914
[6] validation_0-rmse:2.20729
[7] validation_0-rmse:2.16935
[8] validation_0-rmse:2.14256
[9] validation_0-rmse:2.12006
[10] validation_0-rmse:2.09587
[11] validation_0-rmse:2.07339
[12] validation_0-rmse:2.04927
[13] validation_0-rmse:2.03210
[14] validation_0-rmse:2.01497
[15] validation_0-rmse:2.00233
[16] validation_0-rmse:1.98758
[17] validation_0-rmse:1.97418
[18] validation_0-rmse:1.96359
[19] validation_0-rmse:1.95194
[20] validation_0-rmse:1.94411
[21] validation_0-rmse:1.93538
[22] validation_0-rmse:1.92844
[23] validation_0-rmse:1.92259
[24] validation_0-rmse:1.91411
[25] validation_0-rmse:1.90856
[26] validation_0-rmse:1.90620
[27] validation_0-rmse:1.90025
[28] validation_0-rmse:1.89549
[29] validation_0-rmse:1.89190
[30] validation_0-rmse:1.88803
[31] validation_0-rmse:1.88336
[32] validation_0-rmse:1.87740
[33] validation_0-rmse:1.87631
[34] validation_0-rmse:1.87285
[35] validation_0-rmse:1.87090
[36] validation_0-rmse:1.86915
[37] validation_0-rmse:1.86485
[38] validation_0-rmse:1.86454
[39] validation_0-rmse:1.86267
[40] validation_0-rmse:1.85725
[41] validation_0-rmse:1.85345
[42] validation_0-rmse:1.85321
[43] validation_0-rmse:1.85299
[44] validation_0-rmse:1.85393
[45] validation_0-rmse:1.85441
[46] validation_0-rmse:1.85222
[47] validation_0-rmse:1.85128
[48] validation_0-rmse:1.85197
[49] validation_0-rmse:1.84963
[50] validation_0-rmse:1.84854
[51] validation_0-rmse:1.84855
[52] validation_0-rmse:1.84776
[53] validation_0-rmse:1.84315
[54] validation_0-rmse:1.84244
[55] validation_0-rmse:1.84082
[56] validation_0-rmse:1.84197
[57] validation_0-rmse:1.84060
[58] validation_0-rmse:1.84109
[59] validation_0-rmse:1.84236
[60] validation_0-rmse:1.84235
[61] validation_0-rmse:1.84354
[62] validation_0-rmse:1.84242
MSE: 3.387808379507812
...
np.array(XGBoost_err2).mean()
3.7494466498176644
准确率对比
2013年全年MSE平均值 | 不去除 | 去除低重要性因子 |
---|---|---|
Linear使用平均值 | 3.921 | 3.903 |
Linear去除缺失值 | 3.801 | 3.818 |
XGBoost未去除缺失值 | 3.888 | 3.910 |
XGBoost去除缺失值 | 3.742 | 3.749 |
选择最佳因子组合
逐个排除因子(预降维)
(使用全年的数据在Linear模型上)对无用特征进行排序过后,考虑逐个排除因子,查看准确率是否有较大的影响:
remove_factors1 = []
output = {}
for j in range(0, 40):
linear_err = []
for day in days:
print(day)
linear_err.append(linear_train(day, 1, get_trivial(j)))
output[j] = np.array(linear_err).mean()
2013-01-04
x_train.shape=(1185, 56)
y_train.shape =(1185,)
x_test.shape=(297, 56)
y_test.shape=(297,)
MSE: 3.617532364939728
2013-01-07
x_train.shape=(1175, 56)
y_train.shape =(1175,)
x_test.shape=(294, 56)
y_test.shape=(294,)
MSE: 3.767207686481283
...
2013-09-06
x_train.shape=(1262, 54)
y_train.shape =(1262,)
x_test.shape=(316, 54)
y_test.shape=(316,)
MSE: 4.479822455337999
2013-09-09
x_train.shape=(1245, 54)
y_train.shape =(1245,)
x_test.shape=(312, 54)
y_test.shape=(312,)
MSE: 2.6867597216311045
2013-09-10
output
{0: 3.8005630796082426,
1: 3.800560102538793,
2: 3.7993222041207915,
3: 3.796581734282571,
4: 3.810176307276948,
5: 3.821326643110776,
6: 3.8207592247949775,
7: 3.8226390947166196,
8: 3.8198824229201382,
9: 3.8178624023374335,
10: 3.8164553373810577,
11: 3.817581805700733,
12: 3.8162670045691693,
13: 3.8173551938010513,
14: 3.8128217050940854,
15: 3.8171463773589944,
16: 3.8382788440258135,
17: 3.8355687130078513,
18: 3.8323229320104573,
19: 3.8318099337863303,
20: 3.827302483819496,
21: 3.84656562478651,
22: 3.845318679687666,
23: 3.8486123394339438,
24: 3.9034638992606205,
25: 3.9005512092481425,
26: 3.90396465347381,
27: 3.918746734890699,
28: 3.9180623909064027,
29: 3.917919924138114,
30: 3.8968072309036286,
31: 3.8957130947549037,
32: 3.8947024917348525,
33: 3.8949611266433943,
34: 3.9093928642932676,
35: 3.924233068042662,
36: 3.922568663342627,
37: 3.9293781688167835,
38: 3.967771645777158,
39: 4.045240388664313}
plt.plot(list(output.keys()), list(output.values()))
[<matplotlib.lines.Line2D at 0x7f3f48772ee0>]
如图所示,在去除20个因子以前,误差变化较小,在去除3个因子时,误差达到最小值。
而去除因子达到35个以上,误差会急剧增大,可能因为去除了有效的因子。
这说明在56个因子中,真正有效的因子数量甚至不到一半,直接去除掉不但提高了模型的可解释性,甚至同时能提升准确性
动态排除因子(后降维)
预降维是在全年排除相同因子采用的,而后降维将对每天的数据进行评估,对于每天的模型去除不重要的因子。
因此,可以预想到,去除的因子数量会更多(接近一半的负排序重要性因子都会被去除),准确性是一定会提升的。
此处采取的策略是:对每天因子重要性进行三次计算,均为负则去除该因子。然后再次回归
def remove_and_fit(date, state=1):
# 划分数据集
x_train, x_test, y_train, y_test = get_split_by_trade_date(date, state)
# 模型训练
model = LinearRegression()
model.fit(x_train, y_train)
# 计算排序重要性
df = pd.DataFrame(index = factors)
for i in range(3):
perm = PermutationImportance(model, random_state=i).fit(x_test, y_test)
df[i] = perm.feature_importances_
series = df[df<0].transpose().count()==3
remove_factors = list(series[series].index)
print(remove_factors)
return len(remove_factors), linear_train(day, state, remove_factors) # 这里划分数据集的方式不同
linear_remove = pd.DataFrame(columns = ['removed', 'err'])
for day in days:
print(day, end = ' ')
removed, err = remove_and_fit(day)
linear_remove.loc[day] = {'removed':removed, 'err':err}
2013-01-04 ['beta', 'turn', 'sharechg', 'age', 'lagretn', 'AM', 'DP', 'BVEG', 'INVG', 'CT', 'RD', 'RDsales', 'salecash']
MSE: 3.7858729998590723
2013-01-07 ['beta', 'idvol', 'skew', 'turn', 'std_turn', 'age', 'lagretn', 'AM', 'SG', 'PMG', 'cash', 'RDsales', 'QR', 'saleinv', 'CRG']
MSE: 4.025161449626848
2013-01-08 ['size', 'beta', 'skew', 'coskew', 'turn', 'LM', 'age', 'BM', 'CFP', 'INVG', 'ROA', 'PA', 'salecash']
MSE: 4.162825913159681
2013-01-09 ['size', 'beta', 'idvol', 'skew', 'std_turn', 'retnmax', 'age', 'AM', 'PMG', 'ROA', 'RDsales']
MSE: 2.9702129885547928
2013-01-10 ['beta', 'betad', 'idvol', 'sharechg', 'lagretn', 'AM', 'INVG', 'SgINVg', 'cash', 'cashpr', 'salecash']
MSE: 2.4174113537403037
2013-01-11 ['beta', 'turn', 'age', 'mom6', 'LEV', 'CFP', 'TAXchg', 'cash', 'RDsales', 'CR', 'salecash']
MSE: 3.751365716737459
2013-01-14 ['volumed', 'std_dvol', 'AM', 'OCFP', 'INVchg', 'PMG', 'ROA', 'RD', 'saleinv']
MSE: 2.4185355766852887
2013-01-15 ['beta', 'betad', 'idvol', 'age', 'lagretn', 'INVG', 'PMG', 'cashpr', 'CFdebt']
MSE: 3.370724501216639
2013-01-16 ['idvol', 'turn', 'std_turn', 'volumed', 'EP', 'CFP', 'DP', 'INVchg', 'QRG']
MSE: 4.447045990712184
2013-01-17 ['beta', 'total_vol', 'retnmax', 'illq', 'sharechg', 'age', 'AM', 'LEV', 'INVG', 'SG', 'PMG', 'TAXchg', 'CT', 'cash', 'CR', 'QR', 'salecash']
MSE: 4.082304969198799
2013-01-18 ['turn', 'CFP', 'OCFP', 'PMG', 'ROA']
MSE: 2.1234872477788023
2013-01-21 ['idvol', 'idskew', 'skew', 'std_dvol', 'illq', 'lagretn', 'PMG', 'ROA', 'CR']
MSE: 4.075557103299469
2013-01-22 ['betad', 'skew', 'mom6', 'AM', 'CFP', 'OCFP', 'PA', 'cash', 'RD', 'CR', 'salecash']
MSE: 3.0421280038462126
2013-01-23 ['size', 'betad', 'idskew', 'turn', 'mom6', 'lagretn', 'AM', 'EP', 'LG', 'TAXchg', 'ROA', 'PA', 'RDsales', 'salecash']
MSE: 3.8397380865824173
2013-01-24 ['std_turn', 'age', 'AM', 'INVG', 'PMG', 'ROA']
MSE: 4.3652342753144415
2013-01-25 ['skew', 'LM', 'BM', 'AM', 'INVG', 'INVchg', 'PMG', 'cash', 'salecash', 'CRG']
MSE: 3.649042345177247
2013-01-28 ['beta', 'skew', 'EP', 'CFP', 'BVEG', 'INVchg', 'SG', 'PMG', 'ACCP', 'RD', 'RDsales', 'QR']
MSE: 1.998549925155473
2013-01-29 ['idvol', 'total_vol', 'skew', 'turn', 'illq', 'age', 'AM', 'EP', 'OCFP', 'LG', 'INVchg', 'SgINVg', 'ROA', 'PA', 'cashpr', 'RDsales', 'CR', 'CFdebt', 'salecash', 'CRG']
MSE: 2.9341143357828474
2013-01-30 ['beta', 'idskew', 'retnmax', 'age', 'mom6', 'PA', 'cash']
MSE: 3.37135529969674
2013-01-31 ['beta', 'idvol', 'turn', 'AM', 'INVG', 'SG', 'TAXchg', 'ACC', 'RD']
MSE: 4.029868334639127
2013-02-01 ['size', 'skew', 'coskew', 'turn', 'std_turn', 'LEV', 'BVEG', 'INVG', 'INVchg', 'CT', 'cash', 'CFdebt']
MSE: 2.9752105980905337
2013-02-04 ['lagretn', 'SG', 'PMG', 'RDsales', 'salecash']
MSE: 5.564731556991589
2013-02-05 ['beta', 'age', 'mom6', 'CFP', 'DP', 'AG', 'INVG', 'INVchg', 'cash', 'cashpr', 'QRG']
MSE: 3.015906109352361
2013-02-06 ['AM', 'cash', 'QR']
MSE: 2.2884328731540524
2013-02-07 ['LM', 'CFP', 'OCFP', 'BVEG', 'PA', 'RDsales', 'salecash']
MSE: 2.6887185072823114
2013-02-08 ['idvol', 'skew', 'volumed', 'retnmax', 'ACC', 'cash']
MSE: 2.4370138796593364
2013-02-18 ['illq', 'AM', 'OCFP', 'LG', 'RD', 'CRG']
MSE: 3.0436795852997207
2013-02-19 ['age', 'lagretn', 'AM', 'EP', 'OCFP', 'INVG', 'QR']
MSE: 3.589772885371211
2013-02-20 ['betad', 'turn', 'lagretn', 'AM', 'CT', 'RD', 'RDsales', 'CR']
MSE: 2.376644549747271
2013-02-21 ['idvol', 'sharechg', 'EP', 'CFP', 'DP', 'SP', 'BVEG', 'INVG', 'RDsales']
MSE: 4.382255648644994
2013-02-22 ['skew', 'volumed', 'retnmax', 'LM', 'mom6', 'imom', 'AM', 'CFP', 'PA', 'RD']
MSE: 3.984581871361109
2013-02-25 ['skew', 'volumed', 'mom12', 'EP', 'CFP', 'DP', 'LG', 'cash', 'RD', 'QR', 'CFdebt']
MSE: 2.628439990574723
2013-02-26 ['skew', 'coskew', 'retnmax', 'sharechg', 'age', 'imom', 'lagretn', 'LEV', 'EP', 'DP', 'BVEG', 'INVG', 'cash', 'RDsales', 'salecash', 'CRG']
MSE: 3.617891237938386
2013-02-27 ['size', 'idvol', 'coskew', 'retnmax', 'age', 'lagretn', 'AM', 'LEV', 'SG', 'PMG', 'TAXchg', 'CFdebt']
MSE: 2.9805103770521884
2013-02-28 ['betad', 'volumed', 'imom', 'lagretn', 'DP', 'AG', 'TAXchg', 'cash', 'CFdebt']
MSE: 2.4335964170626463
2013-03-01 ['betad', 'skew', 'turn', 'std_turn', 'volumed', 'illq', 'LM', 'age', 'mom6', 'lagretn', 'EP', 'INVG', 'CFdebt', 'salecash']
MSE: 3.308268260769943
2013-03-04 ['betad', 'skew', 'turn', 'volumed', 'AM', 'LEV', 'SP', 'QR', 'salecash']
MSE: 3.091217982194591
2013-03-05 ['skew', 'turn', 'retnmax', 'lagretn', 'CFP', 'DP', 'INVG', 'CT']
MSE: 3.0806149519588337
2013-03-06 ['idvol', 'turn', 'retnmax', 'age', 'momchg', 'AM', 'RDsales']
MSE: 4.669136460161346
2013-03-07 ['idvol', 'volumed', 'LM', 'AM', 'LEV', 'OCFP', 'INVchg', 'RD', 'salecash']
MSE: 4.602641402312919
2013-03-08 ['turn', 'age', 'AM', 'EP', 'INVchg', 'SG', 'PMG', 'CFdebt', 'salecash']
MSE: 3.559080961750365
2013-03-11 ['idvol', 'skew', 'EP', 'CFP', 'PMG', 'RD', 'QR']
MSE: 3.1585437676637826
2013-03-12 ['skew', 'turn', 'volumed', 'imom', 'AM', 'LEV', 'EP', 'BVEG', 'SgINVg', 'PA', 'CT', 'cash', 'CFdebt']
MSE: 4.33071164653929
2013-03-13 ['skew', 'volumed', 'illq', 'mom6', 'lagretn', 'AM', 'EP', 'OCFP', 'DP', 'TAXchg', 'saleinv', 'QRG']
MSE: 4.228758398689912
2013-03-14 ['total_vol', 'skew', 'retnmax', 'illq', 'mom6', 'AM', 'EP', 'CFP', 'SP', 'RD', 'QRG']
MSE: 2.919961024320269
2013-03-15 ['turn', 'illq', 'LM', 'BM', 'BVEG', 'salecash']
MSE: 3.2026742871048857
2013-03-18 ['LM', 'age', 'imom', 'CFP', 'INVchg', 'CFdebt']
MSE: 4.825258612532052
2013-03-19 ['turn', 'age', 'BM', 'AM', 'LEV', 'EP', 'ACC', 'QRG']
MSE: 4.917209643944105
2013-03-20 ['size', 'beta', 'idvol', 'skew', 'age', 'lagretn', 'CFP', 'SP', 'LG', 'INVG', 'SG', 'ROA', 'CT', 'QRG']
MSE: 2.2047808208664144
2013-03-21 ['beta', 'betad', 'idvol', 'turn', 'AM', 'CFP', 'DP', 'PMG', 'TAXchg', 'ACC', 'PA', 'RDsales', 'CFdebt', 'QRG']
MSE: 2.507678699246893
2013-03-22 ['skew', 'lagretn', 'EP', 'DP', 'INVG', 'PMG', 'ROE', 'saleinv']
MSE: 2.8863295573117242
2013-03-25 ['betad', 'skew', 'turn', 'std_turn', 'volumed', 'lagretn', 'CFP', 'DP', 'INVG', 'SG', 'ROA', 'cashpr']
MSE: 3.316622703962829
2013-03-26 ['skew', 'age', 'mom6', 'AM', 'EP', 'CFP', 'INVG', 'QR']
MSE: 4.50087835556555
2013-03-27 ['skew', 'turn', 'volumed', 'age', 'mom6', 'LEV', 'EP', 'SG', 'SgINVg', 'PMG', 'TAXchg', 'CT', 'RDsales']
MSE: 3.8974965957604497
2013-03-28 ['skew', 'turn', 'volumed', 'LM', 'QR']
MSE: 3.344769615966232
2013-03-29 ['beta', 'turn', 'age', 'mom6', 'EP', 'OCFP', 'LG', 'SG', 'cash', 'RD']
MSE: 3.6517181917715344
2013-04-01 ['betad', 'turn', 'volumed', 'std_dvol', 'retnmax', 'imom', 'lagretn', 'LEV', 'EP', 'INVchg', 'cashpr', 'RD', 'CR', 'QR', 'salecash']
MSE: 3.39461678703113
2013-04-02 ['size', 'idvol', 'age', 'mom6', 'lagretn', 'AM', 'EP', 'SgINVg', 'TAXchg', 'cash', 'salecash', 'saleinv']
MSE: 5.006330832570682
2013-04-03 ['std_turn', 'retnmax', 'illq', 'LM', 'EP', 'CFP', 'OCFP', 'SgINVg', 'PMG', 'ROA', 'RD', 'CR', 'QRG']
MSE: 4.595119149833135
2013-04-08 ['beta', 'illq', 'mom6', 'CFP', 'OCFP', 'INVG', 'cash']
MSE: 4.27367902986036
2013-04-09 ['size', 'idvol', 'retnmax', 'illq', 'EP', 'QRG']
MSE: 4.311125099788795
2013-04-10 ['coskew', 'turn', 'retnmax', 'AM', 'OCFP', 'INVG', 'PMG', 'CT', 'cashpr', 'saleinv', 'CRG', 'QRG']
MSE: 2.943656923181127
2013-04-11 ['beta', 'skew', 'coskew', 'turn', 'std_turn', 'LM', 'sharechg', 'AM', 'EP', 'CFP', 'OCFP', 'PMG', 'ACC', 'ROA', 'CT', 'CFdebt', 'salecash']
MSE: 3.0621524306130676
2013-04-12 ['turn', 'age', 'mom6', 'AM', 'AG', 'LG', 'INVG', 'INVchg', 'RDsales', 'QR', 'CFdebt', 'CRG']
MSE: 2.594489069882756
2013-04-15 ['coskew', 'INVG', 'cash']
MSE: 4.122738534906467
2013-04-16 ['idvol', 'total_vol', 'skew', 'coskew', 'volumed', 'std_dvol', 'retnmax', 'age', 'lagretn', 'SG', 'SgINVg', 'ACCP', 'PA', 'CT', 'RDsales', 'CR', 'salecash']
MSE: 2.9841222620046266
2013-04-17 ['coskew', 'turn', 'retnmax', 'imom', 'AM', 'CFP', 'INVG', 'RD', 'CR']
MSE: 2.4707997412930944
2013-04-18 ['skew', 'AM', 'LEV', 'OCFP', 'PMG', 'cashpr', 'QR', 'salecash', 'QRG']
MSE: 3.1476566645198756
2013-04-19 ['lagretn', 'EP', 'BVEG', 'CT', 'RD', 'RDsales', 'CRG']
MSE: 1.750884841826081
2013-04-22 ['idvol', 'coskew', 'PMG', 'cash', 'RDsales', 'QR']
MSE: 4.292764260288389
2013-04-23 ['size', 'betad', 'idvol', 'skew', 'turn', 'std_dvol', 'AM', 'CFP', 'BVEG', 'RDsales', 'QR']
MSE: 3.1710658821709057
2013-04-24 ['turn', 'retnmax', 'LM', 'momchg', 'BM', 'DP', 'BVEG', 'SG', 'SgINVg', 'ACCP', 'PA', 'cash', 'RD', 'CR', 'QR', 'salecash']
MSE: 3.351606083122748
2013-04-25 ['beta', 'volumed', 'std_dvol', 'retnmax', 'age', 'AM', 'INVchg', 'PMG', 'CT', 'cashpr', 'RD', 'saleinv', 'QRG']
MSE: 4.790360563735561
2013-04-26 ['size', 'turn', 'retnmax', 'illq', 'INVG', 'PA', 'cash', 'RD', 'salecash']
MSE: 4.628211756526541
2013-05-02 ['idvol', 'volumed', 'retnmax', 'sharechg', 'SG', 'PMG', 'CR']
MSE: 5.240087277472901
2013-05-03 ['idvol', 'skew', 'LM', 'EP', 'CFP', 'SP', 'cash']
MSE: 2.5405350578236368
2013-05-06 ['size', 'betad', 'skew', 'coskew', 'turn', 'retnmax', 'illq', 'momchg', 'AM', 'DP', 'SP', 'INVchg', 'SG', 'TAXchg', 'PA', 'cashpr', 'RD', 'RDsales', 'CR', 'CRG']
MSE: 3.0044574721981134
2013-05-07 ['idvol', 'skew', 'illq', 'LM', 'mom6', 'OCFP', 'AG', 'PMG', 'ACC', 'cash', 'CR', 'salecash']
MSE: 2.771803937426694
2013-05-08 ['turn', 'age', 'mom6', 'imom', 'AM', 'EP', 'BVEG', 'SG', 'CT', 'RD', 'QR', 'salecash']
MSE: 2.7455414666450255
2013-05-09 ['beta', 'idvol', 'turn', 'age', 'mom6', 'momchg', 'lagretn', 'CFP', 'SP', 'SgINVg', 'TAXchg', 'ROA', 'PA', 'cashpr', 'RDsales']
MSE: 4.6174483990466335
2013-05-10 ['size', 'betad', 'idvol', 'skew', 'illq', 'age', 'EP', 'CFP', 'DP', 'SP', 'INVG', 'PMG', 'cashpr', 'CFdebt', 'salecash']
MSE: 4.2289625201434475
2013-05-13 ['beta', 'volumed', 'retnmax', 'momchg', 'BVEG', 'INVG', 'INVchg', 'SgINVg', 'ROA', 'RD', 'RDsales']
MSE: 3.2821573771656642
2013-05-14 ['betad', 'turn', 'volumed', 'age', 'momchg', 'AM', 'DP', 'BVEG', 'SG', 'PMG']
MSE: 4.236639304204587
2013-05-15 ['turn', 'LM', 'mom6', 'lagretn', 'EP', 'DP', 'SgINVg', 'cash', 'RD', 'QR', 'salecash', 'QRG']
MSE: 2.308756438792809
2013-05-16 ['retnmax', 'SG', 'RD', 'RDsales', 'salecash']
MSE: 3.66247839778964
2013-05-17 ['idvol', 'total_vol', 'momchg', 'AM', 'CFP', 'AG', 'BVEG', 'INVG', 'INVchg', 'cashpr', 'RD', 'RDsales', 'salecash']
MSE: 3.4005687977375905
2013-05-20 ['idvol', 'turn', 'retnmax', 'INVG', 'SG', 'CRG']
MSE: 4.251271852207275
2013-05-21 ['idvol', 'volumed', 'retnmax', 'age', 'lagretn', 'EP', 'OCFP', 'INVchg', 'PMG', 'ROA', 'PA', 'cashpr', 'CR', 'CFdebt', 'QRG']
MSE: 3.7684960172846664
2013-05-22 ['volumed', 'EP', 'INVchg', 'PMG', 'ACC', 'CT', 'cash', 'RDsales', 'QR']
MSE: 4.730338570373537
2013-05-23 ['skew', 'coskew', 'retnmax', 'age', 'lagretn', 'AM', 'DP', 'SP', 'RD', 'salecash']
MSE: 3.981534716882582
2013-05-24 ['total_vol', 'skew', 'turn', 'lagretn', 'AM', 'CFP', 'LG', 'INVG', 'SG', 'RDsales', 'QR', 'CRG']
MSE: 3.3530280927986165
2013-05-27 ['size', 'idvol', 'volumed', 'mom6', 'imom', 'SG', 'CT', 'RD']
MSE: 4.061919240761789
2013-05-28 ['turn', 'retnmax', 'illq', 'LM', 'age', 'mom6', 'LEV', 'CFP', 'PA', 'CT', 'RD', 'QR', 'CFdebt', 'salecash']
MSE: 5.243846264700614
2013-05-29 ['betad', 'total_vol', 'illq', 'age', 'AM', 'INVG', 'RDsales']
MSE: 2.7711284059099497
2013-05-30 ['size', 'idvol', 'skew', 'turn', 'std_turn', 'volumed', 'lagretn', 'EP', 'INVchg', 'SG', 'ACC', 'ROA', 'CR']
MSE: 3.5887124438798774
2013-05-31 ['beta', 'skew', 'LM', 'mom6', 'CFP', 'INVG', 'RD', 'CFdebt', 'QRG']
MSE: 3.9045752522526107
2013-06-03 ['sharechg', 'AM', 'LEV', 'EP', 'DP', 'SP', 'SgINVg', 'TAXchg', 'PA', 'RD', 'RDsales', 'salecash']
MSE: 4.7864433498877705
2013-06-04 ['total_vol', 'volumed', 'age', 'lagretn', 'BVEG', 'CR']
MSE: 4.2228459858796015
2013-06-05 ['size', 'coskew', 'age', 'momchg', 'AM', 'EP', 'DP', 'LG', 'BVEG', 'PA', 'cash', 'QR']
MSE: 3.1121881665080253
2013-06-06 ['idskew', 'volumed', 'illq', 'mom6', 'momchg', 'SP', 'SG', 'PMG', 'CT', 'RD', 'CR']
MSE: 3.283001630711237
2013-06-07 ['betad', 'idvol', 'skew', 'retnmax', 'AM', 'EP', 'DP', 'BVEG', 'SgINVg', 'TAXchg', 'RD', 'QR', 'salecash']
MSE: 4.962202056166642
2013-06-13 ['retnmax', 'illq', 'LG', 'SG', 'cashpr', 'CR', 'CFdebt', 'salecash']
MSE: 5.353840468483478
2013-06-14 ['coskew', 'illq', 'mom6', 'AM', 'INVG', 'SgINVg', 'PMG', 'ACC', 'salecash', 'CRG']
MSE: 2.5061345638899355
2013-06-17 ['idvol', 'skew', 'coskew', 'turn', 'volumed', 'age', 'momchg', 'SP', 'BVEG', 'INVG', 'SG', 'PA', 'cash', 'CR', 'salecash']
MSE: 3.9994831436523994
2013-06-18 ['idvol', 'idskew', 'coskew', 'retnmax', 'age', 'lagretn', 'DP', 'LG', 'SgINVg', 'ROA', 'PA', 'CT', 'cashpr', 'RDsales']
MSE: 3.723980540254007
2013-06-19 ['beta', 'idvol', 'std_dvol', 'retnmax', 'age', 'lagretn', 'AM', 'OCFP', 'INVchg', 'SG', 'PA', 'cash', 'cashpr', 'RD']
MSE: 4.331620733088984
2013-06-20 ['skew', 'illq', 'sharechg', 'age', 'AM', 'LEV', 'SgINVg', 'CR', 'QR', 'salecash']
MSE: 2.856545736980471
2013-06-21 ['skew', 'volumed', 'age', 'BM', 'AM', 'CFP', 'BVEG', 'INVchg', 'PMG', 'PA', 'RDsales', 'CFdebt']
MSE: 4.2808016622111165
2013-06-24 ['size', 'turn', 'std_turn', 'LM', 'AM', 'CFP', 'DP', 'cashpr', 'CR', 'QRG']
MSE: 4.300456944090253
2013-06-25 ['turn', 'lagretn', 'DP', 'BVEG', 'INVG', 'ROA', 'RD']
MSE: 5.634483683169749
2013-06-26 ['idvol', 'skew', 'volumed', 'retnmax', 'AM', 'DP', 'LG', 'CT', 'QR', 'CFdebt']
MSE: 3.889013796976845
2013-06-27 ['beta', 'betad', 'skew', 'turn', 'std_turn', 'retnmax', 'mom6', 'DP', 'PMG']
MSE: 6.958856578714843
2013-06-28 ['turn', 'lagretn', 'AM', 'BVEG', 'SgINVg', 'cash', 'cashpr']
MSE: 4.021165719370021
2013-07-01 ['betad', 'skew', 'turn', 'age', 'mom6', 'CFP', 'CFdebt', 'salecash', 'saleinv']
MSE: 3.0490661274428366
2013-07-02 ['coskew', 'OCFP', 'AG', 'BVEG', 'INVG', 'PMG', 'CR']
MSE: 3.2681444178466332
2013-07-03 ['idvol', 'skew', 'volumed', 'lagretn', 'AM', 'CFP', 'OCFP', 'BVEG', 'INVchg', 'CR']
MSE: 4.458588490993542
2013-07-04 ['std_dvol', 'illq', 'EP', 'OCFP', 'BVEG', 'INVchg', 'PMG', 'ACCP', 'cash', 'RDsales', 'QR']
MSE: 4.5351058807630364
2013-07-05 ['beta', 'betad', 'lagretn', 'INVG', 'PA', 'RD', 'RDsales']
MSE: 4.107400239809365
2013-07-08 ['turn', 'illq', 'imom', 'AM', 'SP', 'QR', 'salecash']
MSE: 5.079964916060136
2013-07-09 ['skew', 'coskew', 'turn', 'retnmax', 'lagretn', 'EP', 'RDsales']
MSE: 4.48783743996065
2013-07-10 ['beta', 'idvol', 'turn', 'volumed', 'sharechg', 'imom', 'lagretn']
MSE: 2.91120609934653
2013-07-11 ['idskew', 'skew', 'lagretn', 'AG', 'TAXchg', 'CT', 'RDsales', 'QRG']
MSE: 2.476220330100001
2013-07-12 ['skew', 'turn', 'std_turn', 'mom6', 'LEV', 'SG']
MSE: 2.9211273543349447
2013-07-15 ['size', 'turn', 'AM', 'EP', 'CFP', 'OCFP', 'DP', 'BVEG', 'INVchg', 'TAXchg', 'ROE', 'ROA', 'CR', 'QRG']
MSE: 3.4050465131682772
2013-07-16 ['skew', 'std_turn', 'volumed', 'retnmax', 'age', 'SP', 'BVEG', 'INVG', 'INVchg', 'PA', 'RD', 'QR']
MSE: 3.7945392668841373
2013-07-17 ['skew', 'turn', 'LEV', 'EP', 'SgINVg', 'cash', 'RD', 'RDsales', 'salecash']
MSE: 5.203289917919129
2013-07-18 ['total_vol', 'coskew', 'volumed', 'retnmax', 'AM', 'INVchg', 'RDsales', 'CR']
MSE: 4.509474728031157
2013-07-19 ['turn', 'retnmax', 'imom', 'LEV', 'INVG', 'ACC', 'ROA', 'salecash']
MSE: 4.979871792755535
2013-07-22 ['idvol', 'coskew', 'turn', 'age', 'AM', 'CFP', 'cashpr']
MSE: 4.324260298617077
2013-07-23 ['momchg', 'AM', 'SP', 'SG', 'TAXchg', 'ACC', 'cash', 'CR', 'CFdebt', 'CRG']
MSE: 2.955548999586859
2013-07-24 ['size', 'betad', 'idvol', 'turn', 'retnmax', 'imom', 'LEV', 'BVEG', 'SG', 'TAXchg', 'ROA']
MSE: 3.9297805684821476
2013-07-25 ['lagretn', 'AM', 'EP', 'PMG']
MSE: 6.005756235116204
2013-07-26 ['idvol', 'total_vol', 'coskew', 'volumed', 'retnmax', 'CFP', 'BVEG', 'INVchg', 'SG', 'TAXchg', 'RD', 'CR', 'salecash', 'saleinv']
MSE: 3.652047181874535
2013-07-29 ['size', 'idvol', 'turn', 'LEV', 'OCFP', 'SgINVg', 'PA', 'RD', 'CFdebt']
MSE: 4.084588835171696
2013-07-30 ['idvol', 'skew', 'turn', 'volumed', 'age', 'mom6', 'imom', 'lagretn', 'CFP', 'BVEG', 'SG', 'PMG', 'CT', 'cashpr', 'RD', 'QR']
MSE: 5.427820478672383
2013-07-31 ['size', 'idvol', 'skew', 'turn', 'std_turn', 'volumed', 'retnmax', 'LM', 'lagretn', 'OCFP', 'INVG', 'INVchg', 'SgINVg', 'PMG', 'CT', 'CR']
MSE: 4.857858414184512
2013-08-01 ['skew', 'std_turn', 'age', 'EP', 'CFP', 'SG', 'ROA', 'RDsales', 'QR', 'CFdebt']
MSE: 2.013532319730573
2013-08-02 ['turn', 'sharechg', 'BVEG', 'INVG', 'INVchg', 'SG', 'cash', 'RD']
MSE: 3.5316282738391513
2013-08-05 ['skew', 'retnmax', 'illq', 'age', 'AM', 'DP', 'SP', 'ROA', 'cashpr']
MSE: 3.698425872146903
2013-08-06 ['volumed', 'std_dvol', 'retnmax', 'mom6', 'lagretn', 'BVEG', 'INVG', 'TAXchg', 'ACCP', 'ROA', 'PA', 'CT', 'cash', 'cashpr', 'RDsales', 'CFdebt', 'QRG']
MSE: 2.8973094805224786
2013-08-07 ['idvol', 'age', 'lagretn', 'EP', 'CFP', 'INVchg', 'ACCP', 'salecash']
MSE: 3.593317809201272
2013-08-08 ['idvol', 'lagretn', 'AM', 'CFP', 'SG', 'PMG', 'ROA', 'CR', 'salecash']
MSE: 3.351461269519372
2013-08-09 ['coskew', 'mom6', 'OCFP', 'INVG', 'ROA', 'RDsales']
MSE: 4.0261812796161855
2013-08-12 ['betad', 'std_turn', 'EP', 'LG', 'INVchg', 'PMG', 'CT', 'RDsales', 'CR', 'salecash']
MSE: 2.9240852956829206
2013-08-13 ['size', 'idvol', 'retnmax', 'age', 'momchg', 'lagretn', 'AM', 'AG', 'BVEG', 'CRG']
MSE: 2.769309375178218
2013-08-14 ['beta', 'skew', 'std_turn', 'imom', 'lagretn', 'CFP', 'QR']
MSE: 3.1386489115048763
2013-08-15 ['coskew', 'illq', 'age', 'mom6', 'imom', 'OCFP', 'TAXchg', 'CT', 'cash', 'RD', 'CFdebt', 'salecash']
MSE: 3.1872869055781012
2013-08-16 ['sharechg', 'lagretn', 'LEV', 'EP', 'cash']
MSE: 4.331717368162058
2013-08-19 ['idvol', 'retnmax', 'lagretn', 'AM', 'EP', 'OCFP', 'LG', 'INVchg', 'TAXchg', 'ACC', 'ROA', 'saleinv']
MSE: 3.1894871655441053
2013-08-20 ['idvol', 'skew', 'illq', 'age', 'momchg', 'AM', 'EP', 'cash', 'RDsales', 'QR']
MSE: 3.64489260107602
2013-08-21 ['idskew', 'coskew', 'turn', 'sharechg', 'imom', 'EP', 'CFP', 'SG', 'CT', 'RD', 'salecash']
MSE: 3.133218730832844
2013-08-22 ['skew', 'turn', 'DP', 'BVEG', 'INVchg', 'PA', 'cash', 'RD', 'RDsales', 'CR']
MSE: 2.9998075331609875
2013-08-23 ['total_vol', 'age', 'imom', 'LEV', 'EP', 'INVchg', 'CFdebt']
MSE: 4.128531566187385
2013-08-26 ['beta', 'std_turn', 'age', 'momchg', 'AM', 'SG', 'cash', 'CR', 'QR', 'salecash']
MSE: 3.1464418828179266
2013-08-27 ['size', 'beta', 'skew', 'turn', 'std_dvol', 'AM', 'CFP', 'SG', 'PMG', 'CT', 'cashpr', 'RD', 'QR']
MSE: 3.451404811937106
2013-08-28 ['size', 'idvol', 'turn', 'illq', 'LEV', 'EP', 'PMG', 'cash', 'salecash']
MSE: 5.279398156725779
2013-08-29 ['beta', 'idvol', 'skew', 'turn', 'age', 'imom', 'LEV', 'INVchg', 'TAXchg', 'cash', 'RDsales', 'QR']
MSE: 5.070080414799744
2013-08-30 ['size', 'idvol', 'BVEG', 'INVG', 'PMG', 'RD', 'salecash']
MSE: 6.2538219549019525
2013-09-02 ['age', 'mom6', 'TAXchg', 'PA', 'RD', 'salecash']
MSE: 6.5811019041794205
2013-09-03 ['beta', 'skew', 'turn', 'mom6', 'PMG', 'ROA', 'CT', 'RDsales']
MSE: 4.43396936602181
2013-09-04 ['size', 'turn', 'sharechg', 'age', 'LG']
MSE: 4.650476080237525
2013-09-05 ['idvol', 'illq', 'age', 'imom', 'OCFP', 'SP', 'INVG', 'SG', 'TAXchg', 'PA', 'cash', 'RD']
MSE: 4.547262023813774
2013-09-06 ['idskew', 'skew', 'std_dvol', 'mom6', 'CFP', 'ACCP']
MSE: 4.244393336537197
2013-09-09 ['size', 'beta', 'betad', 'idvol', 'volumed', 'age', 'imom', 'DP', 'AG', 'INVG', 'PMG', 'salecash', 'saleinv', 'CRG']
MSE: 4.132922887031511
2013-09-10 ['skew', 'turn', 'std_turn', 'illq', 'AM', 'EP', 'TAXchg', 'RD']
MSE: 4.124117314719291
2013-09-11 ['turn', 'std_turn', 'age', 'mom6', 'BM', 'BVEG', 'SgINVg', 'CR', 'CFdebt']
MSE: 5.6085128303163545
2013-09-12 ['skew', 'illq', 'INVG', 'SG', 'RD']
MSE: 3.4850675051252296
2013-09-13 ['LEV', 'CFP', 'SG', 'SgINVg', 'PA', 'CT', 'cash', 'RDsales', 'CR', 'QRG']
MSE: 4.029990953218458
2013-09-16 ['betad', 'skew', 'turn', 'retnmax', 'imom', 'lagretn', 'EP', 'DP', 'INVG', 'SG', 'ROA', 'QR', 'CFdebt', 'salecash']
MSE: 3.801293360185082
2013-09-17 ['betad', 'illq', 'LEV', 'CFP', 'BVEG', 'cash', 'RDsales', 'salecash']
MSE: 4.813227650270847
2013-09-18 ['betad', 'idvol', 'turn', 'std_turn', 'momchg', 'AM', 'CFP', 'INVchg', 'ACC', 'CT', 'CFdebt', 'salecash', 'saleinv']
MSE: 3.5081704961882854
2013-09-23 ['mom6', 'PMG', 'TAXchg', 'PA', 'cash']
MSE: 2.7720720592228916
2013-09-24 ['total_vol', 'retnmax', 'illq', 'AM', 'SG', 'CT', 'cash', 'RDsales']
MSE: 4.876084895763892
2013-09-25 ['idskew', 'turn', 'std_turn', 'retnmax', 'illq', 'LEV', 'OCFP', 'DP', 'SP', 'CT', 'RD', 'CFdebt', 'salecash']
MSE: 5.0340127061687925
2013-09-26 ['beta', 'idvol', 'turn', 'age', 'SP', 'INVG', 'cashpr', 'QR', 'CRG']
MSE: 5.773339755545188
2013-09-27 ['size', 'idskew', 'turn', 'age', 'LEV', 'INVG', 'ROA', 'CT', 'RD', 'CR']
MSE: 4.931646845338142
2013-09-30 ['total_vol', 'turn', 'retnmax', 'sharechg', 'AM', 'CFP', 'INVchg', 'ROA', 'PA', 'CR']
MSE: 2.853335134669498
2013-10-08 ['idvol', 'retnmax', 'AM', 'OCFP', 'INVG', 'TAXchg', 'CT', 'RDsales', 'CR']
MSE: 3.8198359428496502
2013-10-09 ['turn', 'volumed', 'lagretn', 'CFP', 'LG', 'SG', 'PMG', 'ROA', 'CT', 'RDsales', 'QRG']
MSE: 3.1539746575072862
2013-10-10 ['idvol', 'turn', 'illq', 'age', 'lagretn', 'AG', 'TAXchg', 'cash', 'cashpr', 'RDsales', 'CR', 'CRG']
MSE: 6.029162821566159
2013-10-11 ['beta', 'idskew', 'turn', 'volumed', 'age', 'AM', 'INVchg', 'SG', 'PMG', 'CT', 'RD', 'CR']
MSE: 4.083926097141506
2013-10-14 ['coskew', 'turn', 'CFP', 'DP', 'SG', 'TAXchg', 'ROE', 'salecash']
MSE: 5.358110375884738
2013-10-15 ['beta', 'idvol', 'volumed', 'retnmax', 'LM', 'LEV', 'EP', 'CFP', 'SP', 'LG', 'INVG', 'SG']
MSE: 4.379737994755032
2013-10-16 ['skew', 'turn', 'mom6', 'EP', 'SG', 'PA', 'RDsales', 'CFdebt']
MSE: 5.418096078492006
2013-10-17 ['idvol', 'mom6', 'imom', 'AG', 'PMG', 'TAXchg', 'cash', 'CR', 'CRG']
MSE: 4.744623539638933
2013-10-18 ['turn', 'age', 'AM', 'ROA', 'RD', 'QRG']
MSE: 3.8896567621758504
2013-10-21 ['betad', 'total_vol', 'idskew', 'lagretn', 'AM', 'cash']
MSE: 2.7790111037241867
2013-10-22 ['idvol', 'momchg', 'imom', 'AM', 'EP', 'DP', 'SP', 'INVG', 'INVchg', 'CT', 'cash', 'RD', 'RDsales']
MSE: 6.093988664010849
2013-10-23 ['idvol', 'turn', 'retnmax', 'mom6', 'imom', 'lagretn', 'BM', 'AM', 'LEV', 'EP', 'CFP', 'DP', 'LG', 'BVEG', 'SgINVg', 'TAXchg', 'PA', 'cashpr', 'RD', 'RDsales', 'QR', 'QRG']
MSE: 5.786239798513934
2013-10-24 ['betad', 'idvol', 'skew', 'coskew', 'turn', 'illq', 'mom6', 'imom', 'AM', 'INVG', 'CT', 'RD', 'RDsales']
MSE: 5.030191474831782
2013-10-25 ['idskew', 'std_turn', 'mom6', 'AM', 'CFP', 'OCFP', 'AG', 'BVEG', 'INVG', 'CR', 'salecash', 'QRG']
MSE: 4.639727451186666
2013-10-28 ['skew', 'coskew', 'SP', 'ROA', 'PA', 'RD', 'RDsales', 'CR', 'saleinv']
MSE: 6.330372509834726
2013-10-29 ['skew', 'retnmax', 'sharechg', 'age', 'momchg', 'imom', 'AM', 'LEV', 'CFP', 'AG', 'LG', 'BVEG', 'INVG', 'ROA', 'cashpr', 'RD', 'salecash']
MSE: 7.175615556082851
2013-10-30 ['age', 'momchg', 'lagretn', 'AM', 'INVchg', 'CT', 'cash', 'CR', 'CRG']
MSE: 3.0270851338446665
2013-10-31 ['turn', 'mom6', 'imom', 'AM', 'ACCP', 'PA', 'RD', 'RDsales', 'CR']
MSE: 5.288040276830776
2013-11-01 ['coskew', 'turn', 'age', 'INVchg', 'SG', 'CT', 'cash', 'RD', 'RDsales', 'QR', 'CRG', 'QRG']
MSE: 4.1196453322319
2013-11-04 ['size', 'skew', 'std_turn', 'illq', 'age', 'mom6', 'momchg', 'AM', 'OCFP', 'BVEG', 'cash', 'RDsales', 'CFdebt']
MSE: 3.0966750440768926
2013-11-05 ['total_vol', 'skew', 'std_turn', 'age', 'mom6', 'CFP', 'INVchg', 'SG', 'RDsales', 'salecash', 'CRG']
MSE: 2.662112847030313
2013-11-06 ['idvol', 'turn', 'illq', 'LEV', 'EP', 'OCFP', 'BVEG', 'INVchg', 'cash', 'RD', 'RDsales', 'CR', 'salecash']
MSE: 2.990025202465042
2013-11-07 ['size', 'beta', 'std_turn', 'SG', 'ROA', 'QRG']
MSE: 3.711989594519554
2013-11-08 ['betad', 'idvol', 'skew', 'volumed', 'std_dvol', 'lagretn', 'AM', 'DP', 'INVG', 'PMG', 'CR']
MSE: 4.592895702838832
2013-11-11 ['size', 'turn', 'INVchg', 'ROA', 'CT', 'QR', 'saleinv']
MSE: 4.454780865729851
2013-11-12 ['idvol', 'skew', 'std_dvol', 'CFP', 'OCFP', 'DP', 'SP', 'AG', 'INVchg', 'PMG', 'PA', 'cash', 'cashpr', 'RD', 'QR', 'salecash', 'QRG']
MSE: 3.131677788337437
2013-11-13 ['coskew', 'turn', 'std_turn', 'LM', 'age', 'mom6', 'lagretn', 'LEV', 'CFP', 'OCFP', 'DP', 'BVEG', 'cash', 'RDsales', 'QR']
MSE: 2.767840492935834
2013-11-14 ['turn', 'retnmax', 'age', 'lagretn', 'AM', 'SP', 'AG', 'SG', 'PMG', 'CR', 'QR', 'CRG']
MSE: 3.7624903972143158
2013-11-15 ['idvol', 'coskew', 'turn', 'volumed', 'LM', 'lagretn', 'AM', 'EP', 'LG', 'INVG', 'SG', 'PMG', 'PA', 'CT', 'CR', 'CFdebt', 'QRG']
MSE: 2.234910451900615
2013-11-18 ['std_turn', 'volumed', 'EP', 'DP']
MSE: 3.286022743901893
2013-11-19 ['betad', 'total_vol', 'volumed', 'AM', 'CFP', 'PMG', 'ROA', 'QR', 'CFdebt', 'salecash']
MSE: 3.050946341562079
2013-11-20 ['retnmax', 'LM', 'age', 'ROA', 'cash', 'RD']
MSE: 3.334620743562507
2013-11-21 ['idvol', 'imom', 'AM', 'OCFP', 'BVEG', 'INVchg', 'cashpr', 'RD', 'CR', 'CRG']
MSE: 3.8894040528171714
2013-11-22 ['skew', 'turn', 'age', 'EP', 'cash', 'RDsales']
MSE: 4.045378809032109
2013-11-25 ['size', 'idvol', 'std_turn', 'volumed', 'age', 'mom6', 'CFP', 'OCFP', 'DP', 'PMG', 'TAXchg', 'ROE', 'cashpr', 'RD', 'QR', 'salecash', 'saleinv']
MSE: 4.3597700589470145
2013-11-26 ['beta', 'skew', 'illq', 'LM', 'AM', 'EP', 'PMG', 'RDsales', 'QR']
MSE: 4.716572482662422
2013-11-27 ['illq', 'sharechg', 'age', 'imom', 'SG', 'ROA', 'CR', 'CRG']
MSE: 2.688063586386144
2013-11-28 ['skew', 'illq', 'age', 'imom', 'SgINVg']
MSE: 2.995623491216169
2013-11-29 ['beta', 'imom', 'lagretn', 'AM', 'CFP', 'INVG', 'CFdebt', 'salecash']
MSE: 3.071047354868936
2013-12-02 ['idvol', 'retnmax', 'lagretn', 'AM', 'TAXchg', 'CT', 'RDsales', 'CFdebt', 'salecash']
MSE: 5.16301029674257
2013-12-03 ['beta', 'skew', 'lagretn', 'AM', 'INVG', 'SgINVg', 'ROA', 'CR', 'CRG', 'QRG']
MSE: 3.6562350393532226
2013-12-04 ['turn', 'std_turn', 'mom6', 'INVG', 'INVchg', 'SgINVg', 'cash', 'CR']
MSE: 3.429245762918336
2013-12-05 ['EP', 'CFP', 'SG', 'CFdebt', 'salecash']
MSE: 3.2732258037626787
2013-12-06 ['idskew', 'coskew', 'volumed', 'age', 'AM', 'LEV', 'EP', 'INVG', 'PMG', 'ROA', 'cash', 'RD', 'QR', 'CFdebt']
MSE: 3.0892243267207697
2013-12-09 ['beta', 'coskew', 'illq', 'age', 'INVchg', 'ACCP', 'cash', 'cashpr', 'QRG']
MSE: 2.909975588127043
2013-12-10 ['coskew', 'turn', 'imom', 'lagretn', 'EP', 'CFP', 'DP', 'PMG', 'CT', 'RD', 'QR', 'CFdebt']
MSE: 3.429970656413309
2013-12-11 ['idvol', 'skew', 'LM', 'AM', 'LG', 'SgINVg', 'RD', 'RDsales', 'QR']
MSE: 3.899990774640407
2013-12-12 ['size', 'beta', 'idvol', 'age', 'AM', 'CFP', 'OCFP', 'INVG', 'INVchg', 'SG', 'CR', 'CRG']
MSE: 3.2466069131941597
2013-12-13 ['retnmax', 'LM', 'sharechg', 'mom6', 'lagretn', 'AM', 'LEV', 'EP', 'CFP', 'DP', 'SP', 'BVEG', 'INVG', 'ACC', 'salecash', 'saleinv', 'QRG']
MSE: 2.7994189887519934
2013-12-16 ['size', 'idskew', 'turn', 'LM', 'AM', 'EP', 'CFP', 'BVEG', 'SG', 'PMG', 'RDsales']
MSE: 4.5553817848811144
2013-12-17 ['beta', 'turn', 'std_turn', 'volumed', 'retnmax', 'mom6', 'AM', 'SG', 'PMG', 'RD']
MSE: 3.5561130332117106
2013-12-18 ['turn', 'lagretn', 'LEV', 'PA', 'CT', 'CR', 'salecash']
MSE: 2.474691764611882
2013-12-19 ['size', 'idvol', 'skew', 'coskew', 'turn', 'LM', 'BM', 'LG', 'INVchg', 'SG', 'PMG', 'CT', 'CR']
MSE: 2.5083141184554707
2013-12-20 ['betad', 'idvol', 'skew', 'coskew', 'std_turn', 'LEV', 'OCFP', 'TAXchg', 'CT', 'RD', 'salecash']
MSE: 3.7529575553876904
2013-12-23 ['turn', 'retnmax', 'AM', 'LEV', 'CFP', 'DP', 'CR']
MSE: 5.605109218355425
2013-12-24 ['coskew', 'turn', 'age', 'SG', 'QR', 'salecash']
MSE: 2.609969578928021
2013-12-25 ['mom6', 'lagretn', 'INVG', 'TAXchg', 'cash', 'CFdebt', 'salecash']
MSE: 2.9110866706614567
2013-12-26 ['idvol', 'skew', 'turn', 'sharechg', 'AM', 'PMG', 'ACCP', 'CT', 'RDsales', 'CR', 'salecash']
MSE: 4.76824086903283
2013-12-27 ['lagretn', 'LEV', 'INVG', 'RDsales', 'CFdebt']
MSE: 2.4008224944342778
2013-12-30 ['skew', 'turn', 'retnmax', 'mom6', 'momchg', 'INVG', 'CR', 'QRG']
MSE: 3.3727608487381446
2013-12-31 ['volumed', 'mom6', 'lagretn', 'DP', 'INVchg', 'SG', 'TAXchg', 'RD', 'RDsales', 'CFdebt', 'salecash']
MSE: 2.764648018725637
linear_remove['removed'].mean()
10.084033613445378
linear_remove['err'].mean()
3.808543809919859
评论区