
“该策略涉及使用LASSO回归根据历史数据预测商品期货回报,做多预测排名前五的商品,做空预测排名后五的商品,每月重新平衡。”
资产类别: 差价合约、期货 | 地区: 美国 | 周期: 每月 | 市场: 大宗商品 | 关键词: 大宗商品
I. 策略概要
投资范围包括27种活跃交易的商品期货,包括谷物、软商品、牲畜、能源和金属。该策略涉及计算每个期货合约的每月超额回报。使用60个月的滚动窗口创建一个回归模型,其中超额回报为因变量,滞后回报为自变量。应用LASSO回归方法选择显著的预测因子,使用AICc准则确定收缩参数。确定重要预测因子后,使用OLS回归估计参数。根据预测的回报,投资者做多排名前五的商品,做空排名后五的商品。投资组合每月重新平衡。
II. 策略合理性
本文指出了商品期货回报中领先-滞后关系的两个关键原因。首先,一些关系可以用经济学解释,例如,由于消费联系,瘦猪的回报可以预测小麦和大豆等农产品的回报。另一个例子是汽油和玉米之间的关系,它们通过乙醇生产联系在一起。其次,金融化,尤其是指数交易,加强了这些关系,领先-滞后策略在指数期货中被证明是有效的。ETF和ETN的引入进一步放大了这种效应。此外,LASSO机器学习方法有助于识别关键预测因子,避免过度拟合,并提高盈利交易策略的预测准确性。
III. 来源论文
The Serial Dependence of the Commodity Futures Returns: A Machine Learning Approach [点击查看论文]
- Han, Yufeng 和 Kong, Lingfei,北卡罗来纳大学夏洛特分校,圣路易斯华盛顿大学
<摘要>
我们感谢Yulin Feng、Chulwoo Han、Liyan Han、Gene Lai、Christopher Kirby、Tao-Hsien Dolly King、David Mauer、Mihail Velikov、Kiplan S. Womack、Yilei Zhang、Chengli Zheng、Xun Zhong和Guofu Zhou,以及2020年商品市场发展与风险管理研讨会、2020年金融管理协会年会、2021年迈阿密赫伯特商学院机器学习与商业冬季研究会议、2021年国际期货与衍生品会议的研讨会参与者,以及北卡罗来纳大学夏洛特分校的研讨会参与者,他们提出了有益的意见和建议。


IV. 回测表现
| 年化回报 | 15.15% |
| 波动率 | 16.24% |
| β值 | 0.036 |
| 夏普比率 | 0.93 |
| 索提诺比率 | -0.076 |
| 最大回撤 | -8.28% |
| 胜率 | 51% |
V. 完整的 Python 代码
from AlgorithmImports import *
# but it is a normal regression with added penalization term). The shrinkage parameter lambda for each regression is found by minimizing the AICc information criterion (equation 3.12). AICc
# is corrected AIC for small-sample bias. After the LASSO selects the important predictors, OLS is used to estimate the regression, using only the selected predictors to overcome the
# underfitting of LASSO parameter estimates. Prediction of the returns in the month t+1 is based on OLS estimated parameters of month t (using returns of month t and t-1; naturally, the OLS
# regression to obtain parameters at time t, uses returns of months t-1 and t-2). If LASSO does not select any predictor for some commodity, the commodity is omitted. Based on the predicted
# returns, long top five commodities and short bottom five commodities. The portfolio is rebalanced monthly.
#
# QC implementation:
# - Shrinkage parameter is found by cross-validation instead of minimizing AIC, although AIC minimalization code is also included.
import statsmodels.api as sm
from sklearn import linear_model
class TheSerialDependenceoftheCommodityFuturesReturns(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2000, 1, 1)
self.SetCash(100000)
# the full list - table 1
self.symbols = [
"CME_BO1", # Soybean Oil Futures, Continuous Contract
"CME_C1", # Corn Futures, Continuous Contract
"CME_KW2", # Wheat Kansas, Continuous Contract
"CME_O1", # Oats Futures, Continuous Contract
"CME_RR1", # Rough Rice Futures, Continuous Contract
"CME_S1", # Soybean Futures, Continuous Contract
"CME_SM1", # Soybean Meal Futures, Continuous Contract
"CME_W1", # Wheat Futures, Continuous Contract
"ICE_CC1", # Cocoa Futures, Continuous Contract
"ICE_CT1", # Cotton No. 2 Futures, Continuous Contract
"CME_DA1", # Class III Milk Futures
"ICE_OJ1", # Orange Juice Futures, Continuous Contract
"ICE_KC1", # Coffee C Futures, Continuous Contract
"CME_LB1", # Random Length Lumber Futures, Continuous Contract
"ICE_SB1", # Sugar No. 11 Futures, Continuous Contract
"CME_CL1", # Crude Oil Futures, Continuous Contract
"ICE_O1", # Heating Oil Futures, Continuous Contract
"CME_NG1", # Natural Gas (Henry Hub) Physical Futures, Continuous Contract
"CME_RB2", # Gasoline Futures, Continuous Contract
"CME_FC1", # Feeder Cattle Futures, Continuous Contract
"CME_LC1", # Live Cattle Futures, Continuous Contract
"CME_LN1", # Lean Hog Futures, Continuous Contract
"CME_GC1", # Gold Futures, Continuous Contract
"CME_HG1", # Copper Futures, Continuous Contract
"CME_PA1", # Palladium Futures, Continuous Contract
"CME_PL1", # Platinum Futures, Continuous Contract
"CME_SI1", # Silver Futures, Continuous Contract
]
self.daily_period = 21
self.period = 60 + 1
# self.monthly_period = self.period * self.daily_period
self.SetWarmUp(self.period * self.daily_period, Resolution.Daily)
self.perf_data = {}
self.roc = {}
for symbol in self.symbols:
data = self.AddData(QuantpediaFutures, symbol, Resolution.Daily)
data.SetFeeModel(CustomFeeModel())
data.SetLeverage(5)
self.roc[symbol] = self.ROC(symbol, self.daily_period, Resolution.Daily)
self.perf_data[symbol] = RollingWindow[float](self.period)
self.recent_month = -1
def OnData(self, data):
if self.IsWarmingUp:
return
# rebalance once a month
if self.recent_month == self.Time.month:
return
self.recent_month = self.Time.month
dependent_symbols = [] # dependent variables to count with
self.x = []
# store monthly performance and construct indepedend variable
for symbol in self.symbols:
if self.perf_data[symbol].IsReady and self.Securities[symbol].GetLastData() and self.Time.date() < QuantpediaFutures.get_last_update_date()[symbol]:
# create 1-month and 2-months lagged returns variables
perf_data = [x for x in self.perf_data[symbol]]
x_s_t1 = perf_data[:-1]
x_s_t2 = perf_data[1:]
self.x.append(x_s_t1)
self.x.append(x_s_t2)
dependent_symbols.append(symbol)
# roc data is ready and price data is still comming in
if self.roc[symbol].IsReady:
self.perf_data[symbol].Add(self.roc[symbol].Current.Value)
predicted_return = {}
# regression estimates
if len(self.x) != 0:
for symbol in dependent_symbols:
y = np.array([x for x in self.perf_data[symbol]][:self.period-1])
x = np.array(self.x)
# model_result = self.multiple_linear_regression(self.x, y)
# lasso with cross-validation
lasso_cv_model = linear_model.LassoCV()
lasso_cv_model.fit(x.T, y)
lasso_best = linear_model.Lasso(alpha=lasso_cv_model.alpha_)
lasso_best.fit(x.T, y)
# find shrinkage parameter alpha by minimizing aic parameter
# lasso_aic_model = linear_model.LassoLarsIC(criterion='aic')
# lasso_aic_model.fit(x.T, y)
# lasso_best = linear_model.Lasso(alpha=lasso_aic_model.alpha_)
# lasso_best.fit(x.T, y)
# select only important predictors
relevant_predictor_indicies = [i for i, coef in enumerate(lasso_best.coef_) if coef not in [0, -0]]
if len(relevant_predictor_indicies) != 0:
# perform OLS regression
ols_x = np.array([x[i][1:] for i in relevant_predictor_indicies])
ols_model = self.multiple_linear_regression(ols_x, y[:-1])
ols_x_pred = [1] + [x[i][0] for i in relevant_predictor_indicies]
y_pred = ols_model.predict(ols_x_pred)[0]
predicted_return[symbol] = y_pred
long = []
short = []
count = 5
if len(predicted_return) >= count*2:
sorted_by_pred_return = sorted(predicted_return.items(), key=lambda x: x[1], reverse=True)
long = [x[0] for x in sorted_by_pred_return[:count]]
short = [x[0] for x in sorted_by_pred_return[-count:]]
# trade execution
targets: List[PortfolioTarget] = []
for i, portfolio in enumerate([long, short]):
for symbol in portfolio:
if symbol in data and data[symbol]:
targets.append(PortfolioTarget(symbol, ((-1) ** i) / len(portfolio)))
self.SetHoldings(targets, True)
def multiple_linear_regression(self, x, y):
x = np.array(x).T
x = sm.add_constant(x)
result = sm.OLS(endog=y, exog=x).fit()
return result
# Quantpedia data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
class QuantpediaFutures(PythonData):
_last_update_date:Dict[Symbol, datetime.date] = {}
@staticmethod
def get_last_update_date() -> Dict[Symbol, datetime.date]:
return QuantpediaFutures._last_update_date
def GetSource(self, config, date, isLiveMode):
return SubscriptionDataSource("data.quantpedia.com/backtesting_data/futures/{0}.csv".format(config.Symbol.Value), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)
def Reader(self, config, line, date, isLiveMode):
data = QuantpediaFutures()
data.Symbol = config.Symbol
if not line[0].isdigit(): return None
split = line.split(';')
data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
data['back_adjusted'] = float(split[1])
data['spliced'] = float(split[2])
data.Value = float(split[1])
if config.Symbol.Value not in QuantpediaFutures._last_update_date:
QuantpediaFutures._last_update_date[config.Symbol.Value] = datetime(1,1,1).date()
if data.Time.date() > QuantpediaFutures._last_update_date[config.Symbol.Value]:
QuantpediaFutures._last_update_date[config.Symbol.Value] = data.Time.date()
return data
# Custom fee model.
class CustomFeeModel(FeeModel):
def GetOrderFee(self, parameters):
fee = parameters.Security.Price * parameters.Order.AbsoluteQuantity * 0.00005
return OrderFee(CashAmount(fee, "USD"))
class SymbolData():
def __init__(self, monthly_period: int):
self.days_to_liquidate = days_to_liquidate
self.long_flag = long_flag