
“该策略使用谷歌趋势数据计算“危害恐惧”指标,将商品分为五分位,然后做多低恐惧五分位,做空高恐惧五分位,每周重新平衡以实现盈利。”
资产类别: 差价合约、期货 | 地区: 全球 | 周期: 每周 | 市场: 大宗商品 | 关键词: 大宗商品
I. 策略概要
投资策略使用28种商品期货,并分析来自谷歌趋势的149个关键词的搜索量数据(周度数据,表1)。谷歌搜索量指数(GSVI)代表相对搜索强度。GSVI的周度对数变化被标准化,并通过回顾性回归确定搜索量与商品回报之间的历史关系。
“危害恐惧”(CFEAR)指标通过将具有显著t统计量(绝对t统计量 > 临界值)的关键词的回归贝塔值求和来计算。正的CFEAR表示与价格上涨相关的危害恐惧,而负的CFEAR则表示价格下跌。该指标通过减去均值并除以标准差进行标准化。
商品根据标准化的CFEAR分为五分位。该策略对最高五分位(最高危害恐惧)建立空头头寸,对最低五分位(最低危害恐惧)建立多头头寸,并采用等权重。头寸每周重新平衡。这种方法利用市场情绪数据有效预测价格变动。
II. 策略合理性
与危害相关的搜索查询反映了对影响商品的危害的担忧,从而影响期货价格。正向同向运动表明供应减少或需求增加的危害,导致价格过高,而负向同向运动则表明相反情况。价格很可能根据这些关系进行调整。“危害恐惧”与偏度风险或基差动量等已知因素不同,尽管面临流动性风险,但仍带有显著的无法解释的溢价。该策略在交易成本、信号测量和投资组合构建方法方面表现出稳健性,表明其在根据危害恐惧动态预测商品期货价格走势方面的可靠性。
III. 来源论文
Hazard Fear in Commodity Markets [点击查看论文]
- 费尔南德斯-佩雷斯(Adrian Fernandez-Perez)、富埃尔特斯(Ana-Maria Fuertes)、冈萨雷斯-费尔南德斯(Marcos González-Fernández)和米弗雷(Joelle Miffre),都柏林大学学院(UCD)银行与金融系,伦敦城市大学贝叶斯商学院,莱昂大学,奥登西亚商学院。
<摘要>
我们通过149个查询词的互联网搜索量来衡量对天气、疾病、地缘政治或经济威胁的积极关注,即“危害恐惧”,并以此来检验商品期货的定价作用。一种根据危害恐惧信号对商品期货合约横截面进行排序的做多-做空投资组合策略获得了显著的溢价。这种商品危害恐惧溢价反映了对现有基本面、尾部、波动性和流动性风险因素的补偿,但并未被它们所包含。在商品投资组合的横截面中,危害恐惧的敞口被强烈定价。危害恐惧溢价在金融市场情绪不利或悲观时期加剧。


IV. 回测表现
| 年化回报 | 9.28% |
| 波动率 | 10.3% |
| β值 | 0.064 |
| 夏普比率 | 0.9 |
| 索提诺比率 | N/A |
| 最大回撤 | -18.81% |
| 胜率 | 48% |
V. 完整的 Python 代码
from AlgorithmImports import *
import numpy as np
import statsmodels.api as sm
#endregion
class HazardFearInCommodityMarkets(QCAlgorithm):
def Initialize(self):
# TODO put these into csv
file_names = ['ADVERSE_WEATHER', 'ADVERSE_WEATHER_CONDITIONS', 'ADVERSE_WEATHER_WARNING', 'BLIZZARD', 'BLIZZARD_RISK', 'BLIZZARD_WARNING', 'CATASTROPHIC_EVENTS', 'CATASTROPHIC_WEATHER', 'CATASTROPHIC_WEATHER_EVENTS', 'CLIMATE_CHANGE', 'CLIMATE_DISTURBANCE', 'COLD', 'COLD_SPELL', 'COLD_WEATHER', 'CYCLOGENESIS', 'CYCLONE', 'CYCLONE_RISK', 'CYCLONE_WARNING', 'DROUGHT', 'DROUGHT_RISK', 'DROUGHT_WARNING', 'DROUGHTS', 'DRY_WEATHER', 'EL_NINO_WEATHER', 'EXTREME_COLD', 'EXTREME_COLD_TEMPERATURES', 'EXTREME_HEAT', 'EXTREME_RAIN', 'EXTREME_TEMPERATURES', 'EXTREME_WEATHER', 'EXTREME_WIND', 'FLOOD', 'FLOOD_RISK', 'FLOOD_WARNING', 'FLOODING', 'FLOODS', 'FOREST_FIRE', 'FOREST_FIRES', 'FREEZE_WARNING', 'FROST', 'FROST_RISK', 'FROST_WARNING', 'FROSTS', 'GLOBAL_WARMING', 'GUST', 'GUSTS', 'HAIL', 'HAIL_DAMAGE', 'HAIL_RISK', 'HAIL_STORM', 'HAIL_STORM_WARNING', 'HAIL_WARNING', 'HARMATTAN_WIND', 'HEAT', 'HEAT_WAVE', 'HEAT_WAVES', 'HEATWAVE', 'HEATWAVES', 'HEAVY_RAIN', 'HEAVY_RAIN_FALL', 'HEAVY_RAIN_RISK', 'HEAVY_RAIN_WARNING', 'HIGH_TEMPERATURE', 'HIGH_TEMPERATURES', 'HOT_WEATHER', 'HURRICANE', 'HURRICANE_RISK', 'HURRICANE_WARNING', 'HURRICANES', 'NATURAL_DISASTER', 'NATURAL_HAZARD', 'RAIN', 'SEVERE_HEAT', 'SEVERE_WEATHER', 'SEVERE_WEATHER_RISK', 'SNOW', 'SNOW_RISK', 'SNOW_STORM_WARNING', 'SNOW_WARNING', 'STORM', 'STORM_RISK', 'STORM_WARNING', 'STRONG_WIND', 'STRONG_WIND_GUST', 'TORNADO', 'TORNADO_RISK', 'TORNADO_WARNING', 'TORRENTIAL_RAIN', 'TROPICAL_CYCLONE', 'TROPICAL_CYCLONE_RISK', 'TROPICAL_CYCLONE_WARNING', 'TROPICAL_STORM', 'TROPICAL_STORM_RISK', 'TROPICAL_STORM_WARNING', 'TROPICAL_WEATHER', 'TYPHOON', 'TYPHOON_RISK', 'TYPHOON_WARNING', 'WEATHER_BLIZZARD_WARNING', 'WEATHER_RISK', 'WEATHER_WARNING', 'WET_WEATHER', 'WILDFIRE', 'WILDFIRE_RISK', 'WILDFIRE_WARNING', 'WILDFIRES', 'WIND', 'WIND_GUST', 'WIND_GUSTS', 'WIND_RISK', 'WIND_SPEED', 'WIND_STORM', 'WIND_WARNING', 'CROP_DISEASES', 'CROP_PEST', 'CROP_PEST_RISK', 'CROP_PESTS', 'EBOLA', 'INSECT_PEST', 'LA_ROYA', 'PEST_CONTROL', 'PEST_RISK', 'RUST_COFFE', 'AFRICA_INSTABILITY', 'AFRICA_TERRORISM', 'LIBYAN_CRISIS', 'MIDDLE_EAST_CONFLICT', 'MIDDLE_EAST_INSTABILITY', 'MIDDLE_EAST_TERRORISM', 'OIL_CRISIS', 'OIL_EMBARGO', 'OIL_OUTAGE', 'RUSSIAN_CRISIS', 'SYRIAN_WAR', 'TERRORISM', 'TERRORIST_ATTACK', 'TERRORIST_ATTACKS', 'CRISIS', 'ECONOMIC_CRISIS', 'ECONOMIC_RECESSION', 'FINANCIAL_CRISIS', 'RECESSION', 'RECESSION_2008', 'RECESSION_DEPRESSION', 'THE_RECESSION', 'UNEMPLOYMENT', 'UNEMPLOYMENT_RATE', 'US_RECESSION', 'US_UNEMPLOYMENT']
self.SetStartDate(2004, 1, 1) # Google search data are since 2004
self.SetCash(100000)
self.months = 36
self.quantile = 5
self.max_missing_days = 31
self.data = {} # Storing commodities prices
self.google_search_values = {} # Storing seach values for each search
self.symbols = [
"CME_S1", # Soybean Futures, Continuous Contract
"CME_W1", # Wheat Futures, Continuous Contract
"CME_SM1", # Soybean Meal Futures, Continuous Contract
"CME_BO1", # Soybean Oil Futures, Continuous Contract
"CME_C1", # Corn Futures, Continuous Contract
"CME_O1", # Oats Futures, Continuous Contract
"CME_LC1", # Live Cattle Futures, Continuous Contract
"CME_FC1", # Feeder Cattle Futures, Continuous Contract
"CME_LN1", # Lean Hog Futures, Continuous Contract
"CME_GC1", # Gold Futures, Continuous Contract
"CME_SI1", # Silver Futures, Continuous Contract
"CME_PL1", # Platinum Futures, Continuous Contract
"CME_CL1", # Crude Oil Futures, Continuous Contract
"CME_HG1", # Copper Futures, Continuous Contract
"CME_LB1", # Random Length Lumber Futures, Continuous Contract
"CME_NG1", # Natural Gas (Henry Hub) Physical Futures, Continuous Contract
"CME_PA1", # Palladium Futures, Continuous Contract
"CME_RR1", # Rough Rice Futures, Continuous Contract
"CME_DA1", # Class III Milk Futures
"ICE_CC1", # Cocoa Futures, Continuous Contract
"ICE_CT1", # Cotton No. 2 Futures, Continuous Contract
"ICE_KC1", # Coffee C Futures, Continuous Contract
"ICE_O1", # Heating Oil Futures, Continuous Contract
"ICE_OJ1", # Orange Juice Futures, Continuous Contract
"ICE_SB1", # Sugar No. 11 Futures, Continuous Contract
"ICE_TF1" # ICE Endex Dutch TTF 1-Month Calendar Spread Options
"ICE_RS1", # Canola Futures, Continuous Contract
"ICE_GO1", # Gas Oil Futures, Continuous Contract
"ICE_WT1", # WTI Crude Futures, Continuous Contract
"CME_RB2", # Gasoline Futures, Continuous Contract
"CME_KW2", # Wheat Kansas, Continuous Contract
]
# Subscribe to all symbols
for symbol in self.symbols:
data = self.AddData(QuantpediaFutures, symbol, Resolution.Daily)
data.SetFeeModel(CustomFeeModel())
data.SetLeverage(5)
self.data[symbol] = SymbolData(self.months)
for search_value in file_names:
# Subscribe to QuantpediaGoogleSearch with csv name
symbol = self.AddData(QuantpediaGoogleSearch, search_value, Resolution.Daily).Symbol
# Add subscribed symbol to self.google_search_values
self.google_search_values[symbol] = GoogleSearchValues(self.months)
self.symbol = self.AddEquity('SPY', Resolution.Daily).Symbol
self.selection_flag = False
self.Schedule.On(self.DateRules.MonthStart(self.symbol), self.TimeRules.AfterMarketOpen(self.symbol), self.Selection)
def OnData(self, data):
# Google search data comes at the start of each month
for symbol in self.google_search_values:
if symbol in data and data[symbol]:
value = data[symbol].Value
self.google_search_values[symbol].update(value, self.Time.date())
# Rebalance monthly
if not self.selection_flag:
return
self.selection_flag = False
# Calculate and store monthly return for each Quantpedia Future
for symbol in self.data:
if symbol in data and data[symbol]:
value = data[symbol].Value
# Calculate monthly return, if last_price is ready
if self.data[symbol].last_price != 0:
self.data[symbol].update(value)
# Initialize new last_price for next monthly return calculation
self.data[symbol].last_price = value
# Check if at least one monthly returns for Quantpedia Futures are ready.
if not self.data[self.symbols[0]].is_ready():
return
# Nextly, calculate the weekly(monthly in our case) log change in the Google search volume
# for each keyword and divide it by the standard deviation of search volume for each keyword.
keywords_std = []
keywords_monthly_changes = {}
# Calculate std for each google search value,
# which has enough data and they aren't only zeros
for symbol, google_search in self.google_search_values.items():
# Check if google search value is ready
if google_search.is_ready(self.Time.date(), self.max_missing_days):
search_values = [x for x in google_search.n_search_values][::-1] # normalized search volumes
# Calculate std for current google search value
google_search_std = np.std(search_values)
keywords_std.append(google_search_std)
# Calculate monthly changes for each google search
monthly_changes = self.CalculateMonthlyChanges(search_values)
# Store monthly changes of google search under it's symbol
keywords_monthly_changes[symbol] = monthly_changes
# We can't perform regression, because monhtly changes for any keyword aren't ready
if len(keywords_monthly_changes) == 0:
self.Liquidate()
return
# Standard deviation of search volume for each keyword
sum_keywords_std = sum(keywords_std)
# regression_x dictionary has stored x regression value for each keyword
regression_x = self.CreateRegressionXForEachKeyWord(
keywords_monthly_changes,
sum_keywords_std
)
# Storing CFEAR as value under commodity symbol as a key
CFEAR = {}
# Calculation for each commodity
for symbol, symbol_data in self.data.items():
# Make regression only if commodity has ready data
if not symbol_data.is_ready():
continue
significant_betas = []
monthly_returns = [x for x in symbol_data.monthly_returns][::-1]
# Make regression for each keyword
for google_search_symbol, x in regression_x.items():
regression_model = self.MultipleLinearRegression(x, monthly_returns)
# Get regression beta and t-stat for this beta from regression model of this keyword
beta = regression_model.params[-1]
t_stat_beta = regression_model.tvalues[-1]
# Store betas, which t-stat is statistically significant
if abs(t_stat_beta) > 0.8:
significant_betas.append(beta)
if len(significant_betas) != 0:
# CFEAR = sum of all significant betas for each keyword
CFEAR[symbol] = sum(significant_betas)
# Continue only if we will have enough data for quintile selection
if len(CFEAR) < self.quantile:
self.Liquidate()
return
# Calculate CFEAR mean and standard deviation
CFEAR_mean, CFEAR_std = self.MeanAndStdCFEAR(CFEAR)
# Standardize the CFEAR by subtracting the cross-sectional mean and dividing it by the cross-sectional standard deviation.
standardized_CFEAR = {key: (value - CFEAR_mean) / CFEAR_std for key, value in CFEAR.items()}
# Make quintile selection based on standardized CFEAR
quintile = int(len(standardized_CFEAR) / self.quantile)
sorted_by_standardized_CFEAR = [x[0] for x in sorted(standardized_CFEAR.items(), key=lambda item: item[1])]
# Take a short position in the top quintile and long position in the bottom quintile.
long = sorted_by_standardized_CFEAR[:quintile]
short = sorted_by_standardized_CFEAR[-quintile:]
# Trade execution
invested = [x.Key.Value for x in self.Portfolio]
for symbol in invested:
if symbol not in long + short:
self.Liquidate(symbol)
long_length = len(long)
short_length = len(short)
# Equally weighted
for symbol in long:
if symbol in data and data[symbol]:
self.SetHoldings(symbol, 1 / long_length)
for symbol in short:
if symbol in data and data[symbol]:
self.SetHoldings(symbol, -1 / short_length)
def CalculateMonthlyChanges(self, search_values):
# Reverse search_values for easier approach with for loop
# Now search values in list will be descending according to date
search_values.reverse()
# Initialize needed variables
prev_value = search_values[0]
monthly_changes = []
# Now go through each search value except first one
# and calculate monthly change
for value in search_values[1:]:
monthly_change = 0
# Prevent from division with zero
if prev_value != 0:
# Calculate monthly change
monthly_change = (value - prev_value) / prev_value
# add monthly change to list of all google search monthly changes
monthly_changes.append(monthly_change)
return monthly_changes
def CreateRegressionXForEachKeyWord(self, keywords_monthly_changes, sum_keywords_std):
regression_x = {}
# Monthly log change in the Google search volume for each keyword and divide it by the standard deviation of search volume for each keyword.
for symbol, monthly_changes in keywords_monthly_changes.items():
# Create regression x for each keyword
monthly_changes = [x / sum_keywords_std for x in monthly_changes]
regression_x[symbol] = monthly_changes
return regression_x
def MultipleLinearRegression(self, x, y):
x = np.array(x).T
x = sm.add_constant(x)
result = sm.OLS(endog=y, exog=x).fit()
return result
def MeanAndStdCFEAR(self, CFEAR_dictionary):
# Get CFEAR value for each commodity from dictionary
CFEAR_values = [value for _, value in CFEAR_dictionary.items()]
# Calculate CFEAR mean and std
CFEAR_mean = np.mean(CFEAR_values)
CFEAR_std = np.std(CFEAR_values)
return CFEAR_mean, CFEAR_std
def Selection(self):
self.selection_flag = True
class SymbolData():
def __init__(self, period):
self.monthly_returns = RollingWindow[float](period)
self.last_price = 0
self.last_update_time = None
def update(self, current_price):
monthly_return = (current_price - self.last_price) / self.last_price
self.monthly_returns.Add(monthly_return)
def is_ready(self):
return self.monthly_returns.IsReady
class GoogleSearchValues():
def __init__(self, period):
# We need (period + 1) quantity of monthly data,
# Because we will calculate monthly changes and we want to match their
# lenght to monthly_returns in SymbolData
self.search_values = []
self.n_search_values = RollingWindow[float](period+1) # normalized values
self.period = period
def update(self, search_value, update_time:datetime.date):
self.last_update_time = update_time
self.search_values.append(search_value)
if len(self.search_values) >= self.period:
max_search_vol = max(self.search_values)
if max_search_vol != 0:
n_search_vol = search_value / max_search_vol
else:
n_search_vol = 0
self.n_search_values.Add(n_search_vol)
def is_ready(self, curr_date, max_missing_days):
return self.n_search_values.IsReady and (curr_date - self.last_update_time).days <= max_missing_days
# Quantpedia data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
# NOTE: IMPORTANT: Name of the csv file has to be in upper case
class QuantpediaGoogleSearch(PythonData):
def GetSource(self, config, date, isLiveMode):
return SubscriptionDataSource("data.quantpedia.com/backtesting_data/google_search/{0}.csv".format(config.Symbol.Value), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)
def Reader(self, config, line, date, isLiveMode):
data = QuantpediaGoogleSearch()
data.Symbol = config.Symbol
if not line[0].isdigit(): return None
split = line.split(';')
data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
data['value'] = float(split[1])
data.Value = float(split[1])
return data
# Quantpedia data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
class QuantpediaFutures(PythonData):
def GetSource(self, config, date, isLiveMode):
return SubscriptionDataSource("data.quantpedia.com/backtesting_data/futures/{0}.csv".format(config.Symbol.Value), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)
def Reader(self, config, line, date, isLiveMode):
data = QuantpediaFutures()
data.Symbol = config.Symbol
if not line[0].isdigit(): return None
split = line.split(';')
data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
data['back_adjusted'] = float(split[1])
data['spliced'] = float(split[2])
data.Value = float(split[1])
return data
# Custom fee model
class CustomFeeModel(FeeModel):
def GetOrderFee(self, parameters):
fee = parameters.Security.Price * parameters.Order.AbsoluteQuantity * 0.00005
return OrderFee(CashAmount(fee, "USD"))