“该策略使用谷歌趋势数据计算“危害恐惧”指标,将商品分为五分位,然后做多低恐惧五分位,做空高恐惧五分位,每周重新平衡以实现盈利。”

I. 策略概要

投资策略使用28种商品期货,并分析来自谷歌趋势的149个关键词的搜索量数据(周度数据,表1)。谷歌搜索量指数(GSVI)代表相对搜索强度。GSVI的周度对数变化被标准化,并通过回顾性回归确定搜索量与商品回报之间的历史关系。

“危害恐惧”(CFEAR)指标通过将具有显著t统计量(绝对t统计量 > 临界值)的关键词的回归贝塔值求和来计算。正的CFEAR表示与价格上涨相关的危害恐惧,而负的CFEAR则表示价格下跌。该指标通过减去均值并除以标准差进行标准化。

商品根据标准化的CFEAR分为五分位。该策略对最高五分位(最高危害恐惧)建立空头头寸,对最低五分位(最低危害恐惧)建立多头头寸,并采用等权重。头寸每周重新平衡。这种方法利用市场情绪数据有效预测价格变动。

II. 策略合理性

与危害相关的搜索查询反映了对影响商品的危害的担忧,从而影响期货价格。正向同向运动表明供应减少或需求增加的危害,导致价格过高,而负向同向运动则表明相反情况。价格很可能根据这些关系进行调整。“危害恐惧”与偏度风险或基差动量等已知因素不同,尽管面临流动性风险,但仍带有显著的无法解释的溢价。该策略在交易成本、信号测量和投资组合构建方法方面表现出稳健性,表明其在根据危害恐惧动态预测商品期货价格走势方面的可靠性。

III. 来源论文

Hazard Fear in Commodity Markets [点击查看论文]

<摘要>

我们通过149个查询词的互联网搜索量来衡量对天气、疾病、地缘政治或经济威胁的积极关注,即“危害恐惧”,并以此来检验商品期货的定价作用。一种根据危害恐惧信号对商品期货合约横截面进行排序的做多-做空投资组合策略获得了显著的溢价。这种商品危害恐惧溢价反映了对现有基本面、尾部、波动性和流动性风险因素的补偿,但并未被它们所包含。在商品投资组合的横截面中,危害恐惧的敞口被强烈定价。危害恐惧溢价在金融市场情绪不利或悲观时期加剧。

IV. 回测表现

年化回报9.28%
波动率10.3%
β值0.064
夏普比率0.9
索提诺比率N/A
最大回撤-18.81%
胜率48%

V. 完整的 Python 代码

from AlgorithmImports import *
import numpy as np
import statsmodels.api as sm
#endregion
class HazardFearInCommodityMarkets(QCAlgorithm):
    def Initialize(self):
        
        # TODO put these into csv
        file_names = ['ADVERSE_WEATHER', 'ADVERSE_WEATHER_CONDITIONS', 'ADVERSE_WEATHER_WARNING', 'BLIZZARD', 'BLIZZARD_RISK', 'BLIZZARD_WARNING', 'CATASTROPHIC_EVENTS', 'CATASTROPHIC_WEATHER', 'CATASTROPHIC_WEATHER_EVENTS', 'CLIMATE_CHANGE', 'CLIMATE_DISTURBANCE', 'COLD', 'COLD_SPELL', 'COLD_WEATHER', 'CYCLOGENESIS', 'CYCLONE', 'CYCLONE_RISK', 'CYCLONE_WARNING', 'DROUGHT', 'DROUGHT_RISK', 'DROUGHT_WARNING', 'DROUGHTS', 'DRY_WEATHER', 'EL_NINO_WEATHER', 'EXTREME_COLD', 'EXTREME_COLD_TEMPERATURES', 'EXTREME_HEAT', 'EXTREME_RAIN', 'EXTREME_TEMPERATURES', 'EXTREME_WEATHER', 'EXTREME_WIND', 'FLOOD', 'FLOOD_RISK', 'FLOOD_WARNING', 'FLOODING', 'FLOODS', 'FOREST_FIRE', 'FOREST_FIRES', 'FREEZE_WARNING', 'FROST', 'FROST_RISK', 'FROST_WARNING', 'FROSTS', 'GLOBAL_WARMING', 'GUST', 'GUSTS', 'HAIL', 'HAIL_DAMAGE', 'HAIL_RISK', 'HAIL_STORM', 'HAIL_STORM_WARNING', 'HAIL_WARNING', 'HARMATTAN_WIND', 'HEAT', 'HEAT_WAVE', 'HEAT_WAVES', 'HEATWAVE', 'HEATWAVES', 'HEAVY_RAIN', 'HEAVY_RAIN_FALL', 'HEAVY_RAIN_RISK', 'HEAVY_RAIN_WARNING', 'HIGH_TEMPERATURE', 'HIGH_TEMPERATURES', 'HOT_WEATHER', 'HURRICANE', 'HURRICANE_RISK', 'HURRICANE_WARNING', 'HURRICANES', 'NATURAL_DISASTER', 'NATURAL_HAZARD', 'RAIN', 'SEVERE_HEAT', 'SEVERE_WEATHER', 'SEVERE_WEATHER_RISK', 'SNOW', 'SNOW_RISK', 'SNOW_STORM_WARNING', 'SNOW_WARNING', 'STORM', 'STORM_RISK', 'STORM_WARNING', 'STRONG_WIND', 'STRONG_WIND_GUST', 'TORNADO', 'TORNADO_RISK', 'TORNADO_WARNING', 'TORRENTIAL_RAIN', 'TROPICAL_CYCLONE', 'TROPICAL_CYCLONE_RISK', 'TROPICAL_CYCLONE_WARNING', 'TROPICAL_STORM', 'TROPICAL_STORM_RISK', 'TROPICAL_STORM_WARNING', 'TROPICAL_WEATHER', 'TYPHOON', 'TYPHOON_RISK', 'TYPHOON_WARNING', 'WEATHER_BLIZZARD_WARNING', 'WEATHER_RISK', 'WEATHER_WARNING', 'WET_WEATHER', 'WILDFIRE', 'WILDFIRE_RISK', 'WILDFIRE_WARNING', 'WILDFIRES', 'WIND', 'WIND_GUST', 'WIND_GUSTS', 'WIND_RISK', 'WIND_SPEED', 'WIND_STORM', 'WIND_WARNING', 'CROP_DISEASES', 'CROP_PEST', 'CROP_PEST_RISK', 'CROP_PESTS', 'EBOLA', 'INSECT_PEST', 'LA_ROYA', 'PEST_CONTROL', 'PEST_RISK', 'RUST_COFFE', 'AFRICA_INSTABILITY', 'AFRICA_TERRORISM', 'LIBYAN_CRISIS', 'MIDDLE_EAST_CONFLICT', 'MIDDLE_EAST_INSTABILITY', 'MIDDLE_EAST_TERRORISM', 'OIL_CRISIS', 'OIL_EMBARGO', 'OIL_OUTAGE', 'RUSSIAN_CRISIS', 'SYRIAN_WAR', 'TERRORISM', 'TERRORIST_ATTACK', 'TERRORIST_ATTACKS', 'CRISIS', 'ECONOMIC_CRISIS', 'ECONOMIC_RECESSION', 'FINANCIAL_CRISIS', 'RECESSION', 'RECESSION_2008', 'RECESSION_DEPRESSION', 'THE_RECESSION', 'UNEMPLOYMENT', 'UNEMPLOYMENT_RATE', 'US_RECESSION', 'US_UNEMPLOYMENT']
        
        self.SetStartDate(2004, 1, 1) # Google search data are since 2004
        self.SetCash(100000)
        
        self.months = 36
        self.quantile = 5
        self.max_missing_days = 31
        
        self.data = {} # Storing commodities prices
        self.google_search_values = {} # Storing seach values for each search
        
        self.symbols = [
                        "CME_S1", # Soybean Futures, Continuous Contract
                        "CME_W1", # Wheat Futures, Continuous Contract
                        "CME_SM1", # Soybean Meal Futures, Continuous Contract
                        "CME_BO1", # Soybean Oil Futures, Continuous Contract
                        "CME_C1", # Corn Futures, Continuous Contract
                        "CME_O1", # Oats Futures, Continuous Contract
                        "CME_LC1", # Live Cattle Futures, Continuous Contract
                        "CME_FC1", # Feeder Cattle Futures, Continuous Contract
                        "CME_LN1", # Lean Hog Futures, Continuous Contract
                        "CME_GC1", # Gold Futures, Continuous Contract
                        "CME_SI1", # Silver Futures, Continuous Contract
                        "CME_PL1", # Platinum Futures, Continuous Contract
                        "CME_CL1", # Crude Oil Futures, Continuous Contract
                        "CME_HG1", # Copper Futures, Continuous Contract
                        "CME_LB1", # Random Length Lumber Futures, Continuous Contract
                        "CME_NG1", # Natural Gas (Henry Hub) Physical Futures, Continuous Contract
                        "CME_PA1", # Palladium Futures, Continuous Contract
                        "CME_RR1", # Rough Rice Futures, Continuous Contract
                        "CME_DA1", # Class III Milk Futures
                        
                        "ICE_CC1", # Cocoa Futures, Continuous Contract
                        "ICE_CT1", # Cotton No. 2 Futures, Continuous Contract
                        "ICE_KC1", # Coffee C Futures, Continuous Contract
                        "ICE_O1", # Heating Oil Futures, Continuous Contract
                        "ICE_OJ1", # Orange Juice Futures, Continuous Contract
                        "ICE_SB1", # Sugar No. 11 Futures, Continuous Contract
                        "ICE_TF1" # ICE Endex Dutch TTF 1-Month Calendar Spread Options
                        "ICE_RS1",  # Canola Futures, Continuous Contract
                        "ICE_GO1",  # Gas Oil Futures, Continuous Contract
                        "ICE_WT1",  # WTI Crude Futures, Continuous Contract
                        "CME_RB2",  # Gasoline Futures, Continuous Contract
                        "CME_KW2",  # Wheat Kansas, Continuous Contract
        ]
            
        # Subscribe to all symbols
        for symbol in self.symbols:
            data = self.AddData(QuantpediaFutures, symbol, Resolution.Daily)
            data.SetFeeModel(CustomFeeModel())
            data.SetLeverage(5)
            
            self.data[symbol] = SymbolData(self.months)
        
        for search_value in file_names:
            # Subscribe to QuantpediaGoogleSearch with csv name
            symbol = self.AddData(QuantpediaGoogleSearch, search_value, Resolution.Daily).Symbol
            
            # Add subscribed symbol to self.google_search_values
            self.google_search_values[symbol] = GoogleSearchValues(self.months)
        
        self.symbol = self.AddEquity('SPY', Resolution.Daily).Symbol
        
        self.selection_flag = False
        self.Schedule.On(self.DateRules.MonthStart(self.symbol), self.TimeRules.AfterMarketOpen(self.symbol), self.Selection)
    def OnData(self, data):
        # Google search data comes at the start of each month
        for symbol in self.google_search_values:
            if symbol in data and data[symbol]:
                value = data[symbol].Value
                self.google_search_values[symbol].update(value, self.Time.date())
        
        # Rebalance monthly
        if not self.selection_flag:
            return
        self.selection_flag = False
        
        # Calculate and store monthly return for each Quantpedia Future
        for symbol in self.data:
            if symbol in data and data[symbol]:
                value = data[symbol].Value
                
                # Calculate monthly return, if last_price is ready
                if self.data[symbol].last_price != 0:
                    self.data[symbol].update(value)
                    
                # Initialize new last_price for next monthly return calculation
                self.data[symbol].last_price = value
        
        # Check if at least one monthly returns for Quantpedia Futures are ready.
        if not self.data[self.symbols[0]].is_ready():
            return
        
        # Nextly, calculate the weekly(monthly in our case) log change in the Google search volume
        # for each keyword and divide it by the standard deviation of search volume for each keyword.
        
        keywords_std = []
        keywords_monthly_changes = {}
        
        # Calculate std for each google search value,
        # which has enough data and they aren't only zeros
        for symbol, google_search in self.google_search_values.items():
            # Check if google search value is ready
            if google_search.is_ready(self.Time.date(), self.max_missing_days):
                search_values = [x for x in google_search.n_search_values][::-1]  # normalized search volumes
                
                # Calculate std for current google search value
                google_search_std = np.std(search_values)
                keywords_std.append(google_search_std)
                
                # Calculate monthly changes for each google search
                monthly_changes = self.CalculateMonthlyChanges(search_values)
                
                # Store monthly changes of google search under it's symbol
                keywords_monthly_changes[symbol] = monthly_changes  
        
        # We can't perform regression, because monhtly changes for any keyword aren't ready        
        if len(keywords_monthly_changes) == 0:
            self.Liquidate()
            return
                    
        # Standard deviation of search volume for each keyword
        sum_keywords_std = sum(keywords_std)
        
        # regression_x dictionary has stored x regression value for each keyword
        regression_x = self.CreateRegressionXForEachKeyWord(
            keywords_monthly_changes,
            sum_keywords_std
        )
        
        # Storing CFEAR as value under commodity symbol as a key
        CFEAR = {}
        
        # Calculation for each commodity
        for symbol, symbol_data in self.data.items():
            # Make regression only if commodity has ready data
            if not symbol_data.is_ready():
                continue
            
            significant_betas = [] 
            monthly_returns = [x for x in symbol_data.monthly_returns][::-1]
            
            # Make regression for each keyword
            for google_search_symbol, x in regression_x.items():
                regression_model = self.MultipleLinearRegression(x, monthly_returns)
                
                # Get regression beta and t-stat for this beta from regression model of this keyword
                beta = regression_model.params[-1]
                t_stat_beta = regression_model.tvalues[-1]
                
                # Store betas, which t-stat is statistically significant
                if abs(t_stat_beta) > 0.8:
                    significant_betas.append(beta)
            
            if len(significant_betas) != 0:
                # CFEAR = sum of all significant betas for each keyword
                CFEAR[symbol] = sum(significant_betas)
        
        # Continue only if we will have enough data for quintile selection
        if len(CFEAR) < self.quantile:
            self.Liquidate()
            return    
            
        # Calculate CFEAR mean and standard deviation
        CFEAR_mean, CFEAR_std = self.MeanAndStdCFEAR(CFEAR)
        
        # Standardize the CFEAR by subtracting the cross-sectional mean and dividing it by the cross-sectional standard deviation.
        standardized_CFEAR = {key: (value - CFEAR_mean) / CFEAR_std for key, value in CFEAR.items()}
        
        # Make quintile selection based on standardized CFEAR
        quintile = int(len(standardized_CFEAR) / self.quantile)
        sorted_by_standardized_CFEAR = [x[0] for x in sorted(standardized_CFEAR.items(), key=lambda item: item[1])]
        
        # Take a short position in the top quintile and long position in the bottom quintile.
        long = sorted_by_standardized_CFEAR[:quintile]
        short = sorted_by_standardized_CFEAR[-quintile:]
        
        # Trade execution
        invested = [x.Key.Value for x in self.Portfolio]
        for symbol in invested:
            if symbol not in long + short:
                self.Liquidate(symbol)
                
        long_length = len(long)
        short_length = len(short)
        
        # Equally weighted
        for symbol in long:
            if symbol in data and data[symbol]:
                self.SetHoldings(symbol, 1 / long_length)
            
        for symbol in short:
            if symbol in data and data[symbol]:
                self.SetHoldings(symbol, -1 / short_length)
        
    def CalculateMonthlyChanges(self, search_values):
        # Reverse search_values for easier approach with for loop
        # Now search values in list will be descending according to date
        search_values.reverse()
        
        # Initialize needed variables
        prev_value = search_values[0]
        monthly_changes = []
        
        # Now go through each search value except first one
        # and calculate monthly change
        for value in search_values[1:]:
            monthly_change = 0
            
            # Prevent from division with zero
            if prev_value != 0:
                # Calculate monthly change
                monthly_change = (value - prev_value) / prev_value
            
            # add monthly change to list of all google search monthly changes
            monthly_changes.append(monthly_change)
        
        return monthly_changes
        
    def CreateRegressionXForEachKeyWord(self, keywords_monthly_changes, sum_keywords_std):
        regression_x = {}
        
        # Monthly log change in the Google search volume for each keyword and divide it by the standard deviation of search volume for each keyword.
        for symbol, monthly_changes in keywords_monthly_changes.items():
            # Create regression x for each keyword
            monthly_changes = [x / sum_keywords_std for x in monthly_changes]
            regression_x[symbol] = monthly_changes
            
        return regression_x
        
    def MultipleLinearRegression(self, x, y):
        x = np.array(x).T
        x = sm.add_constant(x)
        result = sm.OLS(endog=y, exog=x).fit()
        return result
        
    def MeanAndStdCFEAR(self, CFEAR_dictionary):
        # Get CFEAR value for each commodity from dictionary
        CFEAR_values = [value for _, value in CFEAR_dictionary.items()]
        
        # Calculate CFEAR mean and std
        CFEAR_mean = np.mean(CFEAR_values)
        CFEAR_std = np.std(CFEAR_values)
        
        return CFEAR_mean, CFEAR_std
    
    def Selection(self):
        self.selection_flag = True
        
class SymbolData():
    def __init__(self, period):
        self.monthly_returns = RollingWindow[float](period)
        self.last_price = 0
        self.last_update_time = None
        
    def update(self, current_price):
        monthly_return = (current_price - self.last_price) / self.last_price
        self.monthly_returns.Add(monthly_return)
        
    def is_ready(self):
        return self.monthly_returns.IsReady
        
class GoogleSearchValues():
    def __init__(self, period):
        # We need (period + 1) quantity of monthly data,
        # Because we will calculate monthly changes and we want to match their
        # lenght to monthly_returns in SymbolData
        self.search_values = []
        self.n_search_values = RollingWindow[float](period+1)     # normalized values
        self.period = period
    
    def update(self, search_value, update_time:datetime.date):
        self.last_update_time = update_time
        self.search_values.append(search_value)
        if len(self.search_values) >= self.period:
            max_search_vol = max(self.search_values)
            if max_search_vol != 0:
                n_search_vol = search_value / max_search_vol
            else:
                n_search_vol = 0
            self.n_search_values.Add(n_search_vol)
        
    def is_ready(self, curr_date, max_missing_days):
        return self.n_search_values.IsReady and (curr_date - self.last_update_time).days <= max_missing_days
        
# Quantpedia data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
# NOTE: IMPORTANT: Name of the csv file has to be in upper case
class QuantpediaGoogleSearch(PythonData):
    def GetSource(self, config, date, isLiveMode):
        return SubscriptionDataSource("data.quantpedia.com/backtesting_data/google_search/{0}.csv".format(config.Symbol.Value), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)
        
    def Reader(self, config, line, date, isLiveMode):
        data = QuantpediaGoogleSearch()
        data.Symbol = config.Symbol
        
        if not line[0].isdigit(): return None
        split = line.split(';')
        
        data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
        data['value'] = float(split[1])
        data.Value = float(split[1])
        return data
# Quantpedia data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
class QuantpediaFutures(PythonData):
    def GetSource(self, config, date, isLiveMode):
        return SubscriptionDataSource("data.quantpedia.com/backtesting_data/futures/{0}.csv".format(config.Symbol.Value), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)
    def Reader(self, config, line, date, isLiveMode):
        data = QuantpediaFutures()
        data.Symbol = config.Symbol
        
        if not line[0].isdigit(): return None
        split = line.split(';')
        
        data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
        data['back_adjusted'] = float(split[1])
        data['spliced'] = float(split[2])
        data.Value = float(split[1])
        return data
# Custom fee model
class CustomFeeModel(FeeModel):
    def GetOrderFee(self, parameters):
        fee = parameters.Security.Price * parameters.Order.AbsoluteQuantity * 0.00005
        return OrderFee(CashAmount(fee, "USD"))

发表评论

了解 Quant Buffet 的更多信息

立即订阅以继续阅读并访问完整档案。

继续阅读