当前位置:首页 > 旅游 > 正文

jrue holiday(排球中的自由人是什么意思)

本文主要包括以下几个方面。

使用基础prophet模型训练并预测尝试加入一些特征,比如节假日,changepoint等调整模型的参数,选择更优参数尝试使用add_regression加入一些其他特征

希望大家看完这篇文章后,能够学会如何训练出一个相对较好的先知模型,并运用到自己的项目中去。本文的主代码参考了kaggle并加入了自己的理解,最后一段代码的结果有明显的提升。

一:分析数据

import numpy as np # linear algebraimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)import warningsimport itertoolsimport numpy as npimport randomimport stat *** odels.api as *** # prophet by Facebookfrom fbprophet import Prophet# time series *** ysisfrom stat *** odels.tsa.seasonal import seasonal_decomposefrom stat *** odels.graphics.tsaplots import plot_acf, plot_pacffrom sklearn.metrics import mean_squared_error, mean_absolute_errorimport matplotlib.pyplot as pltimport seaborn as snswarnings.filterwarnings("ignore")plt.style.use('fivethirtyeight')

# 读取数据集df = pd.read_excel("../input/groceries-sales-data/Groceries_Sales_data.xlsx",parse_dates=[0])print('-'*60)print('*** Head of the dataframe ***')print('-'*60)print(df.head())print('-'*60)print('*** Tail of the dataframe ***')print('-'*60)print(df.tail())

## Plot the Time series data 画图看销售趋势fig, ax = plt.subplots(figsize=(20,7))a = sns.lineplot(x="Date", y="Sales", data=df)a.set_title("Daily Sales Data",fontsize=15)plt.show()

将numpy作为np #线性代数导入pandas作为pd #数据处理、CSV文件I/O(例如pd.read_csv)导入warnings导入ITER tools将numpy作为np导入randomimport stat *** odels.api作为 *** # Prophet by face book from fbprophet导入Prophet#时序分析from stat *** odels.tsa.seasonal导入seasonal _ decompose from stats models . graphics . TSA plots导入plot_acf、plot_pacffrom sklearn.metrics导入mean_squared_error、mean _ absolute _ error导入matplot忽略& # 34;)PLT . style . use(& # 39;538 & # 39;)#读取数据集df = PD . Read _ excel(& # 34;../input/杂货-销售-数据/杂货_销售_数据. xlsx & # 34;,parse_dates=[0])打印(& # 39;-'*60)打印(& # 39;***数据帧的头部* * * & # 39;)打印(& # 39;-'* 60)print(df . head())print(& # 39;-'*60)打印(& # 39;***数据帧的尾部* * * & # 39;)打印(& # 39;-'* 60)打印(df。tail ()) # #绘制时间序列数据绘制并查看销售趋势FIG,ax = PLT.subplots (FIG size = (20,7))A = SNS . line plot(x = & # 34;日期& # 34;,y = & # 34销售& # 34;,data = df)a . set _ title(& # 34;每日销售数据& # 34;,fontsize=15)plt.show()

描绘特征并观察历史趋势

销售趋势有天和季度趋势一周内的不同天也展示出不同峰值,即每天有各自趋势

fig, ax = plt.subplots(figsize=(14,5))palette = sns.color_palette("mako_r", 4)a = sns.barplot(x="month", y="Sales",hue = 'year',data=df_new)a.set_title("Store Sales Data",fontsize=15)plt.legend(loc='upper right')plt.show()

fig,ax = plt.subplots(figsize=(14,5))palette = SNS . color _ palette(& # 34;mako _ r & # 34,4)a = SNS . bar plot(x = & # 34;月& # 34;,y = & # 34销售& # 34;,hue = & # 39年& # 39;,data = df _ new)a . set _ title(& # 34;商店销售数据& # 34;,font size = 15)PLT . legend(loc = & # 39;右上& # 39;)plt.show()

将数据分为训练集和测试集。

end_date = '2019-12-31'mask1 = (df['ds'] <= end_date)mask2 = (df['ds'] > end_date)X_tr = df.loc[mask1]X_tst = df.loc[mask2]print("train shape",X_tr.shape)print("test shape",X_tst.shape)pd.plotting.register_matplotlib_converters()f, ax = plt.subplots(figsize=(14,5))X_tr.plot(kind='line', x='ds', y='y', color='blue', label='Train', ax=ax)X_tst.plot(kind='line', x='ds', y='y', color='red', label='Test', ax=ax)plt.title('Sales Amount Traning and Test data')plt.show()

end _ date = & # 392019-12-31'mask 1 =(df[& # 39;ds & # 39]& lt;=结束日期)mask 2 =(df[& # 39;ds & # 39]& gt;end _ date)X _ tr = df . loc[mask 1]X _ tst = df . loc[mask 2]print(& # 34;火车造型& # 34;,X_tr.shape)打印(& # 34;测试形状& # 34;,X _ tst . shape)PD . plotting . register _ matplotlib _ converters()f,ax = plt.subplots(figsize=(14,5))X _ tr . plot(kind = & # 39;行& # 39;,x = & # 39ds & # 39,y = & # 39y & # 39,color = & # 39蓝色& # 39;,label = & # 39火车& # 39;,ax = ax)X _ tst . plot(kind = & # 39;行& # 39;,x = & # 39ds & # 39,y = & # 39y & # 39,color = & # 39红色& # 39;,label = & # 39测试& # 39;,ax = ax)PLT . title(& # 39;销售金额培训和测试数据& # 39;)plt.show()

评价指标定义:MAPE

def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100二:训练简单的prophet

model =Prophet()model.fit(X_tr)## 构建预测DF,并预测销量X_tst_forecast = model.predict(X_tst)X_tst_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)# Plot the forecast with the actualsf, ax = plt.subplots(1)f.set_figheight(5)f.set_figwidth(15)ax.scatter(X_tst.ds, X_tst['y'], color='r')fig = model.plot(X_tst_forecast, ax=ax)

def mean _ absolute _ percentage _ error(y _ true,y_pred): y_true,y_pred = np.array(y_true),np。Array (Y _ PRED)返回NP。均值(NP。ABS((Y _ true-Y _ PRED)/Y _ true))* 100二:训练简单先知模型=先知()模型。Fit (X _ TR) # #建立预测DF并预测销售量X _ ds & # 39, 'yhat & # 39, 'yhat _ lower & # 39, 'yhat _ upper & # 39]].tail(7)#用实际绘制预测sf,ax = PLT . subplots(1)f . set _ fig height(5)f . set _ fig width(15)ax . scatter(X _ tst . ds,X _ tst[& # 39;y & # 39],color = & # 39r & # 39)fig = model.plot(X_tst_forecast,ax=ax)

这时,MAPE的结果是:129.1593

三:加入节假日信息并重新训练模型

import holidaysholiday = pd.DataFrame([])for date, name in sorted(holidays.UnitedStates(years=[2018,2019,2020]).items()): holiday = holiday.append(pd.DataFrame({'ds': date, 'holiday': "US-Holidays"}, index=[0]), ignore_index=True)holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')# Setup and train model with holiday *** odel_with_holidays = Prophet(holidays=holiday)model_with_holidays.fit(X_tr)X_tst_forecast_holiday = model_with_holidays.predict(X_tst)X_tst_forecast_holiday[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)f, ax = plt.subplots(figsize=(14,5))f.set_figheight(5)f.set_figwidth(15)X_tst.plot(kind='line',x='ds', y='y', color='red', label='Actual', ax=ax)X_tst_forecast_holiday.plot(kind='line',x='ds',y='yhat', color='green',label='Forecast', ax=ax)plt.title('Jan & Feb 2020 Forecast vs Actuals')plt.show()

导入假日假日= pd。DataFrame([])为日期,名称按(节假日)排序。美国(年份=[2018,2019,2020])。items()):holiday = holiday . append(PD。data frame({ & # 39;ds & # 39:日期,& # 39;假日& # 39;: "美国假期& # 34;},index=[0]),ignore_index=True)假日[& # 39;ds & # 39]= PD . to _ datetime(holiday[& # 39;ds & # 39],格式= & # 39;% Y-% m-% d & # 39;,错误= & # 39;忽略& # 39;)#用holidays设置和训练模型model _ with _ holidays = Prophet(holidays = holidays)model _ with _ holidays . fit(X _ tr)X _ tst _ forecast _ holiday = model _ with _ holidays . predict(X _ tst)X _ tst _ forecast _ holiday[[[& # 39;ds & # 39, 'yhat & # 39, 'yhat _ lower & # 39, 'yhat _ upper & # 39]].tail(7)f,ax = plt.subplots(figsize=(14,5))f . set _ fig height(5)f . set _ fig width(15)X _ tst . plot(kind = & # 39;行& # 39;,x = & # 39ds & # 39,y = & # 39y & # 39,color = & # 39红色& # 39;,label = & # 39实际& # 39;,ax = ax)X _ tst _ forecast _ holiday . plot(kind = & # 39;行& # 39;,x = & # 39ds & # 39,y = & # 39yhat & # 39,color = & # 39绿色& # 39;,label = & # 39预测& # 39;,ax = ax)PLT . title(& # 39;2020年1月和2月预测与实际& # 39;)plt.show()

MAPE结果:130.6785

原因:测试集在这段时间没有假期。或者节假日对数据影响不大。

四:调参过程

在这一部分,我们将开始调整模型参数,选择更佳参数。

除了节假日,这里还有很多参数需要我们找到更好的来帮助我们找到更好的先知模型。以下参数见官网介绍。

节假日HolidaysChangepoints: (n_changepoints,changepoint_prior_scale)seasonality_modeholiday_prior_scaleSeasonalities with fourier_order

from sklearn.model_selection import ParameterGridparams_grid = {'seasonality_mode':('multiplicative','additive'), 'changepoint_prior_scale':[0.1,0.2,0.3,0.4,0.5], 'holidays_prior_scale':[0.1,0.2,0.3,0.4,0.5], 'n_changepoints' : [100,150,200]}grid = ParameterGrid(params_grid)cnt = 0for p in grid: cnt = cnt+1print('Total Possible Models',cnt)

strt='2019-12-31'end='2020-02-26'model_parameters = pd.DataFrame(columns = ['MAPE','Parameters'])for p in grid: test = pd.DataFrame() print(p) random.seed(0) train_model =Prophet(changepoint_prior_scale = p['changepoint_prior_scale'], holidays_prior_scale = p['holidays_prior_scale'], n_changepoints = p['n_changepoints'], seasonality_mode = p['seasonality_mode'], weekly_seasonality=True, daily_seasonality = True, yearly_seasonality = True, holidays=holiday, interval_width=0.95) train_model.add_country_holidays(country_name='US') train_model.fit(X_tr) train_forecast = train_model.make_future_dataframe(periods=57, freq='D',include_history = False) train_forecast = train_model.predict(train_forecast) test=train_forecast[['ds','yhat']] Actual = df[(df['ds']>strt) & (df['ds']<=end)] MAPE = mean_absolute_percentage_error(Actual['y'],abs(test['yhat'])) print('Mean Absolute Percentage Error(MAPE)------------------------------------',MAPE) model_parameters = model_parameters.append({'MAPE':MAPE,'Parameters':p},ignore_index=True)

parameters = model_parameters.sort_values(by=['MAPE'])parameters = parameters.reset_index(drop=True)parameters.head()# 更优的模型参数parameters['Parameters'][0]# Setup and train model with holidaysfinal_model = Prophet(holidays=holiday, changepoint_prior_scale= 0.5, holidays_prior_scale = 0.1, n_changepoints = 200, seasonality_mode = 'multiplicative', weekly_seasonality=True, daily_seasonality = True, yearly_seasonality = True, interval_width=0.95)final_model.add_country_holidays(country_name='US')final_model.fit(X_tr)future = final_model.make_future_dataframe(periods=122, freq='D')X_tst_final= final_model.predict(X_tst)X_tst_final[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)

from sklearn.model_selection导入参数grid params _ grid = { & # 39;季节性_模式& # 39;:('乘法& # 39;,'添加剂& # 39;), 'changepoint _ prior _ scale & # 39:[0.1,0.2,0.3,0.4,0.5], 'holidays _ prior _ scale & # 39:[0.1,0.2,0.3,0.4,0.5], 'n _ changepoints & # 39:[100,150,200]} grid = parameter grid(params _ grid)CNT = 0 for p in grid:CNT = CNT+1 print(& # 39;所有可能的型号& # 39;,CNT)strt = & # 39;2019-12-31'end = & # 392020-02-26'模型参数= pd。DataFrame(列=[& # 39;MAPE & # 39;,'参数& # 39;])对于网格中的p:test = PD。data frame()print(p)random . seed(0)train _ model = Prophet(change point _ prior _ scale = p[& # 39;changepoint _ prior _ scale & # 39],holidays _ prior _ scale = p[& # 39;holidays _ prior _ scale & # 39],n _ change points = p[& # 39;n _ changepoints & # 39],季节性_模式= p[& # 39;季节性_模式& # 39;],weekly _季节性=True,daily _季节性= True,yearly _季节性= True,节假日=节假日,interval _ width = 0.95)train _ model . add _ country _ holidays(country _ name = & # 39;美国& # 39;)train _ model . fit(X _ tr)train _ forecast = train _ model . make _ future _ data frame(periods = 57,freq = & # 39D & # 39,include _ history = False)train _ forecast = train _ model . predict(train _ forecast)test = train _ forecast[[& # 39;ds & # 39,'yhat & # 39]]实际= df[(df[& # 39;ds & # 39]& gt;strt)&(df[& # 39;ds & # 39]& lt;= end)]MAPE = mean _ absolute _ percentage _ error(实际[& # 39;y & # 39]、abs(测试[& # 39;yhat & # 39]))打印(& # 39;平均绝对百分比误差(MAPE)-& # 39;,MAPE)model _ parameters = model _ parameters . append({ & # 39;MAPE & # 39;:MAPE,& # 39;参数& # 39;:p},ignore _ index = True)parameters = model _ parameters . sort _ values(by =[& # 39;MAPE & # 39;])parameters = parameters . reset _ index(drop = true)parameters . head()#更优模型参数[& # 39;参数& # 39;][0]#用holidays final _ model = Prophet(holidays = holidays,changepoint_prior_scale= 0.5,holidays_prior_scale = 0.1,n_changepoints = 200,seasonality _ mode = & # 39乘法& # 39;,weekly _季节性=True,daily _季节性= True,yearly _季节性= True,interval _ width = 0.95)final _ model . add _ country _ holidays(country _ name = & # 39;美国& # 39;)final _ model . fit(X _ tr)future = final _ model . make _ future _ data frame(periods = 122,freq = & # 39D & # 39)X _ tst _ final = final _ model . predict(X _ tst)X _ tst _ final[[& # 39;ds & # 39, 'yhat & # 39, 'yhat _ lower & # 39, 'yhat _ upper & # 39]].尾巴(7)

MAPE:106 .结果大大提高了20分左右。

五:加入一些其他特征

从图中我们可以看到(红线)月初销量很小,但是月末销量很高,所以我们加上了可变日。我们还可以添加一些其他变量,如周、月中的周等。

end_date = '2019-12-31'df1 = df.copy()df1['ds'] = pd.to_datetime(df1['ds'])df1['days'] = df1['ds'].dt.daymask1 = (df1['ds'] <= end_date)mask2 = (df1['ds'] > end_date)X_tr1 = df1.loc[mask1]X_tst1 = df1.loc[mask2]print("train shape",X_tr1.shape)print("test shape",X_tst1.shape)

final_model = Prophet(holidays=holiday, changepoint_prior_scale= 0.2, holidays_prior_scale = 0.1, n_changepoints = 100, seasonality_mode = 'multiplicative', weekly_seasonality=True, daily_seasonality = True, yearly_seasonality = True, interval_width=0.95)final_model.add_country_holidays(country_name='US')final_model.add_regressor('days')final_model.fit(X_tr1)# 预测过程forecast1 = final_model.predict(X_tst1)f, ax = plt.subplots(figsize=(14,5))f.set_figheight(5)f.set_figwidth(15)X_tst.plot(kind='line',x='ds', y='y', color='red', label='Actual', ax=ax)forecast1.plot(kind='line',x='ds',y='yhat', color='green',label='Forecast', ax=ax)plt.title('Jan & Feb 2020 Forecast vs Actuals')plt.show()

end _ date = & # 392019-12-31'df1 = df . copy()df1[& # 39;ds & # 39]= PD . to _ datetime(df1[& # 39;ds & # 39])df1[& # 39;天& # 39;]= df1[& # 39;ds & # 39]. dt . day mask 1 =(df1[& # 39;ds & # 39]& lt;= end _ date)mask 2 =(df1[& # 39;ds & # 39]& gt;end _ date)X _ tr1 = df1 . loc[mask 1]X _ ts t1 = df1 . loc[mask 2]print(& # 34;火车造型& # 34;,X_tr1.shape)打印(& # 34;测试形状& # 34;,X _ ts t1 . shape)final _ model = Prophet(holidays = holiday,changepoint_prior_scale= 0.2,holidays_prior_scale = 0.1,n_changepoints = 100,季节性_ mode = & # 39乘法& # 39;,weekly _季节性=True,daily _季节性= True,yearly _季节性= True,interval _ width = 0.95)final _ model . add _ country _ holidays(country _ name = & # 39;美国& # 39;)final _ model . add _ regressor(& # 39;天& # 39;)final _ model . fit(X _ tr1)# forecast process forecast 1 = final _ model . predict(X _ ts t1)f,ax = PLT.subplots (fig size = (14,5))f . set _ fig height(5)f . set _ fig width(11 line & # 39;,x = & # 39ds & # 39,y = & # 39y & # 39,color = & # 39红色& # 39;,label = & # 39实际& # 39;,ax = ax)forecast 1 . plot(kind = & # 39;行& # 39;,x = & # 39ds & # 39,y = & # 39yhat & # 39,color = & # 39绿色& # 39;,label = & # 39预测& # 39;,ax = ax)PLT . title(& # 39;2020年1月和2月预测与实际& # 39;)plt.show()

MAPE:71 .我们预测的整体趋势与实际值非常吻合,取得了令人满意的结果。

如果想看完整的代码:https://github . com/13293824182/time series-mothod-code/blob/main/prophet/prophet . ipynb。

如果有帮助,请点个星。我稍后会更新这个仓库中与时间序列相关的算法代码。

0