Logging best practices

The following code showcases how to use HCrystalBall with respect to the logging both from the library and wrapped libraries. Feel free to play around with the commented code to find a setup, which works best for your use-case.

There are two levels to customize 1. one for logging, available through hcb_verbose flag in the wrappers initialization (and available also through grid_search specification) 1. one for warnings from statsmodels library available through filter_statsmodels_warnings function

[1]:
import pandas as pd
import matplotlib.pyplot as plt
import logging
# import warnings
# warnings.simplefilter('ignore')
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [12, 6]
[2]:
# from hcrystalball.utils import filter_statsmodels_warnings
# filter_statsmodels_warnings()
[3]:
# log_stats = logging.getLogger("py.warnings")
# log_stats.setLevel(logging.CRITICAL)
[4]:
# log_prophet = logging.getLogger("fbprophet")
# log_prophet.setLevel(logging.WARNING)
[5]:
from hcrystalball.utils import get_sales_data

df = get_sales_data(n_dates=100,
                    n_assortments=2,
                    n_states=2,
                    n_stores=2)
[6]:
from hcrystalball.model_selection import ModelSelector

ms = ModelSelector(horizon=10,
                   frequency='D',
                   country_code_column='HolidayCode',
                  )

ms.create_gridsearch(sklearn_models=False,
                    n_splits = 2,
                    between_split_lag=None,
                    sklearn_models_optimize_for_horizon=False,
                    autosarimax_models=False,
                    prophet_models=False,
                    tbats_models=False,
                    exp_smooth_models=False,
                    average_ensembles=False,
                    stacking_ensembles=False,
                    exog_cols=['Open','Promo','SchoolHoliday','Promo2'],
                    hcb_verbose=False,
#                     holidays_days_before=2,
#                     holidays_days_after=1,
#                     holidays_bridge_days=True,
                    )
[7]:
from hcrystalball.wrappers import get_sklearn_wrapper, ProphetWrapper, SarimaxWrapper, ExponentialSmoothingWrapper, ThetaWrapper
from sklearn.linear_model import LinearRegression

hcb_verbose = False

ms.add_model_to_gridsearch(get_sklearn_wrapper(LinearRegression, hcb_verbose=hcb_verbose))
ms.add_model_to_gridsearch(ProphetWrapper(hcb_verbose=hcb_verbose))
ms.add_model_to_gridsearch(SarimaxWrapper(init_with_autoarima=True, hcb_verbose=hcb_verbose))
ms.add_model_to_gridsearch(SarimaxWrapper(order=(1,0,1),hcb_verbose=hcb_verbose))
ms.add_model_to_gridsearch(ThetaWrapper(hcb_verbose=hcb_verbose))
ms.add_model_to_gridsearch(ExponentialSmoothingWrapper(hcb_verbose=hcb_verbose))
[8]:
df
[8]:
Store Sales Open Promo SchoolHoliday StoreType Assortment Promo2 State HolidayCode
Date
2015-04-23 817 17520 True False False a a False BE DE-BE
2015-04-23 251 16573 True False False a c False NW DE-NW
2015-04-23 335 11189 True False False b a True NW DE-NW
2015-04-23 380 10761 True False False a a True NW DE-NW
2015-04-23 788 15793 True False False a c False BE DE-BE
... ... ... ... ... ... ... ... ... ... ...
2015-07-31 523 15349 True True True c c False BE DE-BE
2015-07-31 513 19959 True True True a a False BE DE-BE
2015-07-31 380 17133 True True True a a True NW DE-NW
2015-07-31 335 17867 True True True b a True NW DE-NW
2015-07-31 251 22205 True True True a c False NW DE-NW

800 rows × 10 columns

[9]:
# from prefect.engine.executors import LocalDaskExecutor
ms.select_model(df=df,
                target_col_name='Sales',
                partition_columns=['Assortment', 'State','Store'],
#                 parallel_over_columns=['Assortment'],
#                 executor = LocalDaskExecutor(),
               )
[10]:
# from prefect.engine.executors import LocalDaskExecutor
ms.select_model(df=df,
                target_col_name='Sales',
                partition_columns=['Assortment', 'State','Store'],
                parallel_over_columns=['Assortment'],
#                 executor = LocalDaskExecutor(),
               )
[2021-06-03 10:38:43+0000] INFO - prefect.FlowRunner | Beginning Flow run for 'model selection'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'include_rules': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'include_rules': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'parallel_over_columns': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'parallel_over_columns': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'frequency': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'frequency': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'country_code_column': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'country_code_column': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_cv_data': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_cv_data': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_model_reprs': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_model_reprs': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_partition': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_partition': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'data': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'data': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_model_selector_results': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_model_selector_results': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'exclude_rules': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'exclude_rules': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'partition_columns': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'partition_columns': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_cv_results': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_cv_results': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'grid_search': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'grid_search': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'output_path': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'output_path': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'target_col_name': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'target_col_name': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_best_model': Starting task run...
[2021-06-03 10:38:43+0000] INFO - prefect.TaskRunner | Task 'persist_best_model': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'filter_data': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'filter_data': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'partition_data': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'partition_data': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'partition_data['labels']': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'partition_data['labels']': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'partition_data['data']': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'partition_data['data']': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training': Finished task run for task with final state: 'Mapped'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[0]': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[0]': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[1]': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[1]': Finished task run for task with final state: 'Success'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'select_model': Starting task run...
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'select_model': Finished task run for task with final state: 'Mapped'
[2021-06-03 10:38:44+0000] INFO - prefect.TaskRunner | Task 'select_model[0]': Starting task run...
[2021-06-03 10:39:05+0000] INFO - prefect.TaskRunner | Task 'select_model[0]': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:05+0000] INFO - prefect.TaskRunner | Task 'select_model[1]': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'select_model[1]': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition': Finished task run for task with final state: 'Mapped'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[0]': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[0]': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[1]': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[1]': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[11]:
from prefect.executors import LocalDaskExecutor
ms.select_model(df=df,
                target_col_name='Sales',
                partition_columns=['Assortment', 'State','Store'],
                parallel_over_columns=['Assortment'],
                executor = LocalDaskExecutor(),
               )
[2021-06-03 10:39:31+0000] INFO - prefect.FlowRunner | Beginning Flow run for 'model selection'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'data': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'exclude_rules': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'data': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'exclude_rules': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'include_rules': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'parallel_over_columns': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'include_rules': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'parallel_over_columns': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'country_code_column': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'frequency': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'country_code_column': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'frequency': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_columns': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'filter_data': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_columns': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'filter_data': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_data': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_data': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_data['data']': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_data['data']': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training': Finished task run for task with final state: 'Mapped'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_data['labels']': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'grid_search': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'partition_data['labels']': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'grid_search': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'target_col_name': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'target_col_name': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'select_model': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'select_model': Finished task run for task with final state: 'Mapped'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'output_path': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_best_model': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'output_path': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_best_model': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_cv_data': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_cv_results': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_cv_data': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_cv_results': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_model_reprs': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_model_selector_results': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_model_reprs': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_model_selector_results': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_partition': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_partition': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition': Finished task run for task with final state: 'Mapped'
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[0]': Starting task run...
[2021-06-03 10:39:31+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[1]': Starting task run...
[2021-06-03 10:39:32+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[0]': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:32+0000] INFO - prefect.TaskRunner | Task 'prepare_data_for_training[1]': Finished task run for task with final state: 'Success'
[2021-06-03 10:39:32+0000] INFO - prefect.TaskRunner | Task 'select_model[0]': Starting task run...
[2021-06-03 10:39:32+0000] INFO - prefect.TaskRunner | Task 'select_model[1]': Starting task run...
[2021-06-03 10:40:21+0000] INFO - prefect.TaskRunner | Task 'select_model[0]': Finished task run for task with final state: 'Success'
[2021-06-03 10:40:27+0000] INFO - prefect.TaskRunner | Task 'select_model[1]': Finished task run for task with final state: 'Success'
[2021-06-03 10:40:27+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[1]': Starting task run...
[2021-06-03 10:40:27+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[0]': Starting task run...
[2021-06-03 10:40:27+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[1]': Finished task run for task with final state: 'Success'
[2021-06-03 10:40:27+0000] INFO - prefect.TaskRunner | Task 'persist_experts_in_physical_partition[0]': Finished task run for task with final state: 'Success'
[2021-06-03 10:40:27+0000] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded