Transforming Target

One of the common requirements is to transform the target variable itself. hcrystalball implements TargetTransformer transformer for such a cases, that internally scales input data and return un-scaled again.

[1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [12, 6]
[2]:
from hcrystalball.utils import get_sales_data

df = get_sales_data(n_dates=100,
                    n_assortments=1,
                    n_states=1,
                    n_stores=1)
X, y = pd.DataFrame(index=df.index), df['Sales']
[3]:
from hcrystalball.wrappers import get_sklearn_wrapper
from hcrystalball.compose import TSColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
[4]:
from hcrystalball.preprocessing import TargetTransformer

With Wrapper

[5]:
rf_model = get_sklearn_wrapper(RandomForestRegressor, random_state=42)
scaled = TargetTransformer(rf_model, StandardScaler())
[6]:
preds = (scaled.fit(X[:-10], y[:-10])
         .predict(X[-10:])
         .merge(y, left_index=True, right_index=True, how='outer')
         .tail(50)
)
preds.plot(title=f"MAE:{(preds['Sales']-preds['sklearn']).abs().mean().round(3)}");
[6]:
<AxesSubplot:title={'center':'MAE:2564.6'}, xlabel='Date'>
../../../_images/examples_tutorial_wrappers_09_transforming_target_7_1.png

With Pipeline

[7]:
X['trend'] = np.arange(len(X))
[8]:
# add standard scaler to the trend and let the date column pass in raw form
preprocessing = TSColumnTransformer(
    transformers=[
        ('scaler', StandardScaler(), ['trend'])
    ])
# define random forest model
rf_model = get_sklearn_wrapper(RandomForestRegressor, random_state=42)
# glue it together
sklearn_model_pipeline = Pipeline([
    ('preprocessing', preprocessing),
    ('model', rf_model)
])
[9]:
scaled_pipeline = TargetTransformer(sklearn_model_pipeline, StandardScaler())
[10]:
preds = (scaled_pipeline.fit(X[:-10], y[:-10])
         .predict(X[-10:])
         .merge(y, left_index=True, right_index=True, how='outer')
         .tail(50)
)
preds.plot(title=f"MAE:{(preds['Sales']-preds['sklearn']).abs().mean().round(3)}");
[10]:
<AxesSubplot:title={'center':'MAE:2201.033'}, xlabel='Date'>
../../../_images/examples_tutorial_wrappers_09_transforming_target_12_1.png