Transforming Target

One of the common requirements is to transform the target variable itself. hcrystalball implements TargetTransformer transformer for such a cases, that internally scales input data and return un-scaled again.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt'seaborn')
plt.rcParams['figure.figsize'] = [12, 6]
from hcrystalball.utils import get_sales_data

df = get_sales_data(n_dates=100,
X, y = pd.DataFrame(index=df.index), df['Sales']
from hcrystalball.wrappers import get_sklearn_wrapper
from hcrystalball.compose import TSColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from hcrystalball.preprocessing import TargetTransformer

With Wrapper

rf_model = get_sklearn_wrapper(RandomForestRegressor, random_state=42)
scaled = TargetTransformer(rf_model, StandardScaler())
preds = ([:-10], y[:-10])
         .merge(y, left_index=True, right_index=True, how='outer')
<matplotlib.axes._subplots.AxesSubplot at 0x7f3e5bd22990>

With Pipeline

X['trend'] = np.arange(len(X))
# add standard scaler to the trend and let the date column pass in raw form
preprocessing = TSColumnTransformer(
        ('scaler', StandardScaler(), ['trend'])
# define random forest model
rf_model = get_sklearn_wrapper(RandomForestRegressor, random_state=42)
# glue it together
sklearn_model_pipeline = Pipeline([
    ('preprocessing', preprocessing),
    ('model', rf_model)
scaled_pipeline = TargetTransformer(sklearn_model_pipeline, StandardScaler())
preds = ([:-10], y[:-10])
         .merge(y, left_index=True, right_index=True, how='outer')
<matplotlib.axes._subplots.AxesSubplot at 0x7f3e53bb9990>