Skip to content

[BUG] historical_forecasts() can't handle timeseries with rangeindex that doesn't start at 0 #974

@TamerAbdelmigid

Description

@TamerAbdelmigid

Describe the bug
There is a problem with RegressionModel and timeseries that have range index that does not start at one.
When trying to use any method (predict, historical_forecasts, backtest) I get errors, which goes away when I create a pandas date range index.

To Reproduce

series1 = TimeSeries.from_dataframe(df1)
series2= TimeSeries.from_dataframe(df2)
series0= TimeSeries.from_dataframe(df0)
future_cov = series1.concatenate(series2, axis=1)

scaler1 = Scaler()

idx = int(len(series0) * 0.9)
train, val = series0[:idx], series0[idx:]
train_scaled = scaler1.fit_transform(train).astype(np.float32)
val_scaled = scaler1.transform(val).astype(np.float32)
series_scaled = train_scaled.concatenate(val_scaled)

fcov_train, fcov_val = future_cov[:idx], future_cov[idx:]
fcov_train_scaled = scaler1.fit_transform(fcov_train).astype(np.float32)
fcov_val_scaled = scaler1.transform(fcov_val).astype(np.float32)
fcov_series_scaled = fcov_train_scaled.concatenate(fcov_val_scaled)

ensemble_model = LinearRegressionModel(
    lags=15, lags_lags_future_covariates=(15, 15), output_chunk_length=1,
)

ensemble_model.fit(series=train_scaled, future_covariates=fcov_series_scaled)

ensemble_model.backtest(
    series=series_scaled,
    future_covariates=fcov_series_scaled,
    start=0.9,
    forecast_horizon=15,
    stride=15,
    retrain=False,
    verbose=True,
    metric=r2_score,
)

pred_series = ensemble_model.historical_forecasts(
    series=series_scaled,
    future_covariates=fcov_series_scaled,
    start=idx,
    forecast_horizon=15,
    stride=15,
    retrain=False,
    verbose=True,
    overlap_end=False,
    last_points_only=False,
)

ensemble_model.predict(n=15, series=train_scaled, future_covariates=fcov_series_scaled )

Expected behavior
To work normally and not to produce any errors

System (please complete the following information):

  • Python version: [e.g. 3.8]
  • darts version [e.g. 0.19.0]

Additional context
series 0, series 1, series 2 shape = (360090, 1)

error in case of backtest and historical_forecasts:

[2022-05-26 13:47:53,106] ERROR | darts.timeseries | ValueError: point (int) should be a valid index in series
2022-05-26 13:47:53 darts.timeseries ERROR: ValueError: point (int) should be a valid index in series
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13184/3025966321.py in <module>
----> 1 pred_series = ensemble_model.historical_forecasts(
      2     series=series_scaled,
      3     future_covariates=fcov_series_scaled,
      4     start=idx,
      5     forecast_horizon=15,

~\anaconda3\envs\GPU\lib\site-packages\darts\utils\utils.py in sanitized_method(self, *args, **kwargs)
    170 
    171                 getattr(self, sanity_check_method)(*only_args.values(), **only_kwargs)
--> 172             return method_to_sanitize(self, *only_args.values(), **only_kwargs)
    173 
    174         return sanitized_method

~\anaconda3\envs\GPU\lib\site-packages\darts\models\forecasting\forecasting_model.py in historical_forecasts(self, series, past_covariates, future_covariates, num_samples, train_length, start, forecast_horizon, stride, retrain, overlap_end, last_points_only, verbose)
    430         for pred_time in iterator:
    431             # build the training series
--> 432             train = series.drop_after(pred_time)
    433             if train_length and len(train) > train_length:
    434                 train = train[-train_length:]

~\anaconda3\envs\GPU\lib\site-packages\darts\timeseries.py in drop_after(self, split_point)
   1541             A new TimeSeries, after `ts`.
   1542         """
-> 1543         return self.split_before(split_point)[0]
   1544 
   1545     def drop_before(self, split_point: Union[pd.Timestamp, float, int]):

~\anaconda3\envs\GPU\lib\site-packages\darts\timeseries.py in split_before(self, split_point)
   1524             and the second contains the remaining ones.
   1525         """
-> 1526         return self._split_at(split_point, after=False)
   1527 
   1528     def drop_after(self, split_point: Union[pd.Timestamp, float, int]):

~\anaconda3\envs\GPU\lib\site-packages\darts\timeseries.py in _split_at(self, split_point, after)
   1474     ) -> Tuple["TimeSeries", "TimeSeries"]:
   1475 
-> 1476         point_index = self.get_index_at_point(split_point, after)
   1477         return (
   1478             self[: point_index + (1 if after else 0)],

~\anaconda3\envs\GPU\lib\site-packages\darts\timeseries.py in get_index_at_point(self, point, after)
   1420             point_index = int((len(self) - 1) * point)
   1421         elif isinstance(point, (int, np.int64)):
-> 1422             raise_if(
   1423                 point not in range(len(self)),
   1424                 "point (int) should be a valid index in series",

~\anaconda3\envs\GPU\lib\site-packages\darts\logging.py in raise_if(condition, message, logger)
    108         if `condition` is satisfied
    109     """
--> 110     raise_if_not(not condition, message, logger)
    111 
    112 

~\anaconda3\envs\GPU\lib\site-packages\darts\logging.py in raise_if_not(condition, message, logger)
     82     if not condition:
     83         logger.error("ValueError: " + message)
---> 84         raise ValueError(message)
     85 
     86 

ValueError: point (int) should be a valid index in series

Error in case of predict:

[2022-05-26 13:49:26,639] ERROR | darts.timeseries | ValueError: The time series array must not be empty.
2022-05-26 13:49:26 darts.timeseries ERROR: ValueError: The time series array must not be empty.
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_13184/393503889.py in <module>
----> 1 ensemble_model.predict(n=15, series=train_scaled, future_covariates=fcov_series_scaled)

~\anaconda3\envs\GPU\lib\site-packages\darts\models\forecasting\regression_model.py in predict(self, n, series, past_covariates, future_covariates, num_samples, **kwargs)
    555                         # include last_req_ts when slicing series with integer indices
    556                         covariate_matrices[cov_type].append(
--> 557                             cov[first_req_ts : last_req_ts + 1].values()
    558                         )
    559 

~\anaconda3\envs\GPU\lib\site-packages\darts\timeseries.py in __getitem__(self, key)
   3282                     xa_
   3283                 )  # indexing may discard the freq so we restore it...
-> 3284                 return self.__class__(xa_)
   3285             elif isinstance(key.start, pd.Timestamp) or isinstance(
   3286                 key.stop, pd.Timestamp

~\anaconda3\envs\GPU\lib\site-packages\darts\timeseries.py in __init__(self, xa)
     75             logger,
     76         )
---> 77         raise_if_not(xa.size > 0, "The time series array must not be empty.", logger)
     78         raise_if_not(
     79             len(xa.shape) == 3,

~\anaconda3\envs\GPU\lib\site-packages\darts\logging.py in raise_if_not(condition, message, logger)
     82     if not condition:
     83         logger.error("ValueError: " + message)
---> 84         raise ValueError(message)
     85 
     86 

ValueError: The time series array must not be empty.

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions