def estimate_factor_returns(
returns_df: pl.DataFrame,
mkt_cap_df: pl.DataFrame,
sector_df: pl.DataFrame,
style_df: pl.DataFrame,
winsor_factor: float | None = 0.05,
residualize_styles: bool = True,
) -> tuple[pl.DataFrame, pl.DataFrame] | pl.DataFrame:
"""Estimate factor and residual returns across all time periods using input asset factor scores.
Parameters
----------
returns_df: Polars DataFrame containing | date | symbol | asset_returns |
mkt_cap_df: Polars DataFrame containing | date | symbol | market_cap |
sector_df: Polars DataFrame containing | date | symbol | followed by one column for each sector
style_df: Polars DataFrame containing | date | symbol | followed by one column for each style
winsor_factor: winsorization proportion
residualize_styles: bool indicating if style returns should be orthogonalized to market + sector returns
Returns
-------
tuple of Polars DataFrames melted by date: (factor returns, residual returns)
"""
returns, residuals = [], []
try:
sectors = sorted(sector_df.select(pl.exclude("date", "symbol")).columns)
except AttributeError as e:
raise TypeError("`sector_df` must be a Polars DataFrame, but it's missing required attributes") from e
except pl_exc.ColumnNotFoundError as e:
raise ValueError("`sector_df` must have columns for 'date' and 'symbol' in addition to each sector") from e
try:
styles = sorted(style_df.select(pl.exclude("date", "symbol")).columns)
except AttributeError as e:
raise TypeError("`style_df` must be a Polars DataFrame, but it's missing required attributes") from e
except pl_exc.ColumnNotFoundError as e:
raise ValueError("`style_df` must have columns for 'date' and 'symbol' in addition to each style") from e
try:
returns_df = (
returns_df.join(mkt_cap_df, on=["date", "symbol"])
.join(sector_df, on=["date", "symbol"])
.join(style_df, on=["date", "symbol"])
)
dates = returns_df["date"].unique().to_list()
# iterate through, one day at a time
# this could probably be made more efficient with Polars' `.map_groups` method
for dt in dates:
ddf = returns_df.filter(pl.col("date") == dt).sort("symbol")
r = ddf["asset_returns"].to_numpy()
if winsor_factor is not None:
r = winsorize(r, winsor_factor)
f, e = _factor_returns(
r,
ddf["market_cap"].to_numpy(),
ddf.select(sectors).to_numpy(),
ddf.select(styles).to_numpy(),
residualize_styles,
)
returns.append(f)
residuals.append(dict(zip(ddf["symbol"].to_list(), e)))
except AttributeError as e:
raise TypeError(
"`returns_df` and `mkt_cap_df` must be Polars DataFrames, but there are missing attributes"
) from e
except pl_exc.ColumnNotFoundError as e:
raise ValueError(
"`returns_df` must have columns 'date', 'symbol' and 'asset_returns'; "
"`mkt_cap_df` must have 'date', 'symbol' and 'market_cap' columns"
) from e
ret_df = pl.DataFrame(np.array(returns))
ret_df.columns = ["market"] + sectors + styles
ret_df = ret_df.with_columns(pl.Series(dates).alias("date"))
eps_df = pl.DataFrame(residuals).with_columns(pl.Series(dates).alias("date"))
return ret_df, eps_df