Skip to content

Torashii Model

Complete implementation of the factor model.

estimate_factor_returns(returns_df, mkt_cap_df, sector_df, style_df, winsor_factor=0.05, residualize_styles=True)

Estimate factor and residual returns across all time periods using input asset factor scores.

Parameters

returns_df: Polars DataFrame containing | date | symbol | asset_returns | mkt_cap_df: Polars DataFrame containing | date | symbol | market_cap | sector_df: Polars DataFrame containing | date | symbol | followed by one column for each sector style_df: Polars DataFrame containing | date | symbol | followed by one column for each style winsor_factor: winsorization proportion residualize_styles: bool indicating if style returns should be orthogonalized to market + sector returns

Returns

tuple of Polars DataFrames melted by date: (factor returns, residual returns)

Source code in torashii/model.py
def estimate_factor_returns(
    returns_df: pl.DataFrame,
    mkt_cap_df: pl.DataFrame,
    sector_df: pl.DataFrame,
    style_df: pl.DataFrame,
    winsor_factor: float | None = 0.05,
    residualize_styles: bool = True,
) -> tuple[pl.DataFrame, pl.DataFrame] | pl.DataFrame:
    """Estimate factor and residual returns across all time periods using input asset factor scores.

    Parameters
    ----------
    returns_df: Polars DataFrame containing | date | symbol | asset_returns |
    mkt_cap_df: Polars DataFrame containing | date | symbol | market_cap |
    sector_df: Polars DataFrame containing | date | symbol | followed by one column for each sector
    style_df: Polars DataFrame containing | date | symbol | followed by one column for each style
    winsor_factor: winsorization proportion
    residualize_styles: bool indicating if style returns should be orthogonalized to market + sector returns

    Returns
    -------
    tuple of Polars DataFrames melted by date: (factor returns, residual returns)
    """
    returns, residuals = [], []
    try:
        sectors = sorted(sector_df.select(pl.exclude("date", "symbol")).columns)
    except AttributeError as e:
        raise TypeError("`sector_df` must be a Polars DataFrame, but it's missing required attributes") from e
    except pl_exc.ColumnNotFoundError as e:
        raise ValueError("`sector_df` must have columns for 'date' and 'symbol' in addition to each sector") from e
    try:
        styles = sorted(style_df.select(pl.exclude("date", "symbol")).columns)
    except AttributeError as e:
        raise TypeError("`style_df` must be a Polars DataFrame, but it's missing required attributes") from e
    except pl_exc.ColumnNotFoundError as e:
        raise ValueError("`style_df` must have columns for 'date' and 'symbol' in addition to each style") from e
    try:
        returns_df = (
            returns_df.join(mkt_cap_df, on=["date", "symbol"])
            .join(sector_df, on=["date", "symbol"])
            .join(style_df, on=["date", "symbol"])
        )
        dates = returns_df["date"].unique().to_list()
        # iterate through, one day at a time
        # this could probably be made more efficient with Polars' `.map_groups` method
        for dt in dates:
            ddf = returns_df.filter(pl.col("date") == dt).sort("symbol")
            r = ddf["asset_returns"].to_numpy()
            if winsor_factor is not None:
                r = winsorize(r, winsor_factor)
            f, e = _factor_returns(
                r,
                ddf["market_cap"].to_numpy(),
                ddf.select(sectors).to_numpy(),
                ddf.select(styles).to_numpy(),
                residualize_styles,
            )
            returns.append(f)
            residuals.append(dict(zip(ddf["symbol"].to_list(), e)))
    except AttributeError as e:
        raise TypeError(
            "`returns_df` and `mkt_cap_df` must be Polars DataFrames, but there are missing attributes"
        ) from e
    except pl_exc.ColumnNotFoundError as e:
        raise ValueError(
            "`returns_df` must have columns 'date', 'symbol' and 'asset_returns'; "
            "`mkt_cap_df` must have 'date', 'symbol' and 'market_cap' columns"
        ) from e
    ret_df = pl.DataFrame(np.array(returns))
    ret_df.columns = ["market"] + sectors + styles
    ret_df = ret_df.with_columns(pl.Series(dates).alias("date"))
    eps_df = pl.DataFrame(residuals).with_columns(pl.Series(dates).alias("date"))
    return ret_df, eps_df